aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorDan Engelbrecht <[email protected]>2025-06-04 08:59:44 +0200
committerGitHub Enterprise <[email protected]>2025-06-04 08:59:44 +0200
commit937510356143f83ecd15d0a9f58b611c7418ed61 (patch)
treed0540f89c30a46f1fd3a041a20d7bed417fcb877
parentfixed size chunking for encrypted files (#410) (diff)
downloadzen-937510356143f83ecd15d0a9f58b611c7418ed61.tar.xz
zen-937510356143f83ecd15d0a9f58b611c7418ed61.zip
faster scavenge (#417)
- Improvement: Multithreaded scavenge pass for zen builds download - Improvement: Optimized check for modified files when verifying state of scavenged paths
-rw-r--r--CHANGELOG.md10
-rw-r--r--src/zen/cmds/admin_cmd.cpp10
-rw-r--r--src/zen/cmds/builds_cmd.cpp212
-rw-r--r--src/zenutil/chunkedcontent.cpp91
-rw-r--r--src/zenutil/include/zenutil/chunkedcontent.h39
5 files changed, 258 insertions, 104 deletions
diff --git a/CHANGELOG.md b/CHANGELOG.md
index f6d7b651c..28ebf2015 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,4 +1,11 @@
##
+- Improvement: Use fixed size block chunking for know encrypted/compressed file types
+- Improvement: Skip trying to compress chunks that are sourced from files that are known to be encrypted/compressed
+- Improvement: Add global open file cache for written files increasing throughput during download by reducing overhead of open/close of file by 80%
+- Improvement: Multithreaded scavenge pass for zen builds download
+- Improvement: Optimized check for modified files when verifying state of scavenged paths
+
+## 5.6.9
- Bugfix: Remove long running exclusive namespace wide locks when dropping buckets or namespaces
- Bugfix: Flush the last block before closing the last new block written to during blockstore compact. UE-291196
- Bugfix: Fix stats for memcached entries in disk cache buckets
@@ -8,9 +15,6 @@
- Improvement: `zen builds upload` now use the system temp directory for temporary files leaving the source folder untouched
- Improvement: NoneDecoder::DecompressToStream and NoneDecoder::CompressToStream not uses direct disk I/O
- Improvement: Add streaming upload from HttpClient to reduce I/O caused by excessive MMap usage
-- Improvement: Use fixed size block chunking for know encrypted/compressed file types
-- Improvement: Skip trying to compress chunks that are sourced from files that are known to be encrypted/compressed
-- Improvement: Add global open file cache for written files increasing throughput during download by reducing overhead of open/close of file by 80%
## 5.6.8
- Feature: Add per bucket cache configuration (Lua options file only)
diff --git a/src/zen/cmds/admin_cmd.cpp b/src/zen/cmds/admin_cmd.cpp
index b3f8a990e..fe2bbbdc7 100644
--- a/src/zen/cmds/admin_cmd.cpp
+++ b/src/zen/cmds/admin_cmd.cpp
@@ -57,10 +57,7 @@ ScrubCommand::Run(const ZenCliOptions& GlobalOptions, int argc, char** argv)
}
else if (int StatusCode = (int)Response.StatusCode)
{
- ZEN_ERROR("scrub start failed: {}: {} ({})",
- StatusCode,
- ReasonStringForHttpResultCode(StatusCode),
- Response.ToText());
+ ZEN_ERROR("scrub start failed: {}: {} ({})", StatusCode, ReasonStringForHttpResultCode(StatusCode), Response.ToText());
}
else
{
@@ -645,10 +642,7 @@ FlushCommand::Run(const ZenCliOptions& GlobalOptions, int argc, char** argv)
}
else if (int StatusCode = (int)Response.StatusCode)
{
- ZEN_ERROR("flush failed: {}: {} ({})",
- StatusCode,
- ReasonStringForHttpResultCode(StatusCode),
- Response.ToText());
+ ZEN_ERROR("flush failed: {}: {} ({})", StatusCode, ReasonStringForHttpResultCode(StatusCode), Response.ToText());
}
else
{
diff --git a/src/zen/cmds/builds_cmd.cpp b/src/zen/cmds/builds_cmd.cpp
index e13c90b4b..fbcb6b900 100644
--- a/src/zen/cmds/builds_cmd.cpp
+++ b/src/zen/cmds/builds_cmd.cpp
@@ -360,7 +360,7 @@ namespace {
WriteByteCount += Size;
}))
{
- throw std::runtime_error(fmt::format("Failed to copy scavanged file '{}' to '{}'", SourceFilePath, TargetFilePath));
+ throw std::runtime_error(fmt::format("Failed to copy scavenged file '{}' to '{}'", SourceFilePath, TargetFilePath));
}
}
@@ -5342,9 +5342,10 @@ namespace {
return HasLocalState;
}
- FolderContent GetValidFolderContent(GetFolderContentStatistics& LocalFolderScanStats,
- const std::filesystem::path& Path,
- std::span<const std::filesystem::path> PathsToCheck)
+ FolderContent GetValidFolderContent(GetFolderContentStatistics& LocalFolderScanStats,
+ const std::filesystem::path& Path,
+ std::span<const std::filesystem::path> PathsToCheck,
+ std::function<void(uint64_t PathCount, uint64_t CompletedPathCount)>&& ProgressCallback)
{
ZEN_TRACE_CPU("GetValidFolderContent");
FolderContent Result;
@@ -5359,8 +5360,6 @@ namespace {
Stopwatch Timer;
auto _ = MakeGuard([&LocalFolderScanStats, &Timer]() { LocalFolderScanStats.ElapsedWallTimeUS = Timer.GetElapsedTimeUs(); });
- ProgressBar ProgressBar(ProgressMode, "Check Files");
-
ParallelWork Work(AbortFlag);
std::atomic<uint64_t> CompletedPathCount = 0;
uint32_t PathIndex = 0;
@@ -5393,18 +5392,11 @@ namespace {
PathIndex += PathRangeCount;
}
Work.Wait(200, [&](bool, ptrdiff_t) {
- // FilteredBytesHashed.Update(ChunkingStats.BytesHashed.load());
- std::string Details = fmt::format("{}/{} checked, {} found",
- CompletedPathCount.load(),
- PathCount,
- LocalFolderScanStats.FoundFileCount.load());
- ProgressBar.UpdateState({.Task = "Checking files ",
- .Details = Details,
- .TotalCount = PathCount,
- .RemainingCount = PathCount - CompletedPathCount.load()},
- false);
+ if (ProgressCallback)
+ {
+ ProgressCallback(PathCount, CompletedPathCount.load());
+ }
});
- ProgressBar.Finish();
}
uint32_t WritePathIndex = 0;
@@ -5675,40 +5667,153 @@ namespace {
ScavengedContents.resize(ScavengePathCount);
ScavengedLookups.resize(ScavengePathCount);
ScavengedPaths.resize(ScavengePathCount);
+
+ ProgressBar ScavengeProgressBar(ProgressMode, "Scavenging");
+ ParallelWork Work(AbortFlag);
+
+ std::atomic<uint64_t> PathsFound(0);
+ std::atomic<uint64_t> ChunksFound(0);
+ std::atomic<uint64_t> PathsScavenged(0);
+
for (size_t ScavengeIndex = 0; ScavengeIndex < ScavengePathCount; ScavengeIndex++)
{
- const ScavengeSource& Source = ScavengeSources[ScavengeIndex];
+ Work.ScheduleWork(
+ GetIOWorkerPool(),
+ [&RemoteLookup,
+ &ScavengeSources,
+ &ScavengedContents,
+ &ScavengedPaths,
+ &ScavengedLookups,
+ &PathsFound,
+ &ChunksFound,
+ &PathsScavenged,
+ ScavengeIndex](std::atomic<bool>&) {
+ if (!AbortFlag)
+ {
+ const ScavengeSource& Source = ScavengeSources[ScavengeIndex];
- ChunkedFolderContent& ScavengedLocalContent = ScavengedContents[ScavengeIndex];
- std::filesystem::path& ScavengePath = ScavengedPaths[ScavengeIndex];
- FolderContent LocalFolderState;
- if (ReadStateFile(Source.StateFilePath, LocalFolderState, ScavengedLocalContent))
- {
- GetFolderContentStatistics ScavengedFolderScanStats;
+ ChunkedFolderContent& ScavengedLocalContent = ScavengedContents[ScavengeIndex];
+ std::filesystem::path& ScavengePath = ScavengedPaths[ScavengeIndex];
- FolderContent ValidFolderContent =
- GetValidFolderContent(ScavengedFolderScanStats, Source.Path, LocalFolderState.Paths);
+ FolderContent LocalFolderState;
+ if (ReadStateFile(Source.StateFilePath, LocalFolderState, ScavengedLocalContent))
+ {
+ if (IsDir(Source.Path))
+ {
+ ChunkedContentLookup& ScavengedLookup = ScavengedLookups[ScavengeIndex];
+ ScavengedLookup = BuildChunkedContentLookup(ScavengedLocalContent);
+
+ std::vector<uint32_t> PathIndexesToScavange;
+ uint32_t ScavengedStatePathCount = gsl::narrow<uint32_t>(ScavengedLocalContent.Paths.size());
+ PathIndexesToScavange.reserve(ScavengedStatePathCount);
+ for (uint32_t ScavengedStatePathIndex = 0; ScavengedStatePathIndex < ScavengedStatePathCount;
+ ScavengedStatePathIndex++)
+ {
+ const IoHash& SequenceHash = ScavengedLocalContent.RawHashes[ScavengedStatePathIndex];
+ if (auto ScavengeSequenceIt = ScavengedLookup.RawHashToSequenceIndex.find(SequenceHash);
+ ScavengeSequenceIt != ScavengedLookup.RawHashToSequenceIndex.end())
+ {
+ const uint32_t ScavengeSequenceIndex = ScavengeSequenceIt->second;
+ if (RemoteLookup.RawHashToSequenceIndex.contains(SequenceHash))
+ {
+ PathIndexesToScavange.push_back(ScavengedStatePathIndex);
+ }
+ else
+ {
+ const uint32_t ScavengeChunkCount =
+ ScavengedLocalContent.ChunkedContent.ChunkCounts[ScavengeSequenceIndex];
+ for (uint32_t ScavengeChunkIndexOffset = 0;
+ ScavengeChunkIndexOffset < ScavengeChunkCount;
+ ScavengeChunkIndexOffset++)
+ {
+ const size_t ScavengeChunkOrderIndex =
+ ScavengedLookup.ChunkSequenceLocationOffset[ScavengeSequenceIndex] +
+ ScavengeChunkIndexOffset;
+ const uint32_t ScavengeChunkIndex =
+ ScavengedLocalContent.ChunkedContent.ChunkOrders[ScavengeChunkOrderIndex];
+ const IoHash& ScavengeChunkHash =
+ ScavengedLocalContent.ChunkedContent.ChunkHashes[ScavengeChunkIndex];
+ if (RemoteLookup.ChunkHashToChunkIndex.contains(ScavengeChunkHash))
+ {
+ PathIndexesToScavange.push_back(ScavengedStatePathIndex);
+ break;
+ }
+ }
+ }
+ }
+ }
- if (!LocalFolderState.AreKnownFilesEqual(ValidFolderContent))
- {
- std::vector<std::filesystem::path> DeletedPaths;
- FolderContent UpdatedContent = GetUpdatedContent(LocalFolderState, ValidFolderContent, DeletedPaths);
+ if (!PathIndexesToScavange.empty())
+ {
+ std::vector<std::filesystem::path> PathsToScavenge;
+ PathsToScavenge.reserve(PathIndexesToScavange.size());
+ for (uint32_t ScavengedStatePathIndex : PathIndexesToScavange)
+ {
+ PathsToScavenge.push_back(ScavengedLocalContent.Paths[ScavengedStatePathIndex]);
+ }
- // If the files are modified since the state was saved we ignore the files since we don't want to incur the
- // cost of scanning/hashing scavenged files
- DeletedPaths.insert(DeletedPaths.end(), UpdatedContent.Paths.begin(), UpdatedContent.Paths.end());
- if (!DeletedPaths.empty())
- {
- ScavengedLocalContent = DeletePathsFromChunkedContent(ScavengedLocalContent, DeletedPaths);
+ GetFolderContentStatistics ScavengedFolderScanStats;
+
+ FolderContent ValidFolderContent =
+ GetValidFolderContent(ScavengedFolderScanStats, Source.Path, PathsToScavenge, {});
+
+ if (!LocalFolderState.AreKnownFilesEqual(ValidFolderContent))
+ {
+ std::vector<std::filesystem::path> DeletedPaths;
+ FolderContent UpdatedContent =
+ GetUpdatedContent(LocalFolderState, ValidFolderContent, DeletedPaths);
+
+ // If the files are modified since the state was saved we ignore the files since we don't
+ // want to incur the cost of scanning/hashing scavenged files
+ DeletedPaths.insert(DeletedPaths.end(),
+ UpdatedContent.Paths.begin(),
+ UpdatedContent.Paths.end());
+ if (!DeletedPaths.empty())
+ {
+ ScavengedLocalContent =
+ DeletePathsFromChunkedContent(ScavengedLocalContent, ScavengedLookup, DeletedPaths);
+ ScavengedLookup = BuildChunkedContentLookup(ScavengedLocalContent);
+ }
+ }
+
+ if (!ScavengedLocalContent.Paths.empty())
+ {
+ ScavengePath = Source.Path;
+ PathsFound += ScavengedLocalContent.Paths.size();
+ ChunksFound += ScavengedLocalContent.ChunkedContent.ChunkHashes.size();
+ }
+ }
+
+ if (ScavengePath.empty())
+ {
+ ScavengedLocalContent = {};
+ ScavengedLookups[ScavengeIndex] = {};
+ ScavengedPaths[ScavengeIndex].clear();
+ }
+ }
+ }
+ PathsScavenged++;
}
- }
+ });
+ }
+ {
+ ZEN_TRACE_CPU("ScavengeScan_Wait");
- if (!ScavengedLocalContent.Paths.empty())
- {
- ScavengePath = Source.Path;
- }
- }
+ Work.Wait(GetUpdateDelayMS(ProgressMode), [&](bool IsAborted, std::ptrdiff_t PendingWork) {
+ ZEN_UNUSED(IsAborted, PendingWork);
+ std::string Details = fmt::format("{}/{} scanned. {} paths and {} chunks found for scavanging",
+ PathsScavenged.load(),
+ ScavengePathCount,
+ PathsFound.load(),
+ ChunksFound.load());
+ ScavengeProgressBar.UpdateState({.Task = "Scavenging ",
+ .Details = Details,
+ .TotalCount = ScavengePathCount,
+ .RemainingCount = ScavengePathCount - PathsScavenged.load()},
+ false);
+ });
}
+ ScavengeProgressBar.Finish();
for (uint32_t ScavengedContentIndex = 0;
ScavengedContentIndex < ScavengedContents.size() && (!SequenceIndexesLeftToFindToRemoteIndex.empty());
@@ -5718,8 +5823,7 @@ namespace {
if (!ScavengePath.empty())
{
const ChunkedFolderContent& ScavengedLocalContent = ScavengedContents[ScavengedContentIndex];
- ChunkedContentLookup& ScavengedLookup = ScavengedLookups[ScavengedContentIndex];
- ScavengedLookup = BuildChunkedContentLookup(ScavengedLocalContent);
+ const ChunkedContentLookup& ScavengedLookup = ScavengedLookups[ScavengedContentIndex];
for (uint32_t ScavengedSequenceIndex = 0;
ScavengedSequenceIndex < ScavengedLocalContent.ChunkedContent.SequenceRawHashes.size();
@@ -5980,7 +6084,7 @@ namespace {
}
if (CacheMappingStats.ScavengedPathsMatchingSequencesCount > 0 || CacheMappingStats.ScavengedChunkMatchingRemoteCount > 0)
{
- ZEN_CONSOLE("Scavenge of {} paths found {} ({}) chunk sequences, {} ({}) chunks in {}",
+ ZEN_CONSOLE("Scavenge of {} paths, found {} ({}) chunk sequences, {} ({}) chunks in {}",
ScavengedPathsCount,
CacheMappingStats.ScavengedPathsMatchingSequencesCount,
NiceBytes(CacheMappingStats.ScavengedPathsMatchingSequencesByteCount),
@@ -6408,7 +6512,7 @@ namespace {
&DiskStats](std::atomic<bool>&) mutable {
if (!AbortFlag)
{
- ZEN_TRACE_CPU("UpdateFolder_WriteScavanged");
+ ZEN_TRACE_CPU("UpdateFolder_WriteScavenged");
FilteredWrittenBytesPerSecond.Start();
@@ -8574,7 +8678,21 @@ namespace {
}
}
- OutLocalFolderContent = GetValidFolderContent(LocalFolderScanStats, Path, PathsToCheck);
+ ProgressBar ProgressBar(ProgressMode, "Check Files");
+ OutLocalFolderContent = GetValidFolderContent(
+ LocalFolderScanStats,
+ Path,
+ PathsToCheck,
+ [&ProgressBar, &LocalFolderScanStats](uint64_t PathCount, uint64_t CompletedPathCount) {
+ std::string Details =
+ fmt::format("{}/{} checked, {} found", CompletedPathCount, PathCount, LocalFolderScanStats.FoundFileCount.load());
+ ProgressBar.UpdateState({.Task = "Checking files ",
+ .Details = Details,
+ .TotalCount = PathCount,
+ .RemainingCount = PathCount - CompletedPathCount},
+ false);
+ });
+ ProgressBar.Finish();
}
bool ScanContent = true;
diff --git a/src/zenutil/chunkedcontent.cpp b/src/zenutil/chunkedcontent.cpp
index 4bec4901a..c7532e098 100644
--- a/src/zenutil/chunkedcontent.cpp
+++ b/src/zenutil/chunkedcontent.cpp
@@ -662,7 +662,9 @@ MergeChunkedFolderContents(const ChunkedFolderContent& Base, std::span<const Chu
}
ChunkedFolderContent
-DeletePathsFromChunkedContent(const ChunkedFolderContent& BaseContent, std::span<const std::filesystem::path> DeletedPaths)
+DeletePathsFromChunkedContent(const ChunkedFolderContent& BaseContent,
+ const ChunkedContentLookup& BaseContentLookup,
+ std::span<const std::filesystem::path> DeletedPaths)
{
ZEN_TRACE_CPU("DeletePathsFromChunkedContent");
@@ -676,14 +678,18 @@ DeletePathsFromChunkedContent(const ChunkedFolderContent& BaseContent, std::span
{
DeletedPathSet.insert(PathCompareString(DeletedPath));
}
- const ChunkedContentLookup BaseLookup = BuildChunkedContentLookup(BaseContent);
- tsl::robin_map<IoHash, uint32_t, IoHash::Hasher> ChunkHashToChunkIndex;
- const size_t ExpectedCount = BaseContent.Paths.size() - DeletedPaths.size();
- Result.Paths.reserve(ExpectedCount);
- Result.RawSizes.reserve(ExpectedCount);
- Result.Attributes.reserve(ExpectedCount);
- Result.RawHashes.reserve(ExpectedCount);
+ const size_t BaseChunkCount = BaseContent.ChunkedContent.ChunkHashes.size();
+ std::vector<uint32_t> NewChunkIndexes(BaseChunkCount, (uint32_t)-1);
+
+ const size_t ExpectedPathCount = BaseContent.Paths.size() - DeletedPaths.size();
+ Result.Paths.reserve(ExpectedPathCount);
+ Result.RawSizes.reserve(ExpectedPathCount);
+ Result.Attributes.reserve(ExpectedPathCount);
+ Result.RawHashes.reserve(ExpectedPathCount);
+
+ Result.ChunkedContent.ChunkHashes.reserve(BaseChunkCount);
+ Result.ChunkedContent.ChunkRawSizes.reserve(BaseChunkCount);
tsl::robin_map<IoHash, uint32_t, IoHash::Hasher> RawHashToSequenceRawHashIndex;
for (uint32_t PathIndex = 0; PathIndex < BaseContent.Paths.size(); PathIndex++)
@@ -703,20 +709,33 @@ DeletePathsFromChunkedContent(const ChunkedFolderContent& BaseContent, std::span
{
RawHashToSequenceRawHashIndex.insert(
{RawHash, gsl::narrow<uint32_t>(Result.ChunkedContent.SequenceRawHashes.size())});
- const uint32_t SequenceRawHashIndex = BaseLookup.RawHashToSequenceIndex.at(RawHash);
- const uint32_t OrderIndexOffset = BaseLookup.SequenceIndexChunkOrderOffset[SequenceRawHashIndex];
- const uint32_t ChunkCount = BaseContent.ChunkedContent.ChunkCounts[SequenceRawHashIndex];
- ChunkingStatistics Stats;
+ const uint32_t SequenceRawHashIndex = BaseContentLookup.RawHashToSequenceIndex.at(RawHash);
+ const uint32_t OrderIndexOffset = BaseContentLookup.SequenceIndexChunkOrderOffset[SequenceRawHashIndex];
+ const uint32_t ChunkCount = BaseContent.ChunkedContent.ChunkCounts[SequenceRawHashIndex];
+
std::span<const uint32_t> OriginalChunkOrder =
std::span<const uint32_t>(BaseContent.ChunkedContent.ChunkOrders).subspan(OrderIndexOffset, ChunkCount);
- AddChunkSequence(Stats,
- Result.ChunkedContent,
- ChunkHashToChunkIndex,
- RawHash,
- OriginalChunkOrder,
- BaseContent.ChunkedContent.ChunkHashes,
- BaseContent.ChunkedContent.ChunkRawSizes);
- Stats.UniqueSequencesFound++;
+
+ Result.ChunkedContent.ChunkCounts.push_back(gsl::narrow<uint32_t>(OriginalChunkOrder.size()));
+
+ for (uint32_t OldChunkIndex : OriginalChunkOrder)
+ {
+ if (uint32_t FoundChunkIndex = NewChunkIndexes[OldChunkIndex]; FoundChunkIndex != (uint32_t)-1)
+ {
+ Result.ChunkedContent.ChunkOrders.push_back(FoundChunkIndex);
+ }
+ else
+ {
+ const uint32_t NewChunkIndex = gsl::narrow<uint32_t>(Result.ChunkedContent.ChunkHashes.size());
+ NewChunkIndexes[OldChunkIndex] = NewChunkIndex;
+ const IoHash& ChunkHash = BaseContent.ChunkedContent.ChunkHashes[OldChunkIndex];
+ const uint64_t OldChunkSize = BaseContent.ChunkedContent.ChunkRawSizes[OldChunkIndex];
+ Result.ChunkedContent.ChunkHashes.push_back(ChunkHash);
+ Result.ChunkedContent.ChunkRawSizes.push_back(OldChunkSize);
+ Result.ChunkedContent.ChunkOrders.push_back(NewChunkIndex);
+ }
+ }
+ Result.ChunkedContent.SequenceRawHashes.push_back(RawHash);
}
}
}
@@ -726,6 +745,19 @@ DeletePathsFromChunkedContent(const ChunkedFolderContent& BaseContent, std::span
}
ChunkedFolderContent
+DeletePathsFromChunkedContent(const ChunkedFolderContent& BaseContent, std::span<const std::filesystem::path> DeletedPaths)
+{
+ ZEN_TRACE_CPU("DeletePathsFromChunkedContent");
+ ZEN_ASSERT(DeletedPaths.size() <= BaseContent.Paths.size());
+ if (DeletedPaths.size() == BaseContent.Paths.size())
+ {
+ return {};
+ }
+ const ChunkedContentLookup BaseLookup = BuildChunkedContentLookup(BaseContent);
+ return DeletePathsFromChunkedContent(BaseContent, BaseLookup, DeletedPaths);
+}
+
+ChunkedFolderContent
ChunkFolderContent(ChunkingStatistics& Stats,
WorkerThreadPool& WorkerPool,
const std::filesystem::path& RootPath,
@@ -815,8 +847,9 @@ BuildChunkedContentLookup(const ChunkedFolderContent& Content)
struct ChunkLocationReference
{
- uint32_t ChunkIndex = (uint32_t)-1;
- ChunkedContentLookup::ChunkSequenceLocation Location;
+ uint32_t ChunkIndex = (uint32_t)-1;
+ uint32_t SequenceIndex = (uint32_t)-1;
+ uint64_t Offset = (uint64_t)-1;
};
ChunkedContentLookup Result;
@@ -845,8 +878,7 @@ BuildChunkedContentLookup(const ChunkedFolderContent& Content)
{
uint32_t ChunkIndex = Content.ChunkedContent.ChunkOrders[OrderIndex];
- Locations.push_back(
- ChunkLocationReference{ChunkIndex, ChunkedContentLookup::ChunkSequenceLocation{SequenceIndex, LocationOffset}});
+ Locations.push_back(ChunkLocationReference{.ChunkIndex = ChunkIndex, .SequenceIndex = SequenceIndex, .Offset = LocationOffset});
LocationOffset += Content.ChunkedContent.ChunkRawSizes[ChunkIndex];
}
@@ -861,15 +893,15 @@ BuildChunkedContentLookup(const ChunkedFolderContent& Content)
{
return false;
}
- if (Lhs.Location.SequenceIndex < Rhs.Location.SequenceIndex)
+ if (Lhs.SequenceIndex < Rhs.SequenceIndex)
{
return true;
}
- if (Lhs.Location.SequenceIndex > Rhs.Location.SequenceIndex)
+ if (Lhs.SequenceIndex > Rhs.SequenceIndex)
{
return false;
}
- return Lhs.Location.Offset < Rhs.Location.Offset;
+ return Lhs.Offset < Rhs.Offset;
});
Result.ChunkSequenceLocations.reserve(Locations.size());
@@ -882,7 +914,10 @@ BuildChunkedContentLookup(const ChunkedFolderContent& Content)
uint32_t Count = 0;
while ((RangeOffset + Count < Locations.size()) && (Locations[RangeOffset + Count].ChunkIndex == ChunkIndex))
{
- Result.ChunkSequenceLocations.push_back(Locations[RangeOffset + Count].Location);
+ const ChunkLocationReference& LocationReference = Locations[RangeOffset + Count];
+ Result.ChunkSequenceLocations.push_back(
+ ChunkedContentLookup::ChunkSequenceLocation{.SequenceIndex = LocationReference.SequenceIndex,
+ .Offset = LocationReference.Offset});
Count++;
}
Result.ChunkSequenceLocationOffset.push_back(RangeOffset);
diff --git a/src/zenutil/include/zenutil/chunkedcontent.h b/src/zenutil/include/zenutil/chunkedcontent.h
index 03f52e5f6..225b1a3a5 100644
--- a/src/zenutil/include/zenutil/chunkedcontent.h
+++ b/src/zenutil/include/zenutil/chunkedcontent.h
@@ -94,10 +94,31 @@ struct ChunkedFolderContent
ChunkedContentData ChunkedContent;
};
+struct ChunkedContentLookup
+{
+ struct ChunkSequenceLocation
+ {
+ uint32_t SequenceIndex = (uint32_t)-1;
+ uint64_t Offset = (uint64_t)-1;
+ };
+ tsl::robin_map<IoHash, uint32_t, IoHash::Hasher> ChunkHashToChunkIndex;
+ tsl::robin_map<IoHash, uint32_t, IoHash::Hasher> RawHashToSequenceIndex;
+ std::vector<uint32_t> SequenceIndexChunkOrderOffset;
+ std::vector<ChunkSequenceLocation> ChunkSequenceLocations;
+ std::vector<size_t>
+ ChunkSequenceLocationOffset; // ChunkSequenceLocations[ChunkLocationOffset[ChunkIndex]] -> start of sources for ChunkIndex
+ std::vector<uint32_t> ChunkSequenceLocationCounts; // ChunkSequenceLocationCounts[ChunkIndex] count of chunk locations for ChunkIndex
+ std::vector<uint32_t> SequenceIndexFirstPathIndex; // SequenceIndexFirstPathIndex[SequenceIndex] -> first path index with that RawHash
+ std::vector<uint32_t> PathExtensionHash;
+};
+
void SaveChunkedFolderContentToCompactBinary(const ChunkedFolderContent& Content, CbWriter& Output);
ChunkedFolderContent LoadChunkedFolderContentToCompactBinary(CbObjectView Input);
ChunkedFolderContent MergeChunkedFolderContents(const ChunkedFolderContent& Base, std::span<const ChunkedFolderContent> Overlays);
+ChunkedFolderContent DeletePathsFromChunkedContent(const ChunkedFolderContent& Base,
+ const ChunkedContentLookup& BaseContentLookup,
+ std::span<const std::filesystem::path> DeletedPaths);
ChunkedFolderContent DeletePathsFromChunkedContent(const ChunkedFolderContent& Base, std::span<const std::filesystem::path> DeletedPaths);
struct ChunkingStatistics
@@ -120,24 +141,6 @@ ChunkedFolderContent ChunkFolderContent(ChunkingStatistics& Stats,
std::function<void(bool IsAborted, std::ptrdiff_t PendingWork)>&& UpdateCallback,
std::atomic<bool>& AbortFlag);
-struct ChunkedContentLookup
-{
- struct ChunkSequenceLocation
- {
- uint32_t SequenceIndex = (uint32_t)-1;
- uint64_t Offset = (uint64_t)-1;
- };
- tsl::robin_map<IoHash, uint32_t, IoHash::Hasher> ChunkHashToChunkIndex;
- tsl::robin_map<IoHash, uint32_t, IoHash::Hasher> RawHashToSequenceIndex;
- std::vector<uint32_t> SequenceIndexChunkOrderOffset;
- std::vector<ChunkSequenceLocation> ChunkSequenceLocations;
- std::vector<size_t>
- ChunkSequenceLocationOffset; // ChunkSequenceLocations[ChunkLocationOffset[ChunkIndex]] -> start of sources for ChunkIndex
- std::vector<uint32_t> ChunkSequenceLocationCounts; // ChunkSequenceLocationCounts[ChunkIndex] count of chunk locations for ChunkIndex
- std::vector<uint32_t> SequenceIndexFirstPathIndex; // SequenceIndexFirstPathIndex[SequenceIndex] -> first path index with that RawHash
- std::vector<uint32_t> PathExtensionHash;
-};
-
ChunkedContentLookup BuildChunkedContentLookup(const ChunkedFolderContent& Content);
inline std::pair<size_t, uint32_t>