diff options
| author | Dan Engelbrecht <[email protected]> | 2025-06-04 08:59:44 +0200 |
|---|---|---|
| committer | GitHub Enterprise <[email protected]> | 2025-06-04 08:59:44 +0200 |
| commit | 937510356143f83ecd15d0a9f58b611c7418ed61 (patch) | |
| tree | d0540f89c30a46f1fd3a041a20d7bed417fcb877 | |
| parent | fixed size chunking for encrypted files (#410) (diff) | |
| download | zen-937510356143f83ecd15d0a9f58b611c7418ed61.tar.xz zen-937510356143f83ecd15d0a9f58b611c7418ed61.zip | |
faster scavenge (#417)
- Improvement: Multithreaded scavenge pass for zen builds download
- Improvement: Optimized check for modified files when verifying state of scavenged paths
| -rw-r--r-- | CHANGELOG.md | 10 | ||||
| -rw-r--r-- | src/zen/cmds/admin_cmd.cpp | 10 | ||||
| -rw-r--r-- | src/zen/cmds/builds_cmd.cpp | 212 | ||||
| -rw-r--r-- | src/zenutil/chunkedcontent.cpp | 91 | ||||
| -rw-r--r-- | src/zenutil/include/zenutil/chunkedcontent.h | 39 |
5 files changed, 258 insertions, 104 deletions
diff --git a/CHANGELOG.md b/CHANGELOG.md index f6d7b651c..28ebf2015 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,4 +1,11 @@ ## +- Improvement: Use fixed size block chunking for know encrypted/compressed file types +- Improvement: Skip trying to compress chunks that are sourced from files that are known to be encrypted/compressed +- Improvement: Add global open file cache for written files increasing throughput during download by reducing overhead of open/close of file by 80% +- Improvement: Multithreaded scavenge pass for zen builds download +- Improvement: Optimized check for modified files when verifying state of scavenged paths + +## 5.6.9 - Bugfix: Remove long running exclusive namespace wide locks when dropping buckets or namespaces - Bugfix: Flush the last block before closing the last new block written to during blockstore compact. UE-291196 - Bugfix: Fix stats for memcached entries in disk cache buckets @@ -8,9 +15,6 @@ - Improvement: `zen builds upload` now use the system temp directory for temporary files leaving the source folder untouched - Improvement: NoneDecoder::DecompressToStream and NoneDecoder::CompressToStream not uses direct disk I/O - Improvement: Add streaming upload from HttpClient to reduce I/O caused by excessive MMap usage -- Improvement: Use fixed size block chunking for know encrypted/compressed file types -- Improvement: Skip trying to compress chunks that are sourced from files that are known to be encrypted/compressed -- Improvement: Add global open file cache for written files increasing throughput during download by reducing overhead of open/close of file by 80% ## 5.6.8 - Feature: Add per bucket cache configuration (Lua options file only) diff --git a/src/zen/cmds/admin_cmd.cpp b/src/zen/cmds/admin_cmd.cpp index b3f8a990e..fe2bbbdc7 100644 --- a/src/zen/cmds/admin_cmd.cpp +++ b/src/zen/cmds/admin_cmd.cpp @@ -57,10 +57,7 @@ ScrubCommand::Run(const ZenCliOptions& GlobalOptions, int argc, char** argv) } else if (int StatusCode = (int)Response.StatusCode) { - ZEN_ERROR("scrub start failed: {}: {} ({})", - StatusCode, - ReasonStringForHttpResultCode(StatusCode), - Response.ToText()); + ZEN_ERROR("scrub start failed: {}: {} ({})", StatusCode, ReasonStringForHttpResultCode(StatusCode), Response.ToText()); } else { @@ -645,10 +642,7 @@ FlushCommand::Run(const ZenCliOptions& GlobalOptions, int argc, char** argv) } else if (int StatusCode = (int)Response.StatusCode) { - ZEN_ERROR("flush failed: {}: {} ({})", - StatusCode, - ReasonStringForHttpResultCode(StatusCode), - Response.ToText()); + ZEN_ERROR("flush failed: {}: {} ({})", StatusCode, ReasonStringForHttpResultCode(StatusCode), Response.ToText()); } else { diff --git a/src/zen/cmds/builds_cmd.cpp b/src/zen/cmds/builds_cmd.cpp index e13c90b4b..fbcb6b900 100644 --- a/src/zen/cmds/builds_cmd.cpp +++ b/src/zen/cmds/builds_cmd.cpp @@ -360,7 +360,7 @@ namespace { WriteByteCount += Size; })) { - throw std::runtime_error(fmt::format("Failed to copy scavanged file '{}' to '{}'", SourceFilePath, TargetFilePath)); + throw std::runtime_error(fmt::format("Failed to copy scavenged file '{}' to '{}'", SourceFilePath, TargetFilePath)); } } @@ -5342,9 +5342,10 @@ namespace { return HasLocalState; } - FolderContent GetValidFolderContent(GetFolderContentStatistics& LocalFolderScanStats, - const std::filesystem::path& Path, - std::span<const std::filesystem::path> PathsToCheck) + FolderContent GetValidFolderContent(GetFolderContentStatistics& LocalFolderScanStats, + const std::filesystem::path& Path, + std::span<const std::filesystem::path> PathsToCheck, + std::function<void(uint64_t PathCount, uint64_t CompletedPathCount)>&& ProgressCallback) { ZEN_TRACE_CPU("GetValidFolderContent"); FolderContent Result; @@ -5359,8 +5360,6 @@ namespace { Stopwatch Timer; auto _ = MakeGuard([&LocalFolderScanStats, &Timer]() { LocalFolderScanStats.ElapsedWallTimeUS = Timer.GetElapsedTimeUs(); }); - ProgressBar ProgressBar(ProgressMode, "Check Files"); - ParallelWork Work(AbortFlag); std::atomic<uint64_t> CompletedPathCount = 0; uint32_t PathIndex = 0; @@ -5393,18 +5392,11 @@ namespace { PathIndex += PathRangeCount; } Work.Wait(200, [&](bool, ptrdiff_t) { - // FilteredBytesHashed.Update(ChunkingStats.BytesHashed.load()); - std::string Details = fmt::format("{}/{} checked, {} found", - CompletedPathCount.load(), - PathCount, - LocalFolderScanStats.FoundFileCount.load()); - ProgressBar.UpdateState({.Task = "Checking files ", - .Details = Details, - .TotalCount = PathCount, - .RemainingCount = PathCount - CompletedPathCount.load()}, - false); + if (ProgressCallback) + { + ProgressCallback(PathCount, CompletedPathCount.load()); + } }); - ProgressBar.Finish(); } uint32_t WritePathIndex = 0; @@ -5675,40 +5667,153 @@ namespace { ScavengedContents.resize(ScavengePathCount); ScavengedLookups.resize(ScavengePathCount); ScavengedPaths.resize(ScavengePathCount); + + ProgressBar ScavengeProgressBar(ProgressMode, "Scavenging"); + ParallelWork Work(AbortFlag); + + std::atomic<uint64_t> PathsFound(0); + std::atomic<uint64_t> ChunksFound(0); + std::atomic<uint64_t> PathsScavenged(0); + for (size_t ScavengeIndex = 0; ScavengeIndex < ScavengePathCount; ScavengeIndex++) { - const ScavengeSource& Source = ScavengeSources[ScavengeIndex]; + Work.ScheduleWork( + GetIOWorkerPool(), + [&RemoteLookup, + &ScavengeSources, + &ScavengedContents, + &ScavengedPaths, + &ScavengedLookups, + &PathsFound, + &ChunksFound, + &PathsScavenged, + ScavengeIndex](std::atomic<bool>&) { + if (!AbortFlag) + { + const ScavengeSource& Source = ScavengeSources[ScavengeIndex]; - ChunkedFolderContent& ScavengedLocalContent = ScavengedContents[ScavengeIndex]; - std::filesystem::path& ScavengePath = ScavengedPaths[ScavengeIndex]; - FolderContent LocalFolderState; - if (ReadStateFile(Source.StateFilePath, LocalFolderState, ScavengedLocalContent)) - { - GetFolderContentStatistics ScavengedFolderScanStats; + ChunkedFolderContent& ScavengedLocalContent = ScavengedContents[ScavengeIndex]; + std::filesystem::path& ScavengePath = ScavengedPaths[ScavengeIndex]; - FolderContent ValidFolderContent = - GetValidFolderContent(ScavengedFolderScanStats, Source.Path, LocalFolderState.Paths); + FolderContent LocalFolderState; + if (ReadStateFile(Source.StateFilePath, LocalFolderState, ScavengedLocalContent)) + { + if (IsDir(Source.Path)) + { + ChunkedContentLookup& ScavengedLookup = ScavengedLookups[ScavengeIndex]; + ScavengedLookup = BuildChunkedContentLookup(ScavengedLocalContent); + + std::vector<uint32_t> PathIndexesToScavange; + uint32_t ScavengedStatePathCount = gsl::narrow<uint32_t>(ScavengedLocalContent.Paths.size()); + PathIndexesToScavange.reserve(ScavengedStatePathCount); + for (uint32_t ScavengedStatePathIndex = 0; ScavengedStatePathIndex < ScavengedStatePathCount; + ScavengedStatePathIndex++) + { + const IoHash& SequenceHash = ScavengedLocalContent.RawHashes[ScavengedStatePathIndex]; + if (auto ScavengeSequenceIt = ScavengedLookup.RawHashToSequenceIndex.find(SequenceHash); + ScavengeSequenceIt != ScavengedLookup.RawHashToSequenceIndex.end()) + { + const uint32_t ScavengeSequenceIndex = ScavengeSequenceIt->second; + if (RemoteLookup.RawHashToSequenceIndex.contains(SequenceHash)) + { + PathIndexesToScavange.push_back(ScavengedStatePathIndex); + } + else + { + const uint32_t ScavengeChunkCount = + ScavengedLocalContent.ChunkedContent.ChunkCounts[ScavengeSequenceIndex]; + for (uint32_t ScavengeChunkIndexOffset = 0; + ScavengeChunkIndexOffset < ScavengeChunkCount; + ScavengeChunkIndexOffset++) + { + const size_t ScavengeChunkOrderIndex = + ScavengedLookup.ChunkSequenceLocationOffset[ScavengeSequenceIndex] + + ScavengeChunkIndexOffset; + const uint32_t ScavengeChunkIndex = + ScavengedLocalContent.ChunkedContent.ChunkOrders[ScavengeChunkOrderIndex]; + const IoHash& ScavengeChunkHash = + ScavengedLocalContent.ChunkedContent.ChunkHashes[ScavengeChunkIndex]; + if (RemoteLookup.ChunkHashToChunkIndex.contains(ScavengeChunkHash)) + { + PathIndexesToScavange.push_back(ScavengedStatePathIndex); + break; + } + } + } + } + } - if (!LocalFolderState.AreKnownFilesEqual(ValidFolderContent)) - { - std::vector<std::filesystem::path> DeletedPaths; - FolderContent UpdatedContent = GetUpdatedContent(LocalFolderState, ValidFolderContent, DeletedPaths); + if (!PathIndexesToScavange.empty()) + { + std::vector<std::filesystem::path> PathsToScavenge; + PathsToScavenge.reserve(PathIndexesToScavange.size()); + for (uint32_t ScavengedStatePathIndex : PathIndexesToScavange) + { + PathsToScavenge.push_back(ScavengedLocalContent.Paths[ScavengedStatePathIndex]); + } - // If the files are modified since the state was saved we ignore the files since we don't want to incur the - // cost of scanning/hashing scavenged files - DeletedPaths.insert(DeletedPaths.end(), UpdatedContent.Paths.begin(), UpdatedContent.Paths.end()); - if (!DeletedPaths.empty()) - { - ScavengedLocalContent = DeletePathsFromChunkedContent(ScavengedLocalContent, DeletedPaths); + GetFolderContentStatistics ScavengedFolderScanStats; + + FolderContent ValidFolderContent = + GetValidFolderContent(ScavengedFolderScanStats, Source.Path, PathsToScavenge, {}); + + if (!LocalFolderState.AreKnownFilesEqual(ValidFolderContent)) + { + std::vector<std::filesystem::path> DeletedPaths; + FolderContent UpdatedContent = + GetUpdatedContent(LocalFolderState, ValidFolderContent, DeletedPaths); + + // If the files are modified since the state was saved we ignore the files since we don't + // want to incur the cost of scanning/hashing scavenged files + DeletedPaths.insert(DeletedPaths.end(), + UpdatedContent.Paths.begin(), + UpdatedContent.Paths.end()); + if (!DeletedPaths.empty()) + { + ScavengedLocalContent = + DeletePathsFromChunkedContent(ScavengedLocalContent, ScavengedLookup, DeletedPaths); + ScavengedLookup = BuildChunkedContentLookup(ScavengedLocalContent); + } + } + + if (!ScavengedLocalContent.Paths.empty()) + { + ScavengePath = Source.Path; + PathsFound += ScavengedLocalContent.Paths.size(); + ChunksFound += ScavengedLocalContent.ChunkedContent.ChunkHashes.size(); + } + } + + if (ScavengePath.empty()) + { + ScavengedLocalContent = {}; + ScavengedLookups[ScavengeIndex] = {}; + ScavengedPaths[ScavengeIndex].clear(); + } + } + } + PathsScavenged++; } - } + }); + } + { + ZEN_TRACE_CPU("ScavengeScan_Wait"); - if (!ScavengedLocalContent.Paths.empty()) - { - ScavengePath = Source.Path; - } - } + Work.Wait(GetUpdateDelayMS(ProgressMode), [&](bool IsAborted, std::ptrdiff_t PendingWork) { + ZEN_UNUSED(IsAborted, PendingWork); + std::string Details = fmt::format("{}/{} scanned. {} paths and {} chunks found for scavanging", + PathsScavenged.load(), + ScavengePathCount, + PathsFound.load(), + ChunksFound.load()); + ScavengeProgressBar.UpdateState({.Task = "Scavenging ", + .Details = Details, + .TotalCount = ScavengePathCount, + .RemainingCount = ScavengePathCount - PathsScavenged.load()}, + false); + }); } + ScavengeProgressBar.Finish(); for (uint32_t ScavengedContentIndex = 0; ScavengedContentIndex < ScavengedContents.size() && (!SequenceIndexesLeftToFindToRemoteIndex.empty()); @@ -5718,8 +5823,7 @@ namespace { if (!ScavengePath.empty()) { const ChunkedFolderContent& ScavengedLocalContent = ScavengedContents[ScavengedContentIndex]; - ChunkedContentLookup& ScavengedLookup = ScavengedLookups[ScavengedContentIndex]; - ScavengedLookup = BuildChunkedContentLookup(ScavengedLocalContent); + const ChunkedContentLookup& ScavengedLookup = ScavengedLookups[ScavengedContentIndex]; for (uint32_t ScavengedSequenceIndex = 0; ScavengedSequenceIndex < ScavengedLocalContent.ChunkedContent.SequenceRawHashes.size(); @@ -5980,7 +6084,7 @@ namespace { } if (CacheMappingStats.ScavengedPathsMatchingSequencesCount > 0 || CacheMappingStats.ScavengedChunkMatchingRemoteCount > 0) { - ZEN_CONSOLE("Scavenge of {} paths found {} ({}) chunk sequences, {} ({}) chunks in {}", + ZEN_CONSOLE("Scavenge of {} paths, found {} ({}) chunk sequences, {} ({}) chunks in {}", ScavengedPathsCount, CacheMappingStats.ScavengedPathsMatchingSequencesCount, NiceBytes(CacheMappingStats.ScavengedPathsMatchingSequencesByteCount), @@ -6408,7 +6512,7 @@ namespace { &DiskStats](std::atomic<bool>&) mutable { if (!AbortFlag) { - ZEN_TRACE_CPU("UpdateFolder_WriteScavanged"); + ZEN_TRACE_CPU("UpdateFolder_WriteScavenged"); FilteredWrittenBytesPerSecond.Start(); @@ -8574,7 +8678,21 @@ namespace { } } - OutLocalFolderContent = GetValidFolderContent(LocalFolderScanStats, Path, PathsToCheck); + ProgressBar ProgressBar(ProgressMode, "Check Files"); + OutLocalFolderContent = GetValidFolderContent( + LocalFolderScanStats, + Path, + PathsToCheck, + [&ProgressBar, &LocalFolderScanStats](uint64_t PathCount, uint64_t CompletedPathCount) { + std::string Details = + fmt::format("{}/{} checked, {} found", CompletedPathCount, PathCount, LocalFolderScanStats.FoundFileCount.load()); + ProgressBar.UpdateState({.Task = "Checking files ", + .Details = Details, + .TotalCount = PathCount, + .RemainingCount = PathCount - CompletedPathCount}, + false); + }); + ProgressBar.Finish(); } bool ScanContent = true; diff --git a/src/zenutil/chunkedcontent.cpp b/src/zenutil/chunkedcontent.cpp index 4bec4901a..c7532e098 100644 --- a/src/zenutil/chunkedcontent.cpp +++ b/src/zenutil/chunkedcontent.cpp @@ -662,7 +662,9 @@ MergeChunkedFolderContents(const ChunkedFolderContent& Base, std::span<const Chu } ChunkedFolderContent -DeletePathsFromChunkedContent(const ChunkedFolderContent& BaseContent, std::span<const std::filesystem::path> DeletedPaths) +DeletePathsFromChunkedContent(const ChunkedFolderContent& BaseContent, + const ChunkedContentLookup& BaseContentLookup, + std::span<const std::filesystem::path> DeletedPaths) { ZEN_TRACE_CPU("DeletePathsFromChunkedContent"); @@ -676,14 +678,18 @@ DeletePathsFromChunkedContent(const ChunkedFolderContent& BaseContent, std::span { DeletedPathSet.insert(PathCompareString(DeletedPath)); } - const ChunkedContentLookup BaseLookup = BuildChunkedContentLookup(BaseContent); - tsl::robin_map<IoHash, uint32_t, IoHash::Hasher> ChunkHashToChunkIndex; - const size_t ExpectedCount = BaseContent.Paths.size() - DeletedPaths.size(); - Result.Paths.reserve(ExpectedCount); - Result.RawSizes.reserve(ExpectedCount); - Result.Attributes.reserve(ExpectedCount); - Result.RawHashes.reserve(ExpectedCount); + const size_t BaseChunkCount = BaseContent.ChunkedContent.ChunkHashes.size(); + std::vector<uint32_t> NewChunkIndexes(BaseChunkCount, (uint32_t)-1); + + const size_t ExpectedPathCount = BaseContent.Paths.size() - DeletedPaths.size(); + Result.Paths.reserve(ExpectedPathCount); + Result.RawSizes.reserve(ExpectedPathCount); + Result.Attributes.reserve(ExpectedPathCount); + Result.RawHashes.reserve(ExpectedPathCount); + + Result.ChunkedContent.ChunkHashes.reserve(BaseChunkCount); + Result.ChunkedContent.ChunkRawSizes.reserve(BaseChunkCount); tsl::robin_map<IoHash, uint32_t, IoHash::Hasher> RawHashToSequenceRawHashIndex; for (uint32_t PathIndex = 0; PathIndex < BaseContent.Paths.size(); PathIndex++) @@ -703,20 +709,33 @@ DeletePathsFromChunkedContent(const ChunkedFolderContent& BaseContent, std::span { RawHashToSequenceRawHashIndex.insert( {RawHash, gsl::narrow<uint32_t>(Result.ChunkedContent.SequenceRawHashes.size())}); - const uint32_t SequenceRawHashIndex = BaseLookup.RawHashToSequenceIndex.at(RawHash); - const uint32_t OrderIndexOffset = BaseLookup.SequenceIndexChunkOrderOffset[SequenceRawHashIndex]; - const uint32_t ChunkCount = BaseContent.ChunkedContent.ChunkCounts[SequenceRawHashIndex]; - ChunkingStatistics Stats; + const uint32_t SequenceRawHashIndex = BaseContentLookup.RawHashToSequenceIndex.at(RawHash); + const uint32_t OrderIndexOffset = BaseContentLookup.SequenceIndexChunkOrderOffset[SequenceRawHashIndex]; + const uint32_t ChunkCount = BaseContent.ChunkedContent.ChunkCounts[SequenceRawHashIndex]; + std::span<const uint32_t> OriginalChunkOrder = std::span<const uint32_t>(BaseContent.ChunkedContent.ChunkOrders).subspan(OrderIndexOffset, ChunkCount); - AddChunkSequence(Stats, - Result.ChunkedContent, - ChunkHashToChunkIndex, - RawHash, - OriginalChunkOrder, - BaseContent.ChunkedContent.ChunkHashes, - BaseContent.ChunkedContent.ChunkRawSizes); - Stats.UniqueSequencesFound++; + + Result.ChunkedContent.ChunkCounts.push_back(gsl::narrow<uint32_t>(OriginalChunkOrder.size())); + + for (uint32_t OldChunkIndex : OriginalChunkOrder) + { + if (uint32_t FoundChunkIndex = NewChunkIndexes[OldChunkIndex]; FoundChunkIndex != (uint32_t)-1) + { + Result.ChunkedContent.ChunkOrders.push_back(FoundChunkIndex); + } + else + { + const uint32_t NewChunkIndex = gsl::narrow<uint32_t>(Result.ChunkedContent.ChunkHashes.size()); + NewChunkIndexes[OldChunkIndex] = NewChunkIndex; + const IoHash& ChunkHash = BaseContent.ChunkedContent.ChunkHashes[OldChunkIndex]; + const uint64_t OldChunkSize = BaseContent.ChunkedContent.ChunkRawSizes[OldChunkIndex]; + Result.ChunkedContent.ChunkHashes.push_back(ChunkHash); + Result.ChunkedContent.ChunkRawSizes.push_back(OldChunkSize); + Result.ChunkedContent.ChunkOrders.push_back(NewChunkIndex); + } + } + Result.ChunkedContent.SequenceRawHashes.push_back(RawHash); } } } @@ -726,6 +745,19 @@ DeletePathsFromChunkedContent(const ChunkedFolderContent& BaseContent, std::span } ChunkedFolderContent +DeletePathsFromChunkedContent(const ChunkedFolderContent& BaseContent, std::span<const std::filesystem::path> DeletedPaths) +{ + ZEN_TRACE_CPU("DeletePathsFromChunkedContent"); + ZEN_ASSERT(DeletedPaths.size() <= BaseContent.Paths.size()); + if (DeletedPaths.size() == BaseContent.Paths.size()) + { + return {}; + } + const ChunkedContentLookup BaseLookup = BuildChunkedContentLookup(BaseContent); + return DeletePathsFromChunkedContent(BaseContent, BaseLookup, DeletedPaths); +} + +ChunkedFolderContent ChunkFolderContent(ChunkingStatistics& Stats, WorkerThreadPool& WorkerPool, const std::filesystem::path& RootPath, @@ -815,8 +847,9 @@ BuildChunkedContentLookup(const ChunkedFolderContent& Content) struct ChunkLocationReference { - uint32_t ChunkIndex = (uint32_t)-1; - ChunkedContentLookup::ChunkSequenceLocation Location; + uint32_t ChunkIndex = (uint32_t)-1; + uint32_t SequenceIndex = (uint32_t)-1; + uint64_t Offset = (uint64_t)-1; }; ChunkedContentLookup Result; @@ -845,8 +878,7 @@ BuildChunkedContentLookup(const ChunkedFolderContent& Content) { uint32_t ChunkIndex = Content.ChunkedContent.ChunkOrders[OrderIndex]; - Locations.push_back( - ChunkLocationReference{ChunkIndex, ChunkedContentLookup::ChunkSequenceLocation{SequenceIndex, LocationOffset}}); + Locations.push_back(ChunkLocationReference{.ChunkIndex = ChunkIndex, .SequenceIndex = SequenceIndex, .Offset = LocationOffset}); LocationOffset += Content.ChunkedContent.ChunkRawSizes[ChunkIndex]; } @@ -861,15 +893,15 @@ BuildChunkedContentLookup(const ChunkedFolderContent& Content) { return false; } - if (Lhs.Location.SequenceIndex < Rhs.Location.SequenceIndex) + if (Lhs.SequenceIndex < Rhs.SequenceIndex) { return true; } - if (Lhs.Location.SequenceIndex > Rhs.Location.SequenceIndex) + if (Lhs.SequenceIndex > Rhs.SequenceIndex) { return false; } - return Lhs.Location.Offset < Rhs.Location.Offset; + return Lhs.Offset < Rhs.Offset; }); Result.ChunkSequenceLocations.reserve(Locations.size()); @@ -882,7 +914,10 @@ BuildChunkedContentLookup(const ChunkedFolderContent& Content) uint32_t Count = 0; while ((RangeOffset + Count < Locations.size()) && (Locations[RangeOffset + Count].ChunkIndex == ChunkIndex)) { - Result.ChunkSequenceLocations.push_back(Locations[RangeOffset + Count].Location); + const ChunkLocationReference& LocationReference = Locations[RangeOffset + Count]; + Result.ChunkSequenceLocations.push_back( + ChunkedContentLookup::ChunkSequenceLocation{.SequenceIndex = LocationReference.SequenceIndex, + .Offset = LocationReference.Offset}); Count++; } Result.ChunkSequenceLocationOffset.push_back(RangeOffset); diff --git a/src/zenutil/include/zenutil/chunkedcontent.h b/src/zenutil/include/zenutil/chunkedcontent.h index 03f52e5f6..225b1a3a5 100644 --- a/src/zenutil/include/zenutil/chunkedcontent.h +++ b/src/zenutil/include/zenutil/chunkedcontent.h @@ -94,10 +94,31 @@ struct ChunkedFolderContent ChunkedContentData ChunkedContent; }; +struct ChunkedContentLookup +{ + struct ChunkSequenceLocation + { + uint32_t SequenceIndex = (uint32_t)-1; + uint64_t Offset = (uint64_t)-1; + }; + tsl::robin_map<IoHash, uint32_t, IoHash::Hasher> ChunkHashToChunkIndex; + tsl::robin_map<IoHash, uint32_t, IoHash::Hasher> RawHashToSequenceIndex; + std::vector<uint32_t> SequenceIndexChunkOrderOffset; + std::vector<ChunkSequenceLocation> ChunkSequenceLocations; + std::vector<size_t> + ChunkSequenceLocationOffset; // ChunkSequenceLocations[ChunkLocationOffset[ChunkIndex]] -> start of sources for ChunkIndex + std::vector<uint32_t> ChunkSequenceLocationCounts; // ChunkSequenceLocationCounts[ChunkIndex] count of chunk locations for ChunkIndex + std::vector<uint32_t> SequenceIndexFirstPathIndex; // SequenceIndexFirstPathIndex[SequenceIndex] -> first path index with that RawHash + std::vector<uint32_t> PathExtensionHash; +}; + void SaveChunkedFolderContentToCompactBinary(const ChunkedFolderContent& Content, CbWriter& Output); ChunkedFolderContent LoadChunkedFolderContentToCompactBinary(CbObjectView Input); ChunkedFolderContent MergeChunkedFolderContents(const ChunkedFolderContent& Base, std::span<const ChunkedFolderContent> Overlays); +ChunkedFolderContent DeletePathsFromChunkedContent(const ChunkedFolderContent& Base, + const ChunkedContentLookup& BaseContentLookup, + std::span<const std::filesystem::path> DeletedPaths); ChunkedFolderContent DeletePathsFromChunkedContent(const ChunkedFolderContent& Base, std::span<const std::filesystem::path> DeletedPaths); struct ChunkingStatistics @@ -120,24 +141,6 @@ ChunkedFolderContent ChunkFolderContent(ChunkingStatistics& Stats, std::function<void(bool IsAborted, std::ptrdiff_t PendingWork)>&& UpdateCallback, std::atomic<bool>& AbortFlag); -struct ChunkedContentLookup -{ - struct ChunkSequenceLocation - { - uint32_t SequenceIndex = (uint32_t)-1; - uint64_t Offset = (uint64_t)-1; - }; - tsl::robin_map<IoHash, uint32_t, IoHash::Hasher> ChunkHashToChunkIndex; - tsl::robin_map<IoHash, uint32_t, IoHash::Hasher> RawHashToSequenceIndex; - std::vector<uint32_t> SequenceIndexChunkOrderOffset; - std::vector<ChunkSequenceLocation> ChunkSequenceLocations; - std::vector<size_t> - ChunkSequenceLocationOffset; // ChunkSequenceLocations[ChunkLocationOffset[ChunkIndex]] -> start of sources for ChunkIndex - std::vector<uint32_t> ChunkSequenceLocationCounts; // ChunkSequenceLocationCounts[ChunkIndex] count of chunk locations for ChunkIndex - std::vector<uint32_t> SequenceIndexFirstPathIndex; // SequenceIndexFirstPathIndex[SequenceIndex] -> first path index with that RawHash - std::vector<uint32_t> PathExtensionHash; -}; - ChunkedContentLookup BuildChunkedContentLookup(const ChunkedFolderContent& Content); inline std::pair<size_t, uint32_t> |