diff options
| author | Dan Engelbrecht <[email protected]> | 2025-12-19 16:30:03 +0100 |
|---|---|---|
| committer | GitHub Enterprise <[email protected]> | 2025-12-19 16:30:03 +0100 |
| commit | 0bf7531d530f12e0fa2edab70b6bf4693fb041db (patch) | |
| tree | 0f29a872019d5c5b6952ef5e8babde1b6c7cd555 /src | |
| parent | 5.7.15 (diff) | |
| download | zen-0bf7531d530f12e0fa2edab70b6bf4693fb041db.tar.xz zen-0bf7531d530f12e0fa2edab70b6bf4693fb041db.zip | |
optimize scavenge (#697)
* optimize FindScavengeContent
* optimize GetValidFolderContent
Diffstat (limited to 'src')
5 files changed, 260 insertions, 254 deletions
diff --git a/src/zen/cmds/builds_cmd.cpp b/src/zen/cmds/builds_cmd.cpp index a2b2fb0f6..25f66e0ee 100644 --- a/src/zen/cmds/builds_cmd.cpp +++ b/src/zen/cmds/builds_cmd.cpp @@ -875,87 +875,6 @@ namespace { } } - FolderContent GetValidFolderContent(TransferThreadWorkers& Workers, - GetFolderContentStatistics& LocalFolderScanStats, - const std::filesystem::path& Path, - std::span<const std::filesystem::path> PathsToCheck, - std::function<void(uint64_t PathCount, uint64_t CompletedPathCount)>&& ProgressCallback) - { - ZEN_TRACE_CPU("GetValidFolderContent"); - FolderContent Result; - const uint32_t PathCount = gsl::narrow<uint32_t>(PathsToCheck.size()); - - Result.Paths.resize(PathCount); - Result.RawSizes.resize(PathCount); - Result.Attributes.resize(PathCount); - Result.ModificationTicks.resize(PathCount); - - { - Stopwatch Timer; - auto _ = MakeGuard([&LocalFolderScanStats, &Timer]() { LocalFolderScanStats.ElapsedWallTimeUS = Timer.GetElapsedTimeUs(); }); - - ParallelWork Work(AbortFlag, PauseFlag, WorkerThreadPool::EMode::EnableBacklog); - std::atomic<uint64_t> CompletedPathCount = 0; - uint32_t PathIndex = 0; - - while (PathIndex < PathCount) - { - uint32_t PathRangeCount = Min(128u, PathCount - PathIndex); - Work.ScheduleWork(Workers.GetIOWorkerPool(), - [PathIndex, PathRangeCount, &PathsToCheck, &Path, &Result, &CompletedPathCount, &LocalFolderScanStats]( - std::atomic<bool>&) { - for (uint32_t PathRangeIndex = PathIndex; PathRangeIndex < PathIndex + PathRangeCount; - PathRangeIndex++) - { - const std::filesystem::path& FilePath = PathsToCheck[PathRangeIndex]; - std::filesystem::path LocalFilePath = (Path / FilePath).make_preferred(); - if (TryGetFileProperties(LocalFilePath, - Result.RawSizes[PathRangeIndex], - Result.ModificationTicks[PathRangeIndex], - Result.Attributes[PathRangeIndex])) - { - Result.Paths[PathRangeIndex] = std::move(FilePath); - LocalFolderScanStats.FoundFileCount++; - LocalFolderScanStats.FoundFileByteCount += Result.RawSizes[PathRangeIndex]; - LocalFolderScanStats.AcceptedFileCount++; - LocalFolderScanStats.AcceptedFileByteCount += Result.RawSizes[PathRangeIndex]; - } - CompletedPathCount++; - } - }); - PathIndex += PathRangeCount; - } - Work.Wait(200, [&](bool, bool, ptrdiff_t) { - if (ProgressCallback) - { - ProgressCallback(PathCount, CompletedPathCount.load()); - } - }); - } - - uint32_t WritePathIndex = 0; - for (uint32_t ReadPathIndex = 0; ReadPathIndex < PathCount; ReadPathIndex++) - { - if (!Result.Paths[ReadPathIndex].empty()) - { - if (WritePathIndex < ReadPathIndex) - { - Result.Paths[WritePathIndex] = std::move(Result.Paths[ReadPathIndex]); - Result.RawSizes[WritePathIndex] = Result.RawSizes[ReadPathIndex]; - Result.Attributes[WritePathIndex] = Result.Attributes[ReadPathIndex]; - Result.ModificationTicks[WritePathIndex] = Result.ModificationTicks[ReadPathIndex]; - } - WritePathIndex++; - } - } - - Result.Paths.resize(WritePathIndex); - Result.RawSizes.resize(WritePathIndex); - Result.Attributes.resize(WritePathIndex); - Result.ModificationTicks.resize(WritePathIndex); - return Result; - } - std::string GetCbObjectAsNiceString(CbObjectView Object, std::string_view Prefix, std::string_view Suffix) { ExtendableStringBuilder<512> SB; @@ -1380,7 +1299,7 @@ namespace { { ProgressBar ProgressBar(ProgressMode, "Check Files"); FolderState = GetValidFolderContent( - Workers, + Workers.GetIOWorkerPool(), LocalFolderScanStats, Path, PathsToCheck, @@ -1393,7 +1312,10 @@ namespace { .RemainingCount = PathCount - CompletedPathCount, .Status = ProgressBar::State::CalculateStatus(AbortFlag, PauseFlag)}, false); - }); + }, + GetUpdateDelayMS(ProgressMode), + AbortFlag, + PauseFlag); ProgressBar.Finish(); } @@ -1478,7 +1400,7 @@ namespace { { ProgressBar ProgressBar(ProgressMode, "Check Known Files"); CurrentLocalFolderState = GetValidFolderContent( - Workers, + Workers.GetIOWorkerPool(), LocalFolderScanStats, Path, SavedLocalState.FolderState.Paths, @@ -1491,7 +1413,10 @@ namespace { .RemainingCount = PathCount - CompletedPathCount, .Status = ProgressBar::State::CalculateStatus(AbortFlag, PauseFlag)}, false); - }); + }, + GetUpdateDelayMS(ProgressMode), + AbortFlag, + PauseFlag); ProgressBar.Finish(); } if (AbortFlag) @@ -2007,6 +1932,7 @@ namespace { ZEN_CONSOLE( "Downloaded build {}, parts:{} in {}\n" + " Scavenge: {} (Target: {}, Cache: {}, Others: {})\n" " Download: {} ({}) {}bits/s{}\n" " Write: {} ({}) {}B/s{}\n" " Clean: {}\n" @@ -2016,6 +1942,14 @@ namespace { BuildPartString.ToView(), NiceTimeSpanMs(DownloadTimeMs), + NiceTimeSpanMs((Updater.m_CacheMappingStats.CacheScanElapsedWallTimeUs + + Updater.m_CacheMappingStats.LocalScanElapsedWallTimeUs + + Updater.m_CacheMappingStats.ScavengeElapsedWallTimeUs) / + 1000), + NiceTimeSpanMs(Updater.m_CacheMappingStats.LocalScanElapsedWallTimeUs / 1000), + NiceTimeSpanMs(Updater.m_CacheMappingStats.CacheScanElapsedWallTimeUs / 1000), + NiceTimeSpanMs(Updater.m_CacheMappingStats.ScavengeElapsedWallTimeUs / 1000), + DownloadCount, NiceBytes(DownloadByteCount), NiceNum(GetBytesPerSecond(Updater.m_WriteChunkStats.DownloadTimeUs, DownloadByteCount * 8)), diff --git a/src/zenremotestore/builds/buildstorageoperations.cpp b/src/zenremotestore/builds/buildstorageoperations.cpp index f07f410ea..b9f5eb07a 100644 --- a/src/zenremotestore/builds/buildstorageoperations.cpp +++ b/src/zenremotestore/builds/buildstorageoperations.cpp @@ -2838,56 +2838,79 @@ BuildsOperationUpdateFolder::FindScavengeContent(const ScavengeSource& Source, return false; } - OutScavengedLookup = BuildChunkedContentLookup(OutScavengedLocalContent); + tsl::robin_set<uint32_t> PathIndexesToScavenge; + PathIndexesToScavenge.reserve(OutScavengedLocalContent.Paths.size()); + std::vector<uint32_t> ChunkOrderOffsets = BuildChunkOrderOffset(OutScavengedLocalContent.ChunkedContent.ChunkCounts); - std::vector<uint32_t> PathIndexesToScavange; - uint32_t ScavengedStatePathCount = gsl::narrow<uint32_t>(OutScavengedLocalContent.Paths.size()); - PathIndexesToScavange.reserve(ScavengedStatePathCount); - for (uint32_t ScavengedStatePathIndex = 0; ScavengedStatePathIndex < ScavengedStatePathCount; ScavengedStatePathIndex++) { - const IoHash& SequenceHash = OutScavengedLocalContent.RawHashes[ScavengedStatePathIndex]; - if (auto ScavengeSequenceIt = OutScavengedLookup.RawHashToSequenceIndex.find(SequenceHash); - ScavengeSequenceIt != OutScavengedLookup.RawHashToSequenceIndex.end()) + tsl::robin_map<IoHash, uint32_t, IoHash::Hasher> RawHashToPathIndex; + + RawHashToPathIndex.reserve(OutScavengedLocalContent.Paths.size()); + for (uint32_t ScavengedPathIndex = 0; ScavengedPathIndex < OutScavengedLocalContent.RawHashes.size(); ScavengedPathIndex++) { - const uint32_t ScavengeSequenceIndex = ScavengeSequenceIt->second; - if (m_RemoteLookup.RawHashToSequenceIndex.contains(SequenceHash)) + if (!RawHashToPathIndex.contains(OutScavengedLocalContent.RawHashes[ScavengedPathIndex])) { - PathIndexesToScavange.push_back(ScavengedStatePathIndex); + RawHashToPathIndex.insert_or_assign(OutScavengedLocalContent.RawHashes[ScavengedPathIndex], ScavengedPathIndex); } - else + } + + for (uint32_t ScavengeSequenceIndex = 0; ScavengeSequenceIndex < OutScavengedLocalContent.ChunkedContent.SequenceRawHashes.size(); + ScavengeSequenceIndex++) + { + const IoHash& SequenceHash = OutScavengedLocalContent.ChunkedContent.SequenceRawHashes[ScavengeSequenceIndex]; + if (auto It = RawHashToPathIndex.find(SequenceHash); It != RawHashToPathIndex.end()) { - const uint32_t ScavengeChunkCount = OutScavengedLocalContent.ChunkedContent.ChunkCounts[ScavengeSequenceIndex]; - for (uint32_t ScavengeChunkIndexOffset = 0; ScavengeChunkIndexOffset < ScavengeChunkCount; ScavengeChunkIndexOffset++) + uint32_t PathIndex = It->second; + if (!PathIndexesToScavenge.contains(PathIndex)) { - const size_t ScavengeChunkOrderIndex = - OutScavengedLookup.ChunkSequenceLocationOffset[ScavengeSequenceIndex] + ScavengeChunkIndexOffset; - const uint32_t ScavengeChunkIndex = OutScavengedLocalContent.ChunkedContent.ChunkOrders[ScavengeChunkOrderIndex]; - const IoHash& ScavengeChunkHash = OutScavengedLocalContent.ChunkedContent.ChunkHashes[ScavengeChunkIndex]; - if (m_RemoteLookup.ChunkHashToChunkIndex.contains(ScavengeChunkHash)) + if (m_RemoteLookup.RawHashToSequenceIndex.contains(SequenceHash)) { - PathIndexesToScavange.push_back(ScavengedStatePathIndex); - break; + PathIndexesToScavenge.insert(PathIndex); + } + else + { + uint32_t ChunkOrderIndexStart = ChunkOrderOffsets[ScavengeSequenceIndex]; + const uint32_t ChunkCount = OutScavengedLocalContent.ChunkedContent.ChunkCounts[ScavengeSequenceIndex]; + for (uint32_t ChunkOrderIndex = 0; ChunkOrderIndex < ChunkCount; ChunkOrderIndex++) + { + const uint32_t ChunkIndex = + OutScavengedLocalContent.ChunkedContent.ChunkOrders[ChunkOrderIndexStart + ChunkOrderIndex]; + const IoHash& ChunkHash = OutScavengedLocalContent.ChunkedContent.ChunkHashes[ChunkIndex]; + if (m_RemoteLookup.ChunkHashToChunkIndex.contains(ChunkHash)) + { + PathIndexesToScavenge.insert(PathIndex); + break; + } + } } } } + else + { + ZEN_OPERATION_LOG_WARN(m_LogOutput, + "Scavenged state file at '{}' for '{}' is invalid, skipping scavenging for sequence {}", + Source.StateFilePath, + Source.Path, + SequenceHash); + } } } - if (PathIndexesToScavange.empty()) + if (PathIndexesToScavenge.empty()) { - OutScavengedLookup = {}; OutScavengedLocalContent = {}; return false; } std::vector<std::filesystem::path> PathsToScavenge; - PathsToScavenge.reserve(PathIndexesToScavange.size()); - for (uint32_t ScavengedStatePathIndex : PathIndexesToScavange) + PathsToScavenge.reserve(PathIndexesToScavenge.size()); + for (uint32_t ScavengedStatePathIndex : PathIndexesToScavenge) { PathsToScavenge.push_back(OutScavengedLocalContent.Paths[ScavengedStatePathIndex]); } - FolderContent ValidFolderContent = GetValidFolderContent(m_ScavengedFolderScanStats, Source.Path, PathsToScavenge, {}); + FolderContent ValidFolderContent = + GetValidFolderContent(m_IOWorkerPool, m_ScavengedFolderScanStats, Source.Path, PathsToScavenge, {}, 0, m_AbortFlag, m_PauseFlag); if (!LocalFolderState.AreKnownFilesEqual(ValidFolderContent)) { @@ -2899,18 +2922,22 @@ BuildsOperationUpdateFolder::FindScavengeContent(const ScavengeSource& Source, DeletedPaths.insert(DeletedPaths.end(), UpdatedContent.Paths.begin(), UpdatedContent.Paths.end()); if (!DeletedPaths.empty()) { - OutScavengedLocalContent = DeletePathsFromChunkedContent(OutScavengedLocalContent, OutScavengedLookup, DeletedPaths); - OutScavengedLookup = BuildChunkedContentLookup(OutScavengedLocalContent); + OutScavengedLocalContent = + DeletePathsFromChunkedContent(OutScavengedLocalContent, + BuildHashLookup(OutScavengedLocalContent.ChunkedContent.SequenceRawHashes), + ChunkOrderOffsets, + DeletedPaths); } } if (OutScavengedLocalContent.Paths.empty()) { - OutScavengedLookup = {}; OutScavengedLocalContent = {}; return false; } + OutScavengedLookup = BuildChunkedContentLookup(OutScavengedLocalContent); + return true; } @@ -2944,95 +2971,6 @@ BuildsOperationUpdateFolder::FindDownloadedChunk(const IoHash& ChunkHash) return {}; } -FolderContent -BuildsOperationUpdateFolder::GetValidFolderContent(GetFolderContentStatistics& FolderScanStats, - const std::filesystem::path& Path, - std::span<const std::filesystem::path> PathsToCheck, - std::function<void(uint64_t PathCount, uint64_t CompletedPathCount)>&& ProgressCallback) -{ - ZEN_TRACE_CPU("GetValidFolderContent"); - - FolderContent Result; - const uint32_t PathCount = gsl::narrow<uint32_t>(PathsToCheck.size()); - - Result.Paths.resize(PathCount); - Result.RawSizes.resize(PathCount); - Result.Attributes.resize(PathCount); - Result.ModificationTicks.resize(PathCount); - - { - Stopwatch Timer; - auto _ = MakeGuard([&FolderScanStats, &Timer]() { FolderScanStats.ElapsedWallTimeUS = Timer.GetElapsedTimeUs(); }); - - ParallelWork Work(m_AbortFlag, - m_PauseFlag, - ProgressCallback ? WorkerThreadPool::EMode::EnableBacklog : WorkerThreadPool::EMode::DisableBacklog); - std::atomic<uint64_t> CompletedPathCount = 0; - uint32_t PathIndex = 0; - - while (PathIndex < PathCount) - { - uint32_t PathRangeCount = Min(128u, PathCount - PathIndex); - Work.ScheduleWork(m_IOWorkerPool, - [PathIndex, PathRangeCount, &PathsToCheck, &Path, &Result, &CompletedPathCount, &FolderScanStats]( - std::atomic<bool>& AbortFlag) { - if (!AbortFlag) - { - ZEN_TRACE_CPU("Async_ValidateFiles"); - - for (uint32_t PathRangeIndex = PathIndex; PathRangeIndex < PathIndex + PathRangeCount; - PathRangeIndex++) - { - const std::filesystem::path& FilePath = PathsToCheck[PathRangeIndex]; - std::filesystem::path LocalFilePath = (Path / FilePath).make_preferred(); - if (TryGetFileProperties(LocalFilePath, - Result.RawSizes[PathRangeIndex], - Result.ModificationTicks[PathRangeIndex], - Result.Attributes[PathRangeIndex])) - { - Result.Paths[PathRangeIndex] = std::move(FilePath); - FolderScanStats.FoundFileCount++; - FolderScanStats.FoundFileByteCount += Result.RawSizes[PathRangeIndex]; - FolderScanStats.AcceptedFileCount++; - FolderScanStats.AcceptedFileByteCount += Result.RawSizes[PathRangeIndex]; - } - CompletedPathCount++; - } - } - }); - PathIndex += PathRangeCount; - } - Work.Wait(200, [&](bool, bool, ptrdiff_t) { - if (ProgressCallback) - { - ProgressCallback(PathCount, CompletedPathCount.load()); - } - }); - } - - uint32_t WritePathIndex = 0; - for (uint32_t ReadPathIndex = 0; ReadPathIndex < PathCount; ReadPathIndex++) - { - if (!Result.Paths[ReadPathIndex].empty()) - { - if (WritePathIndex < ReadPathIndex) - { - Result.Paths[WritePathIndex] = std::move(Result.Paths[ReadPathIndex]); - Result.RawSizes[WritePathIndex] = Result.RawSizes[ReadPathIndex]; - Result.Attributes[WritePathIndex] = Result.Attributes[ReadPathIndex]; - Result.ModificationTicks[WritePathIndex] = Result.ModificationTicks[ReadPathIndex]; - } - WritePathIndex++; - } - } - - Result.Paths.resize(WritePathIndex); - Result.RawSizes.resize(WritePathIndex); - Result.Attributes.resize(WritePathIndex); - Result.ModificationTicks.resize(WritePathIndex); - return Result; -} - std::vector<const ChunkedContentLookup::ChunkSequenceLocation*> BuildsOperationUpdateFolder::GetRemainingChunkTargets(std::span<const std::atomic<uint32_t>> SequenceIndexChunksLeftToWriteCounters, uint32_t ChunkIndex) diff --git a/src/zenremotestore/chunking/chunkedcontent.cpp b/src/zenremotestore/chunking/chunkedcontent.cpp index 5f1876908..e8187d348 100644 --- a/src/zenremotestore/chunking/chunkedcontent.cpp +++ b/src/zenremotestore/chunking/chunkedcontent.cpp @@ -177,31 +177,6 @@ namespace { std::string PathCompareString(const std::filesystem::path& Path) { return ToLower(Path.generic_string()); } - tsl::robin_map<IoHash, uint32_t, IoHash::Hasher> BuildHashLookup(std::span<const IoHash> Hashes) - { - tsl::robin_map<IoHash, uint32_t, IoHash::Hasher> Lookup; - Lookup.reserve(Hashes.size()); - for (uint32_t Index = 0; Index < Hashes.size(); Index++) - { - bool IsNew = Lookup.insert_or_assign(Hashes[Index], Index).second; - ZEN_ASSERT(IsNew); - } - return Lookup; - } - - std::vector<uint32_t> BuildChunkOrderOffset(std::span<const uint32_t> ChunkCounts) - { - std::vector<uint32_t> ChunkOffsets; - ChunkOffsets.reserve(ChunkCounts.size()); - uint32_t Offset = 0; - for (uint32_t SequenceIndex = 0; SequenceIndex < ChunkCounts.size(); SequenceIndex++) - { - ChunkOffsets.push_back(Offset); - Offset += ChunkCounts[SequenceIndex]; - } - return ChunkOffsets; - } - } // namespace std::string_view FolderContentSourcePlatformNames[(size_t)SourcePlatform::_Count] = {"Windows"sv, "Linux"sv, "MacOS"sv}; @@ -551,6 +526,134 @@ GetFolderContent(GetFolderContentStatistics& Stats, return OrderedContent; } +FolderContent +GetValidFolderContent(WorkerThreadPool& WorkerPool, + GetFolderContentStatistics& FolderScanStats, + const std::filesystem::path& Path, + std::span<const std::filesystem::path> PathsToCheck, + std::function<void(uint64_t PathCount, uint64_t CompletedPathCount)>&& ProgressCallback, + uint32_t ProgressUpdateDelayMS, + std::atomic<bool>& AbortFlag, + std::atomic<bool>& PauseFlag) +{ + ZEN_TRACE_CPU("GetValidFolderContent"); + + FolderContent Result; + const uint32_t PathCount = gsl::narrow<uint32_t>(PathsToCheck.size()); + + Result.Paths.resize(PathCount); + Result.RawSizes.resize(PathCount); + Result.Attributes.resize(PathCount); + Result.ModificationTicks.resize(PathCount); + + { + Stopwatch Timer; + auto _ = MakeGuard([&FolderScanStats, &Timer]() { FolderScanStats.ElapsedWallTimeUS = Timer.GetElapsedTimeUs(); }); + + tsl::robin_map<std::string, uint32_t> PathToPathIndex; + PathToPathIndex.reserve(PathsToCheck.size()); + std::vector<std::filesystem::path> DirectoriesToScan; + { + tsl::robin_set<std::string> DirectoriesFound; + for (size_t PathIndex = 0; PathIndex < PathsToCheck.size(); PathIndex++) + { + const std::filesystem::path PathToCheck = (Path / PathsToCheck[PathIndex]); + const std::string LookupPath = PathToCheck.generic_string(); + PathToPathIndex.insert_or_assign(LookupPath, PathIndex); + std::filesystem::path ParentDirectoryPath = PathToCheck.parent_path(); + const std::string Directory = ParentDirectoryPath.generic_string(); + if (DirectoriesFound.insert(Directory).second) + { + DirectoriesToScan.push_back(ParentDirectoryPath.make_preferred()); + } + } + } + + ParallelWork Work(AbortFlag, + PauseFlag, + ProgressCallback ? WorkerThreadPool::EMode::EnableBacklog : WorkerThreadPool::EMode::DisableBacklog); + + std::atomic<uint64_t> CompletedDirectoryCount = 0; + for (size_t DirectoryIndex = 0; DirectoryIndex < DirectoriesToScan.size(); DirectoryIndex++) + { + Work.ScheduleWork( + WorkerPool, + [&DirectoriesToScan, DirectoryIndex, &Result, &FolderScanStats, &PathsToCheck, &PathToPathIndex, &CompletedDirectoryCount]( + std::atomic<bool>& AbortFlag) { + if (!AbortFlag) + { + ZEN_TRACE_CPU("GetValidFolderContent_ScanDirectory"); + + const std::filesystem::path ParentDirectoryPath = DirectoriesToScan[DirectoryIndex]; + try + { + if (IsDir(ParentDirectoryPath)) + { + DirectoryContent DirContent; + GetDirectoryContent(ParentDirectoryPath, + DirectoryContentFlags::IncludeFiles | DirectoryContentFlags::IncludeFileSizes | + DirectoryContentFlags::IncludeModificationTick | + DirectoryContentFlags::IncludeAttributes, + DirContent); + for (size_t FoundIndex = 0; FoundIndex < DirContent.Files.size(); FoundIndex++) + { + const std::filesystem::path& FoundPath = DirContent.Files[FoundIndex]; + if (auto It = PathToPathIndex.find(FoundPath.generic_string()); It != PathToPathIndex.end()) + { + const size_t PathIndex = It->second; + + Result.Paths[PathIndex] = PathsToCheck[PathIndex]; + Result.RawSizes[PathIndex] = DirContent.FileSizes[FoundIndex]; + Result.ModificationTicks[PathIndex] = DirContent.FileModificationTicks[FoundIndex]; + Result.Attributes[PathIndex] = DirContent.FileAttributes[FoundIndex]; + + FolderScanStats.FoundFileCount++; + FolderScanStats.FoundFileByteCount += Result.RawSizes[PathIndex]; + FolderScanStats.AcceptedFileCount++; + FolderScanStats.AcceptedFileByteCount += Result.RawSizes[PathIndex]; + } + } + } + } + catch (const std::exception& Ex) + { + ZEN_WARN("Failed checking content of folder '{}', reason: {}", ParentDirectoryPath, Ex.what()); + } + } + CompletedDirectoryCount++; + }); + } + Work.Wait(ProgressUpdateDelayMS, [&](bool, bool, ptrdiff_t) { + if (ProgressCallback) + { + ProgressCallback(DirectoriesToScan.size(), CompletedDirectoryCount.load()); + } + }); + } + + uint32_t WritePathIndex = 0; + for (uint32_t ReadPathIndex = 0; ReadPathIndex < PathCount; ReadPathIndex++) + { + if (!Result.Paths[ReadPathIndex].empty()) + { + if (WritePathIndex < ReadPathIndex) + { + Result.Paths[WritePathIndex] = std::move(Result.Paths[ReadPathIndex]); + Result.RawSizes[WritePathIndex] = Result.RawSizes[ReadPathIndex]; + Result.Attributes[WritePathIndex] = Result.Attributes[ReadPathIndex]; + Result.ModificationTicks[WritePathIndex] = Result.ModificationTicks[ReadPathIndex]; + } + WritePathIndex++; + } + } + + Result.Paths.resize(WritePathIndex); + Result.RawSizes.resize(WritePathIndex); + Result.Attributes.resize(WritePathIndex); + Result.ModificationTicks.resize(WritePathIndex); + return Result; +} + void SaveChunkedFolderContentToCompactBinary(const ChunkedFolderContent& Content, CbWriter& Output) { @@ -726,9 +829,10 @@ MergeChunkedFolderContents(const ChunkedFolderContent& Base, std::span<const Chu } ChunkedFolderContent -DeletePathsFromChunkedContent(const ChunkedFolderContent& BaseContent, - const ChunkedContentLookup& BaseContentLookup, - std::span<const std::filesystem::path> DeletedPaths) +DeletePathsFromChunkedContent(const ChunkedFolderContent& BaseContent, + const tsl::robin_map<IoHash, uint32_t, IoHash::Hasher>& RawHashToSequenceIndex, + std::vector<uint32_t> SequenceIndexChunkOrderOffset, + std::span<const std::filesystem::path> DeletedPaths) { ZEN_TRACE_CPU("DeletePathsFromChunkedContent"); @@ -776,8 +880,8 @@ DeletePathsFromChunkedContent(const ChunkedFolderContent& BaseContent, { RawHashToSequenceRawHashIndex.insert( {RawHash, gsl::narrow<uint32_t>(Result.ChunkedContent.SequenceRawHashes.size())}); - const uint32_t SequenceRawHashIndex = BaseContentLookup.RawHashToSequenceIndex.at(RawHash); - const uint32_t OrderIndexOffset = BaseContentLookup.SequenceIndexChunkOrderOffset[SequenceRawHashIndex]; + const uint32_t SequenceRawHashIndex = RawHashToSequenceIndex.at(RawHash); + const uint32_t OrderIndexOffset = SequenceIndexChunkOrderOffset[SequenceRawHashIndex]; const uint32_t ChunkCount = BaseContent.ChunkedContent.ChunkCounts[SequenceRawHashIndex]; std::span<const uint32_t> OriginalChunkOrder = @@ -820,8 +924,12 @@ DeletePathsFromChunkedContent(const ChunkedFolderContent& BaseContent, std::span { return {}; } - const ChunkedContentLookup BaseLookup = BuildChunkedContentLookup(BaseContent); - return DeletePathsFromChunkedContent(BaseContent, BaseLookup, DeletedPaths); + + tsl::robin_map<IoHash, uint32_t, IoHash::Hasher> BaseSequenceHashToSequenceIndex = + BuildHashLookup(BaseContent.ChunkedContent.SequenceRawHashes); + std::vector<uint32_t> BaseSequenceChunkOrderOffset = BuildChunkOrderOffset(BaseContent.ChunkedContent.ChunkCounts); + + return DeletePathsFromChunkedContent(BaseContent, BaseSequenceHashToSequenceIndex, BaseSequenceChunkOrderOffset, DeletedPaths); } bool @@ -1082,6 +1190,33 @@ ChunkFolderContent(ChunkingStatistics& Stats, return Result; } +tsl::robin_map<IoHash, uint32_t, IoHash::Hasher> +BuildHashLookup(std::span<const IoHash> Hashes) +{ + tsl::robin_map<IoHash, uint32_t, IoHash::Hasher> Lookup; + Lookup.reserve(Hashes.size()); + for (uint32_t Index = 0; Index < Hashes.size(); Index++) + { + bool IsNew = Lookup.insert_or_assign(Hashes[Index], Index).second; + ZEN_ASSERT(IsNew); + } + return Lookup; +} + +std::vector<uint32_t> +BuildChunkOrderOffset(std::span<const uint32_t> ChunkCounts) +{ + std::vector<uint32_t> ChunkOffsets; + ChunkOffsets.reserve(ChunkCounts.size()); + uint32_t Offset = 0; + for (uint32_t SequenceIndex = 0; SequenceIndex < ChunkCounts.size(); SequenceIndex++) + { + ChunkOffsets.push_back(Offset); + Offset += ChunkCounts[SequenceIndex]; + } + return ChunkOffsets; +} + ChunkedContentLookup BuildChunkedContentLookup(const ChunkedFolderContent& Content) { @@ -1096,17 +1231,8 @@ BuildChunkedContentLookup(const ChunkedFolderContent& Content) ChunkedContentLookup Result; { - const uint32_t SequenceRawHashesCount = gsl::narrow<uint32_t>(Content.ChunkedContent.SequenceRawHashes.size()); - Result.RawHashToSequenceIndex.reserve(SequenceRawHashesCount); - Result.SequenceIndexChunkOrderOffset.reserve(SequenceRawHashesCount); - uint32_t OrderOffset = 0; - for (uint32_t SequenceRawHashIndex = 0; SequenceRawHashIndex < Content.ChunkedContent.SequenceRawHashes.size(); - SequenceRawHashIndex++) - { - Result.RawHashToSequenceIndex.insert({Content.ChunkedContent.SequenceRawHashes[SequenceRawHashIndex], SequenceRawHashIndex}); - Result.SequenceIndexChunkOrderOffset.push_back(OrderOffset); - OrderOffset += Content.ChunkedContent.ChunkCounts[SequenceRawHashIndex]; - } + Result.SequenceIndexChunkOrderOffset = BuildChunkOrderOffset(Content.ChunkedContent.ChunkCounts); + Result.RawHashToSequenceIndex = BuildHashLookup(Content.ChunkedContent.SequenceRawHashes); } std::vector<ChunkLocationReference> Locations; diff --git a/src/zenremotestore/include/zenremotestore/builds/buildstorageoperations.h b/src/zenremotestore/include/zenremotestore/builds/buildstorageoperations.h index eca654223..d78ee29c1 100644 --- a/src/zenremotestore/include/zenremotestore/builds/buildstorageoperations.h +++ b/src/zenremotestore/include/zenremotestore/builds/buildstorageoperations.h @@ -259,11 +259,6 @@ private: std::filesystem::path FindDownloadedChunk(const IoHash& ChunkHash); - FolderContent GetValidFolderContent(GetFolderContentStatistics& LocalFolderScanStats, - const std::filesystem::path& Path, - std::span<const std::filesystem::path> PathsToCheck, - std::function<void(uint64_t PathCount, uint64_t CompletedPathCount)>&& ProgressCallback); - std::vector<const ChunkedContentLookup::ChunkSequenceLocation*> GetRemainingChunkTargets( std::span<const std::atomic<uint32_t>> SequenceIndexChunksLeftToWriteCounters, uint32_t ChunkIndex); diff --git a/src/zenremotestore/include/zenremotestore/chunking/chunkedcontent.h b/src/zenremotestore/include/zenremotestore/chunking/chunkedcontent.h index e4be7923a..78f20a727 100644 --- a/src/zenremotestore/include/zenremotestore/chunking/chunkedcontent.h +++ b/src/zenremotestore/include/zenremotestore/chunking/chunkedcontent.h @@ -71,6 +71,15 @@ FolderContent GetFolderContent(GetFolderContentStatistics& Stats, std::function<void(bool IsAborted, std::ptrdiff_t PendingWork)>&& UpdateCallback, std::atomic<bool>& AbortFlag); +FolderContent GetValidFolderContent(WorkerThreadPool& WorkerPool, + GetFolderContentStatistics& FolderScanStats, + const std::filesystem::path& Path, + std::span<const std::filesystem::path> PathsToCheck, + std::function<void(uint64_t PathCount, uint64_t CompletedPathCount)>&& ProgressCallback, + uint32_t ProgressUpdateDelayMS, + std::atomic<bool>& AbortFlag, + std::atomic<bool>& PauseFlag); + struct ChunkedContentData { // To describe one asset with a particular RawHash, find the index of the hash in SequenceRawHashes @@ -116,9 +125,10 @@ void SaveChunkedFolderContentToCompactBinary(const ChunkedFolderContent& Con ChunkedFolderContent LoadChunkedFolderContentFromCompactBinary(CbObjectView Input); ChunkedFolderContent MergeChunkedFolderContents(const ChunkedFolderContent& Base, std::span<const ChunkedFolderContent> Overlays); -ChunkedFolderContent DeletePathsFromChunkedContent(const ChunkedFolderContent& Base, - const ChunkedContentLookup& BaseContentLookup, - std::span<const std::filesystem::path> DeletedPaths); +ChunkedFolderContent DeletePathsFromChunkedContent(const ChunkedFolderContent& Base, + const tsl::robin_map<IoHash, uint32_t, IoHash::Hasher>& RawHashToSequenceIndex, + std::vector<uint32_t> SequenceIndexChunkOrderOffset, + std::span<const std::filesystem::path> DeletedPaths); ChunkedFolderContent DeletePathsFromChunkedContent(const ChunkedFolderContent& Base, std::span<const std::filesystem::path> DeletedPaths); bool CompareChunkedContent(const ChunkedFolderContent& Lhs, const ChunkedFolderContent& Rhs); @@ -161,6 +171,9 @@ ChunkedFolderContent ChunkFolderContent(ChunkingStatistics& Stats, std::atomic<bool>& AbortFlag, std::atomic<bool>& PauseFlag); +tsl::robin_map<IoHash, uint32_t, IoHash::Hasher> BuildHashLookup(std::span<const IoHash> Hashes); +std::vector<uint32_t> BuildChunkOrderOffset(std::span<const uint32_t> ChunkCounts); + ChunkedContentLookup BuildChunkedContentLookup(const ChunkedFolderContent& Content); inline std::pair<size_t, uint32_t> |