diff options
| author | Dan Engelbrecht <[email protected]> | 2025-12-19 16:30:03 +0100 |
|---|---|---|
| committer | GitHub Enterprise <[email protected]> | 2025-12-19 16:30:03 +0100 |
| commit | 0bf7531d530f12e0fa2edab70b6bf4693fb041db (patch) | |
| tree | 0f29a872019d5c5b6952ef5e8babde1b6c7cd555 /src/zenremotestore/builds/buildstorageoperations.cpp | |
| parent | 5.7.15 (diff) | |
| download | zen-0bf7531d530f12e0fa2edab70b6bf4693fb041db.tar.xz zen-0bf7531d530f12e0fa2edab70b6bf4693fb041db.zip | |
optimize scavenge (#697)
* optimize FindScavengeContent
* optimize GetValidFolderContent
Diffstat (limited to 'src/zenremotestore/builds/buildstorageoperations.cpp')
| -rw-r--r-- | src/zenremotestore/builds/buildstorageoperations.cpp | 174 |
1 files changed, 56 insertions, 118 deletions
diff --git a/src/zenremotestore/builds/buildstorageoperations.cpp b/src/zenremotestore/builds/buildstorageoperations.cpp index f07f410ea..b9f5eb07a 100644 --- a/src/zenremotestore/builds/buildstorageoperations.cpp +++ b/src/zenremotestore/builds/buildstorageoperations.cpp @@ -2838,56 +2838,79 @@ BuildsOperationUpdateFolder::FindScavengeContent(const ScavengeSource& Source, return false; } - OutScavengedLookup = BuildChunkedContentLookup(OutScavengedLocalContent); + tsl::robin_set<uint32_t> PathIndexesToScavenge; + PathIndexesToScavenge.reserve(OutScavengedLocalContent.Paths.size()); + std::vector<uint32_t> ChunkOrderOffsets = BuildChunkOrderOffset(OutScavengedLocalContent.ChunkedContent.ChunkCounts); - std::vector<uint32_t> PathIndexesToScavange; - uint32_t ScavengedStatePathCount = gsl::narrow<uint32_t>(OutScavengedLocalContent.Paths.size()); - PathIndexesToScavange.reserve(ScavengedStatePathCount); - for (uint32_t ScavengedStatePathIndex = 0; ScavengedStatePathIndex < ScavengedStatePathCount; ScavengedStatePathIndex++) { - const IoHash& SequenceHash = OutScavengedLocalContent.RawHashes[ScavengedStatePathIndex]; - if (auto ScavengeSequenceIt = OutScavengedLookup.RawHashToSequenceIndex.find(SequenceHash); - ScavengeSequenceIt != OutScavengedLookup.RawHashToSequenceIndex.end()) + tsl::robin_map<IoHash, uint32_t, IoHash::Hasher> RawHashToPathIndex; + + RawHashToPathIndex.reserve(OutScavengedLocalContent.Paths.size()); + for (uint32_t ScavengedPathIndex = 0; ScavengedPathIndex < OutScavengedLocalContent.RawHashes.size(); ScavengedPathIndex++) { - const uint32_t ScavengeSequenceIndex = ScavengeSequenceIt->second; - if (m_RemoteLookup.RawHashToSequenceIndex.contains(SequenceHash)) + if (!RawHashToPathIndex.contains(OutScavengedLocalContent.RawHashes[ScavengedPathIndex])) { - PathIndexesToScavange.push_back(ScavengedStatePathIndex); + RawHashToPathIndex.insert_or_assign(OutScavengedLocalContent.RawHashes[ScavengedPathIndex], ScavengedPathIndex); } - else + } + + for (uint32_t ScavengeSequenceIndex = 0; ScavengeSequenceIndex < OutScavengedLocalContent.ChunkedContent.SequenceRawHashes.size(); + ScavengeSequenceIndex++) + { + const IoHash& SequenceHash = OutScavengedLocalContent.ChunkedContent.SequenceRawHashes[ScavengeSequenceIndex]; + if (auto It = RawHashToPathIndex.find(SequenceHash); It != RawHashToPathIndex.end()) { - const uint32_t ScavengeChunkCount = OutScavengedLocalContent.ChunkedContent.ChunkCounts[ScavengeSequenceIndex]; - for (uint32_t ScavengeChunkIndexOffset = 0; ScavengeChunkIndexOffset < ScavengeChunkCount; ScavengeChunkIndexOffset++) + uint32_t PathIndex = It->second; + if (!PathIndexesToScavenge.contains(PathIndex)) { - const size_t ScavengeChunkOrderIndex = - OutScavengedLookup.ChunkSequenceLocationOffset[ScavengeSequenceIndex] + ScavengeChunkIndexOffset; - const uint32_t ScavengeChunkIndex = OutScavengedLocalContent.ChunkedContent.ChunkOrders[ScavengeChunkOrderIndex]; - const IoHash& ScavengeChunkHash = OutScavengedLocalContent.ChunkedContent.ChunkHashes[ScavengeChunkIndex]; - if (m_RemoteLookup.ChunkHashToChunkIndex.contains(ScavengeChunkHash)) + if (m_RemoteLookup.RawHashToSequenceIndex.contains(SequenceHash)) { - PathIndexesToScavange.push_back(ScavengedStatePathIndex); - break; + PathIndexesToScavenge.insert(PathIndex); + } + else + { + uint32_t ChunkOrderIndexStart = ChunkOrderOffsets[ScavengeSequenceIndex]; + const uint32_t ChunkCount = OutScavengedLocalContent.ChunkedContent.ChunkCounts[ScavengeSequenceIndex]; + for (uint32_t ChunkOrderIndex = 0; ChunkOrderIndex < ChunkCount; ChunkOrderIndex++) + { + const uint32_t ChunkIndex = + OutScavengedLocalContent.ChunkedContent.ChunkOrders[ChunkOrderIndexStart + ChunkOrderIndex]; + const IoHash& ChunkHash = OutScavengedLocalContent.ChunkedContent.ChunkHashes[ChunkIndex]; + if (m_RemoteLookup.ChunkHashToChunkIndex.contains(ChunkHash)) + { + PathIndexesToScavenge.insert(PathIndex); + break; + } + } } } } + else + { + ZEN_OPERATION_LOG_WARN(m_LogOutput, + "Scavenged state file at '{}' for '{}' is invalid, skipping scavenging for sequence {}", + Source.StateFilePath, + Source.Path, + SequenceHash); + } } } - if (PathIndexesToScavange.empty()) + if (PathIndexesToScavenge.empty()) { - OutScavengedLookup = {}; OutScavengedLocalContent = {}; return false; } std::vector<std::filesystem::path> PathsToScavenge; - PathsToScavenge.reserve(PathIndexesToScavange.size()); - for (uint32_t ScavengedStatePathIndex : PathIndexesToScavange) + PathsToScavenge.reserve(PathIndexesToScavenge.size()); + for (uint32_t ScavengedStatePathIndex : PathIndexesToScavenge) { PathsToScavenge.push_back(OutScavengedLocalContent.Paths[ScavengedStatePathIndex]); } - FolderContent ValidFolderContent = GetValidFolderContent(m_ScavengedFolderScanStats, Source.Path, PathsToScavenge, {}); + FolderContent ValidFolderContent = + GetValidFolderContent(m_IOWorkerPool, m_ScavengedFolderScanStats, Source.Path, PathsToScavenge, {}, 0, m_AbortFlag, m_PauseFlag); if (!LocalFolderState.AreKnownFilesEqual(ValidFolderContent)) { @@ -2899,18 +2922,22 @@ BuildsOperationUpdateFolder::FindScavengeContent(const ScavengeSource& Source, DeletedPaths.insert(DeletedPaths.end(), UpdatedContent.Paths.begin(), UpdatedContent.Paths.end()); if (!DeletedPaths.empty()) { - OutScavengedLocalContent = DeletePathsFromChunkedContent(OutScavengedLocalContent, OutScavengedLookup, DeletedPaths); - OutScavengedLookup = BuildChunkedContentLookup(OutScavengedLocalContent); + OutScavengedLocalContent = + DeletePathsFromChunkedContent(OutScavengedLocalContent, + BuildHashLookup(OutScavengedLocalContent.ChunkedContent.SequenceRawHashes), + ChunkOrderOffsets, + DeletedPaths); } } if (OutScavengedLocalContent.Paths.empty()) { - OutScavengedLookup = {}; OutScavengedLocalContent = {}; return false; } + OutScavengedLookup = BuildChunkedContentLookup(OutScavengedLocalContent); + return true; } @@ -2944,95 +2971,6 @@ BuildsOperationUpdateFolder::FindDownloadedChunk(const IoHash& ChunkHash) return {}; } -FolderContent -BuildsOperationUpdateFolder::GetValidFolderContent(GetFolderContentStatistics& FolderScanStats, - const std::filesystem::path& Path, - std::span<const std::filesystem::path> PathsToCheck, - std::function<void(uint64_t PathCount, uint64_t CompletedPathCount)>&& ProgressCallback) -{ - ZEN_TRACE_CPU("GetValidFolderContent"); - - FolderContent Result; - const uint32_t PathCount = gsl::narrow<uint32_t>(PathsToCheck.size()); - - Result.Paths.resize(PathCount); - Result.RawSizes.resize(PathCount); - Result.Attributes.resize(PathCount); - Result.ModificationTicks.resize(PathCount); - - { - Stopwatch Timer; - auto _ = MakeGuard([&FolderScanStats, &Timer]() { FolderScanStats.ElapsedWallTimeUS = Timer.GetElapsedTimeUs(); }); - - ParallelWork Work(m_AbortFlag, - m_PauseFlag, - ProgressCallback ? WorkerThreadPool::EMode::EnableBacklog : WorkerThreadPool::EMode::DisableBacklog); - std::atomic<uint64_t> CompletedPathCount = 0; - uint32_t PathIndex = 0; - - while (PathIndex < PathCount) - { - uint32_t PathRangeCount = Min(128u, PathCount - PathIndex); - Work.ScheduleWork(m_IOWorkerPool, - [PathIndex, PathRangeCount, &PathsToCheck, &Path, &Result, &CompletedPathCount, &FolderScanStats]( - std::atomic<bool>& AbortFlag) { - if (!AbortFlag) - { - ZEN_TRACE_CPU("Async_ValidateFiles"); - - for (uint32_t PathRangeIndex = PathIndex; PathRangeIndex < PathIndex + PathRangeCount; - PathRangeIndex++) - { - const std::filesystem::path& FilePath = PathsToCheck[PathRangeIndex]; - std::filesystem::path LocalFilePath = (Path / FilePath).make_preferred(); - if (TryGetFileProperties(LocalFilePath, - Result.RawSizes[PathRangeIndex], - Result.ModificationTicks[PathRangeIndex], - Result.Attributes[PathRangeIndex])) - { - Result.Paths[PathRangeIndex] = std::move(FilePath); - FolderScanStats.FoundFileCount++; - FolderScanStats.FoundFileByteCount += Result.RawSizes[PathRangeIndex]; - FolderScanStats.AcceptedFileCount++; - FolderScanStats.AcceptedFileByteCount += Result.RawSizes[PathRangeIndex]; - } - CompletedPathCount++; - } - } - }); - PathIndex += PathRangeCount; - } - Work.Wait(200, [&](bool, bool, ptrdiff_t) { - if (ProgressCallback) - { - ProgressCallback(PathCount, CompletedPathCount.load()); - } - }); - } - - uint32_t WritePathIndex = 0; - for (uint32_t ReadPathIndex = 0; ReadPathIndex < PathCount; ReadPathIndex++) - { - if (!Result.Paths[ReadPathIndex].empty()) - { - if (WritePathIndex < ReadPathIndex) - { - Result.Paths[WritePathIndex] = std::move(Result.Paths[ReadPathIndex]); - Result.RawSizes[WritePathIndex] = Result.RawSizes[ReadPathIndex]; - Result.Attributes[WritePathIndex] = Result.Attributes[ReadPathIndex]; - Result.ModificationTicks[WritePathIndex] = Result.ModificationTicks[ReadPathIndex]; - } - WritePathIndex++; - } - } - - Result.Paths.resize(WritePathIndex); - Result.RawSizes.resize(WritePathIndex); - Result.Attributes.resize(WritePathIndex); - Result.ModificationTicks.resize(WritePathIndex); - return Result; -} - std::vector<const ChunkedContentLookup::ChunkSequenceLocation*> BuildsOperationUpdateFolder::GetRemainingChunkTargets(std::span<const std::atomic<uint32_t>> SequenceIndexChunksLeftToWriteCounters, uint32_t ChunkIndex) |