aboutsummaryrefslogtreecommitdiff
path: root/src/zenremotestore/builds/buildstorageoperations.cpp
diff options
context:
space:
mode:
authorDan Engelbrecht <[email protected]>2025-12-19 16:30:03 +0100
committerGitHub Enterprise <[email protected]>2025-12-19 16:30:03 +0100
commit0bf7531d530f12e0fa2edab70b6bf4693fb041db (patch)
tree0f29a872019d5c5b6952ef5e8babde1b6c7cd555 /src/zenremotestore/builds/buildstorageoperations.cpp
parent5.7.15 (diff)
downloadzen-0bf7531d530f12e0fa2edab70b6bf4693fb041db.tar.xz
zen-0bf7531d530f12e0fa2edab70b6bf4693fb041db.zip
optimize scavenge (#697)
* optimize FindScavengeContent * optimize GetValidFolderContent
Diffstat (limited to 'src/zenremotestore/builds/buildstorageoperations.cpp')
-rw-r--r--src/zenremotestore/builds/buildstorageoperations.cpp174
1 files changed, 56 insertions, 118 deletions
diff --git a/src/zenremotestore/builds/buildstorageoperations.cpp b/src/zenremotestore/builds/buildstorageoperations.cpp
index f07f410ea..b9f5eb07a 100644
--- a/src/zenremotestore/builds/buildstorageoperations.cpp
+++ b/src/zenremotestore/builds/buildstorageoperations.cpp
@@ -2838,56 +2838,79 @@ BuildsOperationUpdateFolder::FindScavengeContent(const ScavengeSource& Source,
return false;
}
- OutScavengedLookup = BuildChunkedContentLookup(OutScavengedLocalContent);
+ tsl::robin_set<uint32_t> PathIndexesToScavenge;
+ PathIndexesToScavenge.reserve(OutScavengedLocalContent.Paths.size());
+ std::vector<uint32_t> ChunkOrderOffsets = BuildChunkOrderOffset(OutScavengedLocalContent.ChunkedContent.ChunkCounts);
- std::vector<uint32_t> PathIndexesToScavange;
- uint32_t ScavengedStatePathCount = gsl::narrow<uint32_t>(OutScavengedLocalContent.Paths.size());
- PathIndexesToScavange.reserve(ScavengedStatePathCount);
- for (uint32_t ScavengedStatePathIndex = 0; ScavengedStatePathIndex < ScavengedStatePathCount; ScavengedStatePathIndex++)
{
- const IoHash& SequenceHash = OutScavengedLocalContent.RawHashes[ScavengedStatePathIndex];
- if (auto ScavengeSequenceIt = OutScavengedLookup.RawHashToSequenceIndex.find(SequenceHash);
- ScavengeSequenceIt != OutScavengedLookup.RawHashToSequenceIndex.end())
+ tsl::robin_map<IoHash, uint32_t, IoHash::Hasher> RawHashToPathIndex;
+
+ RawHashToPathIndex.reserve(OutScavengedLocalContent.Paths.size());
+ for (uint32_t ScavengedPathIndex = 0; ScavengedPathIndex < OutScavengedLocalContent.RawHashes.size(); ScavengedPathIndex++)
{
- const uint32_t ScavengeSequenceIndex = ScavengeSequenceIt->second;
- if (m_RemoteLookup.RawHashToSequenceIndex.contains(SequenceHash))
+ if (!RawHashToPathIndex.contains(OutScavengedLocalContent.RawHashes[ScavengedPathIndex]))
{
- PathIndexesToScavange.push_back(ScavengedStatePathIndex);
+ RawHashToPathIndex.insert_or_assign(OutScavengedLocalContent.RawHashes[ScavengedPathIndex], ScavengedPathIndex);
}
- else
+ }
+
+ for (uint32_t ScavengeSequenceIndex = 0; ScavengeSequenceIndex < OutScavengedLocalContent.ChunkedContent.SequenceRawHashes.size();
+ ScavengeSequenceIndex++)
+ {
+ const IoHash& SequenceHash = OutScavengedLocalContent.ChunkedContent.SequenceRawHashes[ScavengeSequenceIndex];
+ if (auto It = RawHashToPathIndex.find(SequenceHash); It != RawHashToPathIndex.end())
{
- const uint32_t ScavengeChunkCount = OutScavengedLocalContent.ChunkedContent.ChunkCounts[ScavengeSequenceIndex];
- for (uint32_t ScavengeChunkIndexOffset = 0; ScavengeChunkIndexOffset < ScavengeChunkCount; ScavengeChunkIndexOffset++)
+ uint32_t PathIndex = It->second;
+ if (!PathIndexesToScavenge.contains(PathIndex))
{
- const size_t ScavengeChunkOrderIndex =
- OutScavengedLookup.ChunkSequenceLocationOffset[ScavengeSequenceIndex] + ScavengeChunkIndexOffset;
- const uint32_t ScavengeChunkIndex = OutScavengedLocalContent.ChunkedContent.ChunkOrders[ScavengeChunkOrderIndex];
- const IoHash& ScavengeChunkHash = OutScavengedLocalContent.ChunkedContent.ChunkHashes[ScavengeChunkIndex];
- if (m_RemoteLookup.ChunkHashToChunkIndex.contains(ScavengeChunkHash))
+ if (m_RemoteLookup.RawHashToSequenceIndex.contains(SequenceHash))
{
- PathIndexesToScavange.push_back(ScavengedStatePathIndex);
- break;
+ PathIndexesToScavenge.insert(PathIndex);
+ }
+ else
+ {
+ uint32_t ChunkOrderIndexStart = ChunkOrderOffsets[ScavengeSequenceIndex];
+ const uint32_t ChunkCount = OutScavengedLocalContent.ChunkedContent.ChunkCounts[ScavengeSequenceIndex];
+ for (uint32_t ChunkOrderIndex = 0; ChunkOrderIndex < ChunkCount; ChunkOrderIndex++)
+ {
+ const uint32_t ChunkIndex =
+ OutScavengedLocalContent.ChunkedContent.ChunkOrders[ChunkOrderIndexStart + ChunkOrderIndex];
+ const IoHash& ChunkHash = OutScavengedLocalContent.ChunkedContent.ChunkHashes[ChunkIndex];
+ if (m_RemoteLookup.ChunkHashToChunkIndex.contains(ChunkHash))
+ {
+ PathIndexesToScavenge.insert(PathIndex);
+ break;
+ }
+ }
}
}
}
+ else
+ {
+ ZEN_OPERATION_LOG_WARN(m_LogOutput,
+ "Scavenged state file at '{}' for '{}' is invalid, skipping scavenging for sequence {}",
+ Source.StateFilePath,
+ Source.Path,
+ SequenceHash);
+ }
}
}
- if (PathIndexesToScavange.empty())
+ if (PathIndexesToScavenge.empty())
{
- OutScavengedLookup = {};
OutScavengedLocalContent = {};
return false;
}
std::vector<std::filesystem::path> PathsToScavenge;
- PathsToScavenge.reserve(PathIndexesToScavange.size());
- for (uint32_t ScavengedStatePathIndex : PathIndexesToScavange)
+ PathsToScavenge.reserve(PathIndexesToScavenge.size());
+ for (uint32_t ScavengedStatePathIndex : PathIndexesToScavenge)
{
PathsToScavenge.push_back(OutScavengedLocalContent.Paths[ScavengedStatePathIndex]);
}
- FolderContent ValidFolderContent = GetValidFolderContent(m_ScavengedFolderScanStats, Source.Path, PathsToScavenge, {});
+ FolderContent ValidFolderContent =
+ GetValidFolderContent(m_IOWorkerPool, m_ScavengedFolderScanStats, Source.Path, PathsToScavenge, {}, 0, m_AbortFlag, m_PauseFlag);
if (!LocalFolderState.AreKnownFilesEqual(ValidFolderContent))
{
@@ -2899,18 +2922,22 @@ BuildsOperationUpdateFolder::FindScavengeContent(const ScavengeSource& Source,
DeletedPaths.insert(DeletedPaths.end(), UpdatedContent.Paths.begin(), UpdatedContent.Paths.end());
if (!DeletedPaths.empty())
{
- OutScavengedLocalContent = DeletePathsFromChunkedContent(OutScavengedLocalContent, OutScavengedLookup, DeletedPaths);
- OutScavengedLookup = BuildChunkedContentLookup(OutScavengedLocalContent);
+ OutScavengedLocalContent =
+ DeletePathsFromChunkedContent(OutScavengedLocalContent,
+ BuildHashLookup(OutScavengedLocalContent.ChunkedContent.SequenceRawHashes),
+ ChunkOrderOffsets,
+ DeletedPaths);
}
}
if (OutScavengedLocalContent.Paths.empty())
{
- OutScavengedLookup = {};
OutScavengedLocalContent = {};
return false;
}
+ OutScavengedLookup = BuildChunkedContentLookup(OutScavengedLocalContent);
+
return true;
}
@@ -2944,95 +2971,6 @@ BuildsOperationUpdateFolder::FindDownloadedChunk(const IoHash& ChunkHash)
return {};
}
-FolderContent
-BuildsOperationUpdateFolder::GetValidFolderContent(GetFolderContentStatistics& FolderScanStats,
- const std::filesystem::path& Path,
- std::span<const std::filesystem::path> PathsToCheck,
- std::function<void(uint64_t PathCount, uint64_t CompletedPathCount)>&& ProgressCallback)
-{
- ZEN_TRACE_CPU("GetValidFolderContent");
-
- FolderContent Result;
- const uint32_t PathCount = gsl::narrow<uint32_t>(PathsToCheck.size());
-
- Result.Paths.resize(PathCount);
- Result.RawSizes.resize(PathCount);
- Result.Attributes.resize(PathCount);
- Result.ModificationTicks.resize(PathCount);
-
- {
- Stopwatch Timer;
- auto _ = MakeGuard([&FolderScanStats, &Timer]() { FolderScanStats.ElapsedWallTimeUS = Timer.GetElapsedTimeUs(); });
-
- ParallelWork Work(m_AbortFlag,
- m_PauseFlag,
- ProgressCallback ? WorkerThreadPool::EMode::EnableBacklog : WorkerThreadPool::EMode::DisableBacklog);
- std::atomic<uint64_t> CompletedPathCount = 0;
- uint32_t PathIndex = 0;
-
- while (PathIndex < PathCount)
- {
- uint32_t PathRangeCount = Min(128u, PathCount - PathIndex);
- Work.ScheduleWork(m_IOWorkerPool,
- [PathIndex, PathRangeCount, &PathsToCheck, &Path, &Result, &CompletedPathCount, &FolderScanStats](
- std::atomic<bool>& AbortFlag) {
- if (!AbortFlag)
- {
- ZEN_TRACE_CPU("Async_ValidateFiles");
-
- for (uint32_t PathRangeIndex = PathIndex; PathRangeIndex < PathIndex + PathRangeCount;
- PathRangeIndex++)
- {
- const std::filesystem::path& FilePath = PathsToCheck[PathRangeIndex];
- std::filesystem::path LocalFilePath = (Path / FilePath).make_preferred();
- if (TryGetFileProperties(LocalFilePath,
- Result.RawSizes[PathRangeIndex],
- Result.ModificationTicks[PathRangeIndex],
- Result.Attributes[PathRangeIndex]))
- {
- Result.Paths[PathRangeIndex] = std::move(FilePath);
- FolderScanStats.FoundFileCount++;
- FolderScanStats.FoundFileByteCount += Result.RawSizes[PathRangeIndex];
- FolderScanStats.AcceptedFileCount++;
- FolderScanStats.AcceptedFileByteCount += Result.RawSizes[PathRangeIndex];
- }
- CompletedPathCount++;
- }
- }
- });
- PathIndex += PathRangeCount;
- }
- Work.Wait(200, [&](bool, bool, ptrdiff_t) {
- if (ProgressCallback)
- {
- ProgressCallback(PathCount, CompletedPathCount.load());
- }
- });
- }
-
- uint32_t WritePathIndex = 0;
- for (uint32_t ReadPathIndex = 0; ReadPathIndex < PathCount; ReadPathIndex++)
- {
- if (!Result.Paths[ReadPathIndex].empty())
- {
- if (WritePathIndex < ReadPathIndex)
- {
- Result.Paths[WritePathIndex] = std::move(Result.Paths[ReadPathIndex]);
- Result.RawSizes[WritePathIndex] = Result.RawSizes[ReadPathIndex];
- Result.Attributes[WritePathIndex] = Result.Attributes[ReadPathIndex];
- Result.ModificationTicks[WritePathIndex] = Result.ModificationTicks[ReadPathIndex];
- }
- WritePathIndex++;
- }
- }
-
- Result.Paths.resize(WritePathIndex);
- Result.RawSizes.resize(WritePathIndex);
- Result.Attributes.resize(WritePathIndex);
- Result.ModificationTicks.resize(WritePathIndex);
- return Result;
-}
-
std::vector<const ChunkedContentLookup::ChunkSequenceLocation*>
BuildsOperationUpdateFolder::GetRemainingChunkTargets(std::span<const std::atomic<uint32_t>> SequenceIndexChunksLeftToWriteCounters,
uint32_t ChunkIndex)