aboutsummaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorDan Engelbrecht <[email protected]>2025-12-19 16:30:03 +0100
committerGitHub Enterprise <[email protected]>2025-12-19 16:30:03 +0100
commit0bf7531d530f12e0fa2edab70b6bf4693fb041db (patch)
tree0f29a872019d5c5b6952ef5e8babde1b6c7cd555 /src
parent5.7.15 (diff)
downloadzen-0bf7531d530f12e0fa2edab70b6bf4693fb041db.tar.xz
zen-0bf7531d530f12e0fa2edab70b6bf4693fb041db.zip
optimize scavenge (#697)
* optimize FindScavengeContent * optimize GetValidFolderContent
Diffstat (limited to 'src')
-rw-r--r--src/zen/cmds/builds_cmd.cpp104
-rw-r--r--src/zenremotestore/builds/buildstorageoperations.cpp174
-rw-r--r--src/zenremotestore/chunking/chunkedcontent.cpp212
-rw-r--r--src/zenremotestore/include/zenremotestore/builds/buildstorageoperations.h5
-rw-r--r--src/zenremotestore/include/zenremotestore/chunking/chunkedcontent.h19
5 files changed, 260 insertions, 254 deletions
diff --git a/src/zen/cmds/builds_cmd.cpp b/src/zen/cmds/builds_cmd.cpp
index a2b2fb0f6..25f66e0ee 100644
--- a/src/zen/cmds/builds_cmd.cpp
+++ b/src/zen/cmds/builds_cmd.cpp
@@ -875,87 +875,6 @@ namespace {
}
}
- FolderContent GetValidFolderContent(TransferThreadWorkers& Workers,
- GetFolderContentStatistics& LocalFolderScanStats,
- const std::filesystem::path& Path,
- std::span<const std::filesystem::path> PathsToCheck,
- std::function<void(uint64_t PathCount, uint64_t CompletedPathCount)>&& ProgressCallback)
- {
- ZEN_TRACE_CPU("GetValidFolderContent");
- FolderContent Result;
- const uint32_t PathCount = gsl::narrow<uint32_t>(PathsToCheck.size());
-
- Result.Paths.resize(PathCount);
- Result.RawSizes.resize(PathCount);
- Result.Attributes.resize(PathCount);
- Result.ModificationTicks.resize(PathCount);
-
- {
- Stopwatch Timer;
- auto _ = MakeGuard([&LocalFolderScanStats, &Timer]() { LocalFolderScanStats.ElapsedWallTimeUS = Timer.GetElapsedTimeUs(); });
-
- ParallelWork Work(AbortFlag, PauseFlag, WorkerThreadPool::EMode::EnableBacklog);
- std::atomic<uint64_t> CompletedPathCount = 0;
- uint32_t PathIndex = 0;
-
- while (PathIndex < PathCount)
- {
- uint32_t PathRangeCount = Min(128u, PathCount - PathIndex);
- Work.ScheduleWork(Workers.GetIOWorkerPool(),
- [PathIndex, PathRangeCount, &PathsToCheck, &Path, &Result, &CompletedPathCount, &LocalFolderScanStats](
- std::atomic<bool>&) {
- for (uint32_t PathRangeIndex = PathIndex; PathRangeIndex < PathIndex + PathRangeCount;
- PathRangeIndex++)
- {
- const std::filesystem::path& FilePath = PathsToCheck[PathRangeIndex];
- std::filesystem::path LocalFilePath = (Path / FilePath).make_preferred();
- if (TryGetFileProperties(LocalFilePath,
- Result.RawSizes[PathRangeIndex],
- Result.ModificationTicks[PathRangeIndex],
- Result.Attributes[PathRangeIndex]))
- {
- Result.Paths[PathRangeIndex] = std::move(FilePath);
- LocalFolderScanStats.FoundFileCount++;
- LocalFolderScanStats.FoundFileByteCount += Result.RawSizes[PathRangeIndex];
- LocalFolderScanStats.AcceptedFileCount++;
- LocalFolderScanStats.AcceptedFileByteCount += Result.RawSizes[PathRangeIndex];
- }
- CompletedPathCount++;
- }
- });
- PathIndex += PathRangeCount;
- }
- Work.Wait(200, [&](bool, bool, ptrdiff_t) {
- if (ProgressCallback)
- {
- ProgressCallback(PathCount, CompletedPathCount.load());
- }
- });
- }
-
- uint32_t WritePathIndex = 0;
- for (uint32_t ReadPathIndex = 0; ReadPathIndex < PathCount; ReadPathIndex++)
- {
- if (!Result.Paths[ReadPathIndex].empty())
- {
- if (WritePathIndex < ReadPathIndex)
- {
- Result.Paths[WritePathIndex] = std::move(Result.Paths[ReadPathIndex]);
- Result.RawSizes[WritePathIndex] = Result.RawSizes[ReadPathIndex];
- Result.Attributes[WritePathIndex] = Result.Attributes[ReadPathIndex];
- Result.ModificationTicks[WritePathIndex] = Result.ModificationTicks[ReadPathIndex];
- }
- WritePathIndex++;
- }
- }
-
- Result.Paths.resize(WritePathIndex);
- Result.RawSizes.resize(WritePathIndex);
- Result.Attributes.resize(WritePathIndex);
- Result.ModificationTicks.resize(WritePathIndex);
- return Result;
- }
-
std::string GetCbObjectAsNiceString(CbObjectView Object, std::string_view Prefix, std::string_view Suffix)
{
ExtendableStringBuilder<512> SB;
@@ -1380,7 +1299,7 @@ namespace {
{
ProgressBar ProgressBar(ProgressMode, "Check Files");
FolderState = GetValidFolderContent(
- Workers,
+ Workers.GetIOWorkerPool(),
LocalFolderScanStats,
Path,
PathsToCheck,
@@ -1393,7 +1312,10 @@ namespace {
.RemainingCount = PathCount - CompletedPathCount,
.Status = ProgressBar::State::CalculateStatus(AbortFlag, PauseFlag)},
false);
- });
+ },
+ GetUpdateDelayMS(ProgressMode),
+ AbortFlag,
+ PauseFlag);
ProgressBar.Finish();
}
@@ -1478,7 +1400,7 @@ namespace {
{
ProgressBar ProgressBar(ProgressMode, "Check Known Files");
CurrentLocalFolderState = GetValidFolderContent(
- Workers,
+ Workers.GetIOWorkerPool(),
LocalFolderScanStats,
Path,
SavedLocalState.FolderState.Paths,
@@ -1491,7 +1413,10 @@ namespace {
.RemainingCount = PathCount - CompletedPathCount,
.Status = ProgressBar::State::CalculateStatus(AbortFlag, PauseFlag)},
false);
- });
+ },
+ GetUpdateDelayMS(ProgressMode),
+ AbortFlag,
+ PauseFlag);
ProgressBar.Finish();
}
if (AbortFlag)
@@ -2007,6 +1932,7 @@ namespace {
ZEN_CONSOLE(
"Downloaded build {}, parts:{} in {}\n"
+ " Scavenge: {} (Target: {}, Cache: {}, Others: {})\n"
" Download: {} ({}) {}bits/s{}\n"
" Write: {} ({}) {}B/s{}\n"
" Clean: {}\n"
@@ -2016,6 +1942,14 @@ namespace {
BuildPartString.ToView(),
NiceTimeSpanMs(DownloadTimeMs),
+ NiceTimeSpanMs((Updater.m_CacheMappingStats.CacheScanElapsedWallTimeUs +
+ Updater.m_CacheMappingStats.LocalScanElapsedWallTimeUs +
+ Updater.m_CacheMappingStats.ScavengeElapsedWallTimeUs) /
+ 1000),
+ NiceTimeSpanMs(Updater.m_CacheMappingStats.LocalScanElapsedWallTimeUs / 1000),
+ NiceTimeSpanMs(Updater.m_CacheMappingStats.CacheScanElapsedWallTimeUs / 1000),
+ NiceTimeSpanMs(Updater.m_CacheMappingStats.ScavengeElapsedWallTimeUs / 1000),
+
DownloadCount,
NiceBytes(DownloadByteCount),
NiceNum(GetBytesPerSecond(Updater.m_WriteChunkStats.DownloadTimeUs, DownloadByteCount * 8)),
diff --git a/src/zenremotestore/builds/buildstorageoperations.cpp b/src/zenremotestore/builds/buildstorageoperations.cpp
index f07f410ea..b9f5eb07a 100644
--- a/src/zenremotestore/builds/buildstorageoperations.cpp
+++ b/src/zenremotestore/builds/buildstorageoperations.cpp
@@ -2838,56 +2838,79 @@ BuildsOperationUpdateFolder::FindScavengeContent(const ScavengeSource& Source,
return false;
}
- OutScavengedLookup = BuildChunkedContentLookup(OutScavengedLocalContent);
+ tsl::robin_set<uint32_t> PathIndexesToScavenge;
+ PathIndexesToScavenge.reserve(OutScavengedLocalContent.Paths.size());
+ std::vector<uint32_t> ChunkOrderOffsets = BuildChunkOrderOffset(OutScavengedLocalContent.ChunkedContent.ChunkCounts);
- std::vector<uint32_t> PathIndexesToScavange;
- uint32_t ScavengedStatePathCount = gsl::narrow<uint32_t>(OutScavengedLocalContent.Paths.size());
- PathIndexesToScavange.reserve(ScavengedStatePathCount);
- for (uint32_t ScavengedStatePathIndex = 0; ScavengedStatePathIndex < ScavengedStatePathCount; ScavengedStatePathIndex++)
{
- const IoHash& SequenceHash = OutScavengedLocalContent.RawHashes[ScavengedStatePathIndex];
- if (auto ScavengeSequenceIt = OutScavengedLookup.RawHashToSequenceIndex.find(SequenceHash);
- ScavengeSequenceIt != OutScavengedLookup.RawHashToSequenceIndex.end())
+ tsl::robin_map<IoHash, uint32_t, IoHash::Hasher> RawHashToPathIndex;
+
+ RawHashToPathIndex.reserve(OutScavengedLocalContent.Paths.size());
+ for (uint32_t ScavengedPathIndex = 0; ScavengedPathIndex < OutScavengedLocalContent.RawHashes.size(); ScavengedPathIndex++)
{
- const uint32_t ScavengeSequenceIndex = ScavengeSequenceIt->second;
- if (m_RemoteLookup.RawHashToSequenceIndex.contains(SequenceHash))
+ if (!RawHashToPathIndex.contains(OutScavengedLocalContent.RawHashes[ScavengedPathIndex]))
{
- PathIndexesToScavange.push_back(ScavengedStatePathIndex);
+ RawHashToPathIndex.insert_or_assign(OutScavengedLocalContent.RawHashes[ScavengedPathIndex], ScavengedPathIndex);
}
- else
+ }
+
+ for (uint32_t ScavengeSequenceIndex = 0; ScavengeSequenceIndex < OutScavengedLocalContent.ChunkedContent.SequenceRawHashes.size();
+ ScavengeSequenceIndex++)
+ {
+ const IoHash& SequenceHash = OutScavengedLocalContent.ChunkedContent.SequenceRawHashes[ScavengeSequenceIndex];
+ if (auto It = RawHashToPathIndex.find(SequenceHash); It != RawHashToPathIndex.end())
{
- const uint32_t ScavengeChunkCount = OutScavengedLocalContent.ChunkedContent.ChunkCounts[ScavengeSequenceIndex];
- for (uint32_t ScavengeChunkIndexOffset = 0; ScavengeChunkIndexOffset < ScavengeChunkCount; ScavengeChunkIndexOffset++)
+ uint32_t PathIndex = It->second;
+ if (!PathIndexesToScavenge.contains(PathIndex))
{
- const size_t ScavengeChunkOrderIndex =
- OutScavengedLookup.ChunkSequenceLocationOffset[ScavengeSequenceIndex] + ScavengeChunkIndexOffset;
- const uint32_t ScavengeChunkIndex = OutScavengedLocalContent.ChunkedContent.ChunkOrders[ScavengeChunkOrderIndex];
- const IoHash& ScavengeChunkHash = OutScavengedLocalContent.ChunkedContent.ChunkHashes[ScavengeChunkIndex];
- if (m_RemoteLookup.ChunkHashToChunkIndex.contains(ScavengeChunkHash))
+ if (m_RemoteLookup.RawHashToSequenceIndex.contains(SequenceHash))
{
- PathIndexesToScavange.push_back(ScavengedStatePathIndex);
- break;
+ PathIndexesToScavenge.insert(PathIndex);
+ }
+ else
+ {
+ uint32_t ChunkOrderIndexStart = ChunkOrderOffsets[ScavengeSequenceIndex];
+ const uint32_t ChunkCount = OutScavengedLocalContent.ChunkedContent.ChunkCounts[ScavengeSequenceIndex];
+ for (uint32_t ChunkOrderIndex = 0; ChunkOrderIndex < ChunkCount; ChunkOrderIndex++)
+ {
+ const uint32_t ChunkIndex =
+ OutScavengedLocalContent.ChunkedContent.ChunkOrders[ChunkOrderIndexStart + ChunkOrderIndex];
+ const IoHash& ChunkHash = OutScavengedLocalContent.ChunkedContent.ChunkHashes[ChunkIndex];
+ if (m_RemoteLookup.ChunkHashToChunkIndex.contains(ChunkHash))
+ {
+ PathIndexesToScavenge.insert(PathIndex);
+ break;
+ }
+ }
}
}
}
+ else
+ {
+ ZEN_OPERATION_LOG_WARN(m_LogOutput,
+ "Scavenged state file at '{}' for '{}' is invalid, skipping scavenging for sequence {}",
+ Source.StateFilePath,
+ Source.Path,
+ SequenceHash);
+ }
}
}
- if (PathIndexesToScavange.empty())
+ if (PathIndexesToScavenge.empty())
{
- OutScavengedLookup = {};
OutScavengedLocalContent = {};
return false;
}
std::vector<std::filesystem::path> PathsToScavenge;
- PathsToScavenge.reserve(PathIndexesToScavange.size());
- for (uint32_t ScavengedStatePathIndex : PathIndexesToScavange)
+ PathsToScavenge.reserve(PathIndexesToScavenge.size());
+ for (uint32_t ScavengedStatePathIndex : PathIndexesToScavenge)
{
PathsToScavenge.push_back(OutScavengedLocalContent.Paths[ScavengedStatePathIndex]);
}
- FolderContent ValidFolderContent = GetValidFolderContent(m_ScavengedFolderScanStats, Source.Path, PathsToScavenge, {});
+ FolderContent ValidFolderContent =
+ GetValidFolderContent(m_IOWorkerPool, m_ScavengedFolderScanStats, Source.Path, PathsToScavenge, {}, 0, m_AbortFlag, m_PauseFlag);
if (!LocalFolderState.AreKnownFilesEqual(ValidFolderContent))
{
@@ -2899,18 +2922,22 @@ BuildsOperationUpdateFolder::FindScavengeContent(const ScavengeSource& Source,
DeletedPaths.insert(DeletedPaths.end(), UpdatedContent.Paths.begin(), UpdatedContent.Paths.end());
if (!DeletedPaths.empty())
{
- OutScavengedLocalContent = DeletePathsFromChunkedContent(OutScavengedLocalContent, OutScavengedLookup, DeletedPaths);
- OutScavengedLookup = BuildChunkedContentLookup(OutScavengedLocalContent);
+ OutScavengedLocalContent =
+ DeletePathsFromChunkedContent(OutScavengedLocalContent,
+ BuildHashLookup(OutScavengedLocalContent.ChunkedContent.SequenceRawHashes),
+ ChunkOrderOffsets,
+ DeletedPaths);
}
}
if (OutScavengedLocalContent.Paths.empty())
{
- OutScavengedLookup = {};
OutScavengedLocalContent = {};
return false;
}
+ OutScavengedLookup = BuildChunkedContentLookup(OutScavengedLocalContent);
+
return true;
}
@@ -2944,95 +2971,6 @@ BuildsOperationUpdateFolder::FindDownloadedChunk(const IoHash& ChunkHash)
return {};
}
-FolderContent
-BuildsOperationUpdateFolder::GetValidFolderContent(GetFolderContentStatistics& FolderScanStats,
- const std::filesystem::path& Path,
- std::span<const std::filesystem::path> PathsToCheck,
- std::function<void(uint64_t PathCount, uint64_t CompletedPathCount)>&& ProgressCallback)
-{
- ZEN_TRACE_CPU("GetValidFolderContent");
-
- FolderContent Result;
- const uint32_t PathCount = gsl::narrow<uint32_t>(PathsToCheck.size());
-
- Result.Paths.resize(PathCount);
- Result.RawSizes.resize(PathCount);
- Result.Attributes.resize(PathCount);
- Result.ModificationTicks.resize(PathCount);
-
- {
- Stopwatch Timer;
- auto _ = MakeGuard([&FolderScanStats, &Timer]() { FolderScanStats.ElapsedWallTimeUS = Timer.GetElapsedTimeUs(); });
-
- ParallelWork Work(m_AbortFlag,
- m_PauseFlag,
- ProgressCallback ? WorkerThreadPool::EMode::EnableBacklog : WorkerThreadPool::EMode::DisableBacklog);
- std::atomic<uint64_t> CompletedPathCount = 0;
- uint32_t PathIndex = 0;
-
- while (PathIndex < PathCount)
- {
- uint32_t PathRangeCount = Min(128u, PathCount - PathIndex);
- Work.ScheduleWork(m_IOWorkerPool,
- [PathIndex, PathRangeCount, &PathsToCheck, &Path, &Result, &CompletedPathCount, &FolderScanStats](
- std::atomic<bool>& AbortFlag) {
- if (!AbortFlag)
- {
- ZEN_TRACE_CPU("Async_ValidateFiles");
-
- for (uint32_t PathRangeIndex = PathIndex; PathRangeIndex < PathIndex + PathRangeCount;
- PathRangeIndex++)
- {
- const std::filesystem::path& FilePath = PathsToCheck[PathRangeIndex];
- std::filesystem::path LocalFilePath = (Path / FilePath).make_preferred();
- if (TryGetFileProperties(LocalFilePath,
- Result.RawSizes[PathRangeIndex],
- Result.ModificationTicks[PathRangeIndex],
- Result.Attributes[PathRangeIndex]))
- {
- Result.Paths[PathRangeIndex] = std::move(FilePath);
- FolderScanStats.FoundFileCount++;
- FolderScanStats.FoundFileByteCount += Result.RawSizes[PathRangeIndex];
- FolderScanStats.AcceptedFileCount++;
- FolderScanStats.AcceptedFileByteCount += Result.RawSizes[PathRangeIndex];
- }
- CompletedPathCount++;
- }
- }
- });
- PathIndex += PathRangeCount;
- }
- Work.Wait(200, [&](bool, bool, ptrdiff_t) {
- if (ProgressCallback)
- {
- ProgressCallback(PathCount, CompletedPathCount.load());
- }
- });
- }
-
- uint32_t WritePathIndex = 0;
- for (uint32_t ReadPathIndex = 0; ReadPathIndex < PathCount; ReadPathIndex++)
- {
- if (!Result.Paths[ReadPathIndex].empty())
- {
- if (WritePathIndex < ReadPathIndex)
- {
- Result.Paths[WritePathIndex] = std::move(Result.Paths[ReadPathIndex]);
- Result.RawSizes[WritePathIndex] = Result.RawSizes[ReadPathIndex];
- Result.Attributes[WritePathIndex] = Result.Attributes[ReadPathIndex];
- Result.ModificationTicks[WritePathIndex] = Result.ModificationTicks[ReadPathIndex];
- }
- WritePathIndex++;
- }
- }
-
- Result.Paths.resize(WritePathIndex);
- Result.RawSizes.resize(WritePathIndex);
- Result.Attributes.resize(WritePathIndex);
- Result.ModificationTicks.resize(WritePathIndex);
- return Result;
-}
-
std::vector<const ChunkedContentLookup::ChunkSequenceLocation*>
BuildsOperationUpdateFolder::GetRemainingChunkTargets(std::span<const std::atomic<uint32_t>> SequenceIndexChunksLeftToWriteCounters,
uint32_t ChunkIndex)
diff --git a/src/zenremotestore/chunking/chunkedcontent.cpp b/src/zenremotestore/chunking/chunkedcontent.cpp
index 5f1876908..e8187d348 100644
--- a/src/zenremotestore/chunking/chunkedcontent.cpp
+++ b/src/zenremotestore/chunking/chunkedcontent.cpp
@@ -177,31 +177,6 @@ namespace {
std::string PathCompareString(const std::filesystem::path& Path) { return ToLower(Path.generic_string()); }
- tsl::robin_map<IoHash, uint32_t, IoHash::Hasher> BuildHashLookup(std::span<const IoHash> Hashes)
- {
- tsl::robin_map<IoHash, uint32_t, IoHash::Hasher> Lookup;
- Lookup.reserve(Hashes.size());
- for (uint32_t Index = 0; Index < Hashes.size(); Index++)
- {
- bool IsNew = Lookup.insert_or_assign(Hashes[Index], Index).second;
- ZEN_ASSERT(IsNew);
- }
- return Lookup;
- }
-
- std::vector<uint32_t> BuildChunkOrderOffset(std::span<const uint32_t> ChunkCounts)
- {
- std::vector<uint32_t> ChunkOffsets;
- ChunkOffsets.reserve(ChunkCounts.size());
- uint32_t Offset = 0;
- for (uint32_t SequenceIndex = 0; SequenceIndex < ChunkCounts.size(); SequenceIndex++)
- {
- ChunkOffsets.push_back(Offset);
- Offset += ChunkCounts[SequenceIndex];
- }
- return ChunkOffsets;
- }
-
} // namespace
std::string_view FolderContentSourcePlatformNames[(size_t)SourcePlatform::_Count] = {"Windows"sv, "Linux"sv, "MacOS"sv};
@@ -551,6 +526,134 @@ GetFolderContent(GetFolderContentStatistics& Stats,
return OrderedContent;
}
+FolderContent
+GetValidFolderContent(WorkerThreadPool& WorkerPool,
+ GetFolderContentStatistics& FolderScanStats,
+ const std::filesystem::path& Path,
+ std::span<const std::filesystem::path> PathsToCheck,
+ std::function<void(uint64_t PathCount, uint64_t CompletedPathCount)>&& ProgressCallback,
+ uint32_t ProgressUpdateDelayMS,
+ std::atomic<bool>& AbortFlag,
+ std::atomic<bool>& PauseFlag)
+{
+ ZEN_TRACE_CPU("GetValidFolderContent");
+
+ FolderContent Result;
+ const uint32_t PathCount = gsl::narrow<uint32_t>(PathsToCheck.size());
+
+ Result.Paths.resize(PathCount);
+ Result.RawSizes.resize(PathCount);
+ Result.Attributes.resize(PathCount);
+ Result.ModificationTicks.resize(PathCount);
+
+ {
+ Stopwatch Timer;
+ auto _ = MakeGuard([&FolderScanStats, &Timer]() { FolderScanStats.ElapsedWallTimeUS = Timer.GetElapsedTimeUs(); });
+
+ tsl::robin_map<std::string, uint32_t> PathToPathIndex;
+ PathToPathIndex.reserve(PathsToCheck.size());
+ std::vector<std::filesystem::path> DirectoriesToScan;
+ {
+ tsl::robin_set<std::string> DirectoriesFound;
+ for (size_t PathIndex = 0; PathIndex < PathsToCheck.size(); PathIndex++)
+ {
+ const std::filesystem::path PathToCheck = (Path / PathsToCheck[PathIndex]);
+ const std::string LookupPath = PathToCheck.generic_string();
+ PathToPathIndex.insert_or_assign(LookupPath, PathIndex);
+ std::filesystem::path ParentDirectoryPath = PathToCheck.parent_path();
+ const std::string Directory = ParentDirectoryPath.generic_string();
+ if (DirectoriesFound.insert(Directory).second)
+ {
+ DirectoriesToScan.push_back(ParentDirectoryPath.make_preferred());
+ }
+ }
+ }
+
+ ParallelWork Work(AbortFlag,
+ PauseFlag,
+ ProgressCallback ? WorkerThreadPool::EMode::EnableBacklog : WorkerThreadPool::EMode::DisableBacklog);
+
+ std::atomic<uint64_t> CompletedDirectoryCount = 0;
+ for (size_t DirectoryIndex = 0; DirectoryIndex < DirectoriesToScan.size(); DirectoryIndex++)
+ {
+ Work.ScheduleWork(
+ WorkerPool,
+ [&DirectoriesToScan, DirectoryIndex, &Result, &FolderScanStats, &PathsToCheck, &PathToPathIndex, &CompletedDirectoryCount](
+ std::atomic<bool>& AbortFlag) {
+ if (!AbortFlag)
+ {
+ ZEN_TRACE_CPU("GetValidFolderContent_ScanDirectory");
+
+ const std::filesystem::path ParentDirectoryPath = DirectoriesToScan[DirectoryIndex];
+ try
+ {
+ if (IsDir(ParentDirectoryPath))
+ {
+ DirectoryContent DirContent;
+ GetDirectoryContent(ParentDirectoryPath,
+ DirectoryContentFlags::IncludeFiles | DirectoryContentFlags::IncludeFileSizes |
+ DirectoryContentFlags::IncludeModificationTick |
+ DirectoryContentFlags::IncludeAttributes,
+ DirContent);
+ for (size_t FoundIndex = 0; FoundIndex < DirContent.Files.size(); FoundIndex++)
+ {
+ const std::filesystem::path& FoundPath = DirContent.Files[FoundIndex];
+ if (auto It = PathToPathIndex.find(FoundPath.generic_string()); It != PathToPathIndex.end())
+ {
+ const size_t PathIndex = It->second;
+
+ Result.Paths[PathIndex] = PathsToCheck[PathIndex];
+ Result.RawSizes[PathIndex] = DirContent.FileSizes[FoundIndex];
+ Result.ModificationTicks[PathIndex] = DirContent.FileModificationTicks[FoundIndex];
+ Result.Attributes[PathIndex] = DirContent.FileAttributes[FoundIndex];
+
+ FolderScanStats.FoundFileCount++;
+ FolderScanStats.FoundFileByteCount += Result.RawSizes[PathIndex];
+ FolderScanStats.AcceptedFileCount++;
+ FolderScanStats.AcceptedFileByteCount += Result.RawSizes[PathIndex];
+ }
+ }
+ }
+ }
+ catch (const std::exception& Ex)
+ {
+ ZEN_WARN("Failed checking content of folder '{}', reason: {}", ParentDirectoryPath, Ex.what());
+ }
+ }
+ CompletedDirectoryCount++;
+ });
+ }
+ Work.Wait(ProgressUpdateDelayMS, [&](bool, bool, ptrdiff_t) {
+ if (ProgressCallback)
+ {
+ ProgressCallback(DirectoriesToScan.size(), CompletedDirectoryCount.load());
+ }
+ });
+ }
+
+ uint32_t WritePathIndex = 0;
+ for (uint32_t ReadPathIndex = 0; ReadPathIndex < PathCount; ReadPathIndex++)
+ {
+ if (!Result.Paths[ReadPathIndex].empty())
+ {
+ if (WritePathIndex < ReadPathIndex)
+ {
+ Result.Paths[WritePathIndex] = std::move(Result.Paths[ReadPathIndex]);
+ Result.RawSizes[WritePathIndex] = Result.RawSizes[ReadPathIndex];
+ Result.Attributes[WritePathIndex] = Result.Attributes[ReadPathIndex];
+ Result.ModificationTicks[WritePathIndex] = Result.ModificationTicks[ReadPathIndex];
+ }
+ WritePathIndex++;
+ }
+ }
+
+ Result.Paths.resize(WritePathIndex);
+ Result.RawSizes.resize(WritePathIndex);
+ Result.Attributes.resize(WritePathIndex);
+ Result.ModificationTicks.resize(WritePathIndex);
+ return Result;
+}
+
void
SaveChunkedFolderContentToCompactBinary(const ChunkedFolderContent& Content, CbWriter& Output)
{
@@ -726,9 +829,10 @@ MergeChunkedFolderContents(const ChunkedFolderContent& Base, std::span<const Chu
}
ChunkedFolderContent
-DeletePathsFromChunkedContent(const ChunkedFolderContent& BaseContent,
- const ChunkedContentLookup& BaseContentLookup,
- std::span<const std::filesystem::path> DeletedPaths)
+DeletePathsFromChunkedContent(const ChunkedFolderContent& BaseContent,
+ const tsl::robin_map<IoHash, uint32_t, IoHash::Hasher>& RawHashToSequenceIndex,
+ std::vector<uint32_t> SequenceIndexChunkOrderOffset,
+ std::span<const std::filesystem::path> DeletedPaths)
{
ZEN_TRACE_CPU("DeletePathsFromChunkedContent");
@@ -776,8 +880,8 @@ DeletePathsFromChunkedContent(const ChunkedFolderContent& BaseContent,
{
RawHashToSequenceRawHashIndex.insert(
{RawHash, gsl::narrow<uint32_t>(Result.ChunkedContent.SequenceRawHashes.size())});
- const uint32_t SequenceRawHashIndex = BaseContentLookup.RawHashToSequenceIndex.at(RawHash);
- const uint32_t OrderIndexOffset = BaseContentLookup.SequenceIndexChunkOrderOffset[SequenceRawHashIndex];
+ const uint32_t SequenceRawHashIndex = RawHashToSequenceIndex.at(RawHash);
+ const uint32_t OrderIndexOffset = SequenceIndexChunkOrderOffset[SequenceRawHashIndex];
const uint32_t ChunkCount = BaseContent.ChunkedContent.ChunkCounts[SequenceRawHashIndex];
std::span<const uint32_t> OriginalChunkOrder =
@@ -820,8 +924,12 @@ DeletePathsFromChunkedContent(const ChunkedFolderContent& BaseContent, std::span
{
return {};
}
- const ChunkedContentLookup BaseLookup = BuildChunkedContentLookup(BaseContent);
- return DeletePathsFromChunkedContent(BaseContent, BaseLookup, DeletedPaths);
+
+ tsl::robin_map<IoHash, uint32_t, IoHash::Hasher> BaseSequenceHashToSequenceIndex =
+ BuildHashLookup(BaseContent.ChunkedContent.SequenceRawHashes);
+ std::vector<uint32_t> BaseSequenceChunkOrderOffset = BuildChunkOrderOffset(BaseContent.ChunkedContent.ChunkCounts);
+
+ return DeletePathsFromChunkedContent(BaseContent, BaseSequenceHashToSequenceIndex, BaseSequenceChunkOrderOffset, DeletedPaths);
}
bool
@@ -1082,6 +1190,33 @@ ChunkFolderContent(ChunkingStatistics& Stats,
return Result;
}
+tsl::robin_map<IoHash, uint32_t, IoHash::Hasher>
+BuildHashLookup(std::span<const IoHash> Hashes)
+{
+ tsl::robin_map<IoHash, uint32_t, IoHash::Hasher> Lookup;
+ Lookup.reserve(Hashes.size());
+ for (uint32_t Index = 0; Index < Hashes.size(); Index++)
+ {
+ bool IsNew = Lookup.insert_or_assign(Hashes[Index], Index).second;
+ ZEN_ASSERT(IsNew);
+ }
+ return Lookup;
+}
+
+std::vector<uint32_t>
+BuildChunkOrderOffset(std::span<const uint32_t> ChunkCounts)
+{
+ std::vector<uint32_t> ChunkOffsets;
+ ChunkOffsets.reserve(ChunkCounts.size());
+ uint32_t Offset = 0;
+ for (uint32_t SequenceIndex = 0; SequenceIndex < ChunkCounts.size(); SequenceIndex++)
+ {
+ ChunkOffsets.push_back(Offset);
+ Offset += ChunkCounts[SequenceIndex];
+ }
+ return ChunkOffsets;
+}
+
ChunkedContentLookup
BuildChunkedContentLookup(const ChunkedFolderContent& Content)
{
@@ -1096,17 +1231,8 @@ BuildChunkedContentLookup(const ChunkedFolderContent& Content)
ChunkedContentLookup Result;
{
- const uint32_t SequenceRawHashesCount = gsl::narrow<uint32_t>(Content.ChunkedContent.SequenceRawHashes.size());
- Result.RawHashToSequenceIndex.reserve(SequenceRawHashesCount);
- Result.SequenceIndexChunkOrderOffset.reserve(SequenceRawHashesCount);
- uint32_t OrderOffset = 0;
- for (uint32_t SequenceRawHashIndex = 0; SequenceRawHashIndex < Content.ChunkedContent.SequenceRawHashes.size();
- SequenceRawHashIndex++)
- {
- Result.RawHashToSequenceIndex.insert({Content.ChunkedContent.SequenceRawHashes[SequenceRawHashIndex], SequenceRawHashIndex});
- Result.SequenceIndexChunkOrderOffset.push_back(OrderOffset);
- OrderOffset += Content.ChunkedContent.ChunkCounts[SequenceRawHashIndex];
- }
+ Result.SequenceIndexChunkOrderOffset = BuildChunkOrderOffset(Content.ChunkedContent.ChunkCounts);
+ Result.RawHashToSequenceIndex = BuildHashLookup(Content.ChunkedContent.SequenceRawHashes);
}
std::vector<ChunkLocationReference> Locations;
diff --git a/src/zenremotestore/include/zenremotestore/builds/buildstorageoperations.h b/src/zenremotestore/include/zenremotestore/builds/buildstorageoperations.h
index eca654223..d78ee29c1 100644
--- a/src/zenremotestore/include/zenremotestore/builds/buildstorageoperations.h
+++ b/src/zenremotestore/include/zenremotestore/builds/buildstorageoperations.h
@@ -259,11 +259,6 @@ private:
std::filesystem::path FindDownloadedChunk(const IoHash& ChunkHash);
- FolderContent GetValidFolderContent(GetFolderContentStatistics& LocalFolderScanStats,
- const std::filesystem::path& Path,
- std::span<const std::filesystem::path> PathsToCheck,
- std::function<void(uint64_t PathCount, uint64_t CompletedPathCount)>&& ProgressCallback);
-
std::vector<const ChunkedContentLookup::ChunkSequenceLocation*> GetRemainingChunkTargets(
std::span<const std::atomic<uint32_t>> SequenceIndexChunksLeftToWriteCounters,
uint32_t ChunkIndex);
diff --git a/src/zenremotestore/include/zenremotestore/chunking/chunkedcontent.h b/src/zenremotestore/include/zenremotestore/chunking/chunkedcontent.h
index e4be7923a..78f20a727 100644
--- a/src/zenremotestore/include/zenremotestore/chunking/chunkedcontent.h
+++ b/src/zenremotestore/include/zenremotestore/chunking/chunkedcontent.h
@@ -71,6 +71,15 @@ FolderContent GetFolderContent(GetFolderContentStatistics& Stats,
std::function<void(bool IsAborted, std::ptrdiff_t PendingWork)>&& UpdateCallback,
std::atomic<bool>& AbortFlag);
+FolderContent GetValidFolderContent(WorkerThreadPool& WorkerPool,
+ GetFolderContentStatistics& FolderScanStats,
+ const std::filesystem::path& Path,
+ std::span<const std::filesystem::path> PathsToCheck,
+ std::function<void(uint64_t PathCount, uint64_t CompletedPathCount)>&& ProgressCallback,
+ uint32_t ProgressUpdateDelayMS,
+ std::atomic<bool>& AbortFlag,
+ std::atomic<bool>& PauseFlag);
+
struct ChunkedContentData
{
// To describe one asset with a particular RawHash, find the index of the hash in SequenceRawHashes
@@ -116,9 +125,10 @@ void SaveChunkedFolderContentToCompactBinary(const ChunkedFolderContent& Con
ChunkedFolderContent LoadChunkedFolderContentFromCompactBinary(CbObjectView Input);
ChunkedFolderContent MergeChunkedFolderContents(const ChunkedFolderContent& Base, std::span<const ChunkedFolderContent> Overlays);
-ChunkedFolderContent DeletePathsFromChunkedContent(const ChunkedFolderContent& Base,
- const ChunkedContentLookup& BaseContentLookup,
- std::span<const std::filesystem::path> DeletedPaths);
+ChunkedFolderContent DeletePathsFromChunkedContent(const ChunkedFolderContent& Base,
+ const tsl::robin_map<IoHash, uint32_t, IoHash::Hasher>& RawHashToSequenceIndex,
+ std::vector<uint32_t> SequenceIndexChunkOrderOffset,
+ std::span<const std::filesystem::path> DeletedPaths);
ChunkedFolderContent DeletePathsFromChunkedContent(const ChunkedFolderContent& Base, std::span<const std::filesystem::path> DeletedPaths);
bool CompareChunkedContent(const ChunkedFolderContent& Lhs, const ChunkedFolderContent& Rhs);
@@ -161,6 +171,9 @@ ChunkedFolderContent ChunkFolderContent(ChunkingStatistics& Stats,
std::atomic<bool>& AbortFlag,
std::atomic<bool>& PauseFlag);
+tsl::robin_map<IoHash, uint32_t, IoHash::Hasher> BuildHashLookup(std::span<const IoHash> Hashes);
+std::vector<uint32_t> BuildChunkOrderOffset(std::span<const uint32_t> ChunkCounts);
+
ChunkedContentLookup BuildChunkedContentLookup(const ChunkedFolderContent& Content);
inline std::pair<size_t, uint32_t>