aboutsummaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorDan Engelbrecht <[email protected]>2025-08-11 15:16:49 +0200
committerGitHub Enterprise <[email protected]>2025-08-11 15:16:49 +0200
commit5199674c75e6ca8aac9e3eac53dd8e932154da22 (patch)
tree10b91d48e194207422e260cf4578fee06b0e18e8 /src
parentlist build part content (#462) (diff)
downloadzen-5199674c75e6ca8aac9e3eac53dd8e932154da22.tar.xz
zen-5199674c75e6ca8aac9e3eac53dd8e932154da22.zip
add filtering to builds download (#463)
- Feature: Added wildcard options for `zen build download` - `--wildcard` windows style wildcard (using * and ?) to match file paths to include - `--exclude-wildcard` windows style wildcard (using * and ?) to match file paths to exclude. Applied after --wildcard include filter - Improvement: Remove early wipe of target folder for `zen download` to allow for scavenging useful data
Diffstat (limited to 'src')
-rw-r--r--src/zen/cmds/builds_cmd.cpp264
1 files changed, 210 insertions, 54 deletions
diff --git a/src/zen/cmds/builds_cmd.cpp b/src/zen/cmds/builds_cmd.cpp
index 8e17ba97c..aa6ae5ea4 100644
--- a/src/zen/cmds/builds_cmd.cpp
+++ b/src/zen/cmds/builds_cmd.cpp
@@ -6138,7 +6138,7 @@ namespace {
Work.Wait(GetUpdateDelayMS(ProgressMode), [&](bool IsAborted, bool IsPaused, std::ptrdiff_t PendingWork) {
ZEN_UNUSED(PendingWork);
- std::string Details = fmt::format("{}/{} scanned. {} paths and {} chunks found for scavanging",
+ std::string Details = fmt::format("{}/{} scanned. {} paths and {} chunks found for scavenging",
PathsScavenged.load(),
ScavengePathCount,
PathsFound.load(),
@@ -8676,6 +8676,8 @@ namespace {
ChunkedFolderContent GetRemoteContent(StorageInstance& Storage,
const Oid& BuildId,
const std::vector<std::pair<Oid, std::string>>& BuildParts,
+ std::string_view IncludeWildcard,
+ std::string_view ExcludeWildcard,
std::unique_ptr<ChunkingController>& OutChunkController,
std::vector<ChunkedFolderContent>& OutPartContents,
std::vector<ChunkBlockDescription>& OutBlockDescriptions,
@@ -8708,6 +8710,8 @@ namespace {
const Oid& BuildId,
const Oid& BuildPartId,
CbObject BuildPartManifest,
+ std::string_view IncludeWildcard,
+ std::string_view ExcludeWildcard,
ChunkedFolderContent& OutRemoteContent,
std::vector<ChunkBlockDescription>& OutBlockDescriptions,
std::vector<IoHash>& OutLooseChunkHashes) {
@@ -8912,6 +8916,36 @@ namespace {
OutRemoteContent.ChunkedContent.ChunkHashes,
OutRemoteContent.ChunkedContent.ChunkRawSizes,
OutRemoteContent.ChunkedContent.ChunkOrders);
+
+ {
+ std::vector<std::filesystem::path> DeletedPaths;
+ for (const std::filesystem::path& RemotePath : OutRemoteContent.Paths)
+ {
+ if (!IncludePath(IncludeWildcard, ExcludeWildcard, RemotePath))
+ {
+ DeletedPaths.push_back(RemotePath);
+ }
+ }
+
+ if (!DeletedPaths.empty())
+ {
+ OutRemoteContent = DeletePathsFromChunkedContent(OutRemoteContent, DeletedPaths);
+
+ tsl::robin_set<IoHash, IoHash::Hasher> UsedLooseChunkHashes;
+ UsedLooseChunkHashes.insert(OutRemoteContent.RawHashes.begin(), OutRemoteContent.RawHashes.end());
+ for (auto It = OutLooseChunkHashes.begin(); It != OutLooseChunkHashes.end();)
+ {
+ if (!UsedLooseChunkHashes.contains(*It))
+ {
+ It = OutLooseChunkHashes.erase(It);
+ }
+ else
+ {
+ It++;
+ }
+ }
+ }
+ }
};
OutPartContents.resize(1);
@@ -8919,6 +8953,8 @@ namespace {
BuildId,
BuildPartId,
BuildPartManifest,
+ IncludeWildcard,
+ ExcludeWildcard,
OutPartContents[0],
OutBlockDescriptions,
OutLooseChunkHashes);
@@ -8950,6 +8986,8 @@ namespace {
BuildId,
OverlayBuildPartId,
OverlayBuildPartManifest,
+ IncludeWildcard,
+ ExcludeWildcard,
OverlayPartContent,
OverlayPartBlockDescriptions,
OverlayPartLooseChunkHashes);
@@ -9004,6 +9042,8 @@ namespace {
const std::filesystem::path& StateFilePath,
ChunkingController& ChunkController,
std::span<const std::filesystem::path> ReferencePaths,
+ std::string_view IncludeWildcard,
+ std::string_view ExcludeWildcard,
FolderContent& OutLocalFolderContent)
{
FolderContent LocalFolderState;
@@ -9016,8 +9056,8 @@ namespace {
ZEN_CONSOLE("Read local state file {} in {}", StateFilePath, NiceTimeSpanMs(ReadStateTimer.GetElapsedTimeMs()));
}
{
- const uint32_t LocalPathCount = gsl::narrow<uint32_t>(ReferencePaths.size());
- const uint32_t RemotePathCount = gsl::narrow<uint32_t>(LocalFolderState.Paths.size());
+ const uint32_t LocalPathCount = gsl::narrow<uint32_t>(LocalFolderState.Paths.size());
+ const uint32_t RemotePathCount = gsl::narrow<uint32_t>(ReferencePaths.size());
std::vector<std::filesystem::path> PathsToCheck;
PathsToCheck.reserve(LocalPathCount + RemotePathCount);
@@ -9027,34 +9067,44 @@ namespace {
for (const std::filesystem::path& LocalPath : LocalFolderState.Paths)
{
- FileSet.insert(LocalPath.generic_string());
- PathsToCheck.push_back(LocalPath);
+ if (IncludePath(IncludeWildcard, ExcludeWildcard, LocalPath))
+ {
+ FileSet.insert(LocalPath.generic_string());
+ PathsToCheck.push_back(LocalPath);
+ }
}
for (const std::filesystem::path& RemotePath : ReferencePaths)
{
- if (FileSet.insert(RemotePath.generic_string()).second)
+ if (IncludePath(IncludeWildcard, ExcludeWildcard, RemotePath))
{
- PathsToCheck.push_back(RemotePath);
+ if (FileSet.insert(RemotePath.generic_string()).second)
+ {
+ PathsToCheck.push_back(RemotePath);
+ }
}
}
- ProgressBar ProgressBar(ProgressMode, "Check Files");
- OutLocalFolderContent = GetValidFolderContent(
- LocalFolderScanStats,
- Path,
- PathsToCheck,
- [&ProgressBar, &LocalFolderScanStats](uint64_t PathCount, uint64_t CompletedPathCount) {
- std::string Details =
- fmt::format("{}/{} checked, {} found", CompletedPathCount, PathCount, LocalFolderScanStats.FoundFileCount.load());
- ProgressBar.UpdateState({.Task = "Checking files ",
- .Details = Details,
- .TotalCount = PathCount,
- .RemainingCount = PathCount - CompletedPathCount,
- .Status = ProgressBar::State::CalculateStatus(AbortFlag, PauseFlag)},
- false);
- });
- ProgressBar.Finish();
+ {
+ ProgressBar ProgressBar(ProgressMode, "Check Files");
+ OutLocalFolderContent =
+ GetValidFolderContent(LocalFolderScanStats,
+ Path,
+ PathsToCheck,
+ [&ProgressBar, &LocalFolderScanStats](uint64_t PathCount, uint64_t CompletedPathCount) {
+ std::string Details = fmt::format("{}/{} checked, {} found",
+ CompletedPathCount,
+ PathCount,
+ LocalFolderScanStats.FoundFileCount.load());
+ ProgressBar.UpdateState({.Task = "Checking files ",
+ .Details = Details,
+ .TotalCount = PathCount,
+ .RemainingCount = PathCount - CompletedPathCount,
+ .Status = ProgressBar::State::CalculateStatus(AbortFlag, PauseFlag)},
+ false);
+ });
+ ProgressBar.Finish();
+ }
if (AbortFlag)
{
return {};
@@ -9067,7 +9117,7 @@ namespace {
{
if (!LocalFolderState.AreKnownFilesEqual(OutLocalFolderContent))
{
- const size_t LocaStatePathCount = LocalFolderState.Paths.size();
+ const size_t LocalStatePathCount = LocalFolderState.Paths.size();
std::vector<std::filesystem::path> DeletedPaths;
FolderContent UpdatedContent = GetUpdatedContent(LocalFolderState, OutLocalFolderContent, DeletedPaths);
if (!DeletedPaths.empty())
@@ -9080,7 +9130,7 @@ namespace {
ZEN_CONSOLE("Updating state, {} local files deleted and {} local files updated out of {}",
DeletedPaths.size(),
UpdatedContent.Paths.size(),
- LocaStatePathCount);
+ LocalStatePathCount);
}
if (UpdatedContent.Paths.size() > 0)
{
@@ -9150,6 +9200,85 @@ namespace {
LocalContent = DeletePathsFromChunkedContent(LocalContent, DeletedPaths);
}
}
+
+ // Check files that are present in current folder state that matches the ReferencePaths but not known to the local state
+ {
+ FolderContent UpdatedContent;
+
+ tsl::robin_set<std::string> LocalPathIndexLookup;
+ for (const std::filesystem::path& LocalPath : LocalContent.Paths)
+ {
+ LocalPathIndexLookup.insert(LocalPath.generic_string());
+ }
+
+ tsl::robin_set<std::string> RemotePathIndexLookup;
+ for (const std::filesystem::path& RemotePath : ReferencePaths)
+ {
+ RemotePathIndexLookup.insert(RemotePath.generic_string());
+ }
+
+ for (uint32_t LocalFolderPathIndex = 0; LocalFolderPathIndex < OutLocalFolderContent.Paths.size(); LocalFolderPathIndex++)
+ {
+ const std::filesystem::path& LocalFolderPath = OutLocalFolderContent.Paths[LocalFolderPathIndex];
+ const std::string GenericLocalFolderPath = LocalFolderPath.generic_string();
+ if (RemotePathIndexLookup.contains(GenericLocalFolderPath))
+ {
+ if (!LocalPathIndexLookup.contains(GenericLocalFolderPath))
+ {
+ UpdatedContent.Paths.push_back(LocalFolderPath);
+ UpdatedContent.RawSizes.push_back(OutLocalFolderContent.RawSizes[LocalFolderPathIndex]);
+ UpdatedContent.Attributes.push_back(OutLocalFolderContent.Attributes[LocalFolderPathIndex]);
+ UpdatedContent.ModificationTicks.push_back(OutLocalFolderContent.ModificationTicks[LocalFolderPathIndex]);
+ }
+ }
+ }
+
+ if (UpdatedContent.Paths.size() > 0)
+ {
+ uint64_t ByteCountToScan = 0;
+ for (const uint64_t RawSize : UpdatedContent.RawSizes)
+ {
+ ByteCountToScan += RawSize;
+ }
+ ProgressBar ProgressBar(ProgressMode, "Scan Files");
+ FilteredRate FilteredBytesHashed;
+ FilteredBytesHashed.Start();
+ ChunkedFolderContent UpdatedLocalContent = ChunkFolderContent(
+ ChunkingStats,
+ GetIOWorkerPool(),
+ Path,
+ UpdatedContent,
+ ChunkController,
+ GetUpdateDelayMS(ProgressMode),
+ [&](bool IsAborted, bool IsPaused, std::ptrdiff_t) {
+ FilteredBytesHashed.Update(ChunkingStats.BytesHashed.load());
+ std::string Details = fmt::format("{}/{} ({}/{}, {}B/s) scanned, {} ({}) chunks found",
+ ChunkingStats.FilesProcessed.load(),
+ UpdatedContent.Paths.size(),
+ NiceBytes(ChunkingStats.BytesHashed.load()),
+ NiceBytes(ByteCountToScan),
+ NiceNum(FilteredBytesHashed.GetCurrent()),
+ ChunkingStats.UniqueChunksFound.load(),
+ NiceBytes(ChunkingStats.UniqueBytesFound.load()));
+ ProgressBar.UpdateState({.Task = "Scanning files ",
+ .Details = Details,
+ .TotalCount = ByteCountToScan,
+ .RemainingCount = ByteCountToScan - ChunkingStats.BytesHashed.load(),
+ .Status = ProgressBar::State::CalculateStatus(IsAborted, IsPaused)},
+ false);
+ },
+ AbortFlag,
+ PauseFlag);
+
+ FilteredBytesHashed.Stop();
+ ProgressBar.Finish();
+ if (!AbortFlag)
+ {
+ LocalContent = MergeChunkedFolderContents(LocalContent, {{UpdatedLocalContent}});
+ }
+ }
+ }
+
ScanContent = false;
}
@@ -9233,6 +9362,8 @@ namespace {
ZenStateFilePath(Path / ZenFolderName),
ChunkController,
Content.Paths,
+ {},
+ {},
_);
const uint64_t TotalRawSize = std::accumulate(Result.RawSizes.begin(), Result.RawSizes.end(), std::uint64_t(0));
@@ -9265,7 +9396,9 @@ namespace {
bool WipeTargetFolder,
bool PostDownloadVerify,
bool PrimeCacheOnly,
- bool EnableScavenging)
+ bool EnableScavenging,
+ std::string_view IncludeWildcard,
+ std::string_view ExcludeWildcard)
{
ZEN_TRACE_CPU("DownloadFolder");
@@ -9311,8 +9444,15 @@ namespace {
ProgressBar::SetLogOperationProgress(ProgressMode, TaskSteps::CompareState, TaskSteps::StepCount);
- ChunkedFolderContent RemoteContent =
- GetRemoteContent(Storage, BuildId, AllBuildParts, ChunkController, PartContents, BlockDescriptions, LooseChunkHashes);
+ ChunkedFolderContent RemoteContent = GetRemoteContent(Storage,
+ BuildId,
+ AllBuildParts,
+ IncludeWildcard,
+ ExcludeWildcard,
+ ChunkController,
+ PartContents,
+ BlockDescriptions,
+ LooseChunkHashes);
const std::uint64_t LargeAttachmentSize = AllowMultiparts ? PreferredMultipartChunkSize * 4u : (std::uint64_t)-1;
GetFolderContentStatistics LocalFolderScanStats;
@@ -9323,22 +9463,21 @@ namespace {
{
if (IsDir(Path))
{
- if (!WipeTargetFolder)
+ if (!ChunkController && !IsQuiet)
{
- if (!ChunkController && !IsQuiet)
- {
- ZEN_CONSOLE("Warning: Unspecified chunking algorith, using default");
- ChunkController = CreateChunkingControllerWithFixedChunking(ChunkingControllerWithFixedChunkingSettings{});
- }
-
- LocalContent = GetLocalContent(LocalFolderScanStats,
- ChunkingStats,
- Path,
- ZenStateFilePath(ZenFolderPath),
- *ChunkController,
- RemoteContent.Paths,
- LocalFolderContent);
+ ZEN_CONSOLE("Warning: Unspecified chunking algorith, using default");
+ ChunkController = CreateChunkingControllerWithFixedChunking(ChunkingControllerWithFixedChunkingSettings{});
}
+
+ LocalContent = GetLocalContent(LocalFolderScanStats,
+ ChunkingStats,
+ Path,
+ ZenStateFilePath(ZenFolderPath),
+ *ChunkController,
+ RemoteContent.Paths,
+ IncludeWildcard,
+ ExcludeWildcard,
+ LocalFolderContent);
}
else
{
@@ -9349,7 +9488,6 @@ namespace {
{
return;
}
-
auto CompareContent = [](const ChunkedFolderContent& Lhs, const ChunkedFolderContent& Rhs) {
tsl::robin_map<std::string, size_t> RhsPathToIndex;
const size_t RhsPathCount = Rhs.Paths.size();
@@ -9392,7 +9530,7 @@ namespace {
return true;
};
- if (CompareContent(RemoteContent, LocalContent))
+ if (CompareContent(RemoteContent, LocalContent) && !WipeTargetFolder)
{
if (!IsQuiet)
{
@@ -10133,7 +10271,7 @@ BuildsCommand::BuildsCommand()
AddCacheOptions(m_DownloadOptions);
AddZenFolderOptions(m_DownloadOptions);
AddWorkerOptions(m_DownloadOptions);
- // TODO: AddWildcardOptions(m_DownloadOptions);
+ AddWildcardOptions(m_DownloadOptions);
m_DownloadOptions.add_option("cache",
"",
@@ -11348,7 +11486,9 @@ BuildsCommand::Run(const ZenCliOptions& GlobalOptions, int argc, char** argv)
m_Clean,
m_PostDownloadVerify,
m_PrimeCacheOnly,
- m_EnableScavenging);
+ m_EnableScavenging,
+ m_IncludeWildcard,
+ m_ExcludeWildcard);
return AbortFlag ? 11 : 0;
}
@@ -11552,7 +11692,9 @@ BuildsCommand::Run(const ZenCliOptions& GlobalOptions, int argc, char** argv)
BuildIdString == m_BuildIds.front(),
true,
false,
- m_EnableScavenging);
+ m_EnableScavenging,
+ ""sv,
+ ""sv);
if (AbortFlag)
{
ZEN_CONSOLE("Download cancelled");
@@ -11728,7 +11870,9 @@ BuildsCommand::Run(const ZenCliOptions& GlobalOptions, int argc, char** argv)
true,
true,
false,
- m_EnableScavenging);
+ m_EnableScavenging,
+ ""sv,
+ ""sv);
if (AbortFlag)
{
ZEN_CONSOLE("Download failed.");
@@ -11752,7 +11896,9 @@ BuildsCommand::Run(const ZenCliOptions& GlobalOptions, int argc, char** argv)
false,
true,
false,
- m_EnableScavenging);
+ m_EnableScavenging,
+ ""sv,
+ ""sv);
if (AbortFlag)
{
ZEN_CONSOLE("Re-download failed. (identical target)");
@@ -11870,7 +12016,9 @@ BuildsCommand::Run(const ZenCliOptions& GlobalOptions, int argc, char** argv)
false,
true,
false,
- m_EnableScavenging);
+ m_EnableScavenging,
+ ""sv,
+ ""sv);
if (AbortFlag)
{
ZEN_CONSOLE("Re-download failed. (scrambled target)");
@@ -11922,7 +12070,9 @@ BuildsCommand::Run(const ZenCliOptions& GlobalOptions, int argc, char** argv)
false,
true,
false,
- m_EnableScavenging);
+ m_EnableScavenging,
+ ""sv,
+ ""sv);
if (AbortFlag)
{
ZEN_CONSOLE("Re-download failed.");
@@ -11942,7 +12092,9 @@ BuildsCommand::Run(const ZenCliOptions& GlobalOptions, int argc, char** argv)
false,
true,
false,
- m_EnableScavenging);
+ m_EnableScavenging,
+ ""sv,
+ ""sv);
if (AbortFlag)
{
ZEN_CONSOLE("Re-download failed.");
@@ -11962,7 +12114,9 @@ BuildsCommand::Run(const ZenCliOptions& GlobalOptions, int argc, char** argv)
false,
true,
false,
- m_EnableScavenging);
+ m_EnableScavenging,
+ ""sv,
+ ""sv);
if (AbortFlag)
{
ZEN_CONSOLE("Re-download failed.");
@@ -11982,7 +12136,9 @@ BuildsCommand::Run(const ZenCliOptions& GlobalOptions, int argc, char** argv)
false,
true,
false,
- m_EnableScavenging);
+ m_EnableScavenging,
+ ""sv,
+ ""sv);
if (AbortFlag)
{
ZEN_CONSOLE("Re-download failed.");