diff options
| author | Dan Engelbrecht <[email protected]> | 2025-08-11 15:16:49 +0200 |
|---|---|---|
| committer | GitHub Enterprise <[email protected]> | 2025-08-11 15:16:49 +0200 |
| commit | 5199674c75e6ca8aac9e3eac53dd8e932154da22 (patch) | |
| tree | 10b91d48e194207422e260cf4578fee06b0e18e8 /src | |
| parent | list build part content (#462) (diff) | |
| download | zen-5199674c75e6ca8aac9e3eac53dd8e932154da22.tar.xz zen-5199674c75e6ca8aac9e3eac53dd8e932154da22.zip | |
add filtering to builds download (#463)
- Feature: Added wildcard options for `zen build download`
- `--wildcard` windows style wildcard (using * and ?) to match file paths to include
- `--exclude-wildcard` windows style wildcard (using * and ?) to match file paths to exclude. Applied after --wildcard include filter
- Improvement: Remove early wipe of target folder for `zen download` to allow for scavenging useful data
Diffstat (limited to 'src')
| -rw-r--r-- | src/zen/cmds/builds_cmd.cpp | 264 |
1 files changed, 210 insertions, 54 deletions
diff --git a/src/zen/cmds/builds_cmd.cpp b/src/zen/cmds/builds_cmd.cpp index 8e17ba97c..aa6ae5ea4 100644 --- a/src/zen/cmds/builds_cmd.cpp +++ b/src/zen/cmds/builds_cmd.cpp @@ -6138,7 +6138,7 @@ namespace { Work.Wait(GetUpdateDelayMS(ProgressMode), [&](bool IsAborted, bool IsPaused, std::ptrdiff_t PendingWork) { ZEN_UNUSED(PendingWork); - std::string Details = fmt::format("{}/{} scanned. {} paths and {} chunks found for scavanging", + std::string Details = fmt::format("{}/{} scanned. {} paths and {} chunks found for scavenging", PathsScavenged.load(), ScavengePathCount, PathsFound.load(), @@ -8676,6 +8676,8 @@ namespace { ChunkedFolderContent GetRemoteContent(StorageInstance& Storage, const Oid& BuildId, const std::vector<std::pair<Oid, std::string>>& BuildParts, + std::string_view IncludeWildcard, + std::string_view ExcludeWildcard, std::unique_ptr<ChunkingController>& OutChunkController, std::vector<ChunkedFolderContent>& OutPartContents, std::vector<ChunkBlockDescription>& OutBlockDescriptions, @@ -8708,6 +8710,8 @@ namespace { const Oid& BuildId, const Oid& BuildPartId, CbObject BuildPartManifest, + std::string_view IncludeWildcard, + std::string_view ExcludeWildcard, ChunkedFolderContent& OutRemoteContent, std::vector<ChunkBlockDescription>& OutBlockDescriptions, std::vector<IoHash>& OutLooseChunkHashes) { @@ -8912,6 +8916,36 @@ namespace { OutRemoteContent.ChunkedContent.ChunkHashes, OutRemoteContent.ChunkedContent.ChunkRawSizes, OutRemoteContent.ChunkedContent.ChunkOrders); + + { + std::vector<std::filesystem::path> DeletedPaths; + for (const std::filesystem::path& RemotePath : OutRemoteContent.Paths) + { + if (!IncludePath(IncludeWildcard, ExcludeWildcard, RemotePath)) + { + DeletedPaths.push_back(RemotePath); + } + } + + if (!DeletedPaths.empty()) + { + OutRemoteContent = DeletePathsFromChunkedContent(OutRemoteContent, DeletedPaths); + + tsl::robin_set<IoHash, IoHash::Hasher> UsedLooseChunkHashes; + UsedLooseChunkHashes.insert(OutRemoteContent.RawHashes.begin(), OutRemoteContent.RawHashes.end()); + for (auto It = OutLooseChunkHashes.begin(); It != OutLooseChunkHashes.end();) + { + if (!UsedLooseChunkHashes.contains(*It)) + { + It = OutLooseChunkHashes.erase(It); + } + else + { + It++; + } + } + } + } }; OutPartContents.resize(1); @@ -8919,6 +8953,8 @@ namespace { BuildId, BuildPartId, BuildPartManifest, + IncludeWildcard, + ExcludeWildcard, OutPartContents[0], OutBlockDescriptions, OutLooseChunkHashes); @@ -8950,6 +8986,8 @@ namespace { BuildId, OverlayBuildPartId, OverlayBuildPartManifest, + IncludeWildcard, + ExcludeWildcard, OverlayPartContent, OverlayPartBlockDescriptions, OverlayPartLooseChunkHashes); @@ -9004,6 +9042,8 @@ namespace { const std::filesystem::path& StateFilePath, ChunkingController& ChunkController, std::span<const std::filesystem::path> ReferencePaths, + std::string_view IncludeWildcard, + std::string_view ExcludeWildcard, FolderContent& OutLocalFolderContent) { FolderContent LocalFolderState; @@ -9016,8 +9056,8 @@ namespace { ZEN_CONSOLE("Read local state file {} in {}", StateFilePath, NiceTimeSpanMs(ReadStateTimer.GetElapsedTimeMs())); } { - const uint32_t LocalPathCount = gsl::narrow<uint32_t>(ReferencePaths.size()); - const uint32_t RemotePathCount = gsl::narrow<uint32_t>(LocalFolderState.Paths.size()); + const uint32_t LocalPathCount = gsl::narrow<uint32_t>(LocalFolderState.Paths.size()); + const uint32_t RemotePathCount = gsl::narrow<uint32_t>(ReferencePaths.size()); std::vector<std::filesystem::path> PathsToCheck; PathsToCheck.reserve(LocalPathCount + RemotePathCount); @@ -9027,34 +9067,44 @@ namespace { for (const std::filesystem::path& LocalPath : LocalFolderState.Paths) { - FileSet.insert(LocalPath.generic_string()); - PathsToCheck.push_back(LocalPath); + if (IncludePath(IncludeWildcard, ExcludeWildcard, LocalPath)) + { + FileSet.insert(LocalPath.generic_string()); + PathsToCheck.push_back(LocalPath); + } } for (const std::filesystem::path& RemotePath : ReferencePaths) { - if (FileSet.insert(RemotePath.generic_string()).second) + if (IncludePath(IncludeWildcard, ExcludeWildcard, RemotePath)) { - PathsToCheck.push_back(RemotePath); + if (FileSet.insert(RemotePath.generic_string()).second) + { + PathsToCheck.push_back(RemotePath); + } } } - ProgressBar ProgressBar(ProgressMode, "Check Files"); - OutLocalFolderContent = GetValidFolderContent( - LocalFolderScanStats, - Path, - PathsToCheck, - [&ProgressBar, &LocalFolderScanStats](uint64_t PathCount, uint64_t CompletedPathCount) { - std::string Details = - fmt::format("{}/{} checked, {} found", CompletedPathCount, PathCount, LocalFolderScanStats.FoundFileCount.load()); - ProgressBar.UpdateState({.Task = "Checking files ", - .Details = Details, - .TotalCount = PathCount, - .RemainingCount = PathCount - CompletedPathCount, - .Status = ProgressBar::State::CalculateStatus(AbortFlag, PauseFlag)}, - false); - }); - ProgressBar.Finish(); + { + ProgressBar ProgressBar(ProgressMode, "Check Files"); + OutLocalFolderContent = + GetValidFolderContent(LocalFolderScanStats, + Path, + PathsToCheck, + [&ProgressBar, &LocalFolderScanStats](uint64_t PathCount, uint64_t CompletedPathCount) { + std::string Details = fmt::format("{}/{} checked, {} found", + CompletedPathCount, + PathCount, + LocalFolderScanStats.FoundFileCount.load()); + ProgressBar.UpdateState({.Task = "Checking files ", + .Details = Details, + .TotalCount = PathCount, + .RemainingCount = PathCount - CompletedPathCount, + .Status = ProgressBar::State::CalculateStatus(AbortFlag, PauseFlag)}, + false); + }); + ProgressBar.Finish(); + } if (AbortFlag) { return {}; @@ -9067,7 +9117,7 @@ namespace { { if (!LocalFolderState.AreKnownFilesEqual(OutLocalFolderContent)) { - const size_t LocaStatePathCount = LocalFolderState.Paths.size(); + const size_t LocalStatePathCount = LocalFolderState.Paths.size(); std::vector<std::filesystem::path> DeletedPaths; FolderContent UpdatedContent = GetUpdatedContent(LocalFolderState, OutLocalFolderContent, DeletedPaths); if (!DeletedPaths.empty()) @@ -9080,7 +9130,7 @@ namespace { ZEN_CONSOLE("Updating state, {} local files deleted and {} local files updated out of {}", DeletedPaths.size(), UpdatedContent.Paths.size(), - LocaStatePathCount); + LocalStatePathCount); } if (UpdatedContent.Paths.size() > 0) { @@ -9150,6 +9200,85 @@ namespace { LocalContent = DeletePathsFromChunkedContent(LocalContent, DeletedPaths); } } + + // Check files that are present in current folder state that matches the ReferencePaths but not known to the local state + { + FolderContent UpdatedContent; + + tsl::robin_set<std::string> LocalPathIndexLookup; + for (const std::filesystem::path& LocalPath : LocalContent.Paths) + { + LocalPathIndexLookup.insert(LocalPath.generic_string()); + } + + tsl::robin_set<std::string> RemotePathIndexLookup; + for (const std::filesystem::path& RemotePath : ReferencePaths) + { + RemotePathIndexLookup.insert(RemotePath.generic_string()); + } + + for (uint32_t LocalFolderPathIndex = 0; LocalFolderPathIndex < OutLocalFolderContent.Paths.size(); LocalFolderPathIndex++) + { + const std::filesystem::path& LocalFolderPath = OutLocalFolderContent.Paths[LocalFolderPathIndex]; + const std::string GenericLocalFolderPath = LocalFolderPath.generic_string(); + if (RemotePathIndexLookup.contains(GenericLocalFolderPath)) + { + if (!LocalPathIndexLookup.contains(GenericLocalFolderPath)) + { + UpdatedContent.Paths.push_back(LocalFolderPath); + UpdatedContent.RawSizes.push_back(OutLocalFolderContent.RawSizes[LocalFolderPathIndex]); + UpdatedContent.Attributes.push_back(OutLocalFolderContent.Attributes[LocalFolderPathIndex]); + UpdatedContent.ModificationTicks.push_back(OutLocalFolderContent.ModificationTicks[LocalFolderPathIndex]); + } + } + } + + if (UpdatedContent.Paths.size() > 0) + { + uint64_t ByteCountToScan = 0; + for (const uint64_t RawSize : UpdatedContent.RawSizes) + { + ByteCountToScan += RawSize; + } + ProgressBar ProgressBar(ProgressMode, "Scan Files"); + FilteredRate FilteredBytesHashed; + FilteredBytesHashed.Start(); + ChunkedFolderContent UpdatedLocalContent = ChunkFolderContent( + ChunkingStats, + GetIOWorkerPool(), + Path, + UpdatedContent, + ChunkController, + GetUpdateDelayMS(ProgressMode), + [&](bool IsAborted, bool IsPaused, std::ptrdiff_t) { + FilteredBytesHashed.Update(ChunkingStats.BytesHashed.load()); + std::string Details = fmt::format("{}/{} ({}/{}, {}B/s) scanned, {} ({}) chunks found", + ChunkingStats.FilesProcessed.load(), + UpdatedContent.Paths.size(), + NiceBytes(ChunkingStats.BytesHashed.load()), + NiceBytes(ByteCountToScan), + NiceNum(FilteredBytesHashed.GetCurrent()), + ChunkingStats.UniqueChunksFound.load(), + NiceBytes(ChunkingStats.UniqueBytesFound.load())); + ProgressBar.UpdateState({.Task = "Scanning files ", + .Details = Details, + .TotalCount = ByteCountToScan, + .RemainingCount = ByteCountToScan - ChunkingStats.BytesHashed.load(), + .Status = ProgressBar::State::CalculateStatus(IsAborted, IsPaused)}, + false); + }, + AbortFlag, + PauseFlag); + + FilteredBytesHashed.Stop(); + ProgressBar.Finish(); + if (!AbortFlag) + { + LocalContent = MergeChunkedFolderContents(LocalContent, {{UpdatedLocalContent}}); + } + } + } + ScanContent = false; } @@ -9233,6 +9362,8 @@ namespace { ZenStateFilePath(Path / ZenFolderName), ChunkController, Content.Paths, + {}, + {}, _); const uint64_t TotalRawSize = std::accumulate(Result.RawSizes.begin(), Result.RawSizes.end(), std::uint64_t(0)); @@ -9265,7 +9396,9 @@ namespace { bool WipeTargetFolder, bool PostDownloadVerify, bool PrimeCacheOnly, - bool EnableScavenging) + bool EnableScavenging, + std::string_view IncludeWildcard, + std::string_view ExcludeWildcard) { ZEN_TRACE_CPU("DownloadFolder"); @@ -9311,8 +9444,15 @@ namespace { ProgressBar::SetLogOperationProgress(ProgressMode, TaskSteps::CompareState, TaskSteps::StepCount); - ChunkedFolderContent RemoteContent = - GetRemoteContent(Storage, BuildId, AllBuildParts, ChunkController, PartContents, BlockDescriptions, LooseChunkHashes); + ChunkedFolderContent RemoteContent = GetRemoteContent(Storage, + BuildId, + AllBuildParts, + IncludeWildcard, + ExcludeWildcard, + ChunkController, + PartContents, + BlockDescriptions, + LooseChunkHashes); const std::uint64_t LargeAttachmentSize = AllowMultiparts ? PreferredMultipartChunkSize * 4u : (std::uint64_t)-1; GetFolderContentStatistics LocalFolderScanStats; @@ -9323,22 +9463,21 @@ namespace { { if (IsDir(Path)) { - if (!WipeTargetFolder) + if (!ChunkController && !IsQuiet) { - if (!ChunkController && !IsQuiet) - { - ZEN_CONSOLE("Warning: Unspecified chunking algorith, using default"); - ChunkController = CreateChunkingControllerWithFixedChunking(ChunkingControllerWithFixedChunkingSettings{}); - } - - LocalContent = GetLocalContent(LocalFolderScanStats, - ChunkingStats, - Path, - ZenStateFilePath(ZenFolderPath), - *ChunkController, - RemoteContent.Paths, - LocalFolderContent); + ZEN_CONSOLE("Warning: Unspecified chunking algorith, using default"); + ChunkController = CreateChunkingControllerWithFixedChunking(ChunkingControllerWithFixedChunkingSettings{}); } + + LocalContent = GetLocalContent(LocalFolderScanStats, + ChunkingStats, + Path, + ZenStateFilePath(ZenFolderPath), + *ChunkController, + RemoteContent.Paths, + IncludeWildcard, + ExcludeWildcard, + LocalFolderContent); } else { @@ -9349,7 +9488,6 @@ namespace { { return; } - auto CompareContent = [](const ChunkedFolderContent& Lhs, const ChunkedFolderContent& Rhs) { tsl::robin_map<std::string, size_t> RhsPathToIndex; const size_t RhsPathCount = Rhs.Paths.size(); @@ -9392,7 +9530,7 @@ namespace { return true; }; - if (CompareContent(RemoteContent, LocalContent)) + if (CompareContent(RemoteContent, LocalContent) && !WipeTargetFolder) { if (!IsQuiet) { @@ -10133,7 +10271,7 @@ BuildsCommand::BuildsCommand() AddCacheOptions(m_DownloadOptions); AddZenFolderOptions(m_DownloadOptions); AddWorkerOptions(m_DownloadOptions); - // TODO: AddWildcardOptions(m_DownloadOptions); + AddWildcardOptions(m_DownloadOptions); m_DownloadOptions.add_option("cache", "", @@ -11348,7 +11486,9 @@ BuildsCommand::Run(const ZenCliOptions& GlobalOptions, int argc, char** argv) m_Clean, m_PostDownloadVerify, m_PrimeCacheOnly, - m_EnableScavenging); + m_EnableScavenging, + m_IncludeWildcard, + m_ExcludeWildcard); return AbortFlag ? 11 : 0; } @@ -11552,7 +11692,9 @@ BuildsCommand::Run(const ZenCliOptions& GlobalOptions, int argc, char** argv) BuildIdString == m_BuildIds.front(), true, false, - m_EnableScavenging); + m_EnableScavenging, + ""sv, + ""sv); if (AbortFlag) { ZEN_CONSOLE("Download cancelled"); @@ -11728,7 +11870,9 @@ BuildsCommand::Run(const ZenCliOptions& GlobalOptions, int argc, char** argv) true, true, false, - m_EnableScavenging); + m_EnableScavenging, + ""sv, + ""sv); if (AbortFlag) { ZEN_CONSOLE("Download failed."); @@ -11752,7 +11896,9 @@ BuildsCommand::Run(const ZenCliOptions& GlobalOptions, int argc, char** argv) false, true, false, - m_EnableScavenging); + m_EnableScavenging, + ""sv, + ""sv); if (AbortFlag) { ZEN_CONSOLE("Re-download failed. (identical target)"); @@ -11870,7 +12016,9 @@ BuildsCommand::Run(const ZenCliOptions& GlobalOptions, int argc, char** argv) false, true, false, - m_EnableScavenging); + m_EnableScavenging, + ""sv, + ""sv); if (AbortFlag) { ZEN_CONSOLE("Re-download failed. (scrambled target)"); @@ -11922,7 +12070,9 @@ BuildsCommand::Run(const ZenCliOptions& GlobalOptions, int argc, char** argv) false, true, false, - m_EnableScavenging); + m_EnableScavenging, + ""sv, + ""sv); if (AbortFlag) { ZEN_CONSOLE("Re-download failed."); @@ -11942,7 +12092,9 @@ BuildsCommand::Run(const ZenCliOptions& GlobalOptions, int argc, char** argv) false, true, false, - m_EnableScavenging); + m_EnableScavenging, + ""sv, + ""sv); if (AbortFlag) { ZEN_CONSOLE("Re-download failed."); @@ -11962,7 +12114,9 @@ BuildsCommand::Run(const ZenCliOptions& GlobalOptions, int argc, char** argv) false, true, false, - m_EnableScavenging); + m_EnableScavenging, + ""sv, + ""sv); if (AbortFlag) { ZEN_CONSOLE("Re-download failed."); @@ -11982,7 +12136,9 @@ BuildsCommand::Run(const ZenCliOptions& GlobalOptions, int argc, char** argv) false, true, false, - m_EnableScavenging); + m_EnableScavenging, + ""sv, + ""sv); if (AbortFlag) { ZEN_CONSOLE("Re-download failed."); |