diff options
| author | Dan Engelbrecht <[email protected]> | 2025-04-08 18:57:25 +0200 |
|---|---|---|
| committer | GitHub Enterprise <[email protected]> | 2025-04-08 18:57:25 +0200 |
| commit | 1ca32ca4718dad5bf1e2f381fe93b47d8159807b (patch) | |
| tree | 8fa6a00132de012b7d28e8f17835f1cd9d1ce8f1 /src | |
| parent | 5.6.4-pre0 (diff) | |
| download | zen-1ca32ca4718dad5bf1e2f381fe93b47d8159807b.tar.xz zen-1ca32ca4718dad5bf1e2f381fe93b47d8159807b.zip | |
scavenge builds (#352)
- Improvement: `zen builds` now scavenges previous download locations for data to reduce download size, enabled by default, disable with `--enable-scavenge=false`
- Bugfix: Failing to rename a file during download sometimes reported an error when it succeeded when retrying
Diffstat (limited to 'src')
| -rw-r--r-- | src/zen/cmds/builds_cmd.cpp | 851 | ||||
| -rw-r--r-- | src/zen/cmds/builds_cmd.h | 1 | ||||
| -rw-r--r-- | src/zenutil/chunkedcontent.cpp | 4 |
3 files changed, 669 insertions, 187 deletions
diff --git a/src/zen/cmds/builds_cmd.cpp b/src/zen/cmds/builds_cmd.cpp index 0d2601bd8..b33ec659d 100644 --- a/src/zen/cmds/builds_cmd.cpp +++ b/src/zen/cmds/builds_cmd.cpp @@ -189,6 +189,7 @@ namespace { for (size_t Retries = 0; Ec && Retries < 3; Retries++) { Sleep(100 + int(Retries * 50)); + Ec.clear(); RenameFile(SourcePath, TargetPath, Ec); } if (Ec) @@ -197,6 +198,23 @@ namespace { } } + bool IsFileWithRetry(const std::filesystem::path& Path) + { + std::error_code Ec; + bool Result = IsFile(Path, Ec); + for (size_t Retries = 0; Ec && Retries < 3; Retries++) + { + Sleep(100 + int(Retries * 50)); + Ec.clear(); + Result = IsFile(Path, Ec); + } + if (Ec) + { + zen::ThrowSystemError(Ec.value(), Ec.message()); + } + return Result; + } + bool SetFileReadOnlyWithRetry(const std::filesystem::path& Path, bool ReadOnly) { std::error_code Ec; @@ -204,7 +222,7 @@ namespace { for (size_t Retries = 0; Ec && Retries < 3; Retries++) { Sleep(100 + int(Retries * 50)); - if (!IsFile(Path)) + if (!IsFileWithRetry(Path)) { return false; } @@ -225,7 +243,7 @@ namespace { for (size_t Retries = 0; Ec && Retries < 3; Retries++) { Sleep(100 + int(Retries * 50)); - if (!IsFile(Path)) + if (!IsFileWithRetry(Path)) { return; } @@ -833,11 +851,23 @@ namespace { uint64_t CacheSequenceHashesCount = 0; uint64_t CacheSequenceHashesByteCount = 0; + uint64_t CacheScanElapsedWallTimeUs = 0; + uint32_t LocalPathsMatchingSequencesCount = 0; uint64_t LocalPathsMatchingSequencesByteCount = 0; uint64_t LocalChunkMatchingRemoteCount = 0; uint64_t LocalChunkMatchingRemoteByteCount = 0; + + uint64_t LocalScanElapsedWallTimeUs = 0; + + uint32_t ScavengedPathsMatchingSequencesCount = 0; + uint64_t ScavengedPathsMatchingSequencesByteCount = 0; + + uint64_t ScavengedChunkMatchingRemoteCount = 0; + uint64_t ScavengedChunkMatchingRemoteByteCount = 0; + + uint64_t ScavengeElapsedWallTimeUs = 0; }; struct DownloadStatistics @@ -1295,9 +1325,11 @@ namespace { CbObject CreateStateObject(const Oid& BuildId, const std::vector<std::pair<Oid, std::string>>& AllBuildParts, std::span<const ChunkedFolderContent> PartContents, - const FolderContent& LocalFolderState) + const FolderContent& LocalFolderState, + const std::filesystem::path& LocalPath) { CbObjectWriter CurrentStateWriter; + CurrentStateWriter.AddString("path", (const char*)LocalPath.u8string().c_str()); CurrentStateWriter.BeginArray("builds"sv); { CurrentStateWriter.BeginObject(); @@ -1379,6 +1411,64 @@ namespace { TemporaryFile::SafeWriteFile(WritePath, JsonPayload); } + struct ScavengeSource + { + std::filesystem::path StateFilePath; + std::filesystem::path Path; + }; + + std::vector<ScavengeSource> GetDownloadedStatePaths(const std::filesystem::path& SystemRootDir) + { + std::vector<ScavengeSource> Result; + DirectoryContent Content; + GetDirectoryContent(SystemRootDir / "builds" / "downloads", DirectoryContentFlags::IncludeFiles, Content); + for (const std::filesystem::path& EntryPath : Content.Files) + { + bool DeleteEntry = false; + IoHash EntryPathHash; + if (IoHash::TryParse(EntryPath.stem().string(), EntryPathHash)) + { + // Read state and verify that it is valid + IoBuffer MetaDataJson = ReadFile(EntryPath).Flatten(); + std::string_view Json(reinterpret_cast<const char*>(MetaDataJson.GetData()), MetaDataJson.GetSize()); + std::string JsonError; + CbObject DownloadInfo = LoadCompactBinaryFromJson(Json, JsonError).AsObject(); + if (JsonError.empty()) + { + std::filesystem::path StateFilePath = DownloadInfo["statePath"].AsU8String(); + if (IsFile(StateFilePath)) + { + std::filesystem::path Path = DownloadInfo["path"].AsU8String(); + if (IsDir(Path)) + { + Result.push_back({.StateFilePath = std::move(StateFilePath), .Path = std::move(Path)}); + } + else + { + DeleteEntry = true; + } + } + else + { + DeleteEntry = true; + } + } + else + { + ZEN_WARN("Invalid download state file at {}. '{}'", EntryPath, JsonError); + DeleteEntry = true; + } + } + + if (DeleteEntry) + { + std::error_code DummyEc; + std::filesystem::remove(EntryPath, DummyEc); + } + } + return Result; + } + class BufferedOpenFile { public: @@ -4842,6 +4932,7 @@ namespace { IoBuffer&& CompressedPart, DiskStatistics& DiskStats) { + ZEN_TRACE_CPU("WriteCompressedChunk"); auto ChunkHashToChunkIndexIt = RemoteLookup.ChunkHashToChunkIndex.find(ChunkHash); ZEN_ASSERT(ChunkHashToChunkIndexIt != RemoteLookup.ChunkHashToChunkIndex.end()); const uint32_t ChunkIndex = ChunkHashToChunkIndexIt->second; @@ -5000,7 +5091,143 @@ namespace { Work.DefaultErrorFunction()); }; - void UpdateFolder(StorageInstance& Storage, + bool ReadStateFile(const std::filesystem::path& StateFilePath, + FolderContent& OutLocalFolderState, + ChunkedFolderContent& OutLocalContent) + { + ZEN_TRACE_CPU("ReadStateFile"); + bool HasLocalState = false; + try + { + CbObject CurrentStateObject = LoadCompactBinaryObject(StateFilePath).Object; + if (CurrentStateObject) + { + Oid CurrentBuildId; + std::vector<Oid> SavedBuildPartIds; + std::vector<std::string> SavedBuildPartsNames; + std::vector<ChunkedFolderContent> SavedPartContents; + if (ReadStateObject(CurrentStateObject, + CurrentBuildId, + SavedBuildPartIds, + SavedBuildPartsNames, + SavedPartContents, + OutLocalFolderState)) + { + if (!SavedPartContents.empty()) + { + if (SavedPartContents.size() == 1) + { + OutLocalContent = std::move(SavedPartContents[0]); + } + else + { + OutLocalContent = + MergeChunkedFolderContents(SavedPartContents[0], + std::span<const ChunkedFolderContent>(SavedPartContents).subspan(1)); + } + HasLocalState = true; + } + } + } + } + catch (const std::exception& Ex) + { + ZEN_CONSOLE("Failed reading state file {}, falling back to scannning. Reason: {}", StateFilePath, Ex.what()); + } + return HasLocalState; + } + + FolderContent GetValidFolderContent(GetFolderContentStatistics& LocalFolderScanStats, + const std::filesystem::path& Path, + std::span<const std::filesystem::path> PathsToCheck) + { + ZEN_TRACE_CPU("GetValidFolderContent"); + FolderContent Result; + const uint32_t PathCount = gsl::narrow<uint32_t>(PathsToCheck.size()); + + Result.Paths.resize(PathCount); + Result.RawSizes.resize(PathCount); + Result.Attributes.resize(PathCount); + Result.ModificationTicks.resize(PathCount); + + { + Stopwatch Timer; + auto _ = MakeGuard([&LocalFolderScanStats, &Timer]() { LocalFolderScanStats.ElapsedWallTimeUS = Timer.GetElapsedTimeUs(); }); + + ProgressBar ProgressBar(UsePlainProgress); + + ParallellWork Work(AbortFlag); + std::atomic<uint64_t> CompletedPathCount = 0; + uint32_t PathIndex = 0; + + while (PathIndex < PathCount) + { + uint32_t PathRangeCount = Min(128u, PathCount - PathIndex); + Work.ScheduleWork( + GetIOWorkerPool(), + [PathIndex, PathRangeCount, &PathsToCheck, &Path, &Result, &CompletedPathCount, &LocalFolderScanStats]( + std::atomic<bool>&) { + for (uint32_t PathRangeIndex = PathIndex; PathRangeIndex < PathIndex + PathRangeCount; PathRangeIndex++) + { + const std::filesystem::path& FilePath = PathsToCheck[PathRangeIndex]; + std::filesystem::path LocalFilePath = (Path / FilePath).make_preferred(); + if (TryGetFileProperties(LocalFilePath, + Result.RawSizes[PathRangeIndex], + Result.ModificationTicks[PathRangeIndex], + Result.Attributes[PathRangeIndex])) + { + Result.Paths[PathRangeIndex] = std::move(FilePath); + LocalFolderScanStats.FoundFileCount++; + LocalFolderScanStats.FoundFileByteCount += Result.RawSizes[PathRangeIndex]; + LocalFolderScanStats.AcceptedFileCount++; + LocalFolderScanStats.AcceptedFileByteCount += Result.RawSizes[PathRangeIndex]; + } + CompletedPathCount++; + } + }, + Work.DefaultErrorFunction()); + PathIndex += PathRangeCount; + } + Work.Wait(200, [&](bool, ptrdiff_t) { + // FilteredBytesHashed.Update(ChunkingStats.BytesHashed.load()); + std::string Details = fmt::format("{}/{} checked, {} found", + CompletedPathCount.load(), + PathCount, + LocalFolderScanStats.FoundFileCount.load()); + ProgressBar.UpdateState({.Task = "Checking files ", + .Details = Details, + .TotalCount = PathCount, + .RemainingCount = PathCount - CompletedPathCount.load()}, + false); + }); + ProgressBar.Finish(); + } + + uint32_t WritePathIndex = 0; + for (uint32_t ReadPathIndex = 0; ReadPathIndex < PathCount; ReadPathIndex++) + { + if (!Result.Paths[ReadPathIndex].empty()) + { + if (WritePathIndex < ReadPathIndex) + { + Result.Paths[WritePathIndex] = std::move(Result.Paths[ReadPathIndex]); + Result.RawSizes[WritePathIndex] = Result.RawSizes[ReadPathIndex]; + Result.Attributes[WritePathIndex] = Result.Attributes[ReadPathIndex]; + Result.ModificationTicks[WritePathIndex] = Result.ModificationTicks[ReadPathIndex]; + } + WritePathIndex++; + } + } + + Result.Paths.resize(WritePathIndex); + Result.RawSizes.resize(WritePathIndex); + Result.Attributes.resize(WritePathIndex); + Result.ModificationTicks.resize(WritePathIndex); + return Result; + } + + void UpdateFolder(const std::filesystem::path& SystemRootDir, + StorageInstance& Storage, const Oid& BuildId, const std::filesystem::path& Path, const std::filesystem::path& ZenFolderPath, @@ -5013,6 +5240,7 @@ namespace { bool AllowPartialBlockRequests, bool WipeTargetFolder, bool PrimeCacheOnly, + bool EnableScavenging, FolderContent& OutLocalFolderState, DiskStatistics& DiskStats, CacheMappingStatistics& CacheMappingStats, @@ -5046,6 +5274,8 @@ namespace { { ZEN_TRACE_CPU("UpdateFolder_CheckChunkCache"); + Stopwatch CacheTimer; + DirectoryContent CacheDirContent; GetDirectoryContent(CacheFolderPath, DirectoryContentFlags::IncludeFiles | DirectoryContentFlags::IncludeFileSizes, @@ -5091,6 +5321,7 @@ namespace { } RemoveFileWithRetry(CacheDirContent.Files[Index]); } + CacheMappingStats.CacheScanElapsedWallTimeUs += CacheTimer.GetElapsedTimeUs(); } tsl::robin_map<IoHash, uint32_t, IoHash::Hasher> CachedBlocksFound; @@ -5098,6 +5329,8 @@ namespace { { ZEN_TRACE_CPU("UpdateFolder_CheckBlockCache"); + Stopwatch CacheTimer; + tsl::robin_map<IoHash, uint32_t, IoHash::Hasher> AllBlockSizes; AllBlockSizes.reserve(BlockDescriptions.size()); for (uint32_t BlockIndex = 0; BlockIndex < BlockDescriptions.size(); BlockIndex++) @@ -5137,54 +5370,62 @@ namespace { } RemoveFileWithRetry(BlockDirContent.Files[Index]); } + + CacheMappingStats.CacheScanElapsedWallTimeUs += CacheTimer.GetElapsedTimeUs(); } - std::vector<uint32_t> LocalPathIndexesMatchingSequenceIndexes; + std::vector<uint32_t> LocalPathIndexesMatchingSequenceIndexes; + tsl::robin_map<IoHash, uint32_t, IoHash::Hasher> SequenceIndexesLeftToFindToRemoteIndex; if (!PrimeCacheOnly) { // Pick up all whole files we can use from current local state - ZEN_TRACE_CPU("UpdateFolder_CheckLocalChunks"); + ZEN_TRACE_CPU("UpdateFolder_GetLocalSequences"); + + Stopwatch LocalTimer; + for (uint32_t RemoteSequenceIndex = 0; RemoteSequenceIndex < RemoteContent.ChunkedContent.SequenceRawHashes.size(); RemoteSequenceIndex++) { - const IoHash& RemoteSequenceRawHash = RemoteContent.ChunkedContent.SequenceRawHashes[RemoteSequenceIndex]; + const IoHash& RemoteSequenceRawHash = RemoteContent.ChunkedContent.SequenceRawHashes[RemoteSequenceIndex]; + const uint32_t RemotePathIndex = GetFirstPathIndexForSeqeuenceIndex(RemoteLookup, RemoteSequenceIndex); + const uint64_t RemoteRawSize = RemoteContent.RawSizes[RemotePathIndex]; if (auto CacheSequenceIt = CachedSequenceHashesFound.find(RemoteSequenceRawHash); CacheSequenceIt != CachedSequenceHashesFound.end()) { - // const uint32_t RemoteSequenceIndex = CacheSequenceIt->second; - // const uint32_t RemotePathIndex = GetFirstPathIndexForSeqeuenceIndex(RemoteLookup, RemoteSequenceIndex); - // RemoteSequenceByteCountFoundInCache += RemoteContent.RawSizes[RemotePathIndex]; const std::filesystem::path CacheFilePath = GetFinalChunkedSequenceFileName(CacheFolderPath, RemoteSequenceRawHash); ZEN_ASSERT_SLOW(IsFile(CacheFilePath)); + ZEN_CONSOLE_VERBOSE("Found sequence {} at {} ({})", RemoteSequenceRawHash, CacheFilePath, NiceBytes(RemoteRawSize)); } else if (auto CacheChunkIt = CachedChunkHashesFound.find(RemoteSequenceRawHash); CacheChunkIt != CachedChunkHashesFound.end()) { - // const uint32_t RemoteChunkIndex = CacheChunkIt->second; - // const uint32_t RemotePathIndex = GetFirstPathIndexForSeqeuenceIndex(RemoteLookup, RemoteSequenceIndex); - // RemoteSequenceByteCountFoundInCache += RemoteContent.RawSizes[RemotePathIndex]; const std::filesystem::path CacheFilePath = GetFinalChunkedSequenceFileName(CacheFolderPath, RemoteSequenceRawHash); ZEN_ASSERT_SLOW(IsFile(CacheFilePath)); + ZEN_CONSOLE_VERBOSE("Found chunk {} at {} ({})", RemoteSequenceRawHash, CacheFilePath, NiceBytes(RemoteRawSize)); } else if (auto It = LocalLookup.RawHashToSequenceIndex.find(RemoteSequenceRawHash); It != LocalLookup.RawHashToSequenceIndex.end()) { - const uint32_t LocalSequenceIndex = It->second; - const uint32_t LocalPathIndex = GetFirstPathIndexForSeqeuenceIndex(LocalLookup, LocalSequenceIndex); - ZEN_ASSERT_SLOW(IsFile((Path / LocalContent.Paths[LocalPathIndex]).make_preferred())); - uint64_t RawSize = LocalContent.RawSizes[LocalPathIndex]; + const uint32_t LocalSequenceIndex = It->second; + const uint32_t LocalPathIndex = GetFirstPathIndexForSeqeuenceIndex(LocalLookup, LocalSequenceIndex); + const std::filesystem::path LocalFilePath = (Path / LocalContent.Paths[LocalPathIndex]).make_preferred(); + ZEN_ASSERT_SLOW(IsFile(LocalFilePath)); LocalPathIndexesMatchingSequenceIndexes.push_back(LocalPathIndex); CacheMappingStats.LocalPathsMatchingSequencesCount++; - CacheMappingStats.LocalPathsMatchingSequencesByteCount += RawSize; + CacheMappingStats.LocalPathsMatchingSequencesByteCount += RemoteRawSize; + ZEN_CONSOLE_VERBOSE("Found sequence {} at {} ({})", RemoteSequenceRawHash, LocalFilePath, NiceBytes(RemoteRawSize)); } else { // We must write the sequence const uint32_t ChunkCount = RemoteContent.ChunkedContent.ChunkCounts[RemoteSequenceIndex]; SequenceIndexChunksLeftToWriteCounters[RemoteSequenceIndex] = ChunkCount; + SequenceIndexesLeftToFindToRemoteIndex.insert({RemoteSequenceRawHash, RemoteSequenceIndex}); } } + + CacheMappingStats.LocalScanElapsedWallTimeUs += LocalTimer.GetElapsedTimeUs(); } else { @@ -5195,10 +5436,138 @@ namespace { SequenceIndexChunksLeftToWriteCounters[RemoteSequenceIndex] = ChunkCount; } } + + std::vector<ChunkedFolderContent> ScavengedContents; + std::vector<ChunkedContentLookup> ScavengedLookups; + std::vector<std::filesystem::path> ScavengedPaths; + + struct ScavengeCopyOperation + { + uint32_t ScavengedContentIndex = (uint32_t)-1; + uint32_t ScavengedPathIndex = (uint32_t)-1; + uint32_t RemoteSequenceIndex = (uint32_t)-1; + uint64_t RawSize = (uint32_t)-1; + }; + + std::vector<ScavengeCopyOperation> ScavengeCopyOperations; + uint64_t ScavengedPathsCount = 0; + + if (!PrimeCacheOnly && EnableScavenging) + { + ZEN_TRACE_CPU("UpdateFolder_GetScavengedSequences"); + + Stopwatch ScavengeTimer; + + if (!SequenceIndexesLeftToFindToRemoteIndex.empty()) + { + std::vector<ScavengeSource> ScavengeSources = GetDownloadedStatePaths(SystemRootDir); + auto EraseIt = std::remove_if(ScavengeSources.begin(), ScavengeSources.end(), [&Path](const ScavengeSource& Source) { + return Source.Path == Path; + }); + ScavengeSources.erase(EraseIt, ScavengeSources.end()); + + const size_t ScavengePathCount = ScavengeSources.size(); + + ScavengedContents.resize(ScavengePathCount); + ScavengedLookups.resize(ScavengePathCount); + ScavengedPaths.resize(ScavengePathCount); + for (size_t ScavengeIndex = 0; ScavengeIndex < ScavengePathCount; ScavengeIndex++) + { + const ScavengeSource& Source = ScavengeSources[ScavengeIndex]; + + ChunkedFolderContent& ScavengedLocalContent = ScavengedContents[ScavengeIndex]; + std::filesystem::path& ScavengePath = ScavengedPaths[ScavengeIndex]; + FolderContent LocalFolderState; + if (ReadStateFile(Source.StateFilePath, LocalFolderState, ScavengedLocalContent)) + { + GetFolderContentStatistics ScavengedFolderScanStats; + + FolderContent ValidFolderContent = + GetValidFolderContent(ScavengedFolderScanStats, Source.Path, LocalFolderState.Paths); + + if (!LocalFolderState.AreKnownFilesEqual(ValidFolderContent)) + { + std::vector<std::filesystem::path> DeletedPaths; + FolderContent UpdatedContent = GetUpdatedContent(LocalFolderState, ValidFolderContent, DeletedPaths); + + // If the files are modified since the state was saved we ignore the files since we don't want to incur the + // cost of scanning/hashing scavenged files + DeletedPaths.insert(DeletedPaths.end(), UpdatedContent.Paths.begin(), UpdatedContent.Paths.end()); + if (!DeletedPaths.empty()) + { + ScavengedLocalContent = DeletePathsFromChunkedContent(ScavengedLocalContent, DeletedPaths); + } + } + + if (!ScavengedLocalContent.Paths.empty()) + { + ScavengePath = Source.Path; + } + } + } + + for (uint32_t ScavengedContentIndex = 0; + ScavengedContentIndex < ScavengedContents.size() && (!SequenceIndexesLeftToFindToRemoteIndex.empty()); + ScavengedContentIndex++) + { + const std::filesystem::path& ScavengePath = ScavengedPaths[ScavengedContentIndex]; + if (!ScavengePath.empty()) + { + const ChunkedFolderContent& ScavengedLocalContent = ScavengedContents[ScavengedContentIndex]; + ChunkedContentLookup& ScavengedLookup = ScavengedLookups[ScavengedContentIndex]; + ScavengedLookup = BuildChunkedContentLookup(ScavengedLocalContent); + + for (uint32_t ScavengedSequenceIndex = 0; + ScavengedSequenceIndex < ScavengedLocalContent.ChunkedContent.SequenceRawHashes.size(); + ScavengedSequenceIndex++) + { + const IoHash& SequenceRawHash = ScavengedLocalContent.ChunkedContent.SequenceRawHashes[ScavengedSequenceIndex]; + if (auto It = SequenceIndexesLeftToFindToRemoteIndex.find(SequenceRawHash); + It != SequenceIndexesLeftToFindToRemoteIndex.end()) + { + const uint32_t RemoteSequenceIndex = It->second; + const uint64_t RawSize = + RemoteContent.RawSizes[RemoteLookup.SequenceIndexFirstPathIndex[RemoteSequenceIndex]]; + ZEN_ASSERT(RawSize > 0); + + const uint32_t ScavengedPathIndex = ScavengedLookup.SequenceIndexFirstPathIndex[ScavengedSequenceIndex]; + ZEN_ASSERT_SLOW(IsFile((ScavengePath / ScavengedLocalContent.Paths[ScavengedPathIndex]).make_preferred())); + + ScavengeCopyOperations.push_back({.ScavengedContentIndex = ScavengedContentIndex, + .ScavengedPathIndex = ScavengedPathIndex, + .RemoteSequenceIndex = RemoteSequenceIndex, + .RawSize = RawSize}); + + SequenceIndexesLeftToFindToRemoteIndex.erase(SequenceRawHash); + SequenceIndexChunksLeftToWriteCounters[RemoteSequenceIndex] = 0; + + CacheMappingStats.ScavengedPathsMatchingSequencesCount++; + CacheMappingStats.ScavengedPathsMatchingSequencesByteCount += RawSize; + } + } + ScavengedPathsCount++; + } + } + } + CacheMappingStats.ScavengeElapsedWallTimeUs += ScavengeTimer.GetElapsedTimeUs(); + } + + uint32_t RemainingChunkCount = 0; + for (uint32_t RemoteChunkIndex = 0; RemoteChunkIndex < RemoteContent.ChunkedContent.ChunkHashes.size(); RemoteChunkIndex++) + { + uint64_t ChunkWriteCount = GetChunkWriteCount(SequenceIndexChunksLeftToWriteCounters, RemoteLookup, RemoteChunkIndex); + if (ChunkWriteCount > 0) + { + RemainingChunkCount++; + } + } + // Pick up all chunks in current local state + // TODO: Rename to LocalStateCopyData struct CacheCopyData { - uint32_t LocalSequenceIndex = (uint32_t)-1; + uint32_t ScavengeSourceIndex = (uint32_t)-1; + uint32_t SourceSequenceIndex = (uint32_t)-1; std::vector<const ChunkedContentLookup::ChunkSequenceLocation*> TargetChunkLocationPtrs; struct ChunkTarget { @@ -5216,7 +5585,10 @@ namespace { { ZEN_TRACE_CPU("UpdateFolder_GetLocalChunks"); - for (uint32_t LocalSequenceIndex = 0; LocalSequenceIndex < LocalContent.ChunkedContent.SequenceRawHashes.size(); + Stopwatch LocalTimer; + + for (uint32_t LocalSequenceIndex = 0; + LocalSequenceIndex < LocalContent.ChunkedContent.SequenceRawHashes.size() && (RemainingChunkCount > 0); LocalSequenceIndex++) { const IoHash& LocalSequenceRawHash = LocalContent.ChunkedContent.SequenceRawHashes[LocalSequenceIndex]; @@ -5254,7 +5626,8 @@ namespace { { RawHashToCacheCopyDataIndex.insert_or_assign(LocalSequenceRawHash, CacheCopyDatas.size()); CacheCopyDatas.push_back( - CacheCopyData{.LocalSequenceIndex = LocalSequenceIndex, + CacheCopyData{.ScavengeSourceIndex = (uint32_t)-1, + .SourceSequenceIndex = LocalSequenceIndex, .TargetChunkLocationPtrs = ChunkTargetPtrs, .ChunkTargets = std::vector<CacheCopyData::ChunkTarget>{Target}}); } @@ -5270,13 +5643,15 @@ namespace { { RawHashToCacheCopyDataIndex.insert_or_assign(LocalSequenceRawHash, CacheCopyDatas.size()); CacheCopyDatas.push_back( - CacheCopyData{.LocalSequenceIndex = LocalSequenceIndex, + CacheCopyData{.ScavengeSourceIndex = (uint32_t)-1, + .SourceSequenceIndex = LocalSequenceIndex, .TargetChunkLocationPtrs = ChunkTargetPtrs, .ChunkTargets = std::vector<CacheCopyData::ChunkTarget>{Target}}); } CacheMappingStats.LocalChunkMatchingRemoteCount++; CacheMappingStats.LocalChunkMatchingRemoteByteCount += LocalChunkRawSize; RemoteChunkIndexNeedsCopyFromLocalFileFlags[RemoteChunkIndex] = true; + RemainingChunkCount--; } } } @@ -5284,26 +5659,130 @@ namespace { } } } + CacheMappingStats.LocalScanElapsedWallTimeUs += LocalTimer.GetElapsedTimeUs(); + } + + if (!PrimeCacheOnly) + { + ZEN_TRACE_CPU("UpdateFolder_GetScavengeChunks"); + + Stopwatch ScavengeTimer; + + for (uint32_t ScavengedContentIndex = 0; ScavengedContentIndex < ScavengedContents.size() && (RemainingChunkCount > 0); + ScavengedContentIndex++) + { + const ChunkedFolderContent& ScavengedContent = ScavengedContents[ScavengedContentIndex]; + // const std::filesystem::path& ScavengedPath = ScavengedPaths[ScavengedContentIndex]; + const ChunkedContentLookup& ScavengedLookup = ScavengedLookups[ScavengedContentIndex]; + + for (uint32_t ScavengedSequenceIndex = 0; + ScavengedSequenceIndex < ScavengedContent.ChunkedContent.SequenceRawHashes.size() && (RemainingChunkCount > 0); + ScavengedSequenceIndex++) + { + const IoHash& ScavengedSequenceRawHash = ScavengedContent.ChunkedContent.SequenceRawHashes[ScavengedSequenceIndex]; + const uint32_t ScavengedOrderOffset = ScavengedLookup.SequenceIndexChunkOrderOffset[ScavengedSequenceIndex]; + + { + uint64_t SourceOffset = 0; + const uint32_t ScavengedChunkCount = ScavengedContent.ChunkedContent.ChunkCounts[ScavengedSequenceIndex]; + for (uint32_t ScavengedOrderIndex = 0; ScavengedOrderIndex < ScavengedChunkCount; ScavengedOrderIndex++) + { + const uint32_t ScavengedChunkIndex = + ScavengedContent.ChunkedContent.ChunkOrders[ScavengedOrderOffset + ScavengedOrderIndex]; + const IoHash& ScavengedChunkHash = ScavengedContent.ChunkedContent.ChunkHashes[ScavengedChunkIndex]; + const uint64_t ScavengedChunkRawSize = ScavengedContent.ChunkedContent.ChunkRawSizes[ScavengedChunkIndex]; + + if (auto RemoteChunkIt = RemoteLookup.ChunkHashToChunkIndex.find(ScavengedChunkHash); + RemoteChunkIt != RemoteLookup.ChunkHashToChunkIndex.end()) + { + const uint32_t RemoteChunkIndex = RemoteChunkIt->second; + if (!RemoteChunkIndexNeedsCopyFromLocalFileFlags[RemoteChunkIndex]) + { + std::vector<const ChunkedContentLookup::ChunkSequenceLocation*> ChunkTargetPtrs = + GetRemainingChunkTargets(SequenceIndexChunksLeftToWriteCounters, RemoteLookup, RemoteChunkIndex); + + if (!ChunkTargetPtrs.empty()) + { + CacheCopyData::ChunkTarget Target = { + .TargetChunkLocationCount = gsl::narrow<uint32_t>(ChunkTargetPtrs.size()), + .RemoteChunkIndex = RemoteChunkIndex, + .CacheFileOffset = SourceOffset}; + if (auto CopySourceIt = RawHashToCacheCopyDataIndex.find(ScavengedSequenceRawHash); + CopySourceIt != RawHashToCacheCopyDataIndex.end()) + { + CacheCopyData& Data = CacheCopyDatas[CopySourceIt->second]; + if (Data.TargetChunkLocationPtrs.size() > 1024) + { + RawHashToCacheCopyDataIndex.insert_or_assign(ScavengedSequenceRawHash, + CacheCopyDatas.size()); + CacheCopyDatas.push_back( + CacheCopyData{.ScavengeSourceIndex = ScavengedContentIndex, + .SourceSequenceIndex = ScavengedSequenceIndex, + .TargetChunkLocationPtrs = ChunkTargetPtrs, + .ChunkTargets = std::vector<CacheCopyData::ChunkTarget>{Target}}); + } + else + { + Data.TargetChunkLocationPtrs.insert(Data.TargetChunkLocationPtrs.end(), + ChunkTargetPtrs.begin(), + ChunkTargetPtrs.end()); + Data.ChunkTargets.push_back(Target); + } + } + else + { + RawHashToCacheCopyDataIndex.insert_or_assign(ScavengedSequenceRawHash, CacheCopyDatas.size()); + CacheCopyDatas.push_back( + CacheCopyData{.ScavengeSourceIndex = ScavengedContentIndex, + .SourceSequenceIndex = ScavengedSequenceIndex, + .TargetChunkLocationPtrs = ChunkTargetPtrs, + .ChunkTargets = std::vector<CacheCopyData::ChunkTarget>{Target}}); + } + CacheMappingStats.ScavengedChunkMatchingRemoteCount++; + CacheMappingStats.ScavengedChunkMatchingRemoteByteCount += ScavengedChunkRawSize; + RemoteChunkIndexNeedsCopyFromLocalFileFlags[RemoteChunkIndex] = true; + RemainingChunkCount--; + } + } + } + SourceOffset += ScavengedChunkRawSize; + } + } + } + } + CacheMappingStats.ScavengeElapsedWallTimeUs += ScavengeTimer.GetElapsedTimeUs(); } if (!CachedSequenceHashesFound.empty() || !CachedChunkHashesFound.empty() || !CachedBlocksFound.empty()) { - ZEN_CONSOLE("Download cache: Found {} ({}) chunk sequences, {} ({}) chunks, {} ({}) blocks.", + ZEN_CONSOLE("Download cache: Found {} ({}) chunk sequences, {} ({}) chunks, {} ({}) blocks in {}", CachedSequenceHashesFound.size(), NiceBytes(CacheMappingStats.CacheSequenceHashesByteCount), CachedChunkHashesFound.size(), NiceBytes(CacheMappingStats.CacheChunkByteCount), CachedBlocksFound.size(), - NiceBytes(CacheMappingStats.CacheBlocksByteCount)); + NiceBytes(CacheMappingStats.CacheBlocksByteCount), + NiceTimeSpanMs(CacheMappingStats.CacheScanElapsedWallTimeUs / 1000)); } if (!LocalPathIndexesMatchingSequenceIndexes.empty() || CacheMappingStats.LocalChunkMatchingRemoteCount > 0) { - ZEN_CONSOLE("Local state : Found {} ({}) chunk sequences, {} ({}) chunks", + ZEN_CONSOLE("Local state : Found {} ({}) chunk sequences, {} ({}) chunks in {}", LocalPathIndexesMatchingSequenceIndexes.size(), NiceBytes(CacheMappingStats.LocalPathsMatchingSequencesByteCount), CacheMappingStats.LocalChunkMatchingRemoteCount, - NiceBytes(CacheMappingStats.LocalChunkMatchingRemoteByteCount)); + NiceBytes(CacheMappingStats.LocalChunkMatchingRemoteByteCount), + NiceTimeSpanMs(CacheMappingStats.LocalScanElapsedWallTimeUs / 1000)); + } + if (CacheMappingStats.ScavengedPathsMatchingSequencesCount > 0 || CacheMappingStats.ScavengedChunkMatchingRemoteCount > 0) + { + ZEN_CONSOLE("Scavenge of {} paths found {} ({}) chunk sequences, {} ({}) chunks in {}", + ScavengedPathsCount, + CacheMappingStats.ScavengedPathsMatchingSequencesCount, + NiceBytes(CacheMappingStats.ScavengedPathsMatchingSequencesByteCount), + CacheMappingStats.ScavengedChunkMatchingRemoteCount, + NiceBytes(CacheMappingStats.ScavengedChunkMatchingRemoteByteCount), + NiceTimeSpanMs(CacheMappingStats.ScavengeElapsedWallTimeUs / 1000)); } uint64_t BytesToWrite = 0; @@ -5321,6 +5800,11 @@ namespace { } } + for (const ScavengeCopyOperation& ScavengeCopyOp : ScavengeCopyOperations) + { + BytesToWrite += ScavengeCopyOp.RawSize; + } + uint64_t TotalRequestCount = 0; uint64_t TotalPartWriteCount = 0; std::atomic<uint64_t> WritePartsComplete = 0; @@ -5347,6 +5831,7 @@ namespace { std::vector<LooseChunkHashWorkData> LooseChunkHashWorks; TotalPartWriteCount += CacheCopyDatas.size(); + TotalPartWriteCount += ScavengeCopyOperations.size(); for (const IoHash ChunkHash : LooseChunkHashes) { @@ -5694,6 +6179,53 @@ namespace { } } } + + for (uint32_t ScavengeOpIndex = 0; ScavengeOpIndex < ScavengeCopyOperations.size(); ScavengeOpIndex++) + { + if (AbortFlag) + { + break; + } + if (!PrimeCacheOnly) + { + Work.ScheduleWork( + WritePool, + [&, ScavengeOpIndex](std::atomic<bool>&) mutable { + if (!AbortFlag) + { + const ScavengeCopyOperation& ScavengeOp = ScavengeCopyOperations[ScavengeOpIndex]; + const ChunkedFolderContent& ScavengedContent = ScavengedContents[ScavengeOp.ScavengedContentIndex]; + const std::filesystem::path ScavengedPath = ScavengedContent.Paths[ScavengeOp.ScavengedPathIndex]; + + const std::filesystem::path ScavengedFilePath = + (ScavengedPaths[ScavengeOp.ScavengedContentIndex] / ScavengedPath).make_preferred(); + ZEN_ASSERT_SLOW(FileSizeFromPath(ScavengedFilePath) == ScavengeOp.RawSize); + + const IoHash& RemoteSequenceRawHash = + RemoteContent.ChunkedContent.SequenceRawHashes[ScavengeOp.RemoteSequenceIndex]; + const std::filesystem::path TempFilePath = + GetTempChunkedSequenceFileName(CacheFolderPath, RemoteSequenceRawHash); + + CopyFile(ScavengedFilePath, TempFilePath, {.EnableClone = false}); + + DiskStats.WriteCount++; + DiskStats.WriteByteCount += ScavengeOp.RawSize; + + const std::filesystem::path CacheFilePath = + GetFinalChunkedSequenceFileName(CacheFolderPath, RemoteSequenceRawHash); + RenameFile(TempFilePath, CacheFilePath); + + WritePartsComplete++; + if (WritePartsComplete == TotalPartWriteCount) + { + FilteredWrittenBytesPerSecond.Stop(); + } + } + }, + Work.DefaultErrorFunction()); + } + } + for (uint32_t LooseChunkHashWorkIndex = 0; LooseChunkHashWorkIndex < LooseChunkHashWorks.size(); LooseChunkHashWorkIndex++) { if (AbortFlag) @@ -6011,9 +6543,25 @@ namespace { ZEN_TRACE_CPU("UpdateFolder_CopyLocal"); FilteredWrittenBytesPerSecond.Start(); - const CacheCopyData& CopyData = CacheCopyDatas[CopyDataIndex]; - const uint32_t LocalPathIndex = LocalLookup.SequenceIndexFirstPathIndex[CopyData.LocalSequenceIndex]; - const std::filesystem::path LocalFilePath = (Path / LocalContent.Paths[LocalPathIndex]).make_preferred(); + const CacheCopyData& CopyData = CacheCopyDatas[CopyDataIndex]; + + std::filesystem::path SourceFilePath; + + if (CopyData.ScavengeSourceIndex == (uint32_t)-1) + { + const uint32_t LocalPathIndex = LocalLookup.SequenceIndexFirstPathIndex[CopyData.SourceSequenceIndex]; + SourceFilePath = (Path / LocalContent.Paths[LocalPathIndex]).make_preferred(); + } + else + { + const ChunkedFolderContent& ScavengedContent = ScavengedContents[CopyData.ScavengeSourceIndex]; + const ChunkedContentLookup& ScavengedLookup = ScavengedLookups[CopyData.ScavengeSourceIndex]; + const std::filesystem::path ScavengedPath = ScavengedPaths[CopyData.ScavengeSourceIndex]; + const uint32_t ScavengedPathIndex = + ScavengedLookup.SequenceIndexFirstPathIndex[CopyData.SourceSequenceIndex]; + SourceFilePath = (ScavengedPath / ScavengedContent.Paths[ScavengedPathIndex]).make_preferred(); + } + ZEN_ASSERT_SLOW(IsFile(SourceFilePath)); ZEN_ASSERT(!CopyData.TargetChunkLocationPtrs.empty()); uint64_t CacheLocalFileBytesRead = 0; @@ -6071,7 +6619,7 @@ namespace { tsl::robin_set<uint32_t> ChunkIndexesWritten; - BufferedOpenFile SourceFile(LocalFilePath, DiskStats); + BufferedOpenFile SourceFile(SourceFilePath, DiskStats); WriteFileCache OpenFileCache(DiskStats); for (size_t WriteOpIndex = 0; WriteOpIndex < WriteOps.size();) { @@ -6155,9 +6703,7 @@ namespace { CompletedChunkSequences, Work, WritePool); - ZEN_CONSOLE_VERBOSE("Copied {} from {}", - NiceBytes(CacheLocalFileBytesRead), - LocalContent.Paths[LocalPathIndex]); + ZEN_CONSOLE_VERBOSE("Copied {} from {}", NiceBytes(CacheLocalFileBytesRead), SourceFilePath); } WritePartsComplete++; if (WritePartsComplete == TotalPartWriteCount) @@ -6781,7 +7327,7 @@ namespace { const IoHash& RawHash = LocalContent.RawHashes[LocalPathIndex]; const std::filesystem::path& LocalPath = LocalContent.Paths[LocalPathIndex]; const std::filesystem::path CacheFilePath = GetFinalChunkedSequenceFileName(CacheFolderPath, RawHash); - ZEN_ASSERT_SLOW(!IsFile(CacheFilePath)); + ZEN_ASSERT_SLOW(!IsFileWithRetry(CacheFilePath)); const std::filesystem::path LocalFilePath = (Path / LocalPath).make_preferred(); RenameFileWithRetry(LocalFilePath, CacheFilePath); CachedCount++; @@ -6942,7 +7488,7 @@ namespace { std::filesystem::path TargetFilePath = (Path / TargetPath).make_preferred(); if (!RemotePathIndexToLocalPathIndex[RemotePathIndex]) { - if (IsFile(TargetFilePath)) + if (IsFileWithRetry(TargetFilePath)) { SetFileReadOnlyWithRetry(TargetFilePath, false); } @@ -6979,11 +7525,11 @@ namespace { if (auto InPlaceIt = RemotePathIndexToLocalPathIndex.find(FirstRemotePathIndex); InPlaceIt != RemotePathIndexToLocalPathIndex.end()) { - ZEN_ASSERT_SLOW(IsFile(FirstTargetFilePath)); + ZEN_ASSERT_SLOW(IsFileWithRetry(FirstTargetFilePath)); } else { - if (IsFile(FirstTargetFilePath)) + if (IsFileWithRetry(FirstTargetFilePath)) { SetFileReadOnlyWithRetry(FirstTargetFilePath, false); } @@ -6999,7 +7545,7 @@ namespace { const uint32_t LocalPathIndex = InplaceIt->second; const std::filesystem::path& SourcePath = LocalContent.Paths[LocalPathIndex]; std::filesystem::path SourceFilePath = (Path / SourcePath).make_preferred(); - ZEN_ASSERT_SLOW(IsFile(SourceFilePath)); + ZEN_ASSERT_SLOW(IsFileWithRetry(SourceFilePath)); ZEN_DEBUG("Copying from '{}' -> '{}'", SourceFilePath, FirstTargetFilePath); CopyFile(SourceFilePath, FirstTargetFilePath, {.EnableClone = false}); @@ -7010,7 +7556,7 @@ namespace { ZEN_TRACE_CPU("Rename"); const std::filesystem::path CacheFilePath = GetFinalChunkedSequenceFileName(CacheFolderPath, RawHash); - ZEN_ASSERT_SLOW(IsFile(CacheFilePath)); + ZEN_ASSERT_SLOW(IsFileWithRetry(CacheFilePath)); RenameFileWithRetry(CacheFilePath, FirstTargetFilePath); @@ -7043,12 +7589,12 @@ namespace { if (auto InPlaceIt = RemotePathIndexToLocalPathIndex.find(RemotePathIndex); InPlaceIt != RemotePathIndexToLocalPathIndex.end()) { - ZEN_ASSERT_SLOW(IsFile(TargetFilePath)); + ZEN_ASSERT_SLOW(IsFileWithRetry(TargetFilePath)); } else { ZEN_TRACE_CPU("Copy"); - if (IsFile(TargetFilePath)) + if (IsFileWithRetry(TargetFilePath)) { SetFileReadOnlyWithRetry(TargetFilePath, false); } @@ -7057,7 +7603,7 @@ namespace { CreateDirectories(TargetFilePath.parent_path()); } - ZEN_ASSERT_SLOW(IsFile(FirstTargetFilePath)); + ZEN_ASSERT_SLOW(IsFileWithRetry(FirstTargetFilePath)); ZEN_DEBUG("Copying from '{}' -> '{}'", FirstTargetFilePath, TargetFilePath); CopyFile(FirstTargetFilePath, TargetFilePath, {.EnableClone = false}); RebuildFolderStateStats.FinalizeTreeFilesCopiedCount++; @@ -7620,7 +8166,7 @@ namespace { ChunkedFolderContent GetLocalContent(GetFolderContentStatistics& LocalFolderScanStats, ChunkingStatistics& ChunkingStats, const std::filesystem::path& Path, - const std::filesystem::path& ZenFolderPath, + const std::filesystem::path& StateFilePath, ChunkingController& ChunkController, const ChunkedFolderContent& ReferenceContent, FolderContent& OutLocalFolderContent) @@ -7628,50 +8174,12 @@ namespace { FolderContent LocalFolderState; ChunkedFolderContent LocalContent; - bool HasLocalState = false; - if (IsFile(ZenStateFilePath(ZenFolderPath))) + Stopwatch ReadStateTimer; + const bool HasLocalState = IsFile(StateFilePath) && ReadStateFile(StateFilePath, LocalFolderState, LocalContent); + if (HasLocalState) { - try - { - Stopwatch ReadStateTimer; - CbObject CurrentStateObject = LoadCompactBinaryObject(ZenStateFilePath(ZenFolderPath)).Object; - if (CurrentStateObject) - { - Oid CurrentBuildId; - std::vector<Oid> SavedBuildPartIds; - std::vector<std::string> SavedBuildPartsNames; - std::vector<ChunkedFolderContent> SavedPartContents; - if (ReadStateObject(CurrentStateObject, - CurrentBuildId, - SavedBuildPartIds, - SavedBuildPartsNames, - SavedPartContents, - LocalFolderState)) - { - if (!SavedPartContents.empty()) - { - if (SavedPartContents.size() == 1) - { - LocalContent = std::move(SavedPartContents[0]); - } - else - { - LocalContent = - MergeChunkedFolderContents(SavedPartContents[0], - std::span<const ChunkedFolderContent>(SavedPartContents).subspan(1)); - } - HasLocalState = true; - } - } - } - ZEN_CONSOLE("Read local state in {}", NiceTimeSpanMs(ReadStateTimer.GetElapsedTimeMs())); - } - catch (const std::exception& Ex) - { - ZEN_CONSOLE("Failed reading state file, falling back to scannning. Reason: {}", Ex.what()); - } + ZEN_CONSOLE("Read local state file {} in {}", StateFilePath, NiceTimeSpanMs(ReadStateTimer.GetElapsedTimeMs())); } - { const uint32_t LocalPathCount = gsl::narrow<uint32_t>(ReferenceContent.Paths.size()); const uint32_t RemotePathCount = gsl::narrow<uint32_t>(LocalFolderState.Paths.size()); @@ -7696,92 +8204,7 @@ namespace { } } - const uint32_t PathCount = gsl::narrow<uint32_t>(PathsToCheck.size()); - - OutLocalFolderContent.Paths.resize(PathCount); - OutLocalFolderContent.RawSizes.resize(PathCount); - OutLocalFolderContent.Attributes.resize(PathCount); - OutLocalFolderContent.ModificationTicks.resize(PathCount); - - { - Stopwatch Timer; - auto _ = - MakeGuard([&LocalFolderScanStats, &Timer]() { LocalFolderScanStats.ElapsedWallTimeUS = Timer.GetElapsedTimeUs(); }); - - ProgressBar ProgressBar(UsePlainProgress); - - ParallellWork Work(AbortFlag); - std::atomic<uint64_t> CompletedPathCount = 0; - uint32_t PathIndex = 0; - - while (PathIndex < PathCount) - { - uint32_t PathRangeCount = Min(128u, PathCount - PathIndex); - Work.ScheduleWork( - GetIOWorkerPool(), - [PathIndex, - PathRangeCount, - &PathsToCheck, - &Path, - &OutLocalFolderContent, - &CompletedPathCount, - &LocalFolderScanStats](std::atomic<bool>&) { - for (uint32_t PathRangeIndex = PathIndex; PathRangeIndex < PathIndex + PathRangeCount; PathRangeIndex++) - { - const std::filesystem::path& FilePath = PathsToCheck[PathRangeIndex]; - std::filesystem::path LocalFilePath = (Path / FilePath).make_preferred(); - if (TryGetFileProperties(LocalFilePath, - OutLocalFolderContent.RawSizes[PathRangeIndex], - OutLocalFolderContent.ModificationTicks[PathRangeIndex], - OutLocalFolderContent.Attributes[PathRangeIndex])) - { - OutLocalFolderContent.Paths[PathRangeIndex] = std::move(FilePath); - LocalFolderScanStats.FoundFileCount++; - LocalFolderScanStats.FoundFileByteCount += OutLocalFolderContent.RawSizes[PathRangeIndex]; - LocalFolderScanStats.AcceptedFileCount++; - LocalFolderScanStats.AcceptedFileByteCount += OutLocalFolderContent.RawSizes[PathRangeIndex]; - } - CompletedPathCount++; - } - }, - Work.DefaultErrorFunction()); - PathIndex += PathRangeCount; - } - Work.Wait(200, [&](bool, ptrdiff_t) { - // FilteredBytesHashed.Update(ChunkingStats.BytesHashed.load()); - std::string Details = fmt::format("{}/{} checked, {} found", - CompletedPathCount.load(), - PathCount, - LocalFolderScanStats.FoundFileCount.load()); - ProgressBar.UpdateState({.Task = "Checking files ", - .Details = Details, - .TotalCount = PathCount, - .RemainingCount = PathCount - CompletedPathCount.load()}, - false); - }); - ProgressBar.Finish(); - } - - uint32_t WritePathIndex = 0; - for (uint32_t ReadPathIndex = 0; ReadPathIndex < PathCount; ReadPathIndex++) - { - if (!OutLocalFolderContent.Paths[ReadPathIndex].empty()) - { - if (WritePathIndex < ReadPathIndex) - { - OutLocalFolderContent.Paths[WritePathIndex] = std::move(OutLocalFolderContent.Paths[ReadPathIndex]); - OutLocalFolderContent.RawSizes[WritePathIndex] = OutLocalFolderContent.RawSizes[ReadPathIndex]; - OutLocalFolderContent.Attributes[WritePathIndex] = OutLocalFolderContent.Attributes[ReadPathIndex]; - OutLocalFolderContent.ModificationTicks[WritePathIndex] = OutLocalFolderContent.ModificationTicks[ReadPathIndex]; - } - WritePathIndex++; - } - } - - OutLocalFolderContent.Paths.resize(WritePathIndex); - OutLocalFolderContent.RawSizes.resize(WritePathIndex); - OutLocalFolderContent.Attributes.resize(WritePathIndex); - OutLocalFolderContent.ModificationTicks.resize(WritePathIndex); + OutLocalFolderContent = GetValidFolderContent(LocalFolderScanStats, Path, PathsToCheck); } bool ScanContent = true; @@ -7927,7 +8350,8 @@ namespace { bool AllowPartialBlockRequests, bool WipeTargetFolder, bool PostDownloadVerify, - bool PrimeCacheOnly) + bool PrimeCacheOnly, + bool EnableScavenging) { ZEN_TRACE_CPU("DownloadFolder"); @@ -7977,7 +8401,7 @@ namespace { LocalContent = GetLocalContent(LocalFolderScanStats, ChunkingStats, Path, - ZenFolderPath, + ZenStateFilePath(ZenFolderPath), *ChunkController, RemoteContent, LocalFolderContent); @@ -8041,7 +8465,7 @@ namespace { NiceTimeSpanMs(DownloadTimer.GetElapsedTimeMs())); Stopwatch WriteStateTimer; - CbObject StateObject = CreateStateObject(BuildId, AllBuildParts, PartContents, LocalFolderContent); + CbObject StateObject = CreateStateObject(BuildId, AllBuildParts, PartContents, LocalFolderContent, Path); CreateDirectories(ZenStateFilePath(ZenFolderPath).parent_path()); TemporaryFile::SafeWriteFile(ZenStateFilePath(ZenFolderPath), StateObject.GetView()); ZEN_CONSOLE("Wrote local state in {}", NiceTimeSpanMs(WriteStateTimer.GetElapsedTimeMs())); @@ -8065,7 +8489,8 @@ namespace { RebuildFolderStateStatistics RebuildFolderStateStats; VerifyFolderStatistics VerifyFolderStats; - UpdateFolder(Storage, + UpdateFolder(SystemRootDir, + Storage, BuildId, Path, ZenFolderPath, @@ -8078,6 +8503,7 @@ namespace { AllowPartialBlockRequests, WipeTargetFolder, PrimeCacheOnly, + EnableScavenging, LocalFolderState, DiskStats, CacheMappingStats, @@ -8092,7 +8518,7 @@ namespace { VerifyFolder(RemoteContent, Path, PostDownloadVerify, VerifyFolderStats); Stopwatch WriteStateTimer; - CbObject StateObject = CreateStateObject(BuildId, AllBuildParts, PartContents, LocalFolderState); + CbObject StateObject = CreateStateObject(BuildId, AllBuildParts, PartContents, LocalFolderState, Path); CreateDirectories(ZenStateFilePath(ZenFolderPath).parent_path()); TemporaryFile::SafeWriteFile(ZenStateFilePath(ZenFolderPath), StateObject.GetView()); @@ -8618,6 +9044,12 @@ BuildsCommand::BuildsCommand() m_DownloadOptions .add_option("", "", "verify", "Enable post download verify of all tracked files", cxxopts::value(m_PostDownloadVerify), "<verify>"); + m_DownloadOptions.add_option("", + "", + "enable-scavenge", + "Enable scavenging of data from previouse download locations", + cxxopts::value(m_EnableScavenging), + "<scavenge>"); m_DownloadOptions.parse_positional({"local-path", "build-id", "build-part-name"}); m_DownloadOptions.positional_help("local-path build-id build-part-name"); @@ -8655,6 +9087,12 @@ BuildsCommand::BuildsCommand() "Allow request for partial chunk blocks. Defaults to true.", cxxopts::value(m_AllowPartialBlockRequests), "<allowpartialblockrequests>"); + m_TestOptions.add_option("", + "", + "enable-scavenge", + "Enable scavenging of data from previouse download locations", + cxxopts::value(m_EnableScavenging), + "<scavenge>"); m_TestOptions.parse_positional({"local-path"}); m_TestOptions.positional_help("local-path"); @@ -8702,6 +9140,12 @@ BuildsCommand::BuildsCommand() m_MultiTestDownloadOptions .add_option("", "l", "local-path", "Root file system folder used as base", cxxopts::value(m_Path), "<local-path>"); m_MultiTestDownloadOptions.add_option("", "", "build-ids", "Build Ids list separated by ','", cxxopts::value(m_BuildIds), "<ids>"); + m_MultiTestDownloadOptions.add_option("", + "", + "enable-scavenge", + "Enable scavenging of data from previouse download locations", + cxxopts::value(m_EnableScavenging), + "<scavenge>"); m_MultiTestDownloadOptions.parse_positional({"local-path"}); m_MultiTestDownloadOptions.positional_help("local-path"); } @@ -9451,7 +9895,8 @@ BuildsCommand::Run(const ZenCliOptions& GlobalOptions, int argc, char** argv) m_AllowPartialBlockRequests && !m_PrimeCacheOnly, m_Clean, m_PostDownloadVerify, - m_PrimeCacheOnly); + m_PrimeCacheOnly, + m_EnableScavenging); if (false) { @@ -9639,7 +10084,8 @@ BuildsCommand::Run(const ZenCliOptions& GlobalOptions, int argc, char** argv) m_AllowPartialBlockRequests, BuildIdString == m_BuildIds.front(), true, - false); + false, + m_EnableScavenging); if (AbortFlag) { ZEN_CONSOLE("Download cancelled"); @@ -9692,7 +10138,16 @@ BuildsCommand::Run(const ZenCliOptions& GlobalOptions, int argc, char** argv) BuildStorage::Statistics StorageStats; BuildStorageCache::Statistics StorageCacheStats; - const std::filesystem::path DownloadPath = Path.parent_path() / (m_BuildPartName + "_test"); + const std::filesystem::path DownloadPath = Path.parent_path() / (m_BuildPartName + "_test"); + const std::filesystem::path DownloadPath2 = Path.parent_path() / (m_BuildPartName + "_test2"); + + auto ___ = MakeGuard([DownloadPath, DownloadPath2]() { + CleanDirectory(DownloadPath, true); + DeleteDirectories(DownloadPath); + CleanDirectory(DownloadPath2, true); + DeleteDirectories(DownloadPath2); + }); + const std::filesystem::path ZenFolderPath = m_ZenFolderPath.empty() ? DownloadPath / ZenFolderName : MakeSafeAbsolutePath(m_ZenFolderPath); @@ -9751,7 +10206,8 @@ BuildsCommand::Run(const ZenCliOptions& GlobalOptions, int argc, char** argv) m_AllowPartialBlockRequests, true, true, - false); + false, + m_EnableScavenging); if (AbortFlag) { ZEN_CONSOLE("Download failed."); @@ -9774,7 +10230,8 @@ BuildsCommand::Run(const ZenCliOptions& GlobalOptions, int argc, char** argv) m_AllowPartialBlockRequests, false, true, - false); + false, + m_EnableScavenging); if (AbortFlag) { ZEN_CONSOLE("Re-download failed. (identical target)"); @@ -9892,7 +10349,8 @@ BuildsCommand::Run(const ZenCliOptions& GlobalOptions, int argc, char** argv) m_AllowPartialBlockRequests, false, true, - false); + false, + m_EnableScavenging); if (AbortFlag) { ZEN_CONSOLE("Re-download failed. (scrambled target)"); @@ -9943,7 +10401,8 @@ BuildsCommand::Run(const ZenCliOptions& GlobalOptions, int argc, char** argv) m_AllowPartialBlockRequests, false, true, - false); + false, + m_EnableScavenging); if (AbortFlag) { ZEN_CONSOLE("Re-download failed."); @@ -9962,7 +10421,8 @@ BuildsCommand::Run(const ZenCliOptions& GlobalOptions, int argc, char** argv) m_AllowPartialBlockRequests, false, true, - false); + false, + m_EnableScavenging); if (AbortFlag) { ZEN_CONSOLE("Re-download failed."); @@ -9981,7 +10441,28 @@ BuildsCommand::Run(const ZenCliOptions& GlobalOptions, int argc, char** argv) m_AllowPartialBlockRequests, false, true, - false); + false, + m_EnableScavenging); + if (AbortFlag) + { + ZEN_CONSOLE("Re-download failed."); + return 11; + } + + ZEN_CONSOLE("\nDownload Build {}, Part {} ({}) to '{}' (original)", BuildId, BuildPartId, m_BuildPartName, DownloadPath2); + DownloadFolder(Storage, + BuildId, + {BuildPartId}, + {}, + DownloadPath2, + ZenFolderPath, + SystemRootDir, + m_AllowMultiparts, + m_AllowPartialBlockRequests, + false, + true, + false, + m_EnableScavenging); if (AbortFlag) { ZEN_CONSOLE("Re-download failed."); diff --git a/src/zen/cmds/builds_cmd.h b/src/zen/cmds/builds_cmd.h index 535d2b1d2..7e1e7d0ca 100644 --- a/src/zen/cmds/builds_cmd.h +++ b/src/zen/cmds/builds_cmd.h @@ -98,6 +98,7 @@ private: std::vector<std::string> m_BuildPartNames; std::vector<std::string> m_BuildPartIds; bool m_PostDownloadVerify = false; + bool m_EnableScavenging = true; cxxopts::Options m_DiffOptions{"diff", "Compare two local folders"}; std::string m_DiffPath; diff --git a/src/zenutil/chunkedcontent.cpp b/src/zenutil/chunkedcontent.cpp index 32ae2d94a..17b348f8d 100644 --- a/src/zenutil/chunkedcontent.cpp +++ b/src/zenutil/chunkedcontent.cpp @@ -305,7 +305,7 @@ FolderContent::UpdateState(const FolderContent& Rhs, std::vector<uint32_t>& OutP } FolderContent -GetUpdatedContent(const FolderContent& Old, const FolderContent& New, std::vector<std::filesystem::path>& OutDeletedPathIndexes) +GetUpdatedContent(const FolderContent& Old, const FolderContent& New, std::vector<std::filesystem::path>& OutDeletedPaths) { ZEN_TRACE_CPU("FolderContent::GetUpdatedContent"); @@ -342,7 +342,7 @@ GetUpdatedContent(const FolderContent& Old, const FolderContent& New, std::vecto } else { - OutDeletedPathIndexes.push_back(Old.Paths[OldPathIndex]); + OutDeletedPaths.push_back(Old.Paths[OldPathIndex]); } } return Result; |