aboutsummaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorDan Engelbrecht <[email protected]>2025-04-08 18:57:25 +0200
committerGitHub Enterprise <[email protected]>2025-04-08 18:57:25 +0200
commit1ca32ca4718dad5bf1e2f381fe93b47d8159807b (patch)
tree8fa6a00132de012b7d28e8f17835f1cd9d1ce8f1 /src
parent5.6.4-pre0 (diff)
downloadzen-1ca32ca4718dad5bf1e2f381fe93b47d8159807b.tar.xz
zen-1ca32ca4718dad5bf1e2f381fe93b47d8159807b.zip
scavenge builds (#352)
- Improvement: `zen builds` now scavenges previous download locations for data to reduce download size, enabled by default, disable with `--enable-scavenge=false` - Bugfix: Failing to rename a file during download sometimes reported an error when it succeeded when retrying
Diffstat (limited to 'src')
-rw-r--r--src/zen/cmds/builds_cmd.cpp851
-rw-r--r--src/zen/cmds/builds_cmd.h1
-rw-r--r--src/zenutil/chunkedcontent.cpp4
3 files changed, 669 insertions, 187 deletions
diff --git a/src/zen/cmds/builds_cmd.cpp b/src/zen/cmds/builds_cmd.cpp
index 0d2601bd8..b33ec659d 100644
--- a/src/zen/cmds/builds_cmd.cpp
+++ b/src/zen/cmds/builds_cmd.cpp
@@ -189,6 +189,7 @@ namespace {
for (size_t Retries = 0; Ec && Retries < 3; Retries++)
{
Sleep(100 + int(Retries * 50));
+ Ec.clear();
RenameFile(SourcePath, TargetPath, Ec);
}
if (Ec)
@@ -197,6 +198,23 @@ namespace {
}
}
+ bool IsFileWithRetry(const std::filesystem::path& Path)
+ {
+ std::error_code Ec;
+ bool Result = IsFile(Path, Ec);
+ for (size_t Retries = 0; Ec && Retries < 3; Retries++)
+ {
+ Sleep(100 + int(Retries * 50));
+ Ec.clear();
+ Result = IsFile(Path, Ec);
+ }
+ if (Ec)
+ {
+ zen::ThrowSystemError(Ec.value(), Ec.message());
+ }
+ return Result;
+ }
+
bool SetFileReadOnlyWithRetry(const std::filesystem::path& Path, bool ReadOnly)
{
std::error_code Ec;
@@ -204,7 +222,7 @@ namespace {
for (size_t Retries = 0; Ec && Retries < 3; Retries++)
{
Sleep(100 + int(Retries * 50));
- if (!IsFile(Path))
+ if (!IsFileWithRetry(Path))
{
return false;
}
@@ -225,7 +243,7 @@ namespace {
for (size_t Retries = 0; Ec && Retries < 3; Retries++)
{
Sleep(100 + int(Retries * 50));
- if (!IsFile(Path))
+ if (!IsFileWithRetry(Path))
{
return;
}
@@ -833,11 +851,23 @@ namespace {
uint64_t CacheSequenceHashesCount = 0;
uint64_t CacheSequenceHashesByteCount = 0;
+ uint64_t CacheScanElapsedWallTimeUs = 0;
+
uint32_t LocalPathsMatchingSequencesCount = 0;
uint64_t LocalPathsMatchingSequencesByteCount = 0;
uint64_t LocalChunkMatchingRemoteCount = 0;
uint64_t LocalChunkMatchingRemoteByteCount = 0;
+
+ uint64_t LocalScanElapsedWallTimeUs = 0;
+
+ uint32_t ScavengedPathsMatchingSequencesCount = 0;
+ uint64_t ScavengedPathsMatchingSequencesByteCount = 0;
+
+ uint64_t ScavengedChunkMatchingRemoteCount = 0;
+ uint64_t ScavengedChunkMatchingRemoteByteCount = 0;
+
+ uint64_t ScavengeElapsedWallTimeUs = 0;
};
struct DownloadStatistics
@@ -1295,9 +1325,11 @@ namespace {
CbObject CreateStateObject(const Oid& BuildId,
const std::vector<std::pair<Oid, std::string>>& AllBuildParts,
std::span<const ChunkedFolderContent> PartContents,
- const FolderContent& LocalFolderState)
+ const FolderContent& LocalFolderState,
+ const std::filesystem::path& LocalPath)
{
CbObjectWriter CurrentStateWriter;
+ CurrentStateWriter.AddString("path", (const char*)LocalPath.u8string().c_str());
CurrentStateWriter.BeginArray("builds"sv);
{
CurrentStateWriter.BeginObject();
@@ -1379,6 +1411,64 @@ namespace {
TemporaryFile::SafeWriteFile(WritePath, JsonPayload);
}
+ struct ScavengeSource
+ {
+ std::filesystem::path StateFilePath;
+ std::filesystem::path Path;
+ };
+
+ std::vector<ScavengeSource> GetDownloadedStatePaths(const std::filesystem::path& SystemRootDir)
+ {
+ std::vector<ScavengeSource> Result;
+ DirectoryContent Content;
+ GetDirectoryContent(SystemRootDir / "builds" / "downloads", DirectoryContentFlags::IncludeFiles, Content);
+ for (const std::filesystem::path& EntryPath : Content.Files)
+ {
+ bool DeleteEntry = false;
+ IoHash EntryPathHash;
+ if (IoHash::TryParse(EntryPath.stem().string(), EntryPathHash))
+ {
+ // Read state and verify that it is valid
+ IoBuffer MetaDataJson = ReadFile(EntryPath).Flatten();
+ std::string_view Json(reinterpret_cast<const char*>(MetaDataJson.GetData()), MetaDataJson.GetSize());
+ std::string JsonError;
+ CbObject DownloadInfo = LoadCompactBinaryFromJson(Json, JsonError).AsObject();
+ if (JsonError.empty())
+ {
+ std::filesystem::path StateFilePath = DownloadInfo["statePath"].AsU8String();
+ if (IsFile(StateFilePath))
+ {
+ std::filesystem::path Path = DownloadInfo["path"].AsU8String();
+ if (IsDir(Path))
+ {
+ Result.push_back({.StateFilePath = std::move(StateFilePath), .Path = std::move(Path)});
+ }
+ else
+ {
+ DeleteEntry = true;
+ }
+ }
+ else
+ {
+ DeleteEntry = true;
+ }
+ }
+ else
+ {
+ ZEN_WARN("Invalid download state file at {}. '{}'", EntryPath, JsonError);
+ DeleteEntry = true;
+ }
+ }
+
+ if (DeleteEntry)
+ {
+ std::error_code DummyEc;
+ std::filesystem::remove(EntryPath, DummyEc);
+ }
+ }
+ return Result;
+ }
+
class BufferedOpenFile
{
public:
@@ -4842,6 +4932,7 @@ namespace {
IoBuffer&& CompressedPart,
DiskStatistics& DiskStats)
{
+ ZEN_TRACE_CPU("WriteCompressedChunk");
auto ChunkHashToChunkIndexIt = RemoteLookup.ChunkHashToChunkIndex.find(ChunkHash);
ZEN_ASSERT(ChunkHashToChunkIndexIt != RemoteLookup.ChunkHashToChunkIndex.end());
const uint32_t ChunkIndex = ChunkHashToChunkIndexIt->second;
@@ -5000,7 +5091,143 @@ namespace {
Work.DefaultErrorFunction());
};
- void UpdateFolder(StorageInstance& Storage,
+ bool ReadStateFile(const std::filesystem::path& StateFilePath,
+ FolderContent& OutLocalFolderState,
+ ChunkedFolderContent& OutLocalContent)
+ {
+ ZEN_TRACE_CPU("ReadStateFile");
+ bool HasLocalState = false;
+ try
+ {
+ CbObject CurrentStateObject = LoadCompactBinaryObject(StateFilePath).Object;
+ if (CurrentStateObject)
+ {
+ Oid CurrentBuildId;
+ std::vector<Oid> SavedBuildPartIds;
+ std::vector<std::string> SavedBuildPartsNames;
+ std::vector<ChunkedFolderContent> SavedPartContents;
+ if (ReadStateObject(CurrentStateObject,
+ CurrentBuildId,
+ SavedBuildPartIds,
+ SavedBuildPartsNames,
+ SavedPartContents,
+ OutLocalFolderState))
+ {
+ if (!SavedPartContents.empty())
+ {
+ if (SavedPartContents.size() == 1)
+ {
+ OutLocalContent = std::move(SavedPartContents[0]);
+ }
+ else
+ {
+ OutLocalContent =
+ MergeChunkedFolderContents(SavedPartContents[0],
+ std::span<const ChunkedFolderContent>(SavedPartContents).subspan(1));
+ }
+ HasLocalState = true;
+ }
+ }
+ }
+ }
+ catch (const std::exception& Ex)
+ {
+ ZEN_CONSOLE("Failed reading state file {}, falling back to scannning. Reason: {}", StateFilePath, Ex.what());
+ }
+ return HasLocalState;
+ }
+
+ FolderContent GetValidFolderContent(GetFolderContentStatistics& LocalFolderScanStats,
+ const std::filesystem::path& Path,
+ std::span<const std::filesystem::path> PathsToCheck)
+ {
+ ZEN_TRACE_CPU("GetValidFolderContent");
+ FolderContent Result;
+ const uint32_t PathCount = gsl::narrow<uint32_t>(PathsToCheck.size());
+
+ Result.Paths.resize(PathCount);
+ Result.RawSizes.resize(PathCount);
+ Result.Attributes.resize(PathCount);
+ Result.ModificationTicks.resize(PathCount);
+
+ {
+ Stopwatch Timer;
+ auto _ = MakeGuard([&LocalFolderScanStats, &Timer]() { LocalFolderScanStats.ElapsedWallTimeUS = Timer.GetElapsedTimeUs(); });
+
+ ProgressBar ProgressBar(UsePlainProgress);
+
+ ParallellWork Work(AbortFlag);
+ std::atomic<uint64_t> CompletedPathCount = 0;
+ uint32_t PathIndex = 0;
+
+ while (PathIndex < PathCount)
+ {
+ uint32_t PathRangeCount = Min(128u, PathCount - PathIndex);
+ Work.ScheduleWork(
+ GetIOWorkerPool(),
+ [PathIndex, PathRangeCount, &PathsToCheck, &Path, &Result, &CompletedPathCount, &LocalFolderScanStats](
+ std::atomic<bool>&) {
+ for (uint32_t PathRangeIndex = PathIndex; PathRangeIndex < PathIndex + PathRangeCount; PathRangeIndex++)
+ {
+ const std::filesystem::path& FilePath = PathsToCheck[PathRangeIndex];
+ std::filesystem::path LocalFilePath = (Path / FilePath).make_preferred();
+ if (TryGetFileProperties(LocalFilePath,
+ Result.RawSizes[PathRangeIndex],
+ Result.ModificationTicks[PathRangeIndex],
+ Result.Attributes[PathRangeIndex]))
+ {
+ Result.Paths[PathRangeIndex] = std::move(FilePath);
+ LocalFolderScanStats.FoundFileCount++;
+ LocalFolderScanStats.FoundFileByteCount += Result.RawSizes[PathRangeIndex];
+ LocalFolderScanStats.AcceptedFileCount++;
+ LocalFolderScanStats.AcceptedFileByteCount += Result.RawSizes[PathRangeIndex];
+ }
+ CompletedPathCount++;
+ }
+ },
+ Work.DefaultErrorFunction());
+ PathIndex += PathRangeCount;
+ }
+ Work.Wait(200, [&](bool, ptrdiff_t) {
+ // FilteredBytesHashed.Update(ChunkingStats.BytesHashed.load());
+ std::string Details = fmt::format("{}/{} checked, {} found",
+ CompletedPathCount.load(),
+ PathCount,
+ LocalFolderScanStats.FoundFileCount.load());
+ ProgressBar.UpdateState({.Task = "Checking files ",
+ .Details = Details,
+ .TotalCount = PathCount,
+ .RemainingCount = PathCount - CompletedPathCount.load()},
+ false);
+ });
+ ProgressBar.Finish();
+ }
+
+ uint32_t WritePathIndex = 0;
+ for (uint32_t ReadPathIndex = 0; ReadPathIndex < PathCount; ReadPathIndex++)
+ {
+ if (!Result.Paths[ReadPathIndex].empty())
+ {
+ if (WritePathIndex < ReadPathIndex)
+ {
+ Result.Paths[WritePathIndex] = std::move(Result.Paths[ReadPathIndex]);
+ Result.RawSizes[WritePathIndex] = Result.RawSizes[ReadPathIndex];
+ Result.Attributes[WritePathIndex] = Result.Attributes[ReadPathIndex];
+ Result.ModificationTicks[WritePathIndex] = Result.ModificationTicks[ReadPathIndex];
+ }
+ WritePathIndex++;
+ }
+ }
+
+ Result.Paths.resize(WritePathIndex);
+ Result.RawSizes.resize(WritePathIndex);
+ Result.Attributes.resize(WritePathIndex);
+ Result.ModificationTicks.resize(WritePathIndex);
+ return Result;
+ }
+
+ void UpdateFolder(const std::filesystem::path& SystemRootDir,
+ StorageInstance& Storage,
const Oid& BuildId,
const std::filesystem::path& Path,
const std::filesystem::path& ZenFolderPath,
@@ -5013,6 +5240,7 @@ namespace {
bool AllowPartialBlockRequests,
bool WipeTargetFolder,
bool PrimeCacheOnly,
+ bool EnableScavenging,
FolderContent& OutLocalFolderState,
DiskStatistics& DiskStats,
CacheMappingStatistics& CacheMappingStats,
@@ -5046,6 +5274,8 @@ namespace {
{
ZEN_TRACE_CPU("UpdateFolder_CheckChunkCache");
+ Stopwatch CacheTimer;
+
DirectoryContent CacheDirContent;
GetDirectoryContent(CacheFolderPath,
DirectoryContentFlags::IncludeFiles | DirectoryContentFlags::IncludeFileSizes,
@@ -5091,6 +5321,7 @@ namespace {
}
RemoveFileWithRetry(CacheDirContent.Files[Index]);
}
+ CacheMappingStats.CacheScanElapsedWallTimeUs += CacheTimer.GetElapsedTimeUs();
}
tsl::robin_map<IoHash, uint32_t, IoHash::Hasher> CachedBlocksFound;
@@ -5098,6 +5329,8 @@ namespace {
{
ZEN_TRACE_CPU("UpdateFolder_CheckBlockCache");
+ Stopwatch CacheTimer;
+
tsl::robin_map<IoHash, uint32_t, IoHash::Hasher> AllBlockSizes;
AllBlockSizes.reserve(BlockDescriptions.size());
for (uint32_t BlockIndex = 0; BlockIndex < BlockDescriptions.size(); BlockIndex++)
@@ -5137,54 +5370,62 @@ namespace {
}
RemoveFileWithRetry(BlockDirContent.Files[Index]);
}
+
+ CacheMappingStats.CacheScanElapsedWallTimeUs += CacheTimer.GetElapsedTimeUs();
}
- std::vector<uint32_t> LocalPathIndexesMatchingSequenceIndexes;
+ std::vector<uint32_t> LocalPathIndexesMatchingSequenceIndexes;
+ tsl::robin_map<IoHash, uint32_t, IoHash::Hasher> SequenceIndexesLeftToFindToRemoteIndex;
if (!PrimeCacheOnly)
{
// Pick up all whole files we can use from current local state
- ZEN_TRACE_CPU("UpdateFolder_CheckLocalChunks");
+ ZEN_TRACE_CPU("UpdateFolder_GetLocalSequences");
+
+ Stopwatch LocalTimer;
+
for (uint32_t RemoteSequenceIndex = 0; RemoteSequenceIndex < RemoteContent.ChunkedContent.SequenceRawHashes.size();
RemoteSequenceIndex++)
{
- const IoHash& RemoteSequenceRawHash = RemoteContent.ChunkedContent.SequenceRawHashes[RemoteSequenceIndex];
+ const IoHash& RemoteSequenceRawHash = RemoteContent.ChunkedContent.SequenceRawHashes[RemoteSequenceIndex];
+ const uint32_t RemotePathIndex = GetFirstPathIndexForSeqeuenceIndex(RemoteLookup, RemoteSequenceIndex);
+ const uint64_t RemoteRawSize = RemoteContent.RawSizes[RemotePathIndex];
if (auto CacheSequenceIt = CachedSequenceHashesFound.find(RemoteSequenceRawHash);
CacheSequenceIt != CachedSequenceHashesFound.end())
{
- // const uint32_t RemoteSequenceIndex = CacheSequenceIt->second;
- // const uint32_t RemotePathIndex = GetFirstPathIndexForSeqeuenceIndex(RemoteLookup, RemoteSequenceIndex);
- // RemoteSequenceByteCountFoundInCache += RemoteContent.RawSizes[RemotePathIndex];
const std::filesystem::path CacheFilePath = GetFinalChunkedSequenceFileName(CacheFolderPath, RemoteSequenceRawHash);
ZEN_ASSERT_SLOW(IsFile(CacheFilePath));
+ ZEN_CONSOLE_VERBOSE("Found sequence {} at {} ({})", RemoteSequenceRawHash, CacheFilePath, NiceBytes(RemoteRawSize));
}
else if (auto CacheChunkIt = CachedChunkHashesFound.find(RemoteSequenceRawHash);
CacheChunkIt != CachedChunkHashesFound.end())
{
- // const uint32_t RemoteChunkIndex = CacheChunkIt->second;
- // const uint32_t RemotePathIndex = GetFirstPathIndexForSeqeuenceIndex(RemoteLookup, RemoteSequenceIndex);
- // RemoteSequenceByteCountFoundInCache += RemoteContent.RawSizes[RemotePathIndex];
const std::filesystem::path CacheFilePath = GetFinalChunkedSequenceFileName(CacheFolderPath, RemoteSequenceRawHash);
ZEN_ASSERT_SLOW(IsFile(CacheFilePath));
+ ZEN_CONSOLE_VERBOSE("Found chunk {} at {} ({})", RemoteSequenceRawHash, CacheFilePath, NiceBytes(RemoteRawSize));
}
else if (auto It = LocalLookup.RawHashToSequenceIndex.find(RemoteSequenceRawHash);
It != LocalLookup.RawHashToSequenceIndex.end())
{
- const uint32_t LocalSequenceIndex = It->second;
- const uint32_t LocalPathIndex = GetFirstPathIndexForSeqeuenceIndex(LocalLookup, LocalSequenceIndex);
- ZEN_ASSERT_SLOW(IsFile((Path / LocalContent.Paths[LocalPathIndex]).make_preferred()));
- uint64_t RawSize = LocalContent.RawSizes[LocalPathIndex];
+ const uint32_t LocalSequenceIndex = It->second;
+ const uint32_t LocalPathIndex = GetFirstPathIndexForSeqeuenceIndex(LocalLookup, LocalSequenceIndex);
+ const std::filesystem::path LocalFilePath = (Path / LocalContent.Paths[LocalPathIndex]).make_preferred();
+ ZEN_ASSERT_SLOW(IsFile(LocalFilePath));
LocalPathIndexesMatchingSequenceIndexes.push_back(LocalPathIndex);
CacheMappingStats.LocalPathsMatchingSequencesCount++;
- CacheMappingStats.LocalPathsMatchingSequencesByteCount += RawSize;
+ CacheMappingStats.LocalPathsMatchingSequencesByteCount += RemoteRawSize;
+ ZEN_CONSOLE_VERBOSE("Found sequence {} at {} ({})", RemoteSequenceRawHash, LocalFilePath, NiceBytes(RemoteRawSize));
}
else
{
// We must write the sequence
const uint32_t ChunkCount = RemoteContent.ChunkedContent.ChunkCounts[RemoteSequenceIndex];
SequenceIndexChunksLeftToWriteCounters[RemoteSequenceIndex] = ChunkCount;
+ SequenceIndexesLeftToFindToRemoteIndex.insert({RemoteSequenceRawHash, RemoteSequenceIndex});
}
}
+
+ CacheMappingStats.LocalScanElapsedWallTimeUs += LocalTimer.GetElapsedTimeUs();
}
else
{
@@ -5195,10 +5436,138 @@ namespace {
SequenceIndexChunksLeftToWriteCounters[RemoteSequenceIndex] = ChunkCount;
}
}
+
+ std::vector<ChunkedFolderContent> ScavengedContents;
+ std::vector<ChunkedContentLookup> ScavengedLookups;
+ std::vector<std::filesystem::path> ScavengedPaths;
+
+ struct ScavengeCopyOperation
+ {
+ uint32_t ScavengedContentIndex = (uint32_t)-1;
+ uint32_t ScavengedPathIndex = (uint32_t)-1;
+ uint32_t RemoteSequenceIndex = (uint32_t)-1;
+ uint64_t RawSize = (uint32_t)-1;
+ };
+
+ std::vector<ScavengeCopyOperation> ScavengeCopyOperations;
+ uint64_t ScavengedPathsCount = 0;
+
+ if (!PrimeCacheOnly && EnableScavenging)
+ {
+ ZEN_TRACE_CPU("UpdateFolder_GetScavengedSequences");
+
+ Stopwatch ScavengeTimer;
+
+ if (!SequenceIndexesLeftToFindToRemoteIndex.empty())
+ {
+ std::vector<ScavengeSource> ScavengeSources = GetDownloadedStatePaths(SystemRootDir);
+ auto EraseIt = std::remove_if(ScavengeSources.begin(), ScavengeSources.end(), [&Path](const ScavengeSource& Source) {
+ return Source.Path == Path;
+ });
+ ScavengeSources.erase(EraseIt, ScavengeSources.end());
+
+ const size_t ScavengePathCount = ScavengeSources.size();
+
+ ScavengedContents.resize(ScavengePathCount);
+ ScavengedLookups.resize(ScavengePathCount);
+ ScavengedPaths.resize(ScavengePathCount);
+ for (size_t ScavengeIndex = 0; ScavengeIndex < ScavengePathCount; ScavengeIndex++)
+ {
+ const ScavengeSource& Source = ScavengeSources[ScavengeIndex];
+
+ ChunkedFolderContent& ScavengedLocalContent = ScavengedContents[ScavengeIndex];
+ std::filesystem::path& ScavengePath = ScavengedPaths[ScavengeIndex];
+ FolderContent LocalFolderState;
+ if (ReadStateFile(Source.StateFilePath, LocalFolderState, ScavengedLocalContent))
+ {
+ GetFolderContentStatistics ScavengedFolderScanStats;
+
+ FolderContent ValidFolderContent =
+ GetValidFolderContent(ScavengedFolderScanStats, Source.Path, LocalFolderState.Paths);
+
+ if (!LocalFolderState.AreKnownFilesEqual(ValidFolderContent))
+ {
+ std::vector<std::filesystem::path> DeletedPaths;
+ FolderContent UpdatedContent = GetUpdatedContent(LocalFolderState, ValidFolderContent, DeletedPaths);
+
+ // If the files are modified since the state was saved we ignore the files since we don't want to incur the
+ // cost of scanning/hashing scavenged files
+ DeletedPaths.insert(DeletedPaths.end(), UpdatedContent.Paths.begin(), UpdatedContent.Paths.end());
+ if (!DeletedPaths.empty())
+ {
+ ScavengedLocalContent = DeletePathsFromChunkedContent(ScavengedLocalContent, DeletedPaths);
+ }
+ }
+
+ if (!ScavengedLocalContent.Paths.empty())
+ {
+ ScavengePath = Source.Path;
+ }
+ }
+ }
+
+ for (uint32_t ScavengedContentIndex = 0;
+ ScavengedContentIndex < ScavengedContents.size() && (!SequenceIndexesLeftToFindToRemoteIndex.empty());
+ ScavengedContentIndex++)
+ {
+ const std::filesystem::path& ScavengePath = ScavengedPaths[ScavengedContentIndex];
+ if (!ScavengePath.empty())
+ {
+ const ChunkedFolderContent& ScavengedLocalContent = ScavengedContents[ScavengedContentIndex];
+ ChunkedContentLookup& ScavengedLookup = ScavengedLookups[ScavengedContentIndex];
+ ScavengedLookup = BuildChunkedContentLookup(ScavengedLocalContent);
+
+ for (uint32_t ScavengedSequenceIndex = 0;
+ ScavengedSequenceIndex < ScavengedLocalContent.ChunkedContent.SequenceRawHashes.size();
+ ScavengedSequenceIndex++)
+ {
+ const IoHash& SequenceRawHash = ScavengedLocalContent.ChunkedContent.SequenceRawHashes[ScavengedSequenceIndex];
+ if (auto It = SequenceIndexesLeftToFindToRemoteIndex.find(SequenceRawHash);
+ It != SequenceIndexesLeftToFindToRemoteIndex.end())
+ {
+ const uint32_t RemoteSequenceIndex = It->second;
+ const uint64_t RawSize =
+ RemoteContent.RawSizes[RemoteLookup.SequenceIndexFirstPathIndex[RemoteSequenceIndex]];
+ ZEN_ASSERT(RawSize > 0);
+
+ const uint32_t ScavengedPathIndex = ScavengedLookup.SequenceIndexFirstPathIndex[ScavengedSequenceIndex];
+ ZEN_ASSERT_SLOW(IsFile((ScavengePath / ScavengedLocalContent.Paths[ScavengedPathIndex]).make_preferred()));
+
+ ScavengeCopyOperations.push_back({.ScavengedContentIndex = ScavengedContentIndex,
+ .ScavengedPathIndex = ScavengedPathIndex,
+ .RemoteSequenceIndex = RemoteSequenceIndex,
+ .RawSize = RawSize});
+
+ SequenceIndexesLeftToFindToRemoteIndex.erase(SequenceRawHash);
+ SequenceIndexChunksLeftToWriteCounters[RemoteSequenceIndex] = 0;
+
+ CacheMappingStats.ScavengedPathsMatchingSequencesCount++;
+ CacheMappingStats.ScavengedPathsMatchingSequencesByteCount += RawSize;
+ }
+ }
+ ScavengedPathsCount++;
+ }
+ }
+ }
+ CacheMappingStats.ScavengeElapsedWallTimeUs += ScavengeTimer.GetElapsedTimeUs();
+ }
+
+ uint32_t RemainingChunkCount = 0;
+ for (uint32_t RemoteChunkIndex = 0; RemoteChunkIndex < RemoteContent.ChunkedContent.ChunkHashes.size(); RemoteChunkIndex++)
+ {
+ uint64_t ChunkWriteCount = GetChunkWriteCount(SequenceIndexChunksLeftToWriteCounters, RemoteLookup, RemoteChunkIndex);
+ if (ChunkWriteCount > 0)
+ {
+ RemainingChunkCount++;
+ }
+ }
+
// Pick up all chunks in current local state
+ // TODO: Rename to LocalStateCopyData
struct CacheCopyData
{
- uint32_t LocalSequenceIndex = (uint32_t)-1;
+ uint32_t ScavengeSourceIndex = (uint32_t)-1;
+ uint32_t SourceSequenceIndex = (uint32_t)-1;
std::vector<const ChunkedContentLookup::ChunkSequenceLocation*> TargetChunkLocationPtrs;
struct ChunkTarget
{
@@ -5216,7 +5585,10 @@ namespace {
{
ZEN_TRACE_CPU("UpdateFolder_GetLocalChunks");
- for (uint32_t LocalSequenceIndex = 0; LocalSequenceIndex < LocalContent.ChunkedContent.SequenceRawHashes.size();
+ Stopwatch LocalTimer;
+
+ for (uint32_t LocalSequenceIndex = 0;
+ LocalSequenceIndex < LocalContent.ChunkedContent.SequenceRawHashes.size() && (RemainingChunkCount > 0);
LocalSequenceIndex++)
{
const IoHash& LocalSequenceRawHash = LocalContent.ChunkedContent.SequenceRawHashes[LocalSequenceIndex];
@@ -5254,7 +5626,8 @@ namespace {
{
RawHashToCacheCopyDataIndex.insert_or_assign(LocalSequenceRawHash, CacheCopyDatas.size());
CacheCopyDatas.push_back(
- CacheCopyData{.LocalSequenceIndex = LocalSequenceIndex,
+ CacheCopyData{.ScavengeSourceIndex = (uint32_t)-1,
+ .SourceSequenceIndex = LocalSequenceIndex,
.TargetChunkLocationPtrs = ChunkTargetPtrs,
.ChunkTargets = std::vector<CacheCopyData::ChunkTarget>{Target}});
}
@@ -5270,13 +5643,15 @@ namespace {
{
RawHashToCacheCopyDataIndex.insert_or_assign(LocalSequenceRawHash, CacheCopyDatas.size());
CacheCopyDatas.push_back(
- CacheCopyData{.LocalSequenceIndex = LocalSequenceIndex,
+ CacheCopyData{.ScavengeSourceIndex = (uint32_t)-1,
+ .SourceSequenceIndex = LocalSequenceIndex,
.TargetChunkLocationPtrs = ChunkTargetPtrs,
.ChunkTargets = std::vector<CacheCopyData::ChunkTarget>{Target}});
}
CacheMappingStats.LocalChunkMatchingRemoteCount++;
CacheMappingStats.LocalChunkMatchingRemoteByteCount += LocalChunkRawSize;
RemoteChunkIndexNeedsCopyFromLocalFileFlags[RemoteChunkIndex] = true;
+ RemainingChunkCount--;
}
}
}
@@ -5284,26 +5659,130 @@ namespace {
}
}
}
+ CacheMappingStats.LocalScanElapsedWallTimeUs += LocalTimer.GetElapsedTimeUs();
+ }
+
+ if (!PrimeCacheOnly)
+ {
+ ZEN_TRACE_CPU("UpdateFolder_GetScavengeChunks");
+
+ Stopwatch ScavengeTimer;
+
+ for (uint32_t ScavengedContentIndex = 0; ScavengedContentIndex < ScavengedContents.size() && (RemainingChunkCount > 0);
+ ScavengedContentIndex++)
+ {
+ const ChunkedFolderContent& ScavengedContent = ScavengedContents[ScavengedContentIndex];
+ // const std::filesystem::path& ScavengedPath = ScavengedPaths[ScavengedContentIndex];
+ const ChunkedContentLookup& ScavengedLookup = ScavengedLookups[ScavengedContentIndex];
+
+ for (uint32_t ScavengedSequenceIndex = 0;
+ ScavengedSequenceIndex < ScavengedContent.ChunkedContent.SequenceRawHashes.size() && (RemainingChunkCount > 0);
+ ScavengedSequenceIndex++)
+ {
+ const IoHash& ScavengedSequenceRawHash = ScavengedContent.ChunkedContent.SequenceRawHashes[ScavengedSequenceIndex];
+ const uint32_t ScavengedOrderOffset = ScavengedLookup.SequenceIndexChunkOrderOffset[ScavengedSequenceIndex];
+
+ {
+ uint64_t SourceOffset = 0;
+ const uint32_t ScavengedChunkCount = ScavengedContent.ChunkedContent.ChunkCounts[ScavengedSequenceIndex];
+ for (uint32_t ScavengedOrderIndex = 0; ScavengedOrderIndex < ScavengedChunkCount; ScavengedOrderIndex++)
+ {
+ const uint32_t ScavengedChunkIndex =
+ ScavengedContent.ChunkedContent.ChunkOrders[ScavengedOrderOffset + ScavengedOrderIndex];
+ const IoHash& ScavengedChunkHash = ScavengedContent.ChunkedContent.ChunkHashes[ScavengedChunkIndex];
+ const uint64_t ScavengedChunkRawSize = ScavengedContent.ChunkedContent.ChunkRawSizes[ScavengedChunkIndex];
+
+ if (auto RemoteChunkIt = RemoteLookup.ChunkHashToChunkIndex.find(ScavengedChunkHash);
+ RemoteChunkIt != RemoteLookup.ChunkHashToChunkIndex.end())
+ {
+ const uint32_t RemoteChunkIndex = RemoteChunkIt->second;
+ if (!RemoteChunkIndexNeedsCopyFromLocalFileFlags[RemoteChunkIndex])
+ {
+ std::vector<const ChunkedContentLookup::ChunkSequenceLocation*> ChunkTargetPtrs =
+ GetRemainingChunkTargets(SequenceIndexChunksLeftToWriteCounters, RemoteLookup, RemoteChunkIndex);
+
+ if (!ChunkTargetPtrs.empty())
+ {
+ CacheCopyData::ChunkTarget Target = {
+ .TargetChunkLocationCount = gsl::narrow<uint32_t>(ChunkTargetPtrs.size()),
+ .RemoteChunkIndex = RemoteChunkIndex,
+ .CacheFileOffset = SourceOffset};
+ if (auto CopySourceIt = RawHashToCacheCopyDataIndex.find(ScavengedSequenceRawHash);
+ CopySourceIt != RawHashToCacheCopyDataIndex.end())
+ {
+ CacheCopyData& Data = CacheCopyDatas[CopySourceIt->second];
+ if (Data.TargetChunkLocationPtrs.size() > 1024)
+ {
+ RawHashToCacheCopyDataIndex.insert_or_assign(ScavengedSequenceRawHash,
+ CacheCopyDatas.size());
+ CacheCopyDatas.push_back(
+ CacheCopyData{.ScavengeSourceIndex = ScavengedContentIndex,
+ .SourceSequenceIndex = ScavengedSequenceIndex,
+ .TargetChunkLocationPtrs = ChunkTargetPtrs,
+ .ChunkTargets = std::vector<CacheCopyData::ChunkTarget>{Target}});
+ }
+ else
+ {
+ Data.TargetChunkLocationPtrs.insert(Data.TargetChunkLocationPtrs.end(),
+ ChunkTargetPtrs.begin(),
+ ChunkTargetPtrs.end());
+ Data.ChunkTargets.push_back(Target);
+ }
+ }
+ else
+ {
+ RawHashToCacheCopyDataIndex.insert_or_assign(ScavengedSequenceRawHash, CacheCopyDatas.size());
+ CacheCopyDatas.push_back(
+ CacheCopyData{.ScavengeSourceIndex = ScavengedContentIndex,
+ .SourceSequenceIndex = ScavengedSequenceIndex,
+ .TargetChunkLocationPtrs = ChunkTargetPtrs,
+ .ChunkTargets = std::vector<CacheCopyData::ChunkTarget>{Target}});
+ }
+ CacheMappingStats.ScavengedChunkMatchingRemoteCount++;
+ CacheMappingStats.ScavengedChunkMatchingRemoteByteCount += ScavengedChunkRawSize;
+ RemoteChunkIndexNeedsCopyFromLocalFileFlags[RemoteChunkIndex] = true;
+ RemainingChunkCount--;
+ }
+ }
+ }
+ SourceOffset += ScavengedChunkRawSize;
+ }
+ }
+ }
+ }
+ CacheMappingStats.ScavengeElapsedWallTimeUs += ScavengeTimer.GetElapsedTimeUs();
}
if (!CachedSequenceHashesFound.empty() || !CachedChunkHashesFound.empty() || !CachedBlocksFound.empty())
{
- ZEN_CONSOLE("Download cache: Found {} ({}) chunk sequences, {} ({}) chunks, {} ({}) blocks.",
+ ZEN_CONSOLE("Download cache: Found {} ({}) chunk sequences, {} ({}) chunks, {} ({}) blocks in {}",
CachedSequenceHashesFound.size(),
NiceBytes(CacheMappingStats.CacheSequenceHashesByteCount),
CachedChunkHashesFound.size(),
NiceBytes(CacheMappingStats.CacheChunkByteCount),
CachedBlocksFound.size(),
- NiceBytes(CacheMappingStats.CacheBlocksByteCount));
+ NiceBytes(CacheMappingStats.CacheBlocksByteCount),
+ NiceTimeSpanMs(CacheMappingStats.CacheScanElapsedWallTimeUs / 1000));
}
if (!LocalPathIndexesMatchingSequenceIndexes.empty() || CacheMappingStats.LocalChunkMatchingRemoteCount > 0)
{
- ZEN_CONSOLE("Local state : Found {} ({}) chunk sequences, {} ({}) chunks",
+ ZEN_CONSOLE("Local state : Found {} ({}) chunk sequences, {} ({}) chunks in {}",
LocalPathIndexesMatchingSequenceIndexes.size(),
NiceBytes(CacheMappingStats.LocalPathsMatchingSequencesByteCount),
CacheMappingStats.LocalChunkMatchingRemoteCount,
- NiceBytes(CacheMappingStats.LocalChunkMatchingRemoteByteCount));
+ NiceBytes(CacheMappingStats.LocalChunkMatchingRemoteByteCount),
+ NiceTimeSpanMs(CacheMappingStats.LocalScanElapsedWallTimeUs / 1000));
+ }
+ if (CacheMappingStats.ScavengedPathsMatchingSequencesCount > 0 || CacheMappingStats.ScavengedChunkMatchingRemoteCount > 0)
+ {
+ ZEN_CONSOLE("Scavenge of {} paths found {} ({}) chunk sequences, {} ({}) chunks in {}",
+ ScavengedPathsCount,
+ CacheMappingStats.ScavengedPathsMatchingSequencesCount,
+ NiceBytes(CacheMappingStats.ScavengedPathsMatchingSequencesByteCount),
+ CacheMappingStats.ScavengedChunkMatchingRemoteCount,
+ NiceBytes(CacheMappingStats.ScavengedChunkMatchingRemoteByteCount),
+ NiceTimeSpanMs(CacheMappingStats.ScavengeElapsedWallTimeUs / 1000));
}
uint64_t BytesToWrite = 0;
@@ -5321,6 +5800,11 @@ namespace {
}
}
+ for (const ScavengeCopyOperation& ScavengeCopyOp : ScavengeCopyOperations)
+ {
+ BytesToWrite += ScavengeCopyOp.RawSize;
+ }
+
uint64_t TotalRequestCount = 0;
uint64_t TotalPartWriteCount = 0;
std::atomic<uint64_t> WritePartsComplete = 0;
@@ -5347,6 +5831,7 @@ namespace {
std::vector<LooseChunkHashWorkData> LooseChunkHashWorks;
TotalPartWriteCount += CacheCopyDatas.size();
+ TotalPartWriteCount += ScavengeCopyOperations.size();
for (const IoHash ChunkHash : LooseChunkHashes)
{
@@ -5694,6 +6179,53 @@ namespace {
}
}
}
+
+ for (uint32_t ScavengeOpIndex = 0; ScavengeOpIndex < ScavengeCopyOperations.size(); ScavengeOpIndex++)
+ {
+ if (AbortFlag)
+ {
+ break;
+ }
+ if (!PrimeCacheOnly)
+ {
+ Work.ScheduleWork(
+ WritePool,
+ [&, ScavengeOpIndex](std::atomic<bool>&) mutable {
+ if (!AbortFlag)
+ {
+ const ScavengeCopyOperation& ScavengeOp = ScavengeCopyOperations[ScavengeOpIndex];
+ const ChunkedFolderContent& ScavengedContent = ScavengedContents[ScavengeOp.ScavengedContentIndex];
+ const std::filesystem::path ScavengedPath = ScavengedContent.Paths[ScavengeOp.ScavengedPathIndex];
+
+ const std::filesystem::path ScavengedFilePath =
+ (ScavengedPaths[ScavengeOp.ScavengedContentIndex] / ScavengedPath).make_preferred();
+ ZEN_ASSERT_SLOW(FileSizeFromPath(ScavengedFilePath) == ScavengeOp.RawSize);
+
+ const IoHash& RemoteSequenceRawHash =
+ RemoteContent.ChunkedContent.SequenceRawHashes[ScavengeOp.RemoteSequenceIndex];
+ const std::filesystem::path TempFilePath =
+ GetTempChunkedSequenceFileName(CacheFolderPath, RemoteSequenceRawHash);
+
+ CopyFile(ScavengedFilePath, TempFilePath, {.EnableClone = false});
+
+ DiskStats.WriteCount++;
+ DiskStats.WriteByteCount += ScavengeOp.RawSize;
+
+ const std::filesystem::path CacheFilePath =
+ GetFinalChunkedSequenceFileName(CacheFolderPath, RemoteSequenceRawHash);
+ RenameFile(TempFilePath, CacheFilePath);
+
+ WritePartsComplete++;
+ if (WritePartsComplete == TotalPartWriteCount)
+ {
+ FilteredWrittenBytesPerSecond.Stop();
+ }
+ }
+ },
+ Work.DefaultErrorFunction());
+ }
+ }
+
for (uint32_t LooseChunkHashWorkIndex = 0; LooseChunkHashWorkIndex < LooseChunkHashWorks.size(); LooseChunkHashWorkIndex++)
{
if (AbortFlag)
@@ -6011,9 +6543,25 @@ namespace {
ZEN_TRACE_CPU("UpdateFolder_CopyLocal");
FilteredWrittenBytesPerSecond.Start();
- const CacheCopyData& CopyData = CacheCopyDatas[CopyDataIndex];
- const uint32_t LocalPathIndex = LocalLookup.SequenceIndexFirstPathIndex[CopyData.LocalSequenceIndex];
- const std::filesystem::path LocalFilePath = (Path / LocalContent.Paths[LocalPathIndex]).make_preferred();
+ const CacheCopyData& CopyData = CacheCopyDatas[CopyDataIndex];
+
+ std::filesystem::path SourceFilePath;
+
+ if (CopyData.ScavengeSourceIndex == (uint32_t)-1)
+ {
+ const uint32_t LocalPathIndex = LocalLookup.SequenceIndexFirstPathIndex[CopyData.SourceSequenceIndex];
+ SourceFilePath = (Path / LocalContent.Paths[LocalPathIndex]).make_preferred();
+ }
+ else
+ {
+ const ChunkedFolderContent& ScavengedContent = ScavengedContents[CopyData.ScavengeSourceIndex];
+ const ChunkedContentLookup& ScavengedLookup = ScavengedLookups[CopyData.ScavengeSourceIndex];
+ const std::filesystem::path ScavengedPath = ScavengedPaths[CopyData.ScavengeSourceIndex];
+ const uint32_t ScavengedPathIndex =
+ ScavengedLookup.SequenceIndexFirstPathIndex[CopyData.SourceSequenceIndex];
+ SourceFilePath = (ScavengedPath / ScavengedContent.Paths[ScavengedPathIndex]).make_preferred();
+ }
+ ZEN_ASSERT_SLOW(IsFile(SourceFilePath));
ZEN_ASSERT(!CopyData.TargetChunkLocationPtrs.empty());
uint64_t CacheLocalFileBytesRead = 0;
@@ -6071,7 +6619,7 @@ namespace {
tsl::robin_set<uint32_t> ChunkIndexesWritten;
- BufferedOpenFile SourceFile(LocalFilePath, DiskStats);
+ BufferedOpenFile SourceFile(SourceFilePath, DiskStats);
WriteFileCache OpenFileCache(DiskStats);
for (size_t WriteOpIndex = 0; WriteOpIndex < WriteOps.size();)
{
@@ -6155,9 +6703,7 @@ namespace {
CompletedChunkSequences,
Work,
WritePool);
- ZEN_CONSOLE_VERBOSE("Copied {} from {}",
- NiceBytes(CacheLocalFileBytesRead),
- LocalContent.Paths[LocalPathIndex]);
+ ZEN_CONSOLE_VERBOSE("Copied {} from {}", NiceBytes(CacheLocalFileBytesRead), SourceFilePath);
}
WritePartsComplete++;
if (WritePartsComplete == TotalPartWriteCount)
@@ -6781,7 +7327,7 @@ namespace {
const IoHash& RawHash = LocalContent.RawHashes[LocalPathIndex];
const std::filesystem::path& LocalPath = LocalContent.Paths[LocalPathIndex];
const std::filesystem::path CacheFilePath = GetFinalChunkedSequenceFileName(CacheFolderPath, RawHash);
- ZEN_ASSERT_SLOW(!IsFile(CacheFilePath));
+ ZEN_ASSERT_SLOW(!IsFileWithRetry(CacheFilePath));
const std::filesystem::path LocalFilePath = (Path / LocalPath).make_preferred();
RenameFileWithRetry(LocalFilePath, CacheFilePath);
CachedCount++;
@@ -6942,7 +7488,7 @@ namespace {
std::filesystem::path TargetFilePath = (Path / TargetPath).make_preferred();
if (!RemotePathIndexToLocalPathIndex[RemotePathIndex])
{
- if (IsFile(TargetFilePath))
+ if (IsFileWithRetry(TargetFilePath))
{
SetFileReadOnlyWithRetry(TargetFilePath, false);
}
@@ -6979,11 +7525,11 @@ namespace {
if (auto InPlaceIt = RemotePathIndexToLocalPathIndex.find(FirstRemotePathIndex);
InPlaceIt != RemotePathIndexToLocalPathIndex.end())
{
- ZEN_ASSERT_SLOW(IsFile(FirstTargetFilePath));
+ ZEN_ASSERT_SLOW(IsFileWithRetry(FirstTargetFilePath));
}
else
{
- if (IsFile(FirstTargetFilePath))
+ if (IsFileWithRetry(FirstTargetFilePath))
{
SetFileReadOnlyWithRetry(FirstTargetFilePath, false);
}
@@ -6999,7 +7545,7 @@ namespace {
const uint32_t LocalPathIndex = InplaceIt->second;
const std::filesystem::path& SourcePath = LocalContent.Paths[LocalPathIndex];
std::filesystem::path SourceFilePath = (Path / SourcePath).make_preferred();
- ZEN_ASSERT_SLOW(IsFile(SourceFilePath));
+ ZEN_ASSERT_SLOW(IsFileWithRetry(SourceFilePath));
ZEN_DEBUG("Copying from '{}' -> '{}'", SourceFilePath, FirstTargetFilePath);
CopyFile(SourceFilePath, FirstTargetFilePath, {.EnableClone = false});
@@ -7010,7 +7556,7 @@ namespace {
ZEN_TRACE_CPU("Rename");
const std::filesystem::path CacheFilePath =
GetFinalChunkedSequenceFileName(CacheFolderPath, RawHash);
- ZEN_ASSERT_SLOW(IsFile(CacheFilePath));
+ ZEN_ASSERT_SLOW(IsFileWithRetry(CacheFilePath));
RenameFileWithRetry(CacheFilePath, FirstTargetFilePath);
@@ -7043,12 +7589,12 @@ namespace {
if (auto InPlaceIt = RemotePathIndexToLocalPathIndex.find(RemotePathIndex);
InPlaceIt != RemotePathIndexToLocalPathIndex.end())
{
- ZEN_ASSERT_SLOW(IsFile(TargetFilePath));
+ ZEN_ASSERT_SLOW(IsFileWithRetry(TargetFilePath));
}
else
{
ZEN_TRACE_CPU("Copy");
- if (IsFile(TargetFilePath))
+ if (IsFileWithRetry(TargetFilePath))
{
SetFileReadOnlyWithRetry(TargetFilePath, false);
}
@@ -7057,7 +7603,7 @@ namespace {
CreateDirectories(TargetFilePath.parent_path());
}
- ZEN_ASSERT_SLOW(IsFile(FirstTargetFilePath));
+ ZEN_ASSERT_SLOW(IsFileWithRetry(FirstTargetFilePath));
ZEN_DEBUG("Copying from '{}' -> '{}'", FirstTargetFilePath, TargetFilePath);
CopyFile(FirstTargetFilePath, TargetFilePath, {.EnableClone = false});
RebuildFolderStateStats.FinalizeTreeFilesCopiedCount++;
@@ -7620,7 +8166,7 @@ namespace {
ChunkedFolderContent GetLocalContent(GetFolderContentStatistics& LocalFolderScanStats,
ChunkingStatistics& ChunkingStats,
const std::filesystem::path& Path,
- const std::filesystem::path& ZenFolderPath,
+ const std::filesystem::path& StateFilePath,
ChunkingController& ChunkController,
const ChunkedFolderContent& ReferenceContent,
FolderContent& OutLocalFolderContent)
@@ -7628,50 +8174,12 @@ namespace {
FolderContent LocalFolderState;
ChunkedFolderContent LocalContent;
- bool HasLocalState = false;
- if (IsFile(ZenStateFilePath(ZenFolderPath)))
+ Stopwatch ReadStateTimer;
+ const bool HasLocalState = IsFile(StateFilePath) && ReadStateFile(StateFilePath, LocalFolderState, LocalContent);
+ if (HasLocalState)
{
- try
- {
- Stopwatch ReadStateTimer;
- CbObject CurrentStateObject = LoadCompactBinaryObject(ZenStateFilePath(ZenFolderPath)).Object;
- if (CurrentStateObject)
- {
- Oid CurrentBuildId;
- std::vector<Oid> SavedBuildPartIds;
- std::vector<std::string> SavedBuildPartsNames;
- std::vector<ChunkedFolderContent> SavedPartContents;
- if (ReadStateObject(CurrentStateObject,
- CurrentBuildId,
- SavedBuildPartIds,
- SavedBuildPartsNames,
- SavedPartContents,
- LocalFolderState))
- {
- if (!SavedPartContents.empty())
- {
- if (SavedPartContents.size() == 1)
- {
- LocalContent = std::move(SavedPartContents[0]);
- }
- else
- {
- LocalContent =
- MergeChunkedFolderContents(SavedPartContents[0],
- std::span<const ChunkedFolderContent>(SavedPartContents).subspan(1));
- }
- HasLocalState = true;
- }
- }
- }
- ZEN_CONSOLE("Read local state in {}", NiceTimeSpanMs(ReadStateTimer.GetElapsedTimeMs()));
- }
- catch (const std::exception& Ex)
- {
- ZEN_CONSOLE("Failed reading state file, falling back to scannning. Reason: {}", Ex.what());
- }
+ ZEN_CONSOLE("Read local state file {} in {}", StateFilePath, NiceTimeSpanMs(ReadStateTimer.GetElapsedTimeMs()));
}
-
{
const uint32_t LocalPathCount = gsl::narrow<uint32_t>(ReferenceContent.Paths.size());
const uint32_t RemotePathCount = gsl::narrow<uint32_t>(LocalFolderState.Paths.size());
@@ -7696,92 +8204,7 @@ namespace {
}
}
- const uint32_t PathCount = gsl::narrow<uint32_t>(PathsToCheck.size());
-
- OutLocalFolderContent.Paths.resize(PathCount);
- OutLocalFolderContent.RawSizes.resize(PathCount);
- OutLocalFolderContent.Attributes.resize(PathCount);
- OutLocalFolderContent.ModificationTicks.resize(PathCount);
-
- {
- Stopwatch Timer;
- auto _ =
- MakeGuard([&LocalFolderScanStats, &Timer]() { LocalFolderScanStats.ElapsedWallTimeUS = Timer.GetElapsedTimeUs(); });
-
- ProgressBar ProgressBar(UsePlainProgress);
-
- ParallellWork Work(AbortFlag);
- std::atomic<uint64_t> CompletedPathCount = 0;
- uint32_t PathIndex = 0;
-
- while (PathIndex < PathCount)
- {
- uint32_t PathRangeCount = Min(128u, PathCount - PathIndex);
- Work.ScheduleWork(
- GetIOWorkerPool(),
- [PathIndex,
- PathRangeCount,
- &PathsToCheck,
- &Path,
- &OutLocalFolderContent,
- &CompletedPathCount,
- &LocalFolderScanStats](std::atomic<bool>&) {
- for (uint32_t PathRangeIndex = PathIndex; PathRangeIndex < PathIndex + PathRangeCount; PathRangeIndex++)
- {
- const std::filesystem::path& FilePath = PathsToCheck[PathRangeIndex];
- std::filesystem::path LocalFilePath = (Path / FilePath).make_preferred();
- if (TryGetFileProperties(LocalFilePath,
- OutLocalFolderContent.RawSizes[PathRangeIndex],
- OutLocalFolderContent.ModificationTicks[PathRangeIndex],
- OutLocalFolderContent.Attributes[PathRangeIndex]))
- {
- OutLocalFolderContent.Paths[PathRangeIndex] = std::move(FilePath);
- LocalFolderScanStats.FoundFileCount++;
- LocalFolderScanStats.FoundFileByteCount += OutLocalFolderContent.RawSizes[PathRangeIndex];
- LocalFolderScanStats.AcceptedFileCount++;
- LocalFolderScanStats.AcceptedFileByteCount += OutLocalFolderContent.RawSizes[PathRangeIndex];
- }
- CompletedPathCount++;
- }
- },
- Work.DefaultErrorFunction());
- PathIndex += PathRangeCount;
- }
- Work.Wait(200, [&](bool, ptrdiff_t) {
- // FilteredBytesHashed.Update(ChunkingStats.BytesHashed.load());
- std::string Details = fmt::format("{}/{} checked, {} found",
- CompletedPathCount.load(),
- PathCount,
- LocalFolderScanStats.FoundFileCount.load());
- ProgressBar.UpdateState({.Task = "Checking files ",
- .Details = Details,
- .TotalCount = PathCount,
- .RemainingCount = PathCount - CompletedPathCount.load()},
- false);
- });
- ProgressBar.Finish();
- }
-
- uint32_t WritePathIndex = 0;
- for (uint32_t ReadPathIndex = 0; ReadPathIndex < PathCount; ReadPathIndex++)
- {
- if (!OutLocalFolderContent.Paths[ReadPathIndex].empty())
- {
- if (WritePathIndex < ReadPathIndex)
- {
- OutLocalFolderContent.Paths[WritePathIndex] = std::move(OutLocalFolderContent.Paths[ReadPathIndex]);
- OutLocalFolderContent.RawSizes[WritePathIndex] = OutLocalFolderContent.RawSizes[ReadPathIndex];
- OutLocalFolderContent.Attributes[WritePathIndex] = OutLocalFolderContent.Attributes[ReadPathIndex];
- OutLocalFolderContent.ModificationTicks[WritePathIndex] = OutLocalFolderContent.ModificationTicks[ReadPathIndex];
- }
- WritePathIndex++;
- }
- }
-
- OutLocalFolderContent.Paths.resize(WritePathIndex);
- OutLocalFolderContent.RawSizes.resize(WritePathIndex);
- OutLocalFolderContent.Attributes.resize(WritePathIndex);
- OutLocalFolderContent.ModificationTicks.resize(WritePathIndex);
+ OutLocalFolderContent = GetValidFolderContent(LocalFolderScanStats, Path, PathsToCheck);
}
bool ScanContent = true;
@@ -7927,7 +8350,8 @@ namespace {
bool AllowPartialBlockRequests,
bool WipeTargetFolder,
bool PostDownloadVerify,
- bool PrimeCacheOnly)
+ bool PrimeCacheOnly,
+ bool EnableScavenging)
{
ZEN_TRACE_CPU("DownloadFolder");
@@ -7977,7 +8401,7 @@ namespace {
LocalContent = GetLocalContent(LocalFolderScanStats,
ChunkingStats,
Path,
- ZenFolderPath,
+ ZenStateFilePath(ZenFolderPath),
*ChunkController,
RemoteContent,
LocalFolderContent);
@@ -8041,7 +8465,7 @@ namespace {
NiceTimeSpanMs(DownloadTimer.GetElapsedTimeMs()));
Stopwatch WriteStateTimer;
- CbObject StateObject = CreateStateObject(BuildId, AllBuildParts, PartContents, LocalFolderContent);
+ CbObject StateObject = CreateStateObject(BuildId, AllBuildParts, PartContents, LocalFolderContent, Path);
CreateDirectories(ZenStateFilePath(ZenFolderPath).parent_path());
TemporaryFile::SafeWriteFile(ZenStateFilePath(ZenFolderPath), StateObject.GetView());
ZEN_CONSOLE("Wrote local state in {}", NiceTimeSpanMs(WriteStateTimer.GetElapsedTimeMs()));
@@ -8065,7 +8489,8 @@ namespace {
RebuildFolderStateStatistics RebuildFolderStateStats;
VerifyFolderStatistics VerifyFolderStats;
- UpdateFolder(Storage,
+ UpdateFolder(SystemRootDir,
+ Storage,
BuildId,
Path,
ZenFolderPath,
@@ -8078,6 +8503,7 @@ namespace {
AllowPartialBlockRequests,
WipeTargetFolder,
PrimeCacheOnly,
+ EnableScavenging,
LocalFolderState,
DiskStats,
CacheMappingStats,
@@ -8092,7 +8518,7 @@ namespace {
VerifyFolder(RemoteContent, Path, PostDownloadVerify, VerifyFolderStats);
Stopwatch WriteStateTimer;
- CbObject StateObject = CreateStateObject(BuildId, AllBuildParts, PartContents, LocalFolderState);
+ CbObject StateObject = CreateStateObject(BuildId, AllBuildParts, PartContents, LocalFolderState, Path);
CreateDirectories(ZenStateFilePath(ZenFolderPath).parent_path());
TemporaryFile::SafeWriteFile(ZenStateFilePath(ZenFolderPath), StateObject.GetView());
@@ -8618,6 +9044,12 @@ BuildsCommand::BuildsCommand()
m_DownloadOptions
.add_option("", "", "verify", "Enable post download verify of all tracked files", cxxopts::value(m_PostDownloadVerify), "<verify>");
+ m_DownloadOptions.add_option("",
+ "",
+ "enable-scavenge",
+ "Enable scavenging of data from previouse download locations",
+ cxxopts::value(m_EnableScavenging),
+ "<scavenge>");
m_DownloadOptions.parse_positional({"local-path", "build-id", "build-part-name"});
m_DownloadOptions.positional_help("local-path build-id build-part-name");
@@ -8655,6 +9087,12 @@ BuildsCommand::BuildsCommand()
"Allow request for partial chunk blocks. Defaults to true.",
cxxopts::value(m_AllowPartialBlockRequests),
"<allowpartialblockrequests>");
+ m_TestOptions.add_option("",
+ "",
+ "enable-scavenge",
+ "Enable scavenging of data from previouse download locations",
+ cxxopts::value(m_EnableScavenging),
+ "<scavenge>");
m_TestOptions.parse_positional({"local-path"});
m_TestOptions.positional_help("local-path");
@@ -8702,6 +9140,12 @@ BuildsCommand::BuildsCommand()
m_MultiTestDownloadOptions
.add_option("", "l", "local-path", "Root file system folder used as base", cxxopts::value(m_Path), "<local-path>");
m_MultiTestDownloadOptions.add_option("", "", "build-ids", "Build Ids list separated by ','", cxxopts::value(m_BuildIds), "<ids>");
+ m_MultiTestDownloadOptions.add_option("",
+ "",
+ "enable-scavenge",
+ "Enable scavenging of data from previouse download locations",
+ cxxopts::value(m_EnableScavenging),
+ "<scavenge>");
m_MultiTestDownloadOptions.parse_positional({"local-path"});
m_MultiTestDownloadOptions.positional_help("local-path");
}
@@ -9451,7 +9895,8 @@ BuildsCommand::Run(const ZenCliOptions& GlobalOptions, int argc, char** argv)
m_AllowPartialBlockRequests && !m_PrimeCacheOnly,
m_Clean,
m_PostDownloadVerify,
- m_PrimeCacheOnly);
+ m_PrimeCacheOnly,
+ m_EnableScavenging);
if (false)
{
@@ -9639,7 +10084,8 @@ BuildsCommand::Run(const ZenCliOptions& GlobalOptions, int argc, char** argv)
m_AllowPartialBlockRequests,
BuildIdString == m_BuildIds.front(),
true,
- false);
+ false,
+ m_EnableScavenging);
if (AbortFlag)
{
ZEN_CONSOLE("Download cancelled");
@@ -9692,7 +10138,16 @@ BuildsCommand::Run(const ZenCliOptions& GlobalOptions, int argc, char** argv)
BuildStorage::Statistics StorageStats;
BuildStorageCache::Statistics StorageCacheStats;
- const std::filesystem::path DownloadPath = Path.parent_path() / (m_BuildPartName + "_test");
+ const std::filesystem::path DownloadPath = Path.parent_path() / (m_BuildPartName + "_test");
+ const std::filesystem::path DownloadPath2 = Path.parent_path() / (m_BuildPartName + "_test2");
+
+ auto ___ = MakeGuard([DownloadPath, DownloadPath2]() {
+ CleanDirectory(DownloadPath, true);
+ DeleteDirectories(DownloadPath);
+ CleanDirectory(DownloadPath2, true);
+ DeleteDirectories(DownloadPath2);
+ });
+
const std::filesystem::path ZenFolderPath =
m_ZenFolderPath.empty() ? DownloadPath / ZenFolderName : MakeSafeAbsolutePath(m_ZenFolderPath);
@@ -9751,7 +10206,8 @@ BuildsCommand::Run(const ZenCliOptions& GlobalOptions, int argc, char** argv)
m_AllowPartialBlockRequests,
true,
true,
- false);
+ false,
+ m_EnableScavenging);
if (AbortFlag)
{
ZEN_CONSOLE("Download failed.");
@@ -9774,7 +10230,8 @@ BuildsCommand::Run(const ZenCliOptions& GlobalOptions, int argc, char** argv)
m_AllowPartialBlockRequests,
false,
true,
- false);
+ false,
+ m_EnableScavenging);
if (AbortFlag)
{
ZEN_CONSOLE("Re-download failed. (identical target)");
@@ -9892,7 +10349,8 @@ BuildsCommand::Run(const ZenCliOptions& GlobalOptions, int argc, char** argv)
m_AllowPartialBlockRequests,
false,
true,
- false);
+ false,
+ m_EnableScavenging);
if (AbortFlag)
{
ZEN_CONSOLE("Re-download failed. (scrambled target)");
@@ -9943,7 +10401,8 @@ BuildsCommand::Run(const ZenCliOptions& GlobalOptions, int argc, char** argv)
m_AllowPartialBlockRequests,
false,
true,
- false);
+ false,
+ m_EnableScavenging);
if (AbortFlag)
{
ZEN_CONSOLE("Re-download failed.");
@@ -9962,7 +10421,8 @@ BuildsCommand::Run(const ZenCliOptions& GlobalOptions, int argc, char** argv)
m_AllowPartialBlockRequests,
false,
true,
- false);
+ false,
+ m_EnableScavenging);
if (AbortFlag)
{
ZEN_CONSOLE("Re-download failed.");
@@ -9981,7 +10441,28 @@ BuildsCommand::Run(const ZenCliOptions& GlobalOptions, int argc, char** argv)
m_AllowPartialBlockRequests,
false,
true,
- false);
+ false,
+ m_EnableScavenging);
+ if (AbortFlag)
+ {
+ ZEN_CONSOLE("Re-download failed.");
+ return 11;
+ }
+
+ ZEN_CONSOLE("\nDownload Build {}, Part {} ({}) to '{}' (original)", BuildId, BuildPartId, m_BuildPartName, DownloadPath2);
+ DownloadFolder(Storage,
+ BuildId,
+ {BuildPartId},
+ {},
+ DownloadPath2,
+ ZenFolderPath,
+ SystemRootDir,
+ m_AllowMultiparts,
+ m_AllowPartialBlockRequests,
+ false,
+ true,
+ false,
+ m_EnableScavenging);
if (AbortFlag)
{
ZEN_CONSOLE("Re-download failed.");
diff --git a/src/zen/cmds/builds_cmd.h b/src/zen/cmds/builds_cmd.h
index 535d2b1d2..7e1e7d0ca 100644
--- a/src/zen/cmds/builds_cmd.h
+++ b/src/zen/cmds/builds_cmd.h
@@ -98,6 +98,7 @@ private:
std::vector<std::string> m_BuildPartNames;
std::vector<std::string> m_BuildPartIds;
bool m_PostDownloadVerify = false;
+ bool m_EnableScavenging = true;
cxxopts::Options m_DiffOptions{"diff", "Compare two local folders"};
std::string m_DiffPath;
diff --git a/src/zenutil/chunkedcontent.cpp b/src/zenutil/chunkedcontent.cpp
index 32ae2d94a..17b348f8d 100644
--- a/src/zenutil/chunkedcontent.cpp
+++ b/src/zenutil/chunkedcontent.cpp
@@ -305,7 +305,7 @@ FolderContent::UpdateState(const FolderContent& Rhs, std::vector<uint32_t>& OutP
}
FolderContent
-GetUpdatedContent(const FolderContent& Old, const FolderContent& New, std::vector<std::filesystem::path>& OutDeletedPathIndexes)
+GetUpdatedContent(const FolderContent& Old, const FolderContent& New, std::vector<std::filesystem::path>& OutDeletedPaths)
{
ZEN_TRACE_CPU("FolderContent::GetUpdatedContent");
@@ -342,7 +342,7 @@ GetUpdatedContent(const FolderContent& Old, const FolderContent& New, std::vecto
}
else
{
- OutDeletedPathIndexes.push_back(Old.Paths[OldPathIndex]);
+ OutDeletedPaths.push_back(Old.Paths[OldPathIndex]);
}
}
return Result;