aboutsummaryrefslogtreecommitdiff
path: root/src/zenutil/chunkedcontent.cpp
diff options
context:
space:
mode:
authorDan Engelbrecht <[email protected]>2025-10-03 11:49:14 +0200
committerGitHub Enterprise <[email protected]>2025-10-03 11:49:14 +0200
commitfaf0b7c9b6a08b095f8dc895904f4f7d3f30dcde (patch)
tree2bcd09fe17af6f25108fd05578e7eda6a827d8ec /src/zenutil/chunkedcontent.cpp
parentcache RPC replay fixes (minor) (#544) (diff)
downloadzen-faf0b7c9b6a08b095f8dc895904f4f7d3f30dcde.tar.xz
zen-faf0b7c9b6a08b095f8dc895904f4f7d3f30dcde.zip
move chunking code to zenremotestore lib (#545)
Diffstat (limited to 'src/zenutil/chunkedcontent.cpp')
-rw-r--r--src/zenutil/chunkedcontent.cpp953
1 files changed, 0 insertions, 953 deletions
diff --git a/src/zenutil/chunkedcontent.cpp b/src/zenutil/chunkedcontent.cpp
deleted file mode 100644
index 757bcfae5..000000000
--- a/src/zenutil/chunkedcontent.cpp
+++ /dev/null
@@ -1,953 +0,0 @@
-// Copyright Epic Games, Inc. All Rights Reserved.
-
-#include <zenutil/chunkedcontent.h>
-
-#include <zencore/filesystem.h>
-#include <zencore/fmtutils.h>
-#include <zencore/logging.h>
-#include <zencore/scopeguard.h>
-#include <zencore/timer.h>
-#include <zencore/trace.h>
-
-#include <zenutil/chunkedfile.h>
-#include <zenutil/chunkingcontroller.h>
-#include <zenutil/parallelwork.h>
-#include <zenutil/workerpools.h>
-
-ZEN_THIRD_PARTY_INCLUDES_START
-#include <tsl/robin_set.h>
-#include <gsl/gsl-lite.hpp>
-ZEN_THIRD_PARTY_INCLUDES_END
-
-namespace zen {
-
-using namespace std::literals;
-
-namespace {
- void AddChunkSequence(ChunkingStatistics& Stats,
- ChunkedContentData& InOutChunkedContent,
- tsl::robin_map<IoHash, uint32_t, IoHash::Hasher>& ChunkHashToChunkIndex,
- const IoHash& RawHash,
- std::span<const uint32_t> ChunkSequence,
- std::span<const IoHash> ChunkHashes,
- std::span<const uint64_t> ChunkRawSizes)
- {
- ZEN_ASSERT(ChunkHashes.size() == ChunkRawSizes.size());
- InOutChunkedContent.ChunkCounts.push_back(gsl::narrow<uint32_t>(ChunkSequence.size()));
- InOutChunkedContent.ChunkOrders.reserve(InOutChunkedContent.ChunkOrders.size() + ChunkSequence.size());
-
- for (uint32_t ChunkedSequenceIndex : ChunkSequence)
- {
- const IoHash& ChunkHash = ChunkHashes[ChunkedSequenceIndex];
- if (auto It = ChunkHashToChunkIndex.find(ChunkHash); It != ChunkHashToChunkIndex.end())
- {
- uint32_t ChunkIndex = gsl::narrow<uint32_t>(It->second);
- InOutChunkedContent.ChunkOrders.push_back(ChunkIndex);
- }
- else
- {
- uint32_t ChunkIndex = gsl::narrow<uint32_t>(InOutChunkedContent.ChunkHashes.size());
- ChunkHashToChunkIndex.insert_or_assign(ChunkHash, ChunkIndex);
- InOutChunkedContent.ChunkHashes.push_back(ChunkHash);
- InOutChunkedContent.ChunkRawSizes.push_back(ChunkRawSizes[ChunkedSequenceIndex]);
- InOutChunkedContent.ChunkOrders.push_back(ChunkIndex);
- Stats.UniqueChunksFound++;
- Stats.UniqueBytesFound += ChunkRawSizes[ChunkedSequenceIndex];
- }
- }
- InOutChunkedContent.SequenceRawHashes.push_back(RawHash);
- Stats.UniqueSequencesFound++;
- }
-
- void AddChunkSequence(ChunkingStatistics& Stats,
- ChunkedContentData& InOutChunkedContent,
- tsl::robin_map<IoHash, uint32_t, IoHash::Hasher>& ChunkHashToChunkIndex,
- const IoHash& RawHash,
- const uint64_t RawSize)
- {
- InOutChunkedContent.ChunkCounts.push_back(1);
-
- if (auto It = ChunkHashToChunkIndex.find(RawHash); It != ChunkHashToChunkIndex.end())
- {
- uint32_t ChunkIndex = gsl::narrow<uint32_t>(It->second);
- InOutChunkedContent.ChunkOrders.push_back(ChunkIndex);
- }
- else
- {
- uint32_t ChunkIndex = gsl::narrow<uint32_t>(InOutChunkedContent.ChunkHashes.size());
- ChunkHashToChunkIndex.insert_or_assign(RawHash, ChunkIndex);
- InOutChunkedContent.ChunkHashes.push_back(RawHash);
- InOutChunkedContent.ChunkRawSizes.push_back(RawSize);
- InOutChunkedContent.ChunkOrders.push_back(ChunkIndex);
- Stats.UniqueChunksFound++;
- Stats.UniqueBytesFound += RawSize;
- }
- InOutChunkedContent.SequenceRawHashes.push_back(RawHash);
- Stats.UniqueSequencesFound++;
- }
-
- IoHash HashOneFile(ChunkingStatistics& Stats,
- const ChunkingController& InChunkingController,
- ChunkedFolderContent& OutChunkedContent,
- tsl::robin_map<IoHash, uint32_t, IoHash::Hasher>& ChunkHashToChunkIndex,
- tsl::robin_map<IoHash, uint32_t, IoHash::Hasher>& RawHashToSequenceRawHashIndex,
- RwLock& Lock,
- const std::filesystem::path& FolderPath,
- uint32_t PathIndex,
- std::atomic<bool>& AbortFlag)
- {
- ZEN_TRACE_CPU("ChunkFolderContent");
-
- const uint64_t RawSize = OutChunkedContent.RawSizes[PathIndex];
- const std::filesystem::path& Path = OutChunkedContent.Paths[PathIndex];
-
- if (RawSize == 0)
- {
- return IoHash::Zero;
- }
- else
- {
- ChunkedInfoWithSource Chunked;
- const bool DidChunking =
- InChunkingController.ProcessFile((FolderPath / Path).make_preferred(), RawSize, Chunked, Stats.BytesHashed, AbortFlag);
- if (DidChunking)
- {
- Lock.WithExclusiveLock([&]() {
- if (!RawHashToSequenceRawHashIndex.contains(Chunked.Info.RawHash))
- {
- RawHashToSequenceRawHashIndex.insert(
- {Chunked.Info.RawHash, gsl::narrow<uint32_t>(OutChunkedContent.ChunkedContent.SequenceRawHashes.size())});
- std::vector<uint64_t> ChunkSizes;
- ChunkSizes.reserve(Chunked.ChunkSources.size());
- for (const ChunkSource& Source : Chunked.ChunkSources)
- {
- ChunkSizes.push_back(Source.Size);
- }
- AddChunkSequence(Stats,
- OutChunkedContent.ChunkedContent,
- ChunkHashToChunkIndex,
- Chunked.Info.RawHash,
- Chunked.Info.ChunkSequence,
- Chunked.Info.ChunkHashes,
- ChunkSizes);
- Stats.UniqueSequencesFound++;
- }
- });
- Stats.FilesChunked++;
- return Chunked.Info.RawHash;
- }
- else
- {
- ZEN_TRACE_CPU("HashOnly");
-
- IoBuffer Buffer = IoBufferBuilder::MakeFromFile((FolderPath / Path).make_preferred());
- if (Buffer.GetSize() != RawSize)
- {
- throw std::runtime_error(fmt::format("Failed opening file '{}' for hashing", FolderPath / Path));
- }
- const IoHash Hash = IoHash::HashBuffer(Buffer, &Stats.BytesHashed);
-
- Lock.WithExclusiveLock([&]() {
- if (!RawHashToSequenceRawHashIndex.contains(Hash))
- {
- RawHashToSequenceRawHashIndex.insert(
- {Hash, gsl::narrow<uint32_t>(OutChunkedContent.ChunkedContent.SequenceRawHashes.size())});
- AddChunkSequence(Stats, OutChunkedContent.ChunkedContent, ChunkHashToChunkIndex, Hash, RawSize);
- Stats.UniqueSequencesFound++;
- }
- });
- return Hash;
- }
- }
- }
-
- std::string PathCompareString(const std::filesystem::path& Path) { return ToLower(Path.generic_string()); }
-
-} // namespace
-
-std::string_view FolderContentSourcePlatformNames[(size_t)SourcePlatform::_Count] = {"Windows"sv, "Linux"sv, "MacOS"sv};
-
-std::string_view
-ToString(SourcePlatform Platform)
-{
- return FolderContentSourcePlatformNames[(size_t)Platform];
-}
-
-SourcePlatform
-FromString(std::string_view Platform, SourcePlatform Default)
-{
- for (size_t Index = 0; Index < (size_t)SourcePlatform::_Count; Index++)
- {
- if (Platform == FolderContentSourcePlatformNames[Index])
- {
- return (SourcePlatform)Index;
- }
- }
- return Default;
-}
-
-SourcePlatform
-GetSourceCurrentPlatform()
-{
-#if ZEN_PLATFORM_WINDOWS
- return SourcePlatform::Windows;
-#endif
-#if ZEN_PLATFORM_MAC
- return SourcePlatform::MacOS;
-#endif
-#if ZEN_PLATFORM_LINUX
- return SourcePlatform::Linux;
-#endif
-}
-
-bool
-FolderContent::AreFileAttributesEqual(const uint32_t Lhs, const uint32_t Rhs)
-{
-#if ZEN_PLATFORM_WINDOWS
- return (Lhs & 0xff) == (Rhs & 0xff);
-#endif
-#if ZEN_PLATFORM_MAC
- return Lhs == Rhs;
-#endif
-#if ZEN_PLATFORM_LINUX
- return Lhs == Rhs;
-#endif
-}
-
-bool
-FolderContent::operator==(const FolderContent& Rhs) const
-{
- if ((Platform == Rhs.Platform) && (RawSizes == Rhs.RawSizes) && (Attributes == Rhs.Attributes) &&
- (ModificationTicks == Rhs.ModificationTicks) && (Paths.size() == Rhs.Paths.size()))
- {
- size_t PathCount = 0;
- for (size_t PathIndex = 0; PathIndex < PathCount; PathIndex++)
- {
- if (Paths[PathIndex].generic_string() != Rhs.Paths[PathIndex].generic_string())
- {
- return false;
- }
- }
- return true;
- }
- return false;
-}
-
-bool
-FolderContent::AreKnownFilesEqual(const FolderContent& Rhs) const
-{
- ZEN_TRACE_CPU("FolderContent::AreKnownFilesEqual");
- tsl::robin_map<std::string, size_t> RhsPathToIndex;
- const size_t RhsPathCount = Rhs.Paths.size();
- RhsPathToIndex.reserve(RhsPathCount);
- for (size_t RhsPathIndex = 0; RhsPathIndex < RhsPathCount; RhsPathIndex++)
- {
- RhsPathToIndex.insert({Rhs.Paths[RhsPathIndex].generic_string(), RhsPathIndex});
- }
- const size_t PathCount = Paths.size();
- for (size_t PathIndex = 0; PathIndex < PathCount; PathIndex++)
- {
- if (auto It = RhsPathToIndex.find(Paths[PathIndex].generic_string()); It != RhsPathToIndex.end())
- {
- const size_t RhsPathIndex = It->second;
- if ((RawSizes[PathIndex] != Rhs.RawSizes[RhsPathIndex]) ||
- (!AreFileAttributesEqual(Attributes[PathIndex], Rhs.Attributes[RhsPathIndex])) ||
- (ModificationTicks[PathIndex] != Rhs.ModificationTicks[RhsPathIndex]))
- {
- return false;
- }
- }
- else
- {
- return false;
- }
- }
- return true;
-}
-
-void
-FolderContent::UpdateState(const FolderContent& Rhs, std::vector<uint32_t>& OutPathIndexesOufOfDate)
-{
- ZEN_TRACE_CPU("FolderContent::UpdateState");
- tsl::robin_map<std::string, uint32_t> RhsPathToIndex;
- const uint32_t RhsPathCount = gsl::narrow<uint32_t>(Rhs.Paths.size());
- RhsPathToIndex.reserve(RhsPathCount);
- for (uint32_t RhsPathIndex = 0; RhsPathIndex < RhsPathCount; RhsPathIndex++)
- {
- RhsPathToIndex.insert({Rhs.Paths[RhsPathIndex].generic_string(), RhsPathIndex});
- }
- uint32_t PathCount = gsl::narrow<uint32_t>(Paths.size());
- for (uint32_t PathIndex = 0; PathIndex < PathCount;)
- {
- if (auto It = RhsPathToIndex.find(Paths[PathIndex].generic_string()); It != RhsPathToIndex.end())
- {
- const uint32_t RhsPathIndex = It->second;
-
- if ((RawSizes[PathIndex] != Rhs.RawSizes[RhsPathIndex]) ||
- (ModificationTicks[PathIndex] != Rhs.ModificationTicks[RhsPathIndex]))
- {
- RawSizes[PathIndex] = Rhs.RawSizes[RhsPathIndex];
- ModificationTicks[PathIndex] = Rhs.ModificationTicks[RhsPathIndex];
- OutPathIndexesOufOfDate.push_back(PathIndex);
- }
- Attributes[PathIndex] = Rhs.Attributes[RhsPathIndex];
- PathIndex++;
- }
- else
- {
- Paths.erase(Paths.begin() + PathIndex);
- RawSizes.erase(RawSizes.begin() + PathIndex);
- Attributes.erase(Attributes.begin() + PathIndex);
- ModificationTicks.erase(ModificationTicks.begin() + PathIndex);
- PathCount--;
- }
- }
-}
-
-FolderContent
-GetUpdatedContent(const FolderContent& Old, const FolderContent& New, std::vector<std::filesystem::path>& OutDeletedPaths)
-{
- ZEN_TRACE_CPU("FolderContent::GetUpdatedContent");
-
- const uint32_t NewPathCount = gsl::narrow<uint32_t>(New.Paths.size());
-
- FolderContent Result = {.Platform = Old.Platform};
- Result.Paths.reserve(NewPathCount);
- Result.RawSizes.reserve(NewPathCount);
- Result.Attributes.reserve(NewPathCount);
- Result.ModificationTicks.reserve(NewPathCount);
-
- tsl::robin_map<std::string, uint32_t> NewPathToIndex;
- NewPathToIndex.reserve(NewPathCount);
- for (uint32_t NewPathIndex = 0; NewPathIndex < NewPathCount; NewPathIndex++)
- {
- NewPathToIndex.insert({New.Paths[NewPathIndex].generic_string(), NewPathIndex});
- }
-
- uint32_t OldPathCount = gsl::narrow<uint32_t>(Old.Paths.size());
- for (uint32_t OldPathIndex = 0; OldPathIndex < OldPathCount; OldPathIndex++)
- {
- if (auto It = NewPathToIndex.find(Old.Paths[OldPathIndex].generic_string()); It != NewPathToIndex.end())
- {
- const uint32_t NewPathIndex = It->second;
-
- if ((Old.RawSizes[OldPathIndex] != New.RawSizes[NewPathIndex]) ||
- (Old.ModificationTicks[OldPathIndex] != New.ModificationTicks[NewPathIndex]))
- {
- Result.Paths.push_back(New.Paths[NewPathIndex]);
- Result.RawSizes.push_back(New.RawSizes[NewPathIndex]);
- Result.Attributes.push_back(New.Attributes[NewPathIndex]);
- Result.ModificationTicks.push_back(New.ModificationTicks[NewPathIndex]);
- }
- }
- else
- {
- OutDeletedPaths.push_back(Old.Paths[OldPathIndex]);
- }
- }
- return Result;
-}
-
-void
-SaveFolderContentToCompactBinary(const FolderContent& Content, CbWriter& Output)
-{
- ZEN_TRACE_CPU("SaveFolderContentToCompactBinary");
- Output.AddString("platform"sv, ToString(Content.Platform));
- compactbinary_helpers::WriteArray(Content.Paths, "paths"sv, Output);
- compactbinary_helpers::WriteArray(Content.RawSizes, "rawSizes"sv, Output);
- compactbinary_helpers::WriteArray(Content.Attributes, "attributes"sv, Output);
- compactbinary_helpers::WriteArray(Content.ModificationTicks, "modificationTimes"sv, Output);
-}
-
-FolderContent
-LoadFolderContentToCompactBinary(CbObjectView Input)
-{
- ZEN_TRACE_CPU("LoadFolderContentToCompactBinary");
- FolderContent Content;
- Content.Platform = FromString(Input["platform"sv].AsString(), GetSourceCurrentPlatform());
- compactbinary_helpers::ReadArray("paths"sv, Input, Content.Paths);
- compactbinary_helpers::ReadArray("rawSizes"sv, Input, Content.RawSizes);
- compactbinary_helpers::ReadArray("attributes"sv, Input, Content.Attributes);
- compactbinary_helpers::ReadArray("modificationTimes"sv, Input, Content.ModificationTicks);
- return Content;
-}
-
-FolderContent
-GetFolderContent(GetFolderContentStatistics& Stats,
- const std::filesystem::path& RootPath,
- std::function<bool(const std::string_view& RelativePath)>&& AcceptDirectory,
- std::function<bool(std::string_view RelativePath, uint64_t Size, uint32_t Attributes)>&& AcceptFile,
- WorkerThreadPool& WorkerPool,
- int32_t UpdateIntervalMS,
- std::function<void(bool IsAborted, std::ptrdiff_t PendingWork)>&& UpdateCallback,
- std::atomic<bool>& AbortFlag)
-{
- ZEN_TRACE_CPU("GetFolderContent");
-
- Stopwatch Timer;
- auto _ = MakeGuard([&Stats, &Timer]() { Stats.ElapsedWallTimeUS = Timer.GetElapsedTimeUs(); });
-
- FolderContent Content;
- struct AsyncVisitor : public GetDirectoryContentVisitor
- {
- AsyncVisitor(GetFolderContentStatistics& Stats,
- std::atomic<bool>& AbortFlag,
- FolderContent& Content,
- std::function<bool(const std::string_view& RelativePath)>&& AcceptDirectory,
- std::function<bool(std::string_view RelativePath, uint64_t Size, uint32_t Attributes)>&& AcceptFile)
- : m_Stats(Stats)
- , m_AbortFlag(AbortFlag)
- , m_FoundContent(Content)
- , m_AcceptDirectory(std::move(AcceptDirectory))
- , m_AcceptFile(std::move(AcceptFile))
- {
- }
- virtual void AsyncVisitDirectory(const std::filesystem::path& RelativeRoot, DirectoryContent&& Content) override
- {
- if (!m_AbortFlag)
- {
- m_Stats.FoundFileCount += Content.FileNames.size();
- for (uint64_t FileSize : Content.FileSizes)
- {
- m_Stats.FoundFileByteCount += FileSize;
- }
- std::string RelativeDirectoryPath = RelativeRoot.generic_string();
- if (m_AcceptDirectory(RelativeDirectoryPath))
- {
- std::vector<std::filesystem::path> Paths;
- std::vector<uint64_t> RawSizes;
- std::vector<uint32_t> Attributes;
- std::vector<uint64_t> ModificatonTicks;
- Paths.reserve(Content.FileNames.size());
- RawSizes.reserve(Content.FileNames.size());
- Attributes.reserve(Content.FileNames.size());
- ModificatonTicks.reserve(Content.FileModificationTicks.size());
-
- for (size_t FileIndex = 0; FileIndex < Content.FileNames.size(); FileIndex++)
- {
- const std::filesystem::path& FileName = Content.FileNames[FileIndex];
- std::string RelativePath = (RelativeRoot / FileName).generic_string();
- std::replace(RelativePath.begin(), RelativePath.end(), '\\', '/');
- if (m_AcceptFile(RelativePath, Content.FileSizes[FileIndex], Content.FileAttributes[FileIndex]))
- {
- Paths.emplace_back(std::move(RelativePath));
- RawSizes.emplace_back(Content.FileSizes[FileIndex]);
- Attributes.emplace_back(Content.FileAttributes[FileIndex]);
- ModificatonTicks.emplace_back(Content.FileModificationTicks[FileIndex]);
-
- m_Stats.AcceptedFileCount++;
- m_Stats.AcceptedFileByteCount += Content.FileSizes[FileIndex];
- }
- }
- m_Lock.WithExclusiveLock([&]() {
- m_FoundContent.Paths.insert(m_FoundContent.Paths.end(), Paths.begin(), Paths.end());
- m_FoundContent.RawSizes.insert(m_FoundContent.RawSizes.end(), RawSizes.begin(), RawSizes.end());
- m_FoundContent.Attributes.insert(m_FoundContent.Attributes.end(), Attributes.begin(), Attributes.end());
- m_FoundContent.ModificationTicks.insert(m_FoundContent.ModificationTicks.end(),
- ModificatonTicks.begin(),
- ModificatonTicks.end());
- });
- }
- }
- }
-
- GetFolderContentStatistics& m_Stats;
- std::atomic<bool>& m_AbortFlag;
- RwLock m_Lock;
- FolderContent& m_FoundContent;
- std::function<bool(const std::string_view& RelativePath)> m_AcceptDirectory;
- std::function<bool(std::string_view RelativePath, uint64_t Size, uint32_t Attributes)> m_AcceptFile;
- } Visitor(Stats, AbortFlag, Content, std::move(AcceptDirectory), std::move(AcceptFile));
-
- Latch PendingWork(1);
- GetDirectoryContent(RootPath,
- DirectoryContentFlags::IncludeFiles | DirectoryContentFlags::Recursive | DirectoryContentFlags::IncludeFileSizes |
- DirectoryContentFlags::IncludeAttributes | DirectoryContentFlags::IncludeModificationTick,
- Visitor,
- WorkerPool,
- PendingWork);
- PendingWork.CountDown();
- while (!PendingWork.Wait(UpdateIntervalMS))
- {
- UpdateCallback(AbortFlag.load(), PendingWork.Remaining());
- }
- std::vector<size_t> Order;
- size_t PathCount = Content.Paths.size();
- Order.resize(Content.Paths.size());
- std::vector<std::string> Parents;
- Parents.reserve(PathCount);
- std::vector<std::string> Filenames;
- Filenames.reserve(PathCount);
- for (size_t OrderIndex = 0; OrderIndex < PathCount; OrderIndex++)
- {
- Order[OrderIndex] = OrderIndex;
- Parents.emplace_back(Content.Paths[OrderIndex].parent_path().generic_string());
- Filenames.emplace_back(Content.Paths[OrderIndex].filename().generic_string());
- }
- std::sort(Order.begin(), Order.end(), [&Parents, &Filenames](size_t Lhs, size_t Rhs) {
- const std::string& LhsParent = Parents[Lhs];
- const std::string& RhsParent = Parents[Rhs];
- if (LhsParent < RhsParent)
- {
- return true;
- }
- else if (LhsParent > RhsParent)
- {
- return false;
- }
- return Filenames[Lhs] < Filenames[Rhs];
- });
- FolderContent OrderedContent;
- OrderedContent.Paths.reserve(PathCount);
- OrderedContent.RawSizes.reserve(PathCount);
- OrderedContent.Attributes.reserve(PathCount);
- OrderedContent.ModificationTicks.reserve(PathCount);
- for (size_t OrderIndex : Order)
- {
- OrderedContent.Paths.emplace_back(std::move(Content.Paths[OrderIndex]));
- OrderedContent.RawSizes.emplace_back(Content.RawSizes[OrderIndex]);
- OrderedContent.Attributes.emplace_back(Content.Attributes[OrderIndex]);
- OrderedContent.ModificationTicks.emplace_back(Content.ModificationTicks[OrderIndex]);
- }
- return OrderedContent;
-}
-
-void
-SaveChunkedFolderContentToCompactBinary(const ChunkedFolderContent& Content, CbWriter& Output)
-{
- ZEN_TRACE_CPU("SaveChunkedFolderContentToCompactBinary");
- Output.AddString("platform"sv, ToString(Content.Platform));
- compactbinary_helpers::WriteArray(Content.Paths, "paths"sv, Output);
- compactbinary_helpers::WriteArray(Content.RawSizes, "rawSizes"sv, Output);
- compactbinary_helpers::WriteArray(Content.Attributes, "attributes"sv, Output);
- compactbinary_helpers::WriteArray(Content.RawHashes, "rawHashes"sv, Output);
-
- Output.BeginObject("chunkedContent");
- compactbinary_helpers::WriteArray(Content.ChunkedContent.SequenceRawHashes, "sequenceRawHashes"sv, Output);
- compactbinary_helpers::WriteArray(Content.ChunkedContent.ChunkCounts, "chunkCounts"sv, Output);
- compactbinary_helpers::WriteArray(Content.ChunkedContent.ChunkOrders, "chunkOrders"sv, Output);
- compactbinary_helpers::WriteArray(Content.ChunkedContent.ChunkHashes, "chunkHashes"sv, Output);
- compactbinary_helpers::WriteArray(Content.ChunkedContent.ChunkRawSizes, "chunkRawSizes"sv, Output);
- Output.EndObject(); // chunkedContent
-}
-
-ChunkedFolderContent
-LoadChunkedFolderContentToCompactBinary(CbObjectView Input)
-{
- ZEN_TRACE_CPU("LoadChunkedFolderContentToCompactBinary");
- ChunkedFolderContent Content;
- Content.Platform = FromString(Input["platform"sv].AsString(), GetSourceCurrentPlatform());
- compactbinary_helpers::ReadArray("paths"sv, Input, Content.Paths);
- compactbinary_helpers::ReadArray("rawSizes"sv, Input, Content.RawSizes);
- compactbinary_helpers::ReadArray("attributes"sv, Input, Content.Attributes);
- compactbinary_helpers::ReadArray("rawHashes"sv, Input, Content.RawHashes);
-
- CbObjectView ChunkedContentView = Input["chunkedContent"sv].AsObjectView();
- compactbinary_helpers::ReadArray("sequenceRawHashes"sv, ChunkedContentView, Content.ChunkedContent.SequenceRawHashes);
- compactbinary_helpers::ReadArray("chunkCounts"sv, ChunkedContentView, Content.ChunkedContent.ChunkCounts);
- compactbinary_helpers::ReadArray("chunkOrders"sv, ChunkedContentView, Content.ChunkedContent.ChunkOrders);
- compactbinary_helpers::ReadArray("chunkHashes"sv, ChunkedContentView, Content.ChunkedContent.ChunkHashes);
- compactbinary_helpers::ReadArray("chunkRawSizes"sv, ChunkedContentView, Content.ChunkedContent.ChunkRawSizes);
- return Content;
-}
-
-ChunkedFolderContent
-MergeChunkedFolderContents(const ChunkedFolderContent& Base, std::span<const ChunkedFolderContent> Overlays)
-{
- ZEN_TRACE_CPU("MergeChunkedFolderContents");
-
- ZEN_ASSERT(!Overlays.empty());
-
- ChunkedFolderContent Result;
- const size_t BasePathCount = Base.Paths.size();
- Result.Paths.reserve(BasePathCount);
- Result.RawSizes.reserve(BasePathCount);
- Result.Attributes.reserve(BasePathCount);
- Result.RawHashes.reserve(BasePathCount);
-
- const size_t BaseChunkCount = Base.ChunkedContent.ChunkHashes.size();
- Result.ChunkedContent.SequenceRawHashes.reserve(Base.ChunkedContent.SequenceRawHashes.size());
- Result.ChunkedContent.ChunkCounts.reserve(BaseChunkCount);
- Result.ChunkedContent.ChunkHashes.reserve(BaseChunkCount);
- Result.ChunkedContent.ChunkRawSizes.reserve(BaseChunkCount);
- Result.ChunkedContent.ChunkOrders.reserve(Base.ChunkedContent.ChunkOrders.size());
-
- tsl::robin_map<std::string, std::filesystem::path> GenericPathToActualPath;
- for (const std::filesystem::path& Path : Base.Paths)
- {
- GenericPathToActualPath.insert({PathCompareString(Path), Path});
- }
- for (const ChunkedFolderContent& Overlay : Overlays)
- {
- for (const std::filesystem::path& Path : Overlay.Paths)
- {
- GenericPathToActualPath.insert({PathCompareString(Path), Path});
- }
- }
-
- tsl::robin_map<IoHash, uint32_t, IoHash::Hasher> RawHashToSequenceRawHashIndex;
-
- auto BuildOverlayPaths = [](std::span<const ChunkedFolderContent> Overlays) -> tsl::robin_set<std::string> {
- tsl::robin_set<std::string> Result;
- for (const ChunkedFolderContent& OverlayContent : Overlays)
- {
- for (const std::filesystem::path& Path : OverlayContent.Paths)
- {
- Result.insert(PathCompareString(Path));
- }
- }
- return Result;
- };
-
- auto AddContent = [&BuildOverlayPaths](ChunkedFolderContent& Result,
- const ChunkedFolderContent& OverlayContent,
- tsl::robin_map<IoHash, uint32_t, IoHash::Hasher>& ChunkHashToChunkIndex,
- tsl::robin_map<IoHash, uint32_t, IoHash::Hasher>& RawHashToSequenceRawHashIndex,
- const tsl::robin_map<std::string, std::filesystem::path>& GenericPathToActualPath,
- std::span<const ChunkedFolderContent> Overlays) {
- const ChunkedContentLookup OverlayLookup = BuildChunkedContentLookup(OverlayContent);
- tsl::robin_set<std::string> BaseOverlayPaths = BuildOverlayPaths(Overlays);
- for (uint32_t PathIndex = 0; PathIndex < OverlayContent.Paths.size(); PathIndex++)
- {
- std::string GenericPath = PathCompareString(OverlayContent.Paths[PathIndex]);
- if (!BaseOverlayPaths.contains(GenericPath))
- {
- // This asset will not be overridden by a later layer - add it
-
- const std::filesystem::path OriginalPath = GenericPathToActualPath.at(GenericPath);
- Result.Paths.push_back(OriginalPath);
- const IoHash& RawHash = OverlayContent.RawHashes[PathIndex];
- Result.RawSizes.push_back(OverlayContent.RawSizes[PathIndex]);
- Result.Attributes.push_back(OverlayContent.Attributes[PathIndex]);
- Result.RawHashes.push_back(RawHash);
-
- if (OverlayContent.RawSizes[PathIndex] > 0)
- {
- if (!RawHashToSequenceRawHashIndex.contains(RawHash))
- {
- RawHashToSequenceRawHashIndex.insert(
- {RawHash, gsl::narrow<uint32_t>(Result.ChunkedContent.SequenceRawHashes.size())});
- const uint32_t SequenceRawHashIndex = OverlayLookup.RawHashToSequenceIndex.at(RawHash);
- const uint32_t OrderIndexOffset = OverlayLookup.SequenceIndexChunkOrderOffset[SequenceRawHashIndex];
- const uint32_t ChunkCount = OverlayContent.ChunkedContent.ChunkCounts[SequenceRawHashIndex];
- ChunkingStatistics Stats;
- std::span<const uint32_t> OriginalChunkOrder =
- std::span<const uint32_t>(OverlayContent.ChunkedContent.ChunkOrders).subspan(OrderIndexOffset, ChunkCount);
- AddChunkSequence(Stats,
- Result.ChunkedContent,
- ChunkHashToChunkIndex,
- RawHash,
- OriginalChunkOrder,
- OverlayContent.ChunkedContent.ChunkHashes,
- OverlayContent.ChunkedContent.ChunkRawSizes);
- Stats.UniqueSequencesFound++;
- }
- }
- }
- }
- };
-
- tsl::robin_map<IoHash, uint32_t, IoHash::Hasher> MergedChunkHashToChunkIndex;
- AddContent(Result, Base, MergedChunkHashToChunkIndex, RawHashToSequenceRawHashIndex, GenericPathToActualPath, Overlays);
- for (uint32_t OverlayIndex = 0; OverlayIndex < Overlays.size(); OverlayIndex++)
- {
- AddContent(Result,
- Overlays[OverlayIndex],
- MergedChunkHashToChunkIndex,
- RawHashToSequenceRawHashIndex,
- GenericPathToActualPath,
- Overlays.subspan(OverlayIndex + 1));
- }
- return Result;
-}
-
-ChunkedFolderContent
-DeletePathsFromChunkedContent(const ChunkedFolderContent& BaseContent,
- const ChunkedContentLookup& BaseContentLookup,
- std::span<const std::filesystem::path> DeletedPaths)
-{
- ZEN_TRACE_CPU("DeletePathsFromChunkedContent");
-
- ZEN_ASSERT(DeletedPaths.size() <= BaseContent.Paths.size());
- ChunkedFolderContent Result = {.Platform = BaseContent.Platform};
- if (DeletedPaths.size() < BaseContent.Paths.size())
- {
- tsl::robin_set<std::string> DeletedPathSet;
- DeletedPathSet.reserve(DeletedPaths.size());
- for (const std::filesystem::path& DeletedPath : DeletedPaths)
- {
- DeletedPathSet.insert(PathCompareString(DeletedPath));
- }
-
- const size_t BaseChunkCount = BaseContent.ChunkedContent.ChunkHashes.size();
- std::vector<uint32_t> NewChunkIndexes(BaseChunkCount, (uint32_t)-1);
-
- const size_t ExpectedPathCount = BaseContent.Paths.size() - DeletedPaths.size();
- Result.Paths.reserve(ExpectedPathCount);
- Result.RawSizes.reserve(ExpectedPathCount);
- Result.Attributes.reserve(ExpectedPathCount);
- Result.RawHashes.reserve(ExpectedPathCount);
-
- Result.ChunkedContent.ChunkHashes.reserve(BaseChunkCount);
- Result.ChunkedContent.ChunkRawSizes.reserve(BaseChunkCount);
-
- tsl::robin_map<IoHash, uint32_t, IoHash::Hasher> RawHashToSequenceRawHashIndex;
- for (uint32_t PathIndex = 0; PathIndex < BaseContent.Paths.size(); PathIndex++)
- {
- const std::filesystem::path& Path = BaseContent.Paths[PathIndex];
- if (!DeletedPathSet.contains(PathCompareString(Path)))
- {
- const IoHash& RawHash = BaseContent.RawHashes[PathIndex];
- const uint64_t RawSize = BaseContent.RawSizes[PathIndex];
- Result.Paths.push_back(Path);
- Result.RawSizes.push_back(RawSize);
- Result.Attributes.push_back(BaseContent.Attributes[PathIndex]);
- Result.RawHashes.push_back(RawHash);
- if (RawSize > 0)
- {
- if (!RawHashToSequenceRawHashIndex.contains(RawHash))
- {
- RawHashToSequenceRawHashIndex.insert(
- {RawHash, gsl::narrow<uint32_t>(Result.ChunkedContent.SequenceRawHashes.size())});
- const uint32_t SequenceRawHashIndex = BaseContentLookup.RawHashToSequenceIndex.at(RawHash);
- const uint32_t OrderIndexOffset = BaseContentLookup.SequenceIndexChunkOrderOffset[SequenceRawHashIndex];
- const uint32_t ChunkCount = BaseContent.ChunkedContent.ChunkCounts[SequenceRawHashIndex];
-
- std::span<const uint32_t> OriginalChunkOrder =
- std::span<const uint32_t>(BaseContent.ChunkedContent.ChunkOrders).subspan(OrderIndexOffset, ChunkCount);
-
- Result.ChunkedContent.ChunkCounts.push_back(gsl::narrow<uint32_t>(OriginalChunkOrder.size()));
-
- for (uint32_t OldChunkIndex : OriginalChunkOrder)
- {
- if (uint32_t FoundChunkIndex = NewChunkIndexes[OldChunkIndex]; FoundChunkIndex != (uint32_t)-1)
- {
- Result.ChunkedContent.ChunkOrders.push_back(FoundChunkIndex);
- }
- else
- {
- const uint32_t NewChunkIndex = gsl::narrow<uint32_t>(Result.ChunkedContent.ChunkHashes.size());
- NewChunkIndexes[OldChunkIndex] = NewChunkIndex;
- const IoHash& ChunkHash = BaseContent.ChunkedContent.ChunkHashes[OldChunkIndex];
- const uint64_t OldChunkSize = BaseContent.ChunkedContent.ChunkRawSizes[OldChunkIndex];
- Result.ChunkedContent.ChunkHashes.push_back(ChunkHash);
- Result.ChunkedContent.ChunkRawSizes.push_back(OldChunkSize);
- Result.ChunkedContent.ChunkOrders.push_back(NewChunkIndex);
- }
- }
- Result.ChunkedContent.SequenceRawHashes.push_back(RawHash);
- }
- }
- }
- }
- }
- return Result;
-}
-
-ChunkedFolderContent
-DeletePathsFromChunkedContent(const ChunkedFolderContent& BaseContent, std::span<const std::filesystem::path> DeletedPaths)
-{
- ZEN_TRACE_CPU("DeletePathsFromChunkedContent");
- ZEN_ASSERT(DeletedPaths.size() <= BaseContent.Paths.size());
- if (DeletedPaths.size() == BaseContent.Paths.size())
- {
- return {};
- }
- const ChunkedContentLookup BaseLookup = BuildChunkedContentLookup(BaseContent);
- return DeletePathsFromChunkedContent(BaseContent, BaseLookup, DeletedPaths);
-}
-
-ChunkedFolderContent
-ChunkFolderContent(ChunkingStatistics& Stats,
- WorkerThreadPool& WorkerPool,
- const std::filesystem::path& RootPath,
- const FolderContent& Content,
- const ChunkingController& InChunkingController,
- int32_t UpdateIntervalMS,
- std::function<void(bool IsAborted, bool IsPaused, std::ptrdiff_t PendingWork)>&& UpdateCallback,
- std::atomic<bool>& AbortFlag,
- std::atomic<bool>& PauseFlag)
-{
- ZEN_TRACE_CPU("ChunkFolderContent");
-
- Stopwatch Timer;
- auto _ = MakeGuard([&Stats, &Timer]() { Stats.ElapsedWallTimeUS = Timer.GetElapsedTimeUs(); });
-
- ChunkedFolderContent Result = {.Platform = Content.Platform,
- .Paths = Content.Paths,
- .RawSizes = Content.RawSizes,
- .Attributes = Content.Attributes};
- const size_t ItemCount = Result.Paths.size();
- Result.RawHashes.resize(ItemCount, IoHash::Zero);
- Result.ChunkedContent.SequenceRawHashes.reserve(ItemCount); // Up to 1 per file, maybe less
- Result.ChunkedContent.ChunkCounts.reserve(ItemCount); // Up to one per file
- Result.ChunkedContent.ChunkOrders.reserve(ItemCount); // At least 1 per file, maybe more
- Result.ChunkedContent.ChunkHashes.reserve(ItemCount); // At least 1 per file, maybe more
- Result.ChunkedContent.ChunkRawSizes.reserve(ItemCount); // At least 1 per file, maybe more
- tsl::robin_map<IoHash, uint32_t, IoHash::Hasher> ChunkHashToChunkIndex;
- tsl::robin_map<IoHash, uint32_t, IoHash::Hasher> RawHashToChunkSequenceIndex;
- RawHashToChunkSequenceIndex.reserve(ItemCount);
- ChunkHashToChunkIndex.reserve(ItemCount);
- {
- std::vector<uint32_t> Order;
- Order.resize(ItemCount);
- for (uint32_t I = 0; I < ItemCount; I++)
- {
- Order[I] = I;
- }
-
- // Handle the biggest files first so we don't end up with one straggling large file at the end
- // std::sort(Order.begin(), Order.end(), [&](uint32_t Lhs, uint32_t Rhs) { return Result.RawSizes[Lhs] > Result.RawSizes[Rhs];
- //});
-
- tsl::robin_map<IoHash, uint32_t, IoHash::Hasher> RawHashToSequenceRawHashIndex;
- RawHashToSequenceRawHashIndex.reserve(ItemCount);
-
- RwLock Lock;
-
- ParallelWork Work(AbortFlag, PauseFlag, WorkerThreadPool::EMode::EnableBacklog);
-
- for (uint32_t PathIndex : Order)
- {
- if (Work.IsAborted())
- {
- break;
- }
- Work.ScheduleWork(WorkerPool, // GetSyncWorkerPool()
- [&, PathIndex](std::atomic<bool>& AbortFlag) {
- if (!AbortFlag)
- {
- IoHash RawHash = HashOneFile(Stats,
- InChunkingController,
- Result,
- ChunkHashToChunkIndex,
- RawHashToSequenceRawHashIndex,
- Lock,
- RootPath,
- PathIndex,
- AbortFlag);
- Lock.WithExclusiveLock([&]() { Result.RawHashes[PathIndex] = RawHash; });
- Stats.FilesProcessed++;
- }
- });
- }
-
- Work.Wait(UpdateIntervalMS, [&](bool IsAborted, bool IsPaused, std::ptrdiff_t PendingWork) {
- ZEN_UNUSED(PendingWork);
- UpdateCallback(IsAborted, IsPaused, Work.PendingWork().Remaining());
- });
- }
- return Result;
-}
-
-ChunkedContentLookup
-BuildChunkedContentLookup(const ChunkedFolderContent& Content)
-{
- ZEN_TRACE_CPU("BuildChunkedContentLookup");
-
- struct ChunkLocationReference
- {
- uint32_t ChunkIndex = (uint32_t)-1;
- uint32_t SequenceIndex = (uint32_t)-1;
- uint64_t Offset = (uint64_t)-1;
- };
-
- ChunkedContentLookup Result;
- {
- const uint32_t SequenceRawHashesCount = gsl::narrow<uint32_t>(Content.ChunkedContent.SequenceRawHashes.size());
- Result.RawHashToSequenceIndex.reserve(SequenceRawHashesCount);
- Result.SequenceIndexChunkOrderOffset.reserve(SequenceRawHashesCount);
- uint32_t OrderOffset = 0;
- for (uint32_t SequenceRawHashIndex = 0; SequenceRawHashIndex < Content.ChunkedContent.SequenceRawHashes.size();
- SequenceRawHashIndex++)
- {
- Result.RawHashToSequenceIndex.insert({Content.ChunkedContent.SequenceRawHashes[SequenceRawHashIndex], SequenceRawHashIndex});
- Result.SequenceIndexChunkOrderOffset.push_back(OrderOffset);
- OrderOffset += Content.ChunkedContent.ChunkCounts[SequenceRawHashIndex];
- }
- }
-
- std::vector<ChunkLocationReference> Locations;
- Locations.reserve(Content.ChunkedContent.ChunkOrders.size());
- for (uint32_t SequenceIndex = 0; SequenceIndex < Content.ChunkedContent.SequenceRawHashes.size(); SequenceIndex++)
- {
- const uint32_t OrderOffset = Result.SequenceIndexChunkOrderOffset[SequenceIndex];
- const uint32_t ChunkCount = Content.ChunkedContent.ChunkCounts[SequenceIndex];
- uint64_t LocationOffset = 0;
- for (size_t OrderIndex = OrderOffset; OrderIndex < OrderOffset + ChunkCount; OrderIndex++)
- {
- uint32_t ChunkIndex = Content.ChunkedContent.ChunkOrders[OrderIndex];
-
- Locations.push_back(ChunkLocationReference{.ChunkIndex = ChunkIndex, .SequenceIndex = SequenceIndex, .Offset = LocationOffset});
-
- LocationOffset += Content.ChunkedContent.ChunkRawSizes[ChunkIndex];
- }
- }
-
- std::sort(Locations.begin(), Locations.end(), [](const ChunkLocationReference& Lhs, const ChunkLocationReference& Rhs) {
- if (Lhs.ChunkIndex < Rhs.ChunkIndex)
- {
- return true;
- }
- if (Lhs.ChunkIndex > Rhs.ChunkIndex)
- {
- return false;
- }
- if (Lhs.SequenceIndex < Rhs.SequenceIndex)
- {
- return true;
- }
- if (Lhs.SequenceIndex > Rhs.SequenceIndex)
- {
- return false;
- }
- return Lhs.Offset < Rhs.Offset;
- });
-
- Result.ChunkSequenceLocations.reserve(Locations.size());
- const uint32_t ChunkCount = gsl::narrow<uint32_t>(Content.ChunkedContent.ChunkHashes.size());
- Result.ChunkHashToChunkIndex.reserve(ChunkCount);
- size_t RangeOffset = 0;
- for (uint32_t ChunkIndex = 0; ChunkIndex < ChunkCount; ChunkIndex++)
- {
- Result.ChunkHashToChunkIndex.insert({Content.ChunkedContent.ChunkHashes[ChunkIndex], ChunkIndex});
- uint32_t Count = 0;
- while ((RangeOffset + Count < Locations.size()) && (Locations[RangeOffset + Count].ChunkIndex == ChunkIndex))
- {
- const ChunkLocationReference& LocationReference = Locations[RangeOffset + Count];
- Result.ChunkSequenceLocations.push_back(
- ChunkedContentLookup::ChunkSequenceLocation{.SequenceIndex = LocationReference.SequenceIndex,
- .Offset = LocationReference.Offset});
- Count++;
- }
- Result.ChunkSequenceLocationOffset.push_back(RangeOffset);
- Result.ChunkSequenceLocationCounts.push_back(Count);
- RangeOffset += Count;
- }
-
- Result.SequenceIndexFirstPathIndex.resize(Content.ChunkedContent.SequenceRawHashes.size(), (uint32_t)-1);
- Result.PathExtensionHash.resize(Content.Paths.size());
- for (uint32_t PathIndex = 0; PathIndex < Content.Paths.size(); PathIndex++)
- {
- std::string LowercaseExtension = Content.Paths[PathIndex].extension().string();
- std::transform(LowercaseExtension.begin(), LowercaseExtension.end(), LowercaseExtension.begin(), [](char c) {
- return (char)::tolower(c);
- });
- Result.PathExtensionHash[PathIndex] = HashStringDjb2(LowercaseExtension);
- if (Content.RawSizes[PathIndex] > 0)
- {
- const IoHash& RawHash = Content.RawHashes[PathIndex];
- auto SequenceIndexIt = Result.RawHashToSequenceIndex.find(RawHash);
- ZEN_ASSERT(SequenceIndexIt != Result.RawHashToSequenceIndex.end());
- const uint32_t SequenceIndex = SequenceIndexIt->second;
- if (Result.SequenceIndexFirstPathIndex[SequenceIndex] == (uint32_t)-1)
- {
- Result.SequenceIndexFirstPathIndex[SequenceIndex] = PathIndex;
- }
- }
- }
-
- return Result;
-}
-
-} // namespace zen