diff options
Diffstat (limited to 'src/zenutil/chunkedcontent.cpp')
| -rw-r--r-- | src/zenutil/chunkedcontent.cpp | 185 |
1 files changed, 120 insertions, 65 deletions
diff --git a/src/zenutil/chunkedcontent.cpp b/src/zenutil/chunkedcontent.cpp index bb1ee5183..cd1bf7dd7 100644 --- a/src/zenutil/chunkedcontent.cpp +++ b/src/zenutil/chunkedcontent.cpp @@ -11,7 +11,7 @@ #include <zenutil/chunkedfile.h> #include <zenutil/chunkingcontroller.h> -#include <zenutil/parallellwork.h> +#include <zenutil/parallelwork.h> #include <zenutil/workerpools.h> ZEN_THIRD_PARTY_INCLUDES_START @@ -140,8 +140,12 @@ namespace { { ZEN_TRACE_CPU("HashOnly"); - IoBuffer Buffer = IoBufferBuilder::MakeFromFile((FolderPath / Path).make_preferred()); - const IoHash Hash = IoHash::HashBuffer(Buffer, &Stats.BytesHashed); + IoBuffer Buffer = IoBufferBuilder::MakeFromFile((FolderPath / Path).make_preferred()); + if (Buffer.GetSize() != RawSize) + { + throw std::runtime_error(fmt::format("Failed opening file '{}' for hashing", FolderPath / Path)); + } + const IoHash Hash = IoHash::HashBuffer(Buffer, &Stats.BytesHashed); Lock.WithExclusiveLock([&]() { if (!RawHashToSequenceRawHashIndex.contains(Hash)) @@ -301,17 +305,25 @@ FolderContent::UpdateState(const FolderContent& Rhs, std::vector<uint32_t>& OutP } FolderContent -GetUpdatedContent(const FolderContent& Old, const FolderContent& New, std::vector<std::filesystem::path>& OutDeletedPathIndexes) +GetUpdatedContent(const FolderContent& Old, const FolderContent& New, std::vector<std::filesystem::path>& OutDeletedPaths) { ZEN_TRACE_CPU("FolderContent::GetUpdatedContent"); - FolderContent Result = {.Platform = Old.Platform}; + + const uint32_t NewPathCount = gsl::narrow<uint32_t>(New.Paths.size()); + + FolderContent Result = {.Platform = Old.Platform}; + Result.Paths.reserve(NewPathCount); + Result.RawSizes.reserve(NewPathCount); + Result.Attributes.reserve(NewPathCount); + Result.ModificationTicks.reserve(NewPathCount); + tsl::robin_map<std::string, uint32_t> NewPathToIndex; - const uint32_t NewPathCount = gsl::narrow<uint32_t>(New.Paths.size()); NewPathToIndex.reserve(NewPathCount); for (uint32_t NewPathIndex = 0; NewPathIndex < NewPathCount; NewPathIndex++) { NewPathToIndex.insert({New.Paths[NewPathIndex].generic_string(), NewPathIndex}); } + uint32_t OldPathCount = gsl::narrow<uint32_t>(Old.Paths.size()); for (uint32_t OldPathIndex = 0; OldPathIndex < OldPathCount; OldPathIndex++) { @@ -330,7 +342,7 @@ GetUpdatedContent(const FolderContent& Old, const FolderContent& New, std::vecto } else { - OutDeletedPathIndexes.push_back(Old.Paths[OldPathIndex]); + OutDeletedPaths.push_back(Old.Paths[OldPathIndex]); } } return Result; @@ -366,7 +378,7 @@ GetFolderContent(GetFolderContentStatistics& Stats, std::function<bool(const std::string_view& RelativePath)>&& AcceptDirectory, std::function<bool(std::string_view RelativePath, uint64_t Size, uint32_t Attributes)>&& AcceptFile, WorkerThreadPool& WorkerPool, - int32_t UpdateInteralMS, + int32_t UpdateIntervalMS, std::function<void(bool IsAborted, std::ptrdiff_t PendingWork)>&& UpdateCallback, std::atomic<bool>& AbortFlag) { @@ -455,7 +467,7 @@ GetFolderContent(GetFolderContentStatistics& Stats, WorkerPool, PendingWork); PendingWork.CountDown(); - while (!PendingWork.Wait(UpdateInteralMS)) + while (!PendingWork.Wait(UpdateIntervalMS)) { UpdateCallback(AbortFlag.load(), PendingWork.Remaining()); } @@ -650,7 +662,9 @@ MergeChunkedFolderContents(const ChunkedFolderContent& Base, std::span<const Chu } ChunkedFolderContent -DeletePathsFromChunkedContent(const ChunkedFolderContent& BaseContent, std::span<const std::filesystem::path> DeletedPaths) +DeletePathsFromChunkedContent(const ChunkedFolderContent& BaseContent, + const ChunkedContentLookup& BaseContentLookup, + std::span<const std::filesystem::path> DeletedPaths) { ZEN_TRACE_CPU("DeletePathsFromChunkedContent"); @@ -664,8 +678,18 @@ DeletePathsFromChunkedContent(const ChunkedFolderContent& BaseContent, std::span { DeletedPathSet.insert(PathCompareString(DeletedPath)); } - const ChunkedContentLookup BaseLookup = BuildChunkedContentLookup(BaseContent); - tsl::robin_map<IoHash, uint32_t, IoHash::Hasher> ChunkHashToChunkIndex; + + const size_t BaseChunkCount = BaseContent.ChunkedContent.ChunkHashes.size(); + std::vector<uint32_t> NewChunkIndexes(BaseChunkCount, (uint32_t)-1); + + const size_t ExpectedPathCount = BaseContent.Paths.size() - DeletedPaths.size(); + Result.Paths.reserve(ExpectedPathCount); + Result.RawSizes.reserve(ExpectedPathCount); + Result.Attributes.reserve(ExpectedPathCount); + Result.RawHashes.reserve(ExpectedPathCount); + + Result.ChunkedContent.ChunkHashes.reserve(BaseChunkCount); + Result.ChunkedContent.ChunkRawSizes.reserve(BaseChunkCount); tsl::robin_map<IoHash, uint32_t, IoHash::Hasher> RawHashToSequenceRawHashIndex; for (uint32_t PathIndex = 0; PathIndex < BaseContent.Paths.size(); PathIndex++) @@ -685,20 +709,33 @@ DeletePathsFromChunkedContent(const ChunkedFolderContent& BaseContent, std::span { RawHashToSequenceRawHashIndex.insert( {RawHash, gsl::narrow<uint32_t>(Result.ChunkedContent.SequenceRawHashes.size())}); - const uint32_t SequenceRawHashIndex = BaseLookup.RawHashToSequenceIndex.at(RawHash); - const uint32_t OrderIndexOffset = BaseLookup.SequenceIndexChunkOrderOffset[SequenceRawHashIndex]; - const uint32_t ChunkCount = BaseContent.ChunkedContent.ChunkCounts[SequenceRawHashIndex]; - ChunkingStatistics Stats; + const uint32_t SequenceRawHashIndex = BaseContentLookup.RawHashToSequenceIndex.at(RawHash); + const uint32_t OrderIndexOffset = BaseContentLookup.SequenceIndexChunkOrderOffset[SequenceRawHashIndex]; + const uint32_t ChunkCount = BaseContent.ChunkedContent.ChunkCounts[SequenceRawHashIndex]; + std::span<const uint32_t> OriginalChunkOrder = std::span<const uint32_t>(BaseContent.ChunkedContent.ChunkOrders).subspan(OrderIndexOffset, ChunkCount); - AddChunkSequence(Stats, - Result.ChunkedContent, - ChunkHashToChunkIndex, - RawHash, - OriginalChunkOrder, - BaseContent.ChunkedContent.ChunkHashes, - BaseContent.ChunkedContent.ChunkRawSizes); - Stats.UniqueSequencesFound++; + + Result.ChunkedContent.ChunkCounts.push_back(gsl::narrow<uint32_t>(OriginalChunkOrder.size())); + + for (uint32_t OldChunkIndex : OriginalChunkOrder) + { + if (uint32_t FoundChunkIndex = NewChunkIndexes[OldChunkIndex]; FoundChunkIndex != (uint32_t)-1) + { + Result.ChunkedContent.ChunkOrders.push_back(FoundChunkIndex); + } + else + { + const uint32_t NewChunkIndex = gsl::narrow<uint32_t>(Result.ChunkedContent.ChunkHashes.size()); + NewChunkIndexes[OldChunkIndex] = NewChunkIndex; + const IoHash& ChunkHash = BaseContent.ChunkedContent.ChunkHashes[OldChunkIndex]; + const uint64_t OldChunkSize = BaseContent.ChunkedContent.ChunkRawSizes[OldChunkIndex]; + Result.ChunkedContent.ChunkHashes.push_back(ChunkHash); + Result.ChunkedContent.ChunkRawSizes.push_back(OldChunkSize); + Result.ChunkedContent.ChunkOrders.push_back(NewChunkIndex); + } + } + Result.ChunkedContent.SequenceRawHashes.push_back(RawHash); } } } @@ -708,14 +745,28 @@ DeletePathsFromChunkedContent(const ChunkedFolderContent& BaseContent, std::span } ChunkedFolderContent -ChunkFolderContent(ChunkingStatistics& Stats, - WorkerThreadPool& WorkerPool, - const std::filesystem::path& RootPath, - const FolderContent& Content, - const ChunkingController& InChunkingController, - int32_t UpdateInteralMS, - std::function<void(bool IsAborted, std::ptrdiff_t PendingWork)>&& UpdateCallback, - std::atomic<bool>& AbortFlag) +DeletePathsFromChunkedContent(const ChunkedFolderContent& BaseContent, std::span<const std::filesystem::path> DeletedPaths) +{ + ZEN_TRACE_CPU("DeletePathsFromChunkedContent"); + ZEN_ASSERT(DeletedPaths.size() <= BaseContent.Paths.size()); + if (DeletedPaths.size() == BaseContent.Paths.size()) + { + return {}; + } + const ChunkedContentLookup BaseLookup = BuildChunkedContentLookup(BaseContent); + return DeletePathsFromChunkedContent(BaseContent, BaseLookup, DeletedPaths); +} + +ChunkedFolderContent +ChunkFolderContent(ChunkingStatistics& Stats, + WorkerThreadPool& WorkerPool, + const std::filesystem::path& RootPath, + const FolderContent& Content, + const ChunkingController& InChunkingController, + int32_t UpdateIntervalMS, + std::function<void(bool IsAborted, bool IsPaused, std::ptrdiff_t PendingWork)>&& UpdateCallback, + std::atomic<bool>& AbortFlag, + std::atomic<bool>& PauseFlag) { ZEN_TRACE_CPU("ChunkFolderContent"); @@ -754,7 +805,7 @@ ChunkFolderContent(ChunkingStatistics& Stats, RwLock Lock; - ParallellWork Work(AbortFlag); + ParallelWork Work(AbortFlag, PauseFlag); for (uint32_t PathIndex : Order) { @@ -762,31 +813,28 @@ ChunkFolderContent(ChunkingStatistics& Stats, { break; } - Work.ScheduleWork( - WorkerPool, // GetSyncWorkerPool() - [&, PathIndex](std::atomic<bool>& AbortFlag) { - if (!AbortFlag) - { - IoHash RawHash = HashOneFile(Stats, - InChunkingController, - Result, - ChunkHashToChunkIndex, - RawHashToSequenceRawHashIndex, - Lock, - RootPath, - PathIndex, - AbortFlag); - Lock.WithExclusiveLock([&]() { Result.RawHashes[PathIndex] = RawHash; }); - Stats.FilesProcessed++; - } - }, - Work.DefaultErrorFunction()); - } - - Work.Wait(UpdateInteralMS, [&](bool IsAborted, std::ptrdiff_t PendingWork) { - ZEN_UNUSED(IsAborted); + Work.ScheduleWork(WorkerPool, // GetSyncWorkerPool() + [&, PathIndex](std::atomic<bool>& AbortFlag) { + if (!AbortFlag) + { + IoHash RawHash = HashOneFile(Stats, + InChunkingController, + Result, + ChunkHashToChunkIndex, + RawHashToSequenceRawHashIndex, + Lock, + RootPath, + PathIndex, + AbortFlag); + Lock.WithExclusiveLock([&]() { Result.RawHashes[PathIndex] = RawHash; }); + Stats.FilesProcessed++; + } + }); + } + + Work.Wait(UpdateIntervalMS, [&](bool IsAborted, bool IsPaused, std::ptrdiff_t PendingWork) { ZEN_UNUSED(PendingWork); - UpdateCallback(Work.IsAborted(), Work.PendingWork().Remaining()); + UpdateCallback(IsAborted, IsPaused, Work.PendingWork().Remaining()); }); } return Result; @@ -799,8 +847,9 @@ BuildChunkedContentLookup(const ChunkedFolderContent& Content) struct ChunkLocationReference { - uint32_t ChunkIndex = (uint32_t)-1; - ChunkedContentLookup::ChunkSequenceLocation Location; + uint32_t ChunkIndex = (uint32_t)-1; + uint32_t SequenceIndex = (uint32_t)-1; + uint64_t Offset = (uint64_t)-1; }; ChunkedContentLookup Result; @@ -829,8 +878,7 @@ BuildChunkedContentLookup(const ChunkedFolderContent& Content) { uint32_t ChunkIndex = Content.ChunkedContent.ChunkOrders[OrderIndex]; - Locations.push_back( - ChunkLocationReference{ChunkIndex, ChunkedContentLookup::ChunkSequenceLocation{SequenceIndex, LocationOffset}}); + Locations.push_back(ChunkLocationReference{.ChunkIndex = ChunkIndex, .SequenceIndex = SequenceIndex, .Offset = LocationOffset}); LocationOffset += Content.ChunkedContent.ChunkRawSizes[ChunkIndex]; } @@ -845,15 +893,15 @@ BuildChunkedContentLookup(const ChunkedFolderContent& Content) { return false; } - if (Lhs.Location.SequenceIndex < Rhs.Location.SequenceIndex) + if (Lhs.SequenceIndex < Rhs.SequenceIndex) { return true; } - if (Lhs.Location.SequenceIndex > Rhs.Location.SequenceIndex) + if (Lhs.SequenceIndex > Rhs.SequenceIndex) { return false; } - return Lhs.Location.Offset < Rhs.Location.Offset; + return Lhs.Offset < Rhs.Offset; }); Result.ChunkSequenceLocations.reserve(Locations.size()); @@ -866,7 +914,10 @@ BuildChunkedContentLookup(const ChunkedFolderContent& Content) uint32_t Count = 0; while ((RangeOffset + Count < Locations.size()) && (Locations[RangeOffset + Count].ChunkIndex == ChunkIndex)) { - Result.ChunkSequenceLocations.push_back(Locations[RangeOffset + Count].Location); + const ChunkLocationReference& LocationReference = Locations[RangeOffset + Count]; + Result.ChunkSequenceLocations.push_back( + ChunkedContentLookup::ChunkSequenceLocation{.SequenceIndex = LocationReference.SequenceIndex, + .Offset = LocationReference.Offset}); Count++; } Result.ChunkSequenceLocationOffset.push_back(RangeOffset); @@ -875,8 +926,12 @@ BuildChunkedContentLookup(const ChunkedFolderContent& Content) } Result.SequenceIndexFirstPathIndex.resize(Content.ChunkedContent.SequenceRawHashes.size(), (uint32_t)-1); + Result.PathExtensionHash.resize(Content.Paths.size()); for (uint32_t PathIndex = 0; PathIndex < Content.Paths.size(); PathIndex++) { + std::string LowercaseExtension = Content.Paths[PathIndex].extension().string(); + std::transform(LowercaseExtension.begin(), LowercaseExtension.end(), LowercaseExtension.begin(), ::tolower); + Result.PathExtensionHash[PathIndex] = HashStringDjb2(LowercaseExtension); if (Content.RawSizes[PathIndex] > 0) { const IoHash& RawHash = Content.RawHashes[PathIndex]; |