diff options
Diffstat (limited to 'src/zenremotestore/chunking/chunkedcontent.cpp')
| -rw-r--r-- | src/zenremotestore/chunking/chunkedcontent.cpp | 97 |
1 files changed, 62 insertions, 35 deletions
diff --git a/src/zenremotestore/chunking/chunkedcontent.cpp b/src/zenremotestore/chunking/chunkedcontent.cpp index fda01aa56..26d179f14 100644 --- a/src/zenremotestore/chunking/chunkedcontent.cpp +++ b/src/zenremotestore/chunking/chunkedcontent.cpp @@ -13,6 +13,7 @@ #include <zencore/trace.h> #include <zenremotestore/chunking/chunkblock.h> #include <zenremotestore/chunking/chunkedfile.h> +#include <zenremotestore/chunking/chunkingcache.h> #include <zenremotestore/chunking/chunkingcontroller.h> #include <zenutil/wildcard.h> @@ -100,6 +101,8 @@ namespace { IoHash HashOneFile(ChunkingStatistics& Stats, const ChunkingController& InChunkingController, + ChunkingCache& InChunkingCache, + std::span<const uint64_t> ModificationTicks, ChunkedFolderContent& OutChunkedContent, tsl::robin_map<IoHash, uint32_t, IoHash::Hasher>& ChunkHashToChunkIndex, tsl::robin_map<IoHash, uint32_t, IoHash::Hasher>& RawHashToSequenceRawHashIndex, @@ -110,8 +113,9 @@ namespace { { ZEN_TRACE_CPU("HashOneFile"); - const uint64_t RawSize = OutChunkedContent.RawSizes[PathIndex]; - const std::filesystem::path& Path = OutChunkedContent.Paths[PathIndex]; + const std::filesystem::path& Path = OutChunkedContent.Paths[PathIndex]; + const uint64_t RawSize = OutChunkedContent.RawSizes[PathIndex]; + const uint64_t ModificationTick = ModificationTicks[PathIndex]; if (RawSize == 0) { @@ -119,16 +123,53 @@ namespace { } else { + std::filesystem::path FullPath = FolderPath / Path; + FullPath.make_preferred(); + ChunkedInfoWithSource Chunked; - const bool DidChunking = - InChunkingController.ProcessFile((FolderPath / Path).make_preferred(), RawSize, Chunked, Stats.BytesHashed, AbortFlag); - if (DidChunking) + + if (!InChunkingCache.GetCachedFile(FullPath, RawSize, ModificationTick, Chunked)) { - Lock.WithExclusiveLock([&]() { - if (!RawHashToSequenceRawHashIndex.contains(Chunked.Info.RawHash)) + const bool DidChunking = InChunkingController.ProcessFile(FullPath, RawSize, Chunked, Stats.BytesHashed, AbortFlag); + if (!DidChunking) + { + ZEN_TRACE_CPU("HashOnly"); + + IoBuffer Buffer = IoBufferBuilder::MakeFromFile(FullPath); + if (Buffer.GetSize() != RawSize) + { + throw std::runtime_error(fmt::format("Failed opening file '{}' for hashing", FolderPath / Path)); + } + + Chunked.Info.RawSize = RawSize; + Chunked.Info.RawHash = IoHash::HashBuffer(Buffer, &Stats.BytesHashed); + } + if (InChunkingCache.PutCachedFile(FullPath, ModificationTick, Chunked)) + { + Stats.FilesStoredInCache++; + Stats.ChunksStoredInCache += Chunked.Info.ChunkSequence.empty() ? 1 : Chunked.Info.ChunkHashes.size(); + Stats.BytesStoredInCache += RawSize; + } + } + else + { + Stats.FilesFoundInCache++; + Stats.ChunksFoundInCache += Chunked.Info.ChunkSequence.empty() ? 1 : Chunked.Info.ChunkHashes.size(); + Stats.BytesFoundInCache += RawSize; + } + Lock.WithExclusiveLock([&]() { + if (!RawHashToSequenceRawHashIndex.contains(Chunked.Info.RawHash)) + { + RawHashToSequenceRawHashIndex.insert( + {Chunked.Info.RawHash, gsl::narrow<uint32_t>(OutChunkedContent.ChunkedContent.SequenceRawHashes.size())}); + + if (Chunked.Info.ChunkSequence.empty()) + { + AddChunkSequence(Stats, OutChunkedContent.ChunkedContent, ChunkHashToChunkIndex, Chunked.Info.RawHash, RawSize); + Stats.UniqueSequencesFound++; + } + else { - RawHashToSequenceRawHashIndex.insert( - {Chunked.Info.RawHash, gsl::narrow<uint32_t>(OutChunkedContent.ChunkedContent.SequenceRawHashes.size())}); std::vector<uint64_t> ChunkSizes; ChunkSizes.reserve(Chunked.ChunkSources.size()); for (const ChunkSource& Source : Chunked.ChunkSources) @@ -144,34 +185,12 @@ namespace { Chunked.Info.ChunkSequence, Chunked.Info.ChunkHashes, ChunkSizes); - Stats.UniqueSequencesFound++; } - }); - Stats.FilesChunked++; - return Chunked.Info.RawHash; - } - else - { - ZEN_TRACE_CPU("HashOnly"); - - IoBuffer Buffer = IoBufferBuilder::MakeFromFile((FolderPath / Path).make_preferred()); - if (Buffer.GetSize() != RawSize) - { - throw std::runtime_error(fmt::format("Failed opening file '{}' for hashing", FolderPath / Path)); + Stats.UniqueSequencesFound++; } - const IoHash Hash = IoHash::HashBuffer(Buffer, &Stats.BytesHashed); - - Lock.WithExclusiveLock([&]() { - if (!RawHashToSequenceRawHashIndex.contains(Hash)) - { - RawHashToSequenceRawHashIndex.insert( - {Hash, gsl::narrow<uint32_t>(OutChunkedContent.ChunkedContent.SequenceRawHashes.size())}); - AddChunkSequence(Stats, OutChunkedContent.ChunkedContent, ChunkHashToChunkIndex, Hash, RawSize); - Stats.UniqueSequencesFound++; - } - }); - return Hash; - } + }); + Stats.FilesChunked++; + return Chunked.Info.RawHash; } } @@ -1113,6 +1132,7 @@ ChunkFolderContent(ChunkingStatistics& Stats, const std::filesystem::path& RootPath, const FolderContent& Content, const ChunkingController& InChunkingController, + ChunkingCache& InChunkingCache, int32_t UpdateIntervalMS, std::function<void(bool IsAborted, bool IsPaused, std::ptrdiff_t PendingWork)>&& UpdateCallback, std::atomic<bool>& AbortFlag, @@ -1123,6 +1143,10 @@ ChunkFolderContent(ChunkingStatistics& Stats, Stopwatch Timer; auto _ = MakeGuard([&Stats, &Timer]() { Stats.ElapsedWallTimeUS = Timer.GetElapsedTimeUs(); }); + ZEN_ASSERT(Content.ModificationTicks.size() == Content.Paths.size()); + ZEN_ASSERT(Content.RawSizes.size() == Content.Paths.size()); + ZEN_ASSERT(Content.Attributes.size() == Content.Paths.size()); + ChunkedFolderContent Result = {.Platform = Content.Platform, .Paths = Content.Paths, .RawSizes = Content.RawSizes, @@ -1163,12 +1187,15 @@ ChunkFolderContent(ChunkingStatistics& Stats, { break; } + Work.ScheduleWork(WorkerPool, // GetSyncWorkerPool() [&, PathIndex](std::atomic<bool>& AbortFlag) { if (!AbortFlag) { IoHash RawHash = HashOneFile(Stats, InChunkingController, + InChunkingCache, + Content.ModificationTicks, Result, ChunkHashToChunkIndex, RawHashToSequenceRawHashIndex, |