aboutsummaryrefslogtreecommitdiff
path: root/src/zenremotestore/chunking/chunkedcontent.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'src/zenremotestore/chunking/chunkedcontent.cpp')
-rw-r--r--src/zenremotestore/chunking/chunkedcontent.cpp97
1 files changed, 62 insertions, 35 deletions
diff --git a/src/zenremotestore/chunking/chunkedcontent.cpp b/src/zenremotestore/chunking/chunkedcontent.cpp
index fda01aa56..26d179f14 100644
--- a/src/zenremotestore/chunking/chunkedcontent.cpp
+++ b/src/zenremotestore/chunking/chunkedcontent.cpp
@@ -13,6 +13,7 @@
#include <zencore/trace.h>
#include <zenremotestore/chunking/chunkblock.h>
#include <zenremotestore/chunking/chunkedfile.h>
+#include <zenremotestore/chunking/chunkingcache.h>
#include <zenremotestore/chunking/chunkingcontroller.h>
#include <zenutil/wildcard.h>
@@ -100,6 +101,8 @@ namespace {
IoHash HashOneFile(ChunkingStatistics& Stats,
const ChunkingController& InChunkingController,
+ ChunkingCache& InChunkingCache,
+ std::span<const uint64_t> ModificationTicks,
ChunkedFolderContent& OutChunkedContent,
tsl::robin_map<IoHash, uint32_t, IoHash::Hasher>& ChunkHashToChunkIndex,
tsl::robin_map<IoHash, uint32_t, IoHash::Hasher>& RawHashToSequenceRawHashIndex,
@@ -110,8 +113,9 @@ namespace {
{
ZEN_TRACE_CPU("HashOneFile");
- const uint64_t RawSize = OutChunkedContent.RawSizes[PathIndex];
- const std::filesystem::path& Path = OutChunkedContent.Paths[PathIndex];
+ const std::filesystem::path& Path = OutChunkedContent.Paths[PathIndex];
+ const uint64_t RawSize = OutChunkedContent.RawSizes[PathIndex];
+ const uint64_t ModificationTick = ModificationTicks[PathIndex];
if (RawSize == 0)
{
@@ -119,16 +123,53 @@ namespace {
}
else
{
+ std::filesystem::path FullPath = FolderPath / Path;
+ FullPath.make_preferred();
+
ChunkedInfoWithSource Chunked;
- const bool DidChunking =
- InChunkingController.ProcessFile((FolderPath / Path).make_preferred(), RawSize, Chunked, Stats.BytesHashed, AbortFlag);
- if (DidChunking)
+
+ if (!InChunkingCache.GetCachedFile(FullPath, RawSize, ModificationTick, Chunked))
{
- Lock.WithExclusiveLock([&]() {
- if (!RawHashToSequenceRawHashIndex.contains(Chunked.Info.RawHash))
+ const bool DidChunking = InChunkingController.ProcessFile(FullPath, RawSize, Chunked, Stats.BytesHashed, AbortFlag);
+ if (!DidChunking)
+ {
+ ZEN_TRACE_CPU("HashOnly");
+
+ IoBuffer Buffer = IoBufferBuilder::MakeFromFile(FullPath);
+ if (Buffer.GetSize() != RawSize)
+ {
+ throw std::runtime_error(fmt::format("Failed opening file '{}' for hashing", FolderPath / Path));
+ }
+
+ Chunked.Info.RawSize = RawSize;
+ Chunked.Info.RawHash = IoHash::HashBuffer(Buffer, &Stats.BytesHashed);
+ }
+ if (InChunkingCache.PutCachedFile(FullPath, ModificationTick, Chunked))
+ {
+ Stats.FilesStoredInCache++;
+ Stats.ChunksStoredInCache += Chunked.Info.ChunkSequence.empty() ? 1 : Chunked.Info.ChunkHashes.size();
+ Stats.BytesStoredInCache += RawSize;
+ }
+ }
+ else
+ {
+ Stats.FilesFoundInCache++;
+ Stats.ChunksFoundInCache += Chunked.Info.ChunkSequence.empty() ? 1 : Chunked.Info.ChunkHashes.size();
+ Stats.BytesFoundInCache += RawSize;
+ }
+ Lock.WithExclusiveLock([&]() {
+ if (!RawHashToSequenceRawHashIndex.contains(Chunked.Info.RawHash))
+ {
+ RawHashToSequenceRawHashIndex.insert(
+ {Chunked.Info.RawHash, gsl::narrow<uint32_t>(OutChunkedContent.ChunkedContent.SequenceRawHashes.size())});
+
+ if (Chunked.Info.ChunkSequence.empty())
+ {
+ AddChunkSequence(Stats, OutChunkedContent.ChunkedContent, ChunkHashToChunkIndex, Chunked.Info.RawHash, RawSize);
+ Stats.UniqueSequencesFound++;
+ }
+ else
{
- RawHashToSequenceRawHashIndex.insert(
- {Chunked.Info.RawHash, gsl::narrow<uint32_t>(OutChunkedContent.ChunkedContent.SequenceRawHashes.size())});
std::vector<uint64_t> ChunkSizes;
ChunkSizes.reserve(Chunked.ChunkSources.size());
for (const ChunkSource& Source : Chunked.ChunkSources)
@@ -144,34 +185,12 @@ namespace {
Chunked.Info.ChunkSequence,
Chunked.Info.ChunkHashes,
ChunkSizes);
- Stats.UniqueSequencesFound++;
}
- });
- Stats.FilesChunked++;
- return Chunked.Info.RawHash;
- }
- else
- {
- ZEN_TRACE_CPU("HashOnly");
-
- IoBuffer Buffer = IoBufferBuilder::MakeFromFile((FolderPath / Path).make_preferred());
- if (Buffer.GetSize() != RawSize)
- {
- throw std::runtime_error(fmt::format("Failed opening file '{}' for hashing", FolderPath / Path));
+ Stats.UniqueSequencesFound++;
}
- const IoHash Hash = IoHash::HashBuffer(Buffer, &Stats.BytesHashed);
-
- Lock.WithExclusiveLock([&]() {
- if (!RawHashToSequenceRawHashIndex.contains(Hash))
- {
- RawHashToSequenceRawHashIndex.insert(
- {Hash, gsl::narrow<uint32_t>(OutChunkedContent.ChunkedContent.SequenceRawHashes.size())});
- AddChunkSequence(Stats, OutChunkedContent.ChunkedContent, ChunkHashToChunkIndex, Hash, RawSize);
- Stats.UniqueSequencesFound++;
- }
- });
- return Hash;
- }
+ });
+ Stats.FilesChunked++;
+ return Chunked.Info.RawHash;
}
}
@@ -1113,6 +1132,7 @@ ChunkFolderContent(ChunkingStatistics& Stats,
const std::filesystem::path& RootPath,
const FolderContent& Content,
const ChunkingController& InChunkingController,
+ ChunkingCache& InChunkingCache,
int32_t UpdateIntervalMS,
std::function<void(bool IsAborted, bool IsPaused, std::ptrdiff_t PendingWork)>&& UpdateCallback,
std::atomic<bool>& AbortFlag,
@@ -1123,6 +1143,10 @@ ChunkFolderContent(ChunkingStatistics& Stats,
Stopwatch Timer;
auto _ = MakeGuard([&Stats, &Timer]() { Stats.ElapsedWallTimeUS = Timer.GetElapsedTimeUs(); });
+ ZEN_ASSERT(Content.ModificationTicks.size() == Content.Paths.size());
+ ZEN_ASSERT(Content.RawSizes.size() == Content.Paths.size());
+ ZEN_ASSERT(Content.Attributes.size() == Content.Paths.size());
+
ChunkedFolderContent Result = {.Platform = Content.Platform,
.Paths = Content.Paths,
.RawSizes = Content.RawSizes,
@@ -1163,12 +1187,15 @@ ChunkFolderContent(ChunkingStatistics& Stats,
{
break;
}
+
Work.ScheduleWork(WorkerPool, // GetSyncWorkerPool()
[&, PathIndex](std::atomic<bool>& AbortFlag) {
if (!AbortFlag)
{
IoHash RawHash = HashOneFile(Stats,
InChunkingController,
+ InChunkingCache,
+ Content.ModificationTicks,
Result,
ChunkHashToChunkIndex,
RawHashToSequenceRawHashIndex,