aboutsummaryrefslogtreecommitdiff
path: root/src/zenutil/chunkedcontent.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'src/zenutil/chunkedcontent.cpp')
-rw-r--r--src/zenutil/chunkedcontent.cpp185
1 files changed, 120 insertions, 65 deletions
diff --git a/src/zenutil/chunkedcontent.cpp b/src/zenutil/chunkedcontent.cpp
index bb1ee5183..cd1bf7dd7 100644
--- a/src/zenutil/chunkedcontent.cpp
+++ b/src/zenutil/chunkedcontent.cpp
@@ -11,7 +11,7 @@
#include <zenutil/chunkedfile.h>
#include <zenutil/chunkingcontroller.h>
-#include <zenutil/parallellwork.h>
+#include <zenutil/parallelwork.h>
#include <zenutil/workerpools.h>
ZEN_THIRD_PARTY_INCLUDES_START
@@ -140,8 +140,12 @@ namespace {
{
ZEN_TRACE_CPU("HashOnly");
- IoBuffer Buffer = IoBufferBuilder::MakeFromFile((FolderPath / Path).make_preferred());
- const IoHash Hash = IoHash::HashBuffer(Buffer, &Stats.BytesHashed);
+ IoBuffer Buffer = IoBufferBuilder::MakeFromFile((FolderPath / Path).make_preferred());
+ if (Buffer.GetSize() != RawSize)
+ {
+ throw std::runtime_error(fmt::format("Failed opening file '{}' for hashing", FolderPath / Path));
+ }
+ const IoHash Hash = IoHash::HashBuffer(Buffer, &Stats.BytesHashed);
Lock.WithExclusiveLock([&]() {
if (!RawHashToSequenceRawHashIndex.contains(Hash))
@@ -301,17 +305,25 @@ FolderContent::UpdateState(const FolderContent& Rhs, std::vector<uint32_t>& OutP
}
FolderContent
-GetUpdatedContent(const FolderContent& Old, const FolderContent& New, std::vector<std::filesystem::path>& OutDeletedPathIndexes)
+GetUpdatedContent(const FolderContent& Old, const FolderContent& New, std::vector<std::filesystem::path>& OutDeletedPaths)
{
ZEN_TRACE_CPU("FolderContent::GetUpdatedContent");
- FolderContent Result = {.Platform = Old.Platform};
+
+ const uint32_t NewPathCount = gsl::narrow<uint32_t>(New.Paths.size());
+
+ FolderContent Result = {.Platform = Old.Platform};
+ Result.Paths.reserve(NewPathCount);
+ Result.RawSizes.reserve(NewPathCount);
+ Result.Attributes.reserve(NewPathCount);
+ Result.ModificationTicks.reserve(NewPathCount);
+
tsl::robin_map<std::string, uint32_t> NewPathToIndex;
- const uint32_t NewPathCount = gsl::narrow<uint32_t>(New.Paths.size());
NewPathToIndex.reserve(NewPathCount);
for (uint32_t NewPathIndex = 0; NewPathIndex < NewPathCount; NewPathIndex++)
{
NewPathToIndex.insert({New.Paths[NewPathIndex].generic_string(), NewPathIndex});
}
+
uint32_t OldPathCount = gsl::narrow<uint32_t>(Old.Paths.size());
for (uint32_t OldPathIndex = 0; OldPathIndex < OldPathCount; OldPathIndex++)
{
@@ -330,7 +342,7 @@ GetUpdatedContent(const FolderContent& Old, const FolderContent& New, std::vecto
}
else
{
- OutDeletedPathIndexes.push_back(Old.Paths[OldPathIndex]);
+ OutDeletedPaths.push_back(Old.Paths[OldPathIndex]);
}
}
return Result;
@@ -366,7 +378,7 @@ GetFolderContent(GetFolderContentStatistics& Stats,
std::function<bool(const std::string_view& RelativePath)>&& AcceptDirectory,
std::function<bool(std::string_view RelativePath, uint64_t Size, uint32_t Attributes)>&& AcceptFile,
WorkerThreadPool& WorkerPool,
- int32_t UpdateInteralMS,
+ int32_t UpdateIntervalMS,
std::function<void(bool IsAborted, std::ptrdiff_t PendingWork)>&& UpdateCallback,
std::atomic<bool>& AbortFlag)
{
@@ -455,7 +467,7 @@ GetFolderContent(GetFolderContentStatistics& Stats,
WorkerPool,
PendingWork);
PendingWork.CountDown();
- while (!PendingWork.Wait(UpdateInteralMS))
+ while (!PendingWork.Wait(UpdateIntervalMS))
{
UpdateCallback(AbortFlag.load(), PendingWork.Remaining());
}
@@ -650,7 +662,9 @@ MergeChunkedFolderContents(const ChunkedFolderContent& Base, std::span<const Chu
}
ChunkedFolderContent
-DeletePathsFromChunkedContent(const ChunkedFolderContent& BaseContent, std::span<const std::filesystem::path> DeletedPaths)
+DeletePathsFromChunkedContent(const ChunkedFolderContent& BaseContent,
+ const ChunkedContentLookup& BaseContentLookup,
+ std::span<const std::filesystem::path> DeletedPaths)
{
ZEN_TRACE_CPU("DeletePathsFromChunkedContent");
@@ -664,8 +678,18 @@ DeletePathsFromChunkedContent(const ChunkedFolderContent& BaseContent, std::span
{
DeletedPathSet.insert(PathCompareString(DeletedPath));
}
- const ChunkedContentLookup BaseLookup = BuildChunkedContentLookup(BaseContent);
- tsl::robin_map<IoHash, uint32_t, IoHash::Hasher> ChunkHashToChunkIndex;
+
+ const size_t BaseChunkCount = BaseContent.ChunkedContent.ChunkHashes.size();
+ std::vector<uint32_t> NewChunkIndexes(BaseChunkCount, (uint32_t)-1);
+
+ const size_t ExpectedPathCount = BaseContent.Paths.size() - DeletedPaths.size();
+ Result.Paths.reserve(ExpectedPathCount);
+ Result.RawSizes.reserve(ExpectedPathCount);
+ Result.Attributes.reserve(ExpectedPathCount);
+ Result.RawHashes.reserve(ExpectedPathCount);
+
+ Result.ChunkedContent.ChunkHashes.reserve(BaseChunkCount);
+ Result.ChunkedContent.ChunkRawSizes.reserve(BaseChunkCount);
tsl::robin_map<IoHash, uint32_t, IoHash::Hasher> RawHashToSequenceRawHashIndex;
for (uint32_t PathIndex = 0; PathIndex < BaseContent.Paths.size(); PathIndex++)
@@ -685,20 +709,33 @@ DeletePathsFromChunkedContent(const ChunkedFolderContent& BaseContent, std::span
{
RawHashToSequenceRawHashIndex.insert(
{RawHash, gsl::narrow<uint32_t>(Result.ChunkedContent.SequenceRawHashes.size())});
- const uint32_t SequenceRawHashIndex = BaseLookup.RawHashToSequenceIndex.at(RawHash);
- const uint32_t OrderIndexOffset = BaseLookup.SequenceIndexChunkOrderOffset[SequenceRawHashIndex];
- const uint32_t ChunkCount = BaseContent.ChunkedContent.ChunkCounts[SequenceRawHashIndex];
- ChunkingStatistics Stats;
+ const uint32_t SequenceRawHashIndex = BaseContentLookup.RawHashToSequenceIndex.at(RawHash);
+ const uint32_t OrderIndexOffset = BaseContentLookup.SequenceIndexChunkOrderOffset[SequenceRawHashIndex];
+ const uint32_t ChunkCount = BaseContent.ChunkedContent.ChunkCounts[SequenceRawHashIndex];
+
std::span<const uint32_t> OriginalChunkOrder =
std::span<const uint32_t>(BaseContent.ChunkedContent.ChunkOrders).subspan(OrderIndexOffset, ChunkCount);
- AddChunkSequence(Stats,
- Result.ChunkedContent,
- ChunkHashToChunkIndex,
- RawHash,
- OriginalChunkOrder,
- BaseContent.ChunkedContent.ChunkHashes,
- BaseContent.ChunkedContent.ChunkRawSizes);
- Stats.UniqueSequencesFound++;
+
+ Result.ChunkedContent.ChunkCounts.push_back(gsl::narrow<uint32_t>(OriginalChunkOrder.size()));
+
+ for (uint32_t OldChunkIndex : OriginalChunkOrder)
+ {
+ if (uint32_t FoundChunkIndex = NewChunkIndexes[OldChunkIndex]; FoundChunkIndex != (uint32_t)-1)
+ {
+ Result.ChunkedContent.ChunkOrders.push_back(FoundChunkIndex);
+ }
+ else
+ {
+ const uint32_t NewChunkIndex = gsl::narrow<uint32_t>(Result.ChunkedContent.ChunkHashes.size());
+ NewChunkIndexes[OldChunkIndex] = NewChunkIndex;
+ const IoHash& ChunkHash = BaseContent.ChunkedContent.ChunkHashes[OldChunkIndex];
+ const uint64_t OldChunkSize = BaseContent.ChunkedContent.ChunkRawSizes[OldChunkIndex];
+ Result.ChunkedContent.ChunkHashes.push_back(ChunkHash);
+ Result.ChunkedContent.ChunkRawSizes.push_back(OldChunkSize);
+ Result.ChunkedContent.ChunkOrders.push_back(NewChunkIndex);
+ }
+ }
+ Result.ChunkedContent.SequenceRawHashes.push_back(RawHash);
}
}
}
@@ -708,14 +745,28 @@ DeletePathsFromChunkedContent(const ChunkedFolderContent& BaseContent, std::span
}
ChunkedFolderContent
-ChunkFolderContent(ChunkingStatistics& Stats,
- WorkerThreadPool& WorkerPool,
- const std::filesystem::path& RootPath,
- const FolderContent& Content,
- const ChunkingController& InChunkingController,
- int32_t UpdateInteralMS,
- std::function<void(bool IsAborted, std::ptrdiff_t PendingWork)>&& UpdateCallback,
- std::atomic<bool>& AbortFlag)
+DeletePathsFromChunkedContent(const ChunkedFolderContent& BaseContent, std::span<const std::filesystem::path> DeletedPaths)
+{
+ ZEN_TRACE_CPU("DeletePathsFromChunkedContent");
+ ZEN_ASSERT(DeletedPaths.size() <= BaseContent.Paths.size());
+ if (DeletedPaths.size() == BaseContent.Paths.size())
+ {
+ return {};
+ }
+ const ChunkedContentLookup BaseLookup = BuildChunkedContentLookup(BaseContent);
+ return DeletePathsFromChunkedContent(BaseContent, BaseLookup, DeletedPaths);
+}
+
+ChunkedFolderContent
+ChunkFolderContent(ChunkingStatistics& Stats,
+ WorkerThreadPool& WorkerPool,
+ const std::filesystem::path& RootPath,
+ const FolderContent& Content,
+ const ChunkingController& InChunkingController,
+ int32_t UpdateIntervalMS,
+ std::function<void(bool IsAborted, bool IsPaused, std::ptrdiff_t PendingWork)>&& UpdateCallback,
+ std::atomic<bool>& AbortFlag,
+ std::atomic<bool>& PauseFlag)
{
ZEN_TRACE_CPU("ChunkFolderContent");
@@ -754,7 +805,7 @@ ChunkFolderContent(ChunkingStatistics& Stats,
RwLock Lock;
- ParallellWork Work(AbortFlag);
+ ParallelWork Work(AbortFlag, PauseFlag);
for (uint32_t PathIndex : Order)
{
@@ -762,31 +813,28 @@ ChunkFolderContent(ChunkingStatistics& Stats,
{
break;
}
- Work.ScheduleWork(
- WorkerPool, // GetSyncWorkerPool()
- [&, PathIndex](std::atomic<bool>& AbortFlag) {
- if (!AbortFlag)
- {
- IoHash RawHash = HashOneFile(Stats,
- InChunkingController,
- Result,
- ChunkHashToChunkIndex,
- RawHashToSequenceRawHashIndex,
- Lock,
- RootPath,
- PathIndex,
- AbortFlag);
- Lock.WithExclusiveLock([&]() { Result.RawHashes[PathIndex] = RawHash; });
- Stats.FilesProcessed++;
- }
- },
- Work.DefaultErrorFunction());
- }
-
- Work.Wait(UpdateInteralMS, [&](bool IsAborted, std::ptrdiff_t PendingWork) {
- ZEN_UNUSED(IsAborted);
+ Work.ScheduleWork(WorkerPool, // GetSyncWorkerPool()
+ [&, PathIndex](std::atomic<bool>& AbortFlag) {
+ if (!AbortFlag)
+ {
+ IoHash RawHash = HashOneFile(Stats,
+ InChunkingController,
+ Result,
+ ChunkHashToChunkIndex,
+ RawHashToSequenceRawHashIndex,
+ Lock,
+ RootPath,
+ PathIndex,
+ AbortFlag);
+ Lock.WithExclusiveLock([&]() { Result.RawHashes[PathIndex] = RawHash; });
+ Stats.FilesProcessed++;
+ }
+ });
+ }
+
+ Work.Wait(UpdateIntervalMS, [&](bool IsAborted, bool IsPaused, std::ptrdiff_t PendingWork) {
ZEN_UNUSED(PendingWork);
- UpdateCallback(Work.IsAborted(), Work.PendingWork().Remaining());
+ UpdateCallback(IsAborted, IsPaused, Work.PendingWork().Remaining());
});
}
return Result;
@@ -799,8 +847,9 @@ BuildChunkedContentLookup(const ChunkedFolderContent& Content)
struct ChunkLocationReference
{
- uint32_t ChunkIndex = (uint32_t)-1;
- ChunkedContentLookup::ChunkSequenceLocation Location;
+ uint32_t ChunkIndex = (uint32_t)-1;
+ uint32_t SequenceIndex = (uint32_t)-1;
+ uint64_t Offset = (uint64_t)-1;
};
ChunkedContentLookup Result;
@@ -829,8 +878,7 @@ BuildChunkedContentLookup(const ChunkedFolderContent& Content)
{
uint32_t ChunkIndex = Content.ChunkedContent.ChunkOrders[OrderIndex];
- Locations.push_back(
- ChunkLocationReference{ChunkIndex, ChunkedContentLookup::ChunkSequenceLocation{SequenceIndex, LocationOffset}});
+ Locations.push_back(ChunkLocationReference{.ChunkIndex = ChunkIndex, .SequenceIndex = SequenceIndex, .Offset = LocationOffset});
LocationOffset += Content.ChunkedContent.ChunkRawSizes[ChunkIndex];
}
@@ -845,15 +893,15 @@ BuildChunkedContentLookup(const ChunkedFolderContent& Content)
{
return false;
}
- if (Lhs.Location.SequenceIndex < Rhs.Location.SequenceIndex)
+ if (Lhs.SequenceIndex < Rhs.SequenceIndex)
{
return true;
}
- if (Lhs.Location.SequenceIndex > Rhs.Location.SequenceIndex)
+ if (Lhs.SequenceIndex > Rhs.SequenceIndex)
{
return false;
}
- return Lhs.Location.Offset < Rhs.Location.Offset;
+ return Lhs.Offset < Rhs.Offset;
});
Result.ChunkSequenceLocations.reserve(Locations.size());
@@ -866,7 +914,10 @@ BuildChunkedContentLookup(const ChunkedFolderContent& Content)
uint32_t Count = 0;
while ((RangeOffset + Count < Locations.size()) && (Locations[RangeOffset + Count].ChunkIndex == ChunkIndex))
{
- Result.ChunkSequenceLocations.push_back(Locations[RangeOffset + Count].Location);
+ const ChunkLocationReference& LocationReference = Locations[RangeOffset + Count];
+ Result.ChunkSequenceLocations.push_back(
+ ChunkedContentLookup::ChunkSequenceLocation{.SequenceIndex = LocationReference.SequenceIndex,
+ .Offset = LocationReference.Offset});
Count++;
}
Result.ChunkSequenceLocationOffset.push_back(RangeOffset);
@@ -875,8 +926,12 @@ BuildChunkedContentLookup(const ChunkedFolderContent& Content)
}
Result.SequenceIndexFirstPathIndex.resize(Content.ChunkedContent.SequenceRawHashes.size(), (uint32_t)-1);
+ Result.PathExtensionHash.resize(Content.Paths.size());
for (uint32_t PathIndex = 0; PathIndex < Content.Paths.size(); PathIndex++)
{
+ std::string LowercaseExtension = Content.Paths[PathIndex].extension().string();
+ std::transform(LowercaseExtension.begin(), LowercaseExtension.end(), LowercaseExtension.begin(), ::tolower);
+ Result.PathExtensionHash[PathIndex] = HashStringDjb2(LowercaseExtension);
if (Content.RawSizes[PathIndex] > 0)
{
const IoHash& RawHash = Content.RawHashes[PathIndex];