aboutsummaryrefslogtreecommitdiff
path: root/src/zenutil
diff options
context:
space:
mode:
authorDan Engelbrecht <[email protected]>2025-06-03 16:21:01 +0200
committerGitHub Enterprise <[email protected]>2025-06-03 16:21:01 +0200
commita0b10b046095d57ffbdb46c83084601a832f4562 (patch)
treefe015645ea07d83c2784e3e28d0e976a37054859 /src/zenutil
parentminor: fix unused variable warning on some compilers (diff)
downloadzen-a0b10b046095d57ffbdb46c83084601a832f4562.tar.xz
zen-a0b10b046095d57ffbdb46c83084601a832f4562.zip
fixed size chunking for encrypted files (#410)
- Improvement: Use fixed size block chunking for know encrypted/compressed file types - Improvement: Skip trying to compress chunks that are sourced from files that are known to be encrypted/compressed - Improvement: Add global open file cache for written files increasing throughput during download by reducing overhead of open/close of file by 80%
Diffstat (limited to 'src/zenutil')
-rw-r--r--src/zenutil/bufferedwritefilecache.cpp177
-rw-r--r--src/zenutil/chunkedcontent.cpp4
-rw-r--r--src/zenutil/chunkingcontroller.cpp289
-rw-r--r--src/zenutil/filebuildstorage.cpp19
-rw-r--r--src/zenutil/include/zenutil/bufferedwritefilecache.h106
-rw-r--r--src/zenutil/include/zenutil/chunkedcontent.h1
-rw-r--r--src/zenutil/include/zenutil/chunkingcontroller.h45
7 files changed, 501 insertions, 140 deletions
diff --git a/src/zenutil/bufferedwritefilecache.cpp b/src/zenutil/bufferedwritefilecache.cpp
new file mode 100644
index 000000000..a52850314
--- /dev/null
+++ b/src/zenutil/bufferedwritefilecache.cpp
@@ -0,0 +1,177 @@
+// Copyright Epic Games, Inc. All Rights Reserved.
+
+#include <zenutil/bufferedwritefilecache.h>
+
+#include <zencore/logging.h>
+#include <zencore/trace.h>
+
+ZEN_THIRD_PARTY_INCLUDES_START
+#include <gsl/gsl-lite.hpp>
+ZEN_THIRD_PARTY_INCLUDES_END
+
+namespace zen {
+
+BufferedWriteFileCache::BufferedWriteFileCache() : m_CacheHitCount(0), m_CacheMissCount(0), m_OpenHandleCount(0), m_DroppedHandleCount(0)
+{
+}
+
+BufferedWriteFileCache::~BufferedWriteFileCache()
+{
+ ZEN_TRACE_CPU("~BufferedWriteFileCache()");
+
+ try
+ {
+ for (TOpenHandles& OpenHandles : m_OpenFiles)
+ {
+ while (BasicFile* File = OpenHandles.Pop())
+ {
+ std::unique_ptr<BasicFile> FileToClose(File);
+ m_OpenHandleCount--;
+ }
+ }
+ m_OpenFiles.clear();
+ m_ChunkWriters.clear();
+ }
+ catch (const std::exception& Ex)
+ {
+ ZEN_ERROR("~BufferedWriteFileCache() threw exeption: {}", Ex.what());
+ }
+}
+
+std::unique_ptr<BasicFile>
+BufferedWriteFileCache::Get(uint32_t FileIndex)
+{
+ ZEN_TRACE_CPU("BufferedWriteFileCache::Get");
+
+ RwLock::ExclusiveLockScope _(m_WriterLock);
+ if (auto It = m_ChunkWriters.find(FileIndex); It != m_ChunkWriters.end())
+ {
+ const uint32_t HandleIndex = It->second;
+ TOpenHandles& OpenHandles = m_OpenFiles[HandleIndex];
+ if (BasicFile* File = OpenHandles.Pop(); File != nullptr)
+ {
+ m_OpenHandleCount--;
+ m_CacheHitCount++;
+ return std::unique_ptr<BasicFile>(File);
+ }
+ }
+ m_CacheMissCount++;
+ return nullptr;
+}
+
+void
+BufferedWriteFileCache::Put(uint32_t FileIndex, std::unique_ptr<BasicFile>&& Writer)
+{
+ ZEN_TRACE_CPU("BufferedWriteFileCache::Put");
+
+ if (m_OpenHandleCount.load() >= MaxBufferedCount)
+ {
+ m_DroppedHandleCount++;
+ return;
+ }
+ RwLock::ExclusiveLockScope _(m_WriterLock);
+ if (auto It = m_ChunkWriters.find(FileIndex); It != m_ChunkWriters.end())
+ {
+ const uint32_t HandleIndex = It->second;
+ TOpenHandles& OpenHandles = m_OpenFiles[HandleIndex];
+ if (OpenHandles.Push(Writer.get()))
+ {
+ Writer.release();
+ m_OpenHandleCount++;
+ }
+ else
+ {
+ m_DroppedHandleCount++;
+ }
+ }
+ else
+ {
+ const uint32_t HandleIndex = gsl::narrow<uint32_t>(m_OpenFiles.size());
+ m_OpenFiles.push_back(TOpenHandles{});
+ m_OpenFiles.back().Push(Writer.release());
+ m_ChunkWriters.insert_or_assign(FileIndex, HandleIndex);
+ m_OpenHandleCount++;
+ }
+}
+
+void
+BufferedWriteFileCache::Close(std::span<uint32_t> FileIndexes)
+{
+ ZEN_TRACE_CPU("BufferedWriteFileCache::Close");
+
+ std::vector<std::unique_ptr<BasicFile>> FilesToClose;
+ FilesToClose.reserve(FileIndexes.size());
+ {
+ RwLock::ExclusiveLockScope _(m_WriterLock);
+ for (uint32_t FileIndex : FileIndexes)
+ {
+ if (auto It = m_ChunkWriters.find(FileIndex); It != m_ChunkWriters.end())
+ {
+ const uint32_t HandleIndex = It->second;
+ TOpenHandles& OpenHandles = m_OpenFiles[HandleIndex];
+ while (BasicFile* File = OpenHandles.Pop())
+ {
+ FilesToClose.emplace_back(std::unique_ptr<BasicFile>(File));
+ m_OpenHandleCount--;
+ }
+ m_ChunkWriters.erase(It);
+ }
+ }
+ }
+ FilesToClose.clear();
+}
+
+BufferedWriteFileCache::Local::Local(BufferedWriteFileCache& Cache) : m_Cache(Cache)
+{
+}
+
+BufferedWriteFileCache::Local::Writer*
+BufferedWriteFileCache::Local::GetWriter(uint32_t FileIndex)
+{
+ if (auto It = m_FileIndexToWriterIndex.find(FileIndex); It != m_FileIndexToWriterIndex.end())
+ {
+ return m_ChunkWriters[It->second].get();
+ }
+ std::unique_ptr<BasicFile> File = m_Cache.Get(FileIndex);
+ if (File)
+ {
+ const uint32_t WriterIndex = gsl::narrow<uint32_t>(m_ChunkWriters.size());
+ m_FileIndexToWriterIndex.insert_or_assign(FileIndex, WriterIndex);
+ m_ChunkWriters.emplace_back(std::make_unique<Writer>(Writer{.File = std::move(File)}));
+ return m_ChunkWriters.back().get();
+ }
+ return nullptr;
+}
+
+BufferedWriteFileCache::Local::Writer*
+BufferedWriteFileCache::Local::PutWriter(uint32_t FileIndex, std::unique_ptr<Writer> Writer)
+{
+ ZEN_ASSERT(!m_FileIndexToWriterIndex.contains(FileIndex));
+ const uint32_t WriterIndex = gsl::narrow<uint32_t>(m_ChunkWriters.size());
+ m_FileIndexToWriterIndex.insert_or_assign(FileIndex, WriterIndex);
+ m_ChunkWriters.emplace_back(std::move(Writer));
+ return m_ChunkWriters.back().get();
+}
+
+BufferedWriteFileCache::Local::~Local()
+{
+ ZEN_TRACE_CPU("BufferedWriteFileCache::~Local()");
+ try
+ {
+ for (auto& It : m_FileIndexToWriterIndex)
+ {
+ const uint32_t FileIndex = It.first;
+ const uint32_t WriterIndex = It.second;
+ m_ChunkWriters[WriterIndex]->Writer.reset();
+ std::unique_ptr<BasicFile> File;
+ File.swap(m_ChunkWriters[WriterIndex]->File);
+ m_Cache.Put(FileIndex, std::move(File));
+ }
+ }
+ catch (const std::exception& Ex)
+ {
+ ZEN_ERROR("BufferedWriteFileCache::~Local() threw exeption: {}", Ex.what());
+ }
+}
+
+} // namespace zen
diff --git a/src/zenutil/chunkedcontent.cpp b/src/zenutil/chunkedcontent.cpp
index ae129324e..4bec4901a 100644
--- a/src/zenutil/chunkedcontent.cpp
+++ b/src/zenutil/chunkedcontent.cpp
@@ -891,8 +891,12 @@ BuildChunkedContentLookup(const ChunkedFolderContent& Content)
}
Result.SequenceIndexFirstPathIndex.resize(Content.ChunkedContent.SequenceRawHashes.size(), (uint32_t)-1);
+ Result.PathExtensionHash.resize(Content.Paths.size());
for (uint32_t PathIndex = 0; PathIndex < Content.Paths.size(); PathIndex++)
{
+ std::string LowercaseExtension = Content.Paths[PathIndex].extension().string();
+ std::transform(LowercaseExtension.begin(), LowercaseExtension.end(), LowercaseExtension.begin(), ::tolower);
+ Result.PathExtensionHash[PathIndex] = HashStringDjb2(LowercaseExtension);
if (Content.RawSizes[PathIndex] > 0)
{
const IoHash& RawHash = Content.RawHashes[PathIndex];
diff --git a/src/zenutil/chunkingcontroller.cpp b/src/zenutil/chunkingcontroller.cpp
index a5ebce193..6fb4182c0 100644
--- a/src/zenutil/chunkingcontroller.cpp
+++ b/src/zenutil/chunkingcontroller.cpp
@@ -4,6 +4,7 @@
#include <zencore/basicfile.h>
#include <zencore/compactbinarybuilder.h>
+#include <zencore/filesystem.h>
#include <zencore/trace.h>
ZEN_THIRD_PARTY_INCLUDES_START
@@ -35,32 +36,54 @@ namespace {
return ChunkedParams{.UseThreshold = UseThreshold, .MinSize = MinSize, .MaxSize = MaxSize, .AvgSize = AvgSize};
}
-} // namespace
+ void WriteChunkParams(CbObjectWriter& Writer, const ChunkedParams& Params)
+ {
+ Writer.BeginObject("ChunkingParams"sv);
+ {
+ Writer.AddBool("UseThreshold"sv, Params.UseThreshold);
-class BasicChunkingController : public ChunkingController
-{
-public:
- BasicChunkingController(std::span<const std::string_view> ExcludeExtensions,
- bool ExcludeElfFiles,
- bool ExcludeMachOFiles,
- uint64_t ChunkFileSizeLimit,
- const ChunkedParams& ChunkingParams)
- : m_ChunkExcludeExtensions(ExcludeExtensions.begin(), ExcludeExtensions.end())
- , m_ExcludeElfFiles(ExcludeElfFiles)
- , m_ExcludeMachOFiles(ExcludeMachOFiles)
- , m_ChunkFileSizeLimit(ChunkFileSizeLimit)
- , m_ChunkingParams(ChunkingParams)
+ Writer.AddInteger("MinSize"sv, (uint64_t)Params.MinSize);
+ Writer.AddInteger("MaxSize"sv, (uint64_t)Params.MaxSize);
+ Writer.AddInteger("AvgSize"sv, (uint64_t)Params.AvgSize);
+ }
+ Writer.EndObject(); // ChunkingParams
+ }
+
+ bool IsElfFile(BasicFile& Buffer)
{
+ if (Buffer.FileSize() > 4)
+ {
+ uint32_t ElfCheck = 0;
+ Buffer.Read(&ElfCheck, 4, 0);
+ if (ElfCheck == 0x464c457f)
+ {
+ return true;
+ }
+ }
+ return false;
}
- BasicChunkingController(CbObjectView Parameters)
- : m_ChunkExcludeExtensions(ReadStringArray(Parameters["ChunkExcludeExtensions"sv].AsArrayView()))
- , m_ExcludeElfFiles(Parameters["ExcludeElfFiles"sv].AsBool(DefaultChunkingExcludeElfFiles))
- , m_ExcludeMachOFiles(Parameters["ExcludeMachOFiles"sv].AsBool(DefaultChunkingExcludeMachOFiles))
- , m_ChunkFileSizeLimit(Parameters["ChunkFileSizeLimit"sv].AsUInt64(DefaultChunkingFileSizeLimit))
- , m_ChunkingParams(ReadChunkParams(Parameters["ChunkingParams"sv].AsObjectView()))
+ bool IsMachOFile(BasicFile& Buffer)
{
+ if (Buffer.FileSize() > 4)
+ {
+ uint32_t MachOCheck = 0;
+ Buffer.Read(&MachOCheck, 4, 0);
+ if ((MachOCheck == 0xfeedface) || (MachOCheck == 0xcefaedfe))
+ {
+ return true;
+ }
+ }
+ return false;
}
+} // namespace
+
+class BasicChunkingController : public ChunkingController
+{
+public:
+ BasicChunkingController(const BasicChunkingControllerSettings& Settings) : m_Settings(Settings) {}
+
+ BasicChunkingController(CbObjectView Parameters) : m_Settings(ReadSettings(Parameters)) {}
virtual bool ProcessFile(const std::filesystem::path& InputPath,
uint64_t RawSize,
@@ -70,35 +93,25 @@ public:
{
ZEN_TRACE_CPU("BasicChunkingController::ProcessFile");
const bool ExcludeFromChunking =
- std::find(m_ChunkExcludeExtensions.begin(), m_ChunkExcludeExtensions.end(), InputPath.extension()) !=
- m_ChunkExcludeExtensions.end();
+ std::find(m_Settings.ExcludeExtensions.begin(), m_Settings.ExcludeExtensions.end(), InputPath.extension()) !=
+ m_Settings.ExcludeExtensions.end();
- if (ExcludeFromChunking || (RawSize < m_ChunkFileSizeLimit))
+ if (ExcludeFromChunking || (RawSize < m_Settings.ChunkFileSizeLimit))
{
return false;
}
BasicFile Buffer(InputPath, BasicFile::Mode::kRead);
- if (m_ExcludeElfFiles && Buffer.FileSize() > 4)
+ if (m_Settings.ExcludeElfFiles && IsElfFile(Buffer))
{
- uint32_t ElfCheck = 0;
- Buffer.Read(&ElfCheck, 4, 0);
- if (ElfCheck == 0x464c457f)
- {
- return false;
- }
+ return false;
}
- if (m_ExcludeMachOFiles && Buffer.FileSize() > 4)
+ if (m_Settings.ExcludeMachOFiles && IsMachOFile(Buffer))
{
- uint32_t MachOCheck = 0;
- Buffer.Read(&MachOCheck, 4, 0);
- if ((MachOCheck == 0xfeedface) || (MachOCheck == 0xcefaedfe))
- {
- return false;
- }
+ return false;
}
- OutChunked = ChunkData(Buffer, 0, RawSize, m_ChunkingParams, &BytesProcessed, &AbortFlag);
+ OutChunked = ChunkData(Buffer, 0, RawSize, m_Settings.ChunkingParams, &BytesProcessed, &AbortFlag);
return true;
}
@@ -109,59 +122,43 @@ public:
CbObjectWriter Writer;
Writer.BeginArray("ChunkExcludeExtensions"sv);
{
- for (const std::string& Extension : m_ChunkExcludeExtensions)
+ for (const std::string& Extension : m_Settings.ExcludeExtensions)
{
Writer.AddString(Extension);
}
}
Writer.EndArray(); // ChunkExcludeExtensions
- Writer.AddBool("ExcludeElfFiles"sv, m_ExcludeElfFiles);
- Writer.AddBool("ExcludeMachOFiles"sv, m_ExcludeMachOFiles);
+ Writer.AddBool("ExcludeElfFiles"sv, m_Settings.ExcludeElfFiles);
+ Writer.AddBool("ExcludeMachOFiles"sv, m_Settings.ExcludeMachOFiles);
+ Writer.AddInteger("ChunkFileSizeLimit"sv, m_Settings.ChunkFileSizeLimit);
- Writer.AddInteger("ChunkFileSizeLimit"sv, m_ChunkFileSizeLimit);
- Writer.BeginObject("ChunkingParams"sv);
- {
- Writer.AddBool("UseThreshold"sv, m_ChunkingParams.UseThreshold);
+ WriteChunkParams(Writer, m_Settings.ChunkingParams);
- Writer.AddInteger("MinSize"sv, (uint64_t)m_ChunkingParams.MinSize);
- Writer.AddInteger("MaxSize"sv, (uint64_t)m_ChunkingParams.MaxSize);
- Writer.AddInteger("AvgSize"sv, (uint64_t)m_ChunkingParams.AvgSize);
- }
- Writer.EndObject(); // ChunkingParams
return Writer.Save();
}
static constexpr std::string_view Name = "BasicChunkingController"sv;
-protected:
- const std::vector<std::string> m_ChunkExcludeExtensions;
- const bool m_ExcludeElfFiles = false;
- const bool m_ExcludeMachOFiles = false;
- const uint64_t m_ChunkFileSizeLimit;
- const ChunkedParams m_ChunkingParams;
+private:
+ static BasicChunkingControllerSettings ReadSettings(CbObjectView Parameters)
+ {
+ return BasicChunkingControllerSettings{
+ .ExcludeExtensions = ReadStringArray(Parameters["ChunkExcludeExtensions"sv].AsArrayView()),
+ .ExcludeElfFiles = Parameters["ExcludeElfFiles"sv].AsBool(DefaultChunkingExcludeElfFiles),
+ .ExcludeMachOFiles = Parameters["ExcludeMachOFiles"sv].AsBool(DefaultChunkingExcludeMachOFiles),
+ .ChunkFileSizeLimit = Parameters["ChunkFileSizeLimit"sv].AsUInt64(DefaultChunkingFileSizeLimit),
+ .ChunkingParams = ReadChunkParams(Parameters["ChunkingParams"sv].AsObjectView())};
+ }
+
+ const BasicChunkingControllerSettings m_Settings;
};
class ChunkingControllerWithFixedChunking : public ChunkingController
{
public:
- ChunkingControllerWithFixedChunking(std::span<const std::string_view> FixedChunkingExtensions,
- uint64_t ChunkFileSizeLimit,
- const ChunkedParams& ChunkingParams,
- uint32_t FixedChunkingChunkSize)
- : m_FixedChunkingExtensions(FixedChunkingExtensions.begin(), FixedChunkingExtensions.end())
- , m_ChunkFileSizeLimit(ChunkFileSizeLimit)
- , m_ChunkingParams(ChunkingParams)
- , m_FixedChunkingChunkSize(FixedChunkingChunkSize)
- {
- }
+ ChunkingControllerWithFixedChunking(const ChunkingControllerWithFixedChunkingSettings& Settings) : m_Settings(Settings) {}
- ChunkingControllerWithFixedChunking(CbObjectView Parameters)
- : m_FixedChunkingExtensions(ReadStringArray(Parameters["FixedChunkingExtensions"sv].AsArrayView()))
- , m_ChunkFileSizeLimit(Parameters["ChunkFileSizeLimit"sv].AsUInt64(DefaultChunkingFileSizeLimit))
- , m_ChunkingParams(ReadChunkParams(Parameters["ChunkingParams"sv].AsObjectView()))
- , m_FixedChunkingChunkSize(Parameters["FixedChunkingChunkSize"sv].AsUInt32(16u * 1024u * 1024u))
- {
- }
+ ChunkingControllerWithFixedChunking(CbObjectView Parameters) : m_Settings(ReadSettings(Parameters)) {}
virtual bool ProcessFile(const std::filesystem::path& InputPath,
uint64_t RawSize,
@@ -170,33 +167,71 @@ public:
std::atomic<bool>& AbortFlag) const override
{
ZEN_TRACE_CPU("ChunkingControllerWithFixedChunking::ProcessFile");
- if (RawSize < m_ChunkFileSizeLimit)
+ const bool ExcludeFromChunking =
+ std::find(m_Settings.ExcludeExtensions.begin(), m_Settings.ExcludeExtensions.end(), InputPath.extension()) !=
+ m_Settings.ExcludeExtensions.end();
+
+ if (ExcludeFromChunking || (RawSize < m_Settings.ChunkFileSizeLimit))
{
return false;
}
- const bool FixedChunking = std::find(m_FixedChunkingExtensions.begin(), m_FixedChunkingExtensions.end(), InputPath.extension()) !=
- m_FixedChunkingExtensions.end();
- if (FixedChunking)
+ const bool FixedChunkingExtension =
+ std::find(m_Settings.FixedChunkingExtensions.begin(), m_Settings.FixedChunkingExtensions.end(), InputPath.extension()) !=
+ m_Settings.FixedChunkingExtensions.end();
+
+ if (FixedChunkingExtension)
{
+ if (RawSize < m_Settings.MinSizeForFixedChunking)
+ {
+ return false;
+ }
ZEN_TRACE_CPU("FixedChunking");
- IoHashStream FullHash;
- IoBuffer Source = IoBufferBuilder::MakeFromFile(InputPath);
+ IoHashStream FullHasher;
+ BasicFile Source(InputPath, BasicFile::Mode::kRead);
uint64_t Offset = 0;
tsl::robin_map<IoHash, uint32_t, IoHash::Hasher> ChunkHashToChunkIndex;
- ChunkHashToChunkIndex.reserve(1 + (RawSize / m_FixedChunkingChunkSize));
+ const uint64_t ExpectedChunkCount = 1 + (RawSize / m_Settings.FixedChunkingChunkSize);
+ ChunkHashToChunkIndex.reserve(ExpectedChunkCount);
+ OutChunked.Info.ChunkHashes.reserve(ExpectedChunkCount);
+ OutChunked.Info.ChunkSequence.reserve(ExpectedChunkCount);
+ OutChunked.ChunkSources.reserve(ExpectedChunkCount);
+
+ static const uint64_t BufferingSize = 256u * 1024u;
+
+ IoHashStream ChunkHasher;
+
while (Offset < RawSize)
{
if (AbortFlag)
{
return false;
}
- uint64_t ChunkSize = std::min<uint64_t>(RawSize - Offset, m_FixedChunkingChunkSize);
- IoBuffer Chunk(Source, Offset, ChunkSize);
- MemoryView ChunkData = Chunk.GetView();
- FullHash.Append(ChunkData);
- IoHash ChunkHash = IoHash::HashBuffer(ChunkData);
+ ChunkHasher.Reset();
+
+ uint64_t ChunkSize = std::min<uint64_t>(RawSize - Offset, m_Settings.FixedChunkingChunkSize);
+ if (ChunkSize >= (BufferingSize + BufferingSize / 2))
+ {
+ ScanFile(Source.Handle(),
+ Offset,
+ ChunkSize,
+ BufferingSize,
+ [&FullHasher, &ChunkHasher, &BytesProcessed](const void* Data, size_t Size) {
+ FullHasher.Append(Data, Size);
+ ChunkHasher.Append(Data, Size);
+ BytesProcessed.fetch_add(Size);
+ });
+ }
+ else
+ {
+ IoBuffer ChunkData = Source.ReadRange(Offset, ChunkSize);
+ FullHasher.Append(ChunkData);
+ ChunkHasher.Append(ChunkData);
+ BytesProcessed.fetch_add(ChunkSize);
+ }
+
+ const IoHash ChunkHash = ChunkHasher.GetHash();
if (auto It = ChunkHashToChunkIndex.find(ChunkHash); It != ChunkHashToChunkIndex.end())
{
OutChunked.Info.ChunkSequence.push_back(It->second);
@@ -209,16 +244,24 @@ public:
OutChunked.ChunkSources.push_back({.Offset = Offset, .Size = gsl::narrow<uint32_t>(ChunkSize)});
}
Offset += ChunkSize;
- BytesProcessed.fetch_add(ChunkSize);
}
OutChunked.Info.RawSize = RawSize;
- OutChunked.Info.RawHash = FullHash.GetHash();
+ OutChunked.Info.RawHash = FullHasher.GetHash();
return true;
}
else
{
BasicFile Buffer(InputPath, BasicFile::Mode::kRead);
- OutChunked = ChunkData(Buffer, 0, RawSize, m_ChunkingParams, &BytesProcessed);
+ if (m_Settings.ExcludeElfFiles && IsElfFile(Buffer))
+ {
+ return false;
+ }
+ if (m_Settings.ExcludeMachOFiles && IsMachOFile(Buffer))
+ {
+ return false;
+ }
+
+ OutChunked = ChunkData(Buffer, 0, RawSize, m_Settings.ChunkingParams, &BytesProcessed, &AbortFlag);
return true;
}
}
@@ -230,47 +273,57 @@ public:
CbObjectWriter Writer;
Writer.BeginArray("FixedChunkingExtensions");
{
- for (const std::string& Extension : m_FixedChunkingExtensions)
+ for (const std::string& Extension : m_Settings.FixedChunkingExtensions)
{
Writer.AddString(Extension);
}
}
Writer.EndArray(); // ChunkExcludeExtensions
- Writer.AddInteger("ChunkFileSizeLimit"sv, m_ChunkFileSizeLimit);
- Writer.BeginObject("ChunkingParams"sv);
- {
- Writer.AddBool("UseThreshold"sv, m_ChunkingParams.UseThreshold);
- Writer.AddInteger("MinSize"sv, (uint64_t)m_ChunkingParams.MinSize);
- Writer.AddInteger("MaxSize"sv, (uint64_t)m_ChunkingParams.MaxSize);
- Writer.AddInteger("AvgSize"sv, (uint64_t)m_ChunkingParams.AvgSize);
+ Writer.BeginArray("ChunkExcludeExtensions"sv);
+ {
+ for (const std::string& Extension : m_Settings.ExcludeExtensions)
+ {
+ Writer.AddString(Extension);
+ }
}
- Writer.EndObject(); // ChunkingParams
- Writer.AddInteger("FixedChunkingChunkSize"sv, m_FixedChunkingChunkSize);
+ Writer.EndArray(); // ChunkExcludeExtensions
+
+ Writer.AddBool("ExcludeElfFiles"sv, m_Settings.ExcludeElfFiles);
+ Writer.AddBool("ExcludeMachOFiles"sv, m_Settings.ExcludeMachOFiles);
+
+ Writer.AddInteger("ChunkFileSizeLimit"sv, m_Settings.ChunkFileSizeLimit);
+
+ WriteChunkParams(Writer, m_Settings.ChunkingParams);
+
+ Writer.AddInteger("FixedChunkingChunkSize"sv, m_Settings.FixedChunkingChunkSize);
+ Writer.AddInteger("MinSizeForFixedChunking"sv, m_Settings.MinSizeForFixedChunking);
return Writer.Save();
}
static constexpr std::string_view Name = "ChunkingControllerWithFixedChunking"sv;
-protected:
- const std::vector<std::string> m_FixedChunkingExtensions;
- const uint64_t m_ChunkFileSizeLimit;
- const ChunkedParams m_ChunkingParams;
- const uint32_t m_FixedChunkingChunkSize;
+private:
+ static ChunkingControllerWithFixedChunkingSettings ReadSettings(CbObjectView Parameters)
+ {
+ return ChunkingControllerWithFixedChunkingSettings{
+ .FixedChunkingExtensions = ReadStringArray(Parameters["FixedChunkingExtensions"sv].AsArrayView()),
+ .ExcludeExtensions = ReadStringArray(Parameters["ChunkExcludeExtensions"sv].AsArrayView()),
+ .ExcludeElfFiles = Parameters["ExcludeElfFiles"sv].AsBool(DefaultChunkingExcludeElfFiles),
+ .ExcludeMachOFiles = Parameters["ExcludeMachOFiles"sv].AsBool(DefaultChunkingExcludeMachOFiles),
+ .ChunkFileSizeLimit = Parameters["ChunkFileSizeLimit"sv].AsUInt64(DefaultChunkingFileSizeLimit),
+ .ChunkingParams = ReadChunkParams(Parameters["ChunkingParams"sv].AsObjectView()),
+ .FixedChunkingChunkSize = Parameters["FixedChunkingChunkSize"sv].AsUInt64(DefaultFixedChunkingChunkSize),
+ .MinSizeForFixedChunking = Parameters["MinSizeForFixedChunking"sv].AsUInt64(DefaultFixedChunkingChunkSize)};
+ }
+
+ const ChunkingControllerWithFixedChunkingSettings m_Settings;
};
std::unique_ptr<ChunkingController>
-CreateBasicChunkingController(std::span<const std::string_view> ExcludeExtensions,
- bool ExcludeElfFiles,
- bool ExcludeMachOFiles,
- uint64_t ChunkFileSizeLimit,
- const ChunkedParams& ChunkingParams)
+CreateBasicChunkingController(const BasicChunkingControllerSettings& Settings)
{
- return std::make_unique<BasicChunkingController>(ExcludeExtensions,
- ExcludeElfFiles,
- ExcludeMachOFiles,
- ChunkFileSizeLimit,
- ChunkingParams);
+ return std::make_unique<BasicChunkingController>(Settings);
}
std::unique_ptr<ChunkingController>
CreateBasicChunkingController(CbObjectView Parameters)
@@ -279,15 +332,9 @@ CreateBasicChunkingController(CbObjectView Parameters)
}
std::unique_ptr<ChunkingController>
-CreateChunkingControllerWithFixedChunking(std::span<const std::string_view> FixedChunkingExtensions,
- uint64_t ChunkFileSizeLimit,
- const ChunkedParams& ChunkingParams,
- uint32_t FixedChunkingChunkSize)
+CreateChunkingControllerWithFixedChunking(const ChunkingControllerWithFixedChunkingSettings& Setting)
{
- return std::make_unique<ChunkingControllerWithFixedChunking>(FixedChunkingExtensions,
- ChunkFileSizeLimit,
- ChunkingParams,
- FixedChunkingChunkSize);
+ return std::make_unique<ChunkingControllerWithFixedChunking>(Setting);
}
std::unique_ptr<ChunkingController>
CreateChunkingControllerWithFixedChunking(CbObjectView Parameters)
diff --git a/src/zenutil/filebuildstorage.cpp b/src/zenutil/filebuildstorage.cpp
index badfb4840..c389d16c5 100644
--- a/src/zenutil/filebuildstorage.cpp
+++ b/src/zenutil/filebuildstorage.cpp
@@ -678,13 +678,24 @@ protected:
{
return false;
}
- CompositeBuffer Decompressed = ValidateBuffer.DecompressToComposite();
- if (!Decompressed)
+
+ IoHashStream Hash;
+ bool CouldDecompress = ValidateBuffer.DecompressToStream(
+ 0,
+ (uint64_t)-1,
+ [&](uint64_t SourceOffset, uint64_t SourceSize, uint64_t Offset, const CompositeBuffer& RangeBuffer) {
+ ZEN_UNUSED(SourceOffset, SourceSize, Offset);
+ for (const SharedBuffer& Segment : RangeBuffer.GetSegments())
+ {
+ Hash.Append(Segment.GetView());
+ }
+ return true;
+ });
+ if (!CouldDecompress)
{
return false;
}
- IoHash Hash = IoHash::HashBuffer(Decompressed);
- if (Hash != RawHash)
+ if (Hash.GetHash() != VerifyHash)
{
return false;
}
diff --git a/src/zenutil/include/zenutil/bufferedwritefilecache.h b/src/zenutil/include/zenutil/bufferedwritefilecache.h
new file mode 100644
index 000000000..68d6c375e
--- /dev/null
+++ b/src/zenutil/include/zenutil/bufferedwritefilecache.h
@@ -0,0 +1,106 @@
+// Copyright Epic Games, Inc. All Rights Reserved.
+
+#pragma once
+
+#include <zencore/basicfile.h>
+
+ZEN_THIRD_PARTY_INCLUDES_START
+#include <tsl/robin_map.h>
+ZEN_THIRD_PARTY_INCLUDES_END
+
+namespace zen {
+
+class CompositeBuffer;
+
+class BufferedWriteFileCache
+{
+public:
+ BufferedWriteFileCache(const BufferedWriteFileCache&) = delete;
+ BufferedWriteFileCache& operator=(const BufferedWriteFileCache&) = delete;
+
+ BufferedWriteFileCache();
+
+ ~BufferedWriteFileCache();
+
+ std::unique_ptr<BasicFile> Get(uint32_t FileIndex);
+
+ void Put(uint32_t FileIndex, std::unique_ptr<BasicFile>&& Writer);
+
+ void Close(std::span<uint32_t> FileIndexes);
+
+ class Local
+ {
+ public:
+ struct Writer
+ {
+ std::unique_ptr<BasicFile> File;
+ std::unique_ptr<BasicFileWriter> Writer;
+
+ inline void Write(const CompositeBuffer& Chunk, uint64_t FileOffset)
+ {
+ if (Writer)
+ {
+ Writer->Write(Chunk, FileOffset);
+ }
+ else
+ {
+ File->Write(Chunk, FileOffset);
+ }
+ }
+ };
+
+ Local(const Local&) = delete;
+ Local& operator=(const Local&) = delete;
+
+ explicit Local(BufferedWriteFileCache& Cache);
+ ~Local();
+
+ Writer* GetWriter(uint32_t FileIndex);
+ Writer* PutWriter(uint32_t FileIndex, std::unique_ptr<Writer> Writer);
+
+ private:
+ tsl::robin_map<uint32_t, uint32_t> m_FileIndexToWriterIndex;
+ std::vector<std::unique_ptr<Writer>> m_ChunkWriters;
+ BufferedWriteFileCache& m_Cache;
+ };
+
+private:
+ static constexpr size_t MaxHandlesPerPath = 7;
+ static constexpr size_t MaxBufferedCount = 1024;
+ struct TOpenHandles
+ {
+ BasicFile* Files[MaxHandlesPerPath];
+ uint64_t Size = 0;
+ inline BasicFile* Pop()
+ {
+ if (Size > 0)
+ {
+ return Files[--Size];
+ }
+ else
+ {
+ return nullptr;
+ }
+ }
+ inline bool Push(BasicFile* File)
+ {
+ if (Size < MaxHandlesPerPath)
+ {
+ Files[Size++] = File;
+ return true;
+ }
+ return false;
+ }
+ };
+ static_assert(sizeof(TOpenHandles) == 64);
+
+ RwLock m_WriterLock;
+ tsl::robin_map<uint32_t, uint32_t> m_ChunkWriters;
+ std::vector<TOpenHandles> m_OpenFiles;
+ std::atomic<uint32_t> m_CacheHitCount;
+ std::atomic<uint32_t> m_CacheMissCount;
+ std::atomic<uint32_t> m_OpenHandleCount;
+ std::atomic<uint32_t> m_DroppedHandleCount;
+};
+
+} // namespace zen
diff --git a/src/zenutil/include/zenutil/chunkedcontent.h b/src/zenutil/include/zenutil/chunkedcontent.h
index d33869be2..03f52e5f6 100644
--- a/src/zenutil/include/zenutil/chunkedcontent.h
+++ b/src/zenutil/include/zenutil/chunkedcontent.h
@@ -135,6 +135,7 @@ struct ChunkedContentLookup
ChunkSequenceLocationOffset; // ChunkSequenceLocations[ChunkLocationOffset[ChunkIndex]] -> start of sources for ChunkIndex
std::vector<uint32_t> ChunkSequenceLocationCounts; // ChunkSequenceLocationCounts[ChunkIndex] count of chunk locations for ChunkIndex
std::vector<uint32_t> SequenceIndexFirstPathIndex; // SequenceIndexFirstPathIndex[SequenceIndex] -> first path index with that RawHash
+ std::vector<uint32_t> PathExtensionHash;
};
ChunkedContentLookup BuildChunkedContentLookup(const ChunkedFolderContent& Content);
diff --git a/src/zenutil/include/zenutil/chunkingcontroller.h b/src/zenutil/include/zenutil/chunkingcontroller.h
index 970917fb0..315502265 100644
--- a/src/zenutil/include/zenutil/chunkingcontroller.h
+++ b/src/zenutil/include/zenutil/chunkingcontroller.h
@@ -11,9 +11,11 @@
namespace zen {
-const std::vector<std::string_view> DefaultChunkingExcludeExtensions = {".exe", ".dll", ".pdb", ".self", ".mp4"};
-const bool DefaultChunkingExcludeElfFiles = true;
-const bool DefaultChunkingExcludeMachOFiles = true;
+const std::vector<std::string> DefaultChunkingExcludeExtensions =
+ {".exe", ".dll", ".pdb", ".self", ".mp4", ".zip", ".7z", ".bzip", ".rar", ".gzip"};
+const std::vector<std::string> DefaultFixedChunkingExtensions = {".apk", ".nsp", ".xvc", ".pkg", ".dmg", ".ipa"};
+const bool DefaultChunkingExcludeElfFiles = true;
+const bool DefaultChunkingExcludeMachOFiles = true;
const ChunkedParams DefaultChunkedParams = {.MinSize = ((8u * 1u) * 1024u) - 128u,
.MaxSize = 128u * 1024u,
@@ -21,7 +23,8 @@ const ChunkedParams DefaultChunkedParams = {.MinSize = ((8u * 1u) * 1024u) - 128
const size_t DefaultChunkingFileSizeLimit = DefaultChunkedParams.MaxSize;
-const uint32_t DefaultFixedChunkingChunkSize = 16u * 1024u * 1024u;
+const uint64_t DefaultFixedChunkingChunkSize = 32u * 1024u * 1024u;
+const uint64_t DefaultMinSizeForFixedChunking = DefaultFixedChunkingChunkSize * 8u;
struct ChunkedInfoWithSource;
@@ -40,19 +43,31 @@ public:
virtual CbObject GetParameters() const = 0;
};
-std::unique_ptr<ChunkingController> CreateBasicChunkingController(
- std::span<const std::string_view> ExcludeExtensions = DefaultChunkingExcludeExtensions,
- bool ExcludeElfFiles = DefaultChunkingExcludeElfFiles,
- bool ExcludeMachOFiles = DefaultChunkingExcludeMachOFiles,
- uint64_t ChunkFileSizeLimit = DefaultChunkingFileSizeLimit,
- const ChunkedParams& ChunkingParams = DefaultChunkedParams);
+struct BasicChunkingControllerSettings
+{
+ std::vector<std::string> ExcludeExtensions = DefaultChunkingExcludeExtensions;
+ bool ExcludeElfFiles = DefaultChunkingExcludeElfFiles;
+ bool ExcludeMachOFiles = DefaultChunkingExcludeMachOFiles;
+ uint64_t ChunkFileSizeLimit = DefaultChunkingFileSizeLimit;
+ ChunkedParams ChunkingParams = DefaultChunkedParams;
+};
+
+std::unique_ptr<ChunkingController> CreateBasicChunkingController(const BasicChunkingControllerSettings& Settings);
std::unique_ptr<ChunkingController> CreateBasicChunkingController(CbObjectView Parameters);
-std::unique_ptr<ChunkingController> CreateChunkingControllerWithFixedChunking(
- std::span<const std::string_view> ExcludeExtensions = DefaultChunkingExcludeExtensions,
- uint64_t ChunkFileSizeLimit = DefaultChunkingFileSizeLimit,
- const ChunkedParams& ChunkingParams = DefaultChunkedParams,
- uint32_t FixedChunkingChunkSize = DefaultFixedChunkingChunkSize);
+struct ChunkingControllerWithFixedChunkingSettings
+{
+ std::vector<std::string> FixedChunkingExtensions = DefaultFixedChunkingExtensions;
+ std::vector<std::string> ExcludeExtensions = DefaultChunkingExcludeExtensions;
+ bool ExcludeElfFiles = DefaultChunkingExcludeElfFiles;
+ bool ExcludeMachOFiles = DefaultChunkingExcludeMachOFiles;
+ uint64_t ChunkFileSizeLimit = DefaultChunkingFileSizeLimit;
+ ChunkedParams ChunkingParams = DefaultChunkedParams;
+ uint64_t FixedChunkingChunkSize = DefaultFixedChunkingChunkSize;
+ uint64_t MinSizeForFixedChunking = DefaultMinSizeForFixedChunking;
+};
+
+std::unique_ptr<ChunkingController> CreateChunkingControllerWithFixedChunking(const ChunkingControllerWithFixedChunkingSettings& Setting);
std::unique_ptr<ChunkingController> CreateChunkingControllerWithFixedChunking(CbObjectView Parameters);
std::unique_ptr<ChunkingController> CreateChunkingController(std::string_view Name, CbObjectView Parameters);