aboutsummaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
Diffstat (limited to 'src')
-rw-r--r--src/zen/cmds/builds_cmd.cpp326
-rw-r--r--src/zen/cmds/builds_cmd.h7
-rw-r--r--src/zenremotestore/builds/buildstorageoperations.cpp184
-rw-r--r--src/zenremotestore/chunking/chunkedcontent.cpp97
-rw-r--r--src/zenremotestore/chunking/chunkingcache.cpp627
-rw-r--r--src/zenremotestore/include/zenremotestore/builds/buildstorageoperations.h5
-rw-r--r--src/zenremotestore/include/zenremotestore/chunking/chunkedcontent.h14
-rw-r--r--src/zenremotestore/include/zenremotestore/chunking/chunkedfile.h3
-rw-r--r--src/zenremotestore/include/zenremotestore/chunking/chunkingcache.h44
-rw-r--r--src/zenremotestore/zenremotestore.cpp2
10 files changed, 1110 insertions, 199 deletions
diff --git a/src/zen/cmds/builds_cmd.cpp b/src/zen/cmds/builds_cmd.cpp
index f859a618b..508d2ba60 100644
--- a/src/zen/cmds/builds_cmd.cpp
+++ b/src/zen/cmds/builds_cmd.cpp
@@ -33,6 +33,7 @@
#include <zenremotestore/chunking/chunkblock.h>
#include <zenremotestore/chunking/chunkedcontent.h>
#include <zenremotestore/chunking/chunkedfile.h>
+#include <zenremotestore/chunking/chunkingcache.h>
#include <zenremotestore/chunking/chunkingcontroller.h>
#include <zenremotestore/filesystemutils.h>
#include <zenremotestore/jupiter/jupiterhost.h>
@@ -420,53 +421,63 @@ namespace {
NiceTimeSpanMs(ValidateOp.m_ValidateStats.ElapsedWallTimeUS / 1000));
}
- std::vector<std::pair<Oid, std::string>> UploadFolder(OperationLogOutput& Output,
- TransferThreadWorkers& Workers,
- StorageInstance& Storage,
- const Oid& BuildId,
- const Oid& BuildPartId,
- const std::string_view BuildPartName,
- const std::filesystem::path& Path,
- const std::filesystem::path& TempDir,
- const std::filesystem::path& ManifestPath,
- const uint64_t FindBlockMaxCount,
- const uint8_t BlockReuseMinPercentLimit,
- bool AllowMultiparts,
- const CbObject& MetaData,
- bool CreateBuild,
- bool IgnoreExistingBlocks,
- bool UploadToZenCache,
- const std::vector<std::string>& ExcludeFolders,
- const std::vector<std::string>& ExcludeExtensions)
+ struct UploadFolderOptions
+ {
+ std::filesystem::path TempDir;
+ uint64_t FindBlockMaxCount;
+ uint8_t BlockReuseMinPercentLimit;
+ bool AllowMultiparts;
+ bool CreateBuild;
+ bool IgnoreExistingBlocks;
+ bool UploadToZenCache;
+ const std::vector<std::string>& ExcludeFolders = DefaultExcludeFolders;
+ const std::vector<std::string>& ExcludeExtensions = DefaultExcludeExtensions;
+ };
+
+ std::vector<std::pair<Oid, std::string>> UploadFolder(OperationLogOutput& Output,
+ TransferThreadWorkers& Workers,
+ StorageInstance& Storage,
+ const Oid& BuildId,
+ const Oid& BuildPartId,
+ const std::string_view BuildPartName,
+ const std::filesystem::path& Path,
+ const std::filesystem::path& ManifestPath,
+ const CbObject& MetaData,
+ ChunkingController& ChunkController,
+ ChunkingCache& ChunkCache,
+ const UploadFolderOptions& Options)
{
ProgressBar::SetLogOperationName(ProgressMode, "Upload Folder");
Stopwatch UploadTimer;
- BuildsOperationUploadFolder UploadOp(Output,
- Storage,
- AbortFlag,
- PauseFlag,
- Workers.GetIOWorkerPool(),
- Workers.GetNetworkPool(),
- BuildId,
- Path,
- CreateBuild,
- std::move(MetaData),
- BuildsOperationUploadFolder::Options{.IsQuiet = IsQuiet,
- .IsVerbose = IsVerbose,
- .DoExtraContentValidation = DoExtraContentVerify,
- .FindBlockMaxCount = FindBlockMaxCount,
- .BlockReuseMinPercentLimit = BlockReuseMinPercentLimit,
- .AllowMultiparts = AllowMultiparts,
- .IgnoreExistingBlocks = IgnoreExistingBlocks,
- .TempDir = TempDir,
- .ExcludeFolders = ExcludeFolders,
- .ExcludeExtensions = ExcludeExtensions,
- .ZenExcludeManifestName = ZenExcludeManifestName,
- .NonCompressableExtensions = DefaultSplitOnlyExtensions,
- .PopulateCache = UploadToZenCache});
- std::vector<std::pair<Oid, std::string>> UploadedParts = UploadOp.Execute(BuildPartId, BuildPartName, ManifestPath);
+ BuildsOperationUploadFolder UploadOp(
+ Output,
+ Storage,
+ AbortFlag,
+ PauseFlag,
+ Workers.GetIOWorkerPool(),
+ Workers.GetNetworkPool(),
+ BuildId,
+ Path,
+ Options.CreateBuild,
+ std::move(MetaData),
+ BuildsOperationUploadFolder::Options{.IsQuiet = IsQuiet,
+ .IsVerbose = IsVerbose,
+ .DoExtraContentValidation = DoExtraContentVerify,
+ .FindBlockMaxCount = Options.FindBlockMaxCount,
+ .BlockReuseMinPercentLimit = Options.BlockReuseMinPercentLimit,
+ .AllowMultiparts = Options.AllowMultiparts,
+ .IgnoreExistingBlocks = Options.IgnoreExistingBlocks,
+ .TempDir = Options.TempDir,
+ .ExcludeFolders = Options.ExcludeFolders,
+ .ExcludeExtensions = Options.ExcludeExtensions,
+ .ZenExcludeManifestName = ZenExcludeManifestName,
+ .NonCompressableExtensions = DefaultSplitOnlyExtensions,
+ .PopulateCache = Options.UploadToZenCache});
+
+ std::vector<std::pair<Oid, std::string>> UploadedParts =
+ UploadOp.Execute(BuildPartId, BuildPartName, ManifestPath, ChunkController, ChunkCache);
if (AbortFlag)
{
return {};
@@ -493,6 +504,10 @@ namespace {
"\n UniqueChunksFound: {}"
"\n UniqueSequencesFound: {}"
"\n UniqueBytesFound: {}"
+ "\n FilesFoundInCache: {}"
+ "\n ChunksFoundInCache: {}"
+ "\n FilesStoredInCache: {}"
+ "\n ChunksStoredInCache: {}"
"\n ElapsedWallTimeUS: {}",
UploadOp.m_ChunkingStats.FilesProcessed.load(),
UploadOp.m_ChunkingStats.FilesChunked.load(),
@@ -500,6 +515,12 @@ namespace {
UploadOp.m_ChunkingStats.UniqueChunksFound.load(),
UploadOp.m_ChunkingStats.UniqueSequencesFound.load(),
NiceBytes(UploadOp.m_ChunkingStats.UniqueBytesFound.load()),
+ UploadOp.m_ChunkingStats.FilesFoundInCache.load(),
+ UploadOp.m_ChunkingStats.ChunksFoundInCache.load(),
+ NiceBytes(UploadOp.m_ChunkingStats.BytesFoundInCache.load()),
+ UploadOp.m_ChunkingStats.FilesStoredInCache.load(),
+ UploadOp.m_ChunkingStats.ChunksStoredInCache.load(),
+ NiceBytes(UploadOp.m_ChunkingStats.BytesStoredInCache.load()),
NiceLatencyNs(UploadOp.m_ChunkingStats.ElapsedWallTimeUS * 1000));
ZEN_CONSOLE_VERBOSE(
@@ -608,7 +629,7 @@ namespace {
: 0.0;
const std::string MultipartAttachmentStats =
- AllowMultiparts ? fmt::format(" ({} as multipart)", UploadOp.m_UploadStats.MultipartAttachmentCount.load()) : "";
+ Options.AllowMultiparts ? fmt::format(" ({} as multipart)", UploadOp.m_UploadStats.MultipartAttachmentCount.load()) : "";
if (!IsQuiet)
{
@@ -643,10 +664,10 @@ namespace {
NiceTimeSpanMs(UploadOp.m_GenerateBlocksStats.GenerateBlocksElapsedWallTimeUS / 1000),
UploadOp.m_LooseChunksStats.CompressedChunkCount.load(),
- NiceBytes(UploadOp.m_LooseChunksStats.ChunkByteCount),
+ NiceBytes(UploadOp.m_LooseChunksStats.CompressedChunkRawBytes),
NiceBytes(UploadOp.m_LooseChunksStats.CompressedChunkBytes.load()),
NiceNum(GetBytesPerSecond(UploadOp.m_LooseChunksStats.CompressChunksElapsedWallTimeUS,
- UploadOp.m_LooseChunksStats.ChunkByteCount)),
+ UploadOp.m_LooseChunksStats.CompressedChunkRawBytes)),
NiceTimeSpanMs(UploadOp.m_LooseChunksStats.CompressChunksElapsedWallTimeUS / 1000),
UploadOp.m_UploadStats.BlockCount.load() + UploadOp.m_UploadStats.ChunkCount.load(),
@@ -909,6 +930,7 @@ namespace {
ChunkingStatistics& ChunkingStats,
const std::filesystem::path& Path,
ChunkingController& ChunkController,
+ ChunkingCache& ChunkCache,
std::span<const std::filesystem::path> PathsToCheck)
{
FolderContent FolderState;
@@ -953,6 +975,7 @@ namespace {
Path,
FolderState,
ChunkController,
+ ChunkCache,
GetUpdateDelayMS(ProgressMode),
[&](bool IsAborted, bool IsPaused, std::ptrdiff_t) {
FilteredBytesHashed.Update(LocalChunkingStats.BytesHashed.load());
@@ -988,7 +1011,8 @@ namespace {
ChunkingStatistics& ChunkingStats,
const std::filesystem::path& Path,
const std::filesystem::path& StateFilePath,
- ChunkingController& ChunkController)
+ ChunkingController& ChunkController,
+ ChunkingCache& ChunkCache)
{
Stopwatch ReadStateTimer;
bool FileExists = IsFile(StateFilePath);
@@ -1075,6 +1099,7 @@ namespace {
Path,
UpdatedContent,
ChunkController,
+ ChunkCache,
GetUpdateDelayMS(ProgressMode),
[&](bool IsAborted, bool IsPaused, std::ptrdiff_t) {
FilteredBytesHashed.Update(LocalChunkingStats.BytesHashed.load());
@@ -1143,7 +1168,8 @@ namespace {
const std::filesystem::path& Path,
std::function<bool(const std::string_view& RelativePath)>&& IsAcceptedFolder,
std::function<bool(std::string_view RelativePath, uint64_t Size, uint32_t Attributes)>&& IsAcceptedFile,
- ChunkingController& ChunkController)
+ ChunkingController& ChunkController,
+ ChunkingCache& ChunkCache)
{
Stopwatch Timer;
@@ -1163,14 +1189,19 @@ namespace {
return {};
}
- BuildState LocalContent =
- GetLocalContent(Workers, GetFolderContentStats, ChunkingStats, Path, ZenStateFilePath(Path / ZenFolderName), ChunkController)
- .State;
+ BuildState LocalContent = GetLocalContent(Workers,
+ GetFolderContentStats,
+ ChunkingStats,
+ Path,
+ ZenStateFilePath(Path / ZenFolderName),
+ ChunkController,
+ ChunkCache)
+ .State;
std::vector<std::filesystem::path> UntrackedPaths = GetNewPaths(LocalContent.ChunkedContent.Paths, Content.Paths);
BuildState UntrackedLocalContent =
- GetLocalStateFromPaths(Workers, GetFolderContentStats, ChunkingStats, Path, ChunkController, UntrackedPaths).State;
+ GetLocalStateFromPaths(Workers, GetFolderContentStats, ChunkingStats, Path, ChunkController, ChunkCache, UntrackedPaths).State;
ChunkedFolderContent Result = MergeChunkedFolderContents(LocalContent.ChunkedContent,
std::vector<ChunkedFolderContent>{UntrackedLocalContent.ChunkedContent});
@@ -1210,7 +1241,7 @@ namespace {
uint64_t MaximumInMemoryPayloadSize = 512u * 1024u;
bool PopulateCache = true;
bool AppendNewContent = false;
- std::vector<std::string> ExcludeFolders;
+ std::vector<std::string> ExcludeFolders = DefaultExcludeFolders;
};
void DownloadFolder(OperationLogOutput& Output,
@@ -1297,18 +1328,25 @@ namespace {
ZEN_CONSOLE_INFO("Unspecified chunking algorithm, using default");
ChunkController = CreateStandardChunkingController(StandardChunkingControllerSettings{});
}
+ std::unique_ptr<ChunkingCache> ChunkCache(CreateNullChunkingCache());
LocalState = GetLocalContent(Workers,
LocalFolderScanStats,
ChunkingStats,
Path,
ZenStateFilePath(Path / ZenFolderName),
- *ChunkController);
+ *ChunkController,
+ *ChunkCache);
std::vector<std::filesystem::path> UntrackedPaths = GetNewPaths(LocalState.State.ChunkedContent.Paths, RemoteContent.Paths);
- BuildSaveState UntrackedLocalContent =
- GetLocalStateFromPaths(Workers, LocalFolderScanStats, ChunkingStats, Path, *ChunkController, UntrackedPaths);
+ BuildSaveState UntrackedLocalContent = GetLocalStateFromPaths(Workers,
+ LocalFolderScanStats,
+ ChunkingStats,
+ Path,
+ *ChunkController,
+ *ChunkCache,
+ UntrackedPaths);
if (!UntrackedLocalContent.State.ChunkedContent.Paths.empty())
{
@@ -1773,9 +1811,10 @@ namespace {
void DiffFolders(TransferThreadWorkers& Workers,
const std::filesystem::path& BasePath,
const std::filesystem::path& ComparePath,
- bool OnlyChunked,
- const std::vector<std::string>& InExcludeFolders,
- const std::vector<std::string>& InExcludeExtensions)
+ ChunkingController& ChunkController,
+ ChunkingCache& ChunkCache,
+ const std::vector<std::string>& ExcludeFolders,
+ const std::vector<std::string>& ExcludeExtensions)
{
ZEN_TRACE_CPU("DiffFolders");
@@ -1797,20 +1836,7 @@ namespace {
ChunkedFolderContent CompareFolderContent;
{
- StandardChunkingControllerSettings ChunkingSettings;
- std::unique_ptr<ChunkingController> ChunkController = CreateStandardChunkingController(ChunkingSettings);
- std::vector<std::string> ExcludeExtensions = InExcludeExtensions;
- if (OnlyChunked)
- {
- ExcludeExtensions.insert(ExcludeExtensions.end(),
- ChunkingSettings.SplitOnlyExtensions.begin(),
- ChunkingSettings.SplitOnlyExtensions.end());
- ExcludeExtensions.insert(ExcludeExtensions.end(),
- ChunkingSettings.SplitAndCompressExtensions.begin(),
- ChunkingSettings.SplitAndCompressExtensions.end());
- }
-
- auto IsAcceptedFolder = [ExcludeFolders = InExcludeFolders](const std::string_view& RelativePath) -> bool {
+ auto IsAcceptedFolder = [ExcludeFolders](const std::string_view& RelativePath) -> bool {
for (const std::string& ExcludeFolder : ExcludeFolders)
{
if (RelativePath.starts_with(ExcludeFolder))
@@ -1849,7 +1875,8 @@ namespace {
BasePath,
IsAcceptedFolder,
IsAcceptedFile,
- *ChunkController);
+ ChunkController,
+ ChunkCache);
if (AbortFlag)
{
return;
@@ -1865,7 +1892,8 @@ namespace {
ComparePath,
IsAcceptedFolder,
IsAcceptedFile,
- *ChunkController);
+ ChunkController,
+ ChunkCache);
if (AbortFlag)
{
@@ -2082,6 +2110,15 @@ BuildsCommand::BuildsCommand()
"<boostworkers>");
};
+ auto AddChunkingCacheOptions = [this](cxxopts::Options& Ops) {
+ Ops.add_option("",
+ "",
+ "chunking-cache-path",
+ "Path to cache for chunking information of scanned files. Default is empty resulting in no caching",
+ cxxopts::value(m_ChunkingCachePath),
+ "<chunkingcachepath>");
+ };
+
auto AddWildcardOptions = [this](cxxopts::Options& Ops) {
Ops.add_option("",
"",
@@ -2238,6 +2275,7 @@ BuildsCommand::BuildsCommand()
AddZenFolderOptions(m_UploadOptions);
AddExcludeFolderOption(m_UploadOptions);
AddExcludeExtensionsOption(m_UploadOptions);
+ AddChunkingCacheOptions(m_UploadOptions);
m_UploadOptions.add_options()("h,help", "Print help");
m_UploadOptions.add_option("", "l", "local-path", "Root file system folder for build", cxxopts::value(m_Path), "<local-path>");
m_UploadOptions.add_option("",
@@ -2431,6 +2469,7 @@ BuildsCommand::BuildsCommand()
AddWorkerOptions(m_DiffOptions);
AddExcludeFolderOption(m_DiffOptions);
AddExcludeExtensionsOption(m_DiffOptions);
+ AddChunkingCacheOptions(m_DiffOptions);
m_DiffOptions.add_options()("h,help", "Print help");
m_DiffOptions.add_option("", "l", "local-path", "Root file system folder used as base", cxxopts::value(m_Path), "<local-path>");
m_DiffOptions.add_option("", "c", "compare-path", "Root file system folder used as diff", cxxopts::value(m_DiffPath), "<diff-path>");
@@ -2456,6 +2495,7 @@ BuildsCommand::BuildsCommand()
AddPartialBlockRequestOptions(m_TestOptions);
AddWildcardOptions(m_TestOptions);
AddAppendNewContentOptions(m_TestOptions);
+ AddChunkingCacheOptions(m_TestOptions);
m_TestOptions.add_option("",
"",
@@ -3321,6 +3361,7 @@ BuildsCommand::Run(const ZenCliOptions& GlobalOptions, int argc, char** argv)
m_ZenFolderPath = std::filesystem::current_path() / ZenFolderName;
}
MakeSafeAbsolutePathÍnPlace(m_ZenFolderPath);
+ MakeSafeAbsolutePathÍnPlace(m_ChunkingCachePath);
CreateDirectories(m_ZenFolderPath);
auto _ = MakeGuard([this, &Workers]() { CleanAndRemoveDirectory(Workers.GetIOWorkerPool(), m_ZenFolderPath); });
@@ -3357,24 +3398,32 @@ BuildsCommand::Run(const ZenCliOptions& GlobalOptions, int argc, char** argv)
std::vector<std::string> ExcludeExtensions = DefaultExcludeExtensions;
ParseExcludeFolderAndExtension(ExcludeFolders, ExcludeExtensions);
- std::vector<std::pair<Oid, std::string>> UploadedParts = UploadFolder(*Output,
- Workers,
- Storage,
- BuildId,
- BuildPartId,
- m_BuildPartName,
- m_Path,
- TempDir,
- m_ManifestPath,
- m_FindBlockMaxCount,
- m_BlockReuseMinPercentLimit,
- m_AllowMultiparts,
- MetaData,
- m_CreateBuild,
- m_Clean,
- m_UploadToZenCache,
- ExcludeFolders,
- ExcludeExtensions);
+ std::unique_ptr<ChunkingController> ChunkController = CreateStandardChunkingController(StandardChunkingControllerSettings{});
+ std::unique_ptr<ChunkingCache> ChunkCache = m_ChunkingCachePath.empty()
+ ? CreateNullChunkingCache()
+ : CreateDiskChunkingCache(m_ChunkingCachePath, *ChunkController, 256u * 1024u);
+
+ std::vector<std::pair<Oid, std::string>> UploadedParts =
+ UploadFolder(*Output,
+ Workers,
+ Storage,
+ BuildId,
+ BuildPartId,
+ m_BuildPartName,
+ m_Path,
+ m_ManifestPath,
+ MetaData,
+ *ChunkController,
+ *ChunkCache,
+ UploadFolderOptions{.TempDir = TempDir,
+ .FindBlockMaxCount = m_FindBlockMaxCount,
+ .BlockReuseMinPercentLimit = m_BlockReuseMinPercentLimit,
+ .AllowMultiparts = m_AllowMultiparts,
+ .CreateBuild = m_CreateBuild,
+ .IgnoreExistingBlocks = m_Clean,
+ .UploadToZenCache = m_UploadToZenCache,
+ .ExcludeFolders = ExcludeFolders,
+ .ExcludeExtensions = ExcludeExtensions});
if (!AbortFlag)
{
@@ -3619,11 +3668,29 @@ BuildsCommand::Run(const ZenCliOptions& GlobalOptions, int argc, char** argv)
ParsePath();
ParseDiffPath();
+ MakeSafeAbsolutePathÍnPlace(m_ChunkingCachePath);
+
std::vector<std::string> ExcludeFolders = DefaultExcludeFolders;
std::vector<std::string> ExcludeExtensions = DefaultExcludeExtensions;
ParseExcludeFolderAndExtension(ExcludeFolders, ExcludeExtensions);
- DiffFolders(Workers, m_Path, m_DiffPath, m_OnlyChunked, ExcludeFolders, ExcludeExtensions);
+ StandardChunkingControllerSettings ChunkingSettings;
+ std::unique_ptr<ChunkingController> ChunkController = CreateStandardChunkingController(ChunkingSettings);
+ std::unique_ptr<ChunkingCache> ChunkCache = m_ChunkingCachePath.empty()
+ ? CreateNullChunkingCache()
+ : CreateDiskChunkingCache(m_ChunkingCachePath, *ChunkController, 256u * 1024u);
+
+ if (m_OnlyChunked)
+ {
+ ExcludeExtensions.insert(ExcludeExtensions.end(),
+ ChunkingSettings.SplitOnlyExtensions.begin(),
+ ChunkingSettings.SplitOnlyExtensions.end());
+ ExcludeExtensions.insert(ExcludeExtensions.end(),
+ ChunkingSettings.SplitAndCompressExtensions.begin(),
+ ChunkingSettings.SplitAndCompressExtensions.end());
+ }
+
+ DiffFolders(Workers, m_Path, m_DiffPath, *ChunkController, *ChunkCache, ExcludeFolders, ExcludeExtensions);
if (AbortFlag)
{
throw std::runtime_error("Diff folders aborted");
@@ -3988,6 +4055,7 @@ BuildsCommand::Run(const ZenCliOptions& GlobalOptions, int argc, char** argv)
m_ZenFolderPath = m_Path / ZenFolderName;
}
MakeSafeAbsolutePathÍnPlace(m_ZenFolderPath);
+ MakeSafeAbsolutePathÍnPlace(m_ChunkingCachePath);
StorageInstance Storage = CreateBuildStorage(StorageStats,
StorageCacheStats,
@@ -4026,7 +4094,11 @@ BuildsCommand::Run(const ZenCliOptions& GlobalOptions, int argc, char** argv)
}
const std::filesystem::path UploadTempDir = UploadTempDirectory(m_Path);
- // std::filesystem::path UploadTempDir = m_ZenFolderPath / "upload_tmp";
+
+ std::unique_ptr<ChunkingController> ChunkController = CreateStandardChunkingController(StandardChunkingControllerSettings{});
+ std::unique_ptr<ChunkingCache> ChunkCache = m_ChunkingCachePath.empty()
+ ? CreateNullChunkingCache()
+ : CreateDiskChunkingCache(m_ChunkingCachePath, *ChunkController, 256u * 1024u);
UploadFolder(*Output,
Workers,
@@ -4035,22 +4107,51 @@ BuildsCommand::Run(const ZenCliOptions& GlobalOptions, int argc, char** argv)
BuildPartId,
m_BuildPartName,
m_Path,
- UploadTempDir,
{},
- m_FindBlockMaxCount,
- m_BlockReuseMinPercentLimit,
- m_AllowMultiparts,
MetaData,
- true,
- false,
- m_UploadToZenCache,
- DefaultExcludeFolders,
- DefaultExcludeExtensions);
+ *ChunkController,
+ *ChunkCache,
+ UploadFolderOptions{.TempDir = UploadTempDir,
+ .FindBlockMaxCount = m_FindBlockMaxCount,
+ .BlockReuseMinPercentLimit = m_BlockReuseMinPercentLimit,
+ .AllowMultiparts = m_AllowMultiparts,
+ .CreateBuild = true,
+ .IgnoreExistingBlocks = false,
+ .UploadToZenCache = m_UploadToZenCache});
+
if (AbortFlag)
{
throw std::runtime_error("Test aborted. (Upload build)");
}
+ {
+ ZEN_CONSOLE("Upload Build {}, Part {} ({}) from '{}' with chunking cache", m_BuildId, BuildPartId, m_BuildPartName, m_Path);
+
+ UploadFolder(*Output,
+ Workers,
+ Storage,
+ Oid::NewOid(),
+ Oid::NewOid(),
+ m_BuildPartName,
+ m_Path,
+ {},
+ MetaData,
+ *ChunkController,
+ *ChunkCache,
+ UploadFolderOptions{.TempDir = UploadTempDir,
+ .FindBlockMaxCount = m_FindBlockMaxCount,
+ .BlockReuseMinPercentLimit = m_BlockReuseMinPercentLimit,
+ .AllowMultiparts = m_AllowMultiparts,
+ .CreateBuild = true,
+ .IgnoreExistingBlocks = false,
+ .UploadToZenCache = m_UploadToZenCache});
+
+ if (AbortFlag)
+ {
+ throw std::runtime_error("Test aborted. (Upload again, chunking is cached)");
+ }
+ }
+
ValidateBuildPart(*Output, Workers, *Storage.BuildStorage, BuildId, BuildPartId, m_BuildPartName);
if (!m_IncludeWildcard.empty() || !m_ExcludeWildcard.empty())
@@ -4334,17 +4435,18 @@ BuildsCommand::Run(const ZenCliOptions& GlobalOptions, int argc, char** argv)
BuildPartId2,
m_BuildPartName,
DownloadPath,
- UploadTempDir,
{},
- m_FindBlockMaxCount,
- m_BlockReuseMinPercentLimit,
- m_AllowMultiparts,
MetaData2,
- true,
- false,
- m_UploadToZenCache,
- DefaultExcludeFolders,
- DefaultExcludeExtensions);
+ *ChunkController,
+ *ChunkCache,
+ UploadFolderOptions{.TempDir = UploadTempDir,
+ .FindBlockMaxCount = m_FindBlockMaxCount,
+ .BlockReuseMinPercentLimit = m_BlockReuseMinPercentLimit,
+ .AllowMultiparts = m_AllowMultiparts,
+ .CreateBuild = true,
+ .IgnoreExistingBlocks = false,
+ .UploadToZenCache = m_UploadToZenCache});
+
if (AbortFlag)
{
throw std::runtime_error("Test aborted. (Upload scrambled)");
diff --git a/src/zen/cmds/builds_cmd.h b/src/zen/cmds/builds_cmd.h
index 80c64c48d..f87cd01ae 100644
--- a/src/zen/cmds/builds_cmd.h
+++ b/src/zen/cmds/builds_cmd.h
@@ -96,9 +96,10 @@ private:
std::string m_ExcludeFolders;
std::string m_ExcludeExtensions;
- cxxopts::Options m_UploadOptions{"upload", "Upload a folder"};
- uint64_t m_FindBlockMaxCount = 10000;
- bool m_PostUploadVerify = false;
+ cxxopts::Options m_UploadOptions{"upload", "Upload a folder"};
+ uint64_t m_FindBlockMaxCount = 10000;
+ bool m_PostUploadVerify = false;
+ std::filesystem::path m_ChunkingCachePath;
cxxopts::Options m_DownloadOptions{"download", "Download a folder"};
std::vector<std::string> m_BuildPartNames;
diff --git a/src/zenremotestore/builds/buildstorageoperations.cpp b/src/zenremotestore/builds/buildstorageoperations.cpp
index a8169538b..485973e2b 100644
--- a/src/zenremotestore/builds/buildstorageoperations.cpp
+++ b/src/zenremotestore/builds/buildstorageoperations.cpp
@@ -8,6 +8,7 @@
#include <zenremotestore/builds/buildstoragecache.h>
#include <zenremotestore/builds/buildstorageutil.h>
#include <zenremotestore/chunking/chunkblock.h>
+#include <zenremotestore/chunking/chunkingcache.h>
#include <zenremotestore/chunking/chunkingcontroller.h>
#include <zenremotestore/filesystemutils.h>
#include <zenremotestore/operationlogoutput.h>
@@ -4703,45 +4704,42 @@ BuildsOperationUploadFolder::ReadManifestParts(const std::filesystem::path& Mani
UploadParts.resize(Manifest.Parts.size());
for (size_t PartIndex = 0; PartIndex < Manifest.Parts.size(); PartIndex++)
{
- const PartManifest::Part& PartManifest = Manifest.Parts[PartIndex];
- UploadPart& Part = UploadParts[PartIndex];
- FolderContent& Content = Part.Content;
+ PartManifest::Part& PartManifest = Manifest.Parts[PartIndex];
+ if (ManifestPath.is_relative())
+ {
+ PartManifest.Files.push_back(ManifestPath);
+ }
+
+ UploadPart& Part = UploadParts[PartIndex];
+ FolderContent& Content = Part.Content;
GetFolderContentStatistics& LocalFolderScanStats = Part.LocalFolderScanStats;
const std::vector<std::filesystem::path>& AssetPaths = PartManifest.Files;
- for (const std::filesystem::path& AssetPath : AssetPaths)
- {
- Content.Paths.push_back(AssetPath);
- const std::filesystem::path AssetFilePath = (m_Path / AssetPath).make_preferred();
- Content.RawSizes.push_back(FileSizeFromPath(AssetFilePath));
-#if ZEN_PLATFORM_WINDOWS
- Content.Attributes.push_back(GetFileAttributesFromPath(AssetFilePath));
-#endif // ZEN_PLATFORM_WINDOWS
-#if ZEN_PLATFORM_MAC || ZEN_PLATFORM_LINUX
- Content.Attributes.push_back(GetFileMode(AssetFilePath));
-#endif // ZEN_PLATFORM_MAC || ZEN_PLATFORM_LINUX
- LocalFolderScanStats.AcceptedFileByteCount += Content.RawSizes.back();
- LocalFolderScanStats.AcceptedFileCount++;
- }
- if (ManifestPath.is_relative())
- {
- Content.Paths.push_back(ManifestPath);
- const std::filesystem::path ManifestFilePath = (m_Path / ManifestPath).make_preferred();
- Content.RawSizes.push_back(FileSizeFromPath(ManifestFilePath));
-#if ZEN_PLATFORM_WINDOWS
- Content.Attributes.push_back(GetFileAttributesFromPath(ManifestFilePath));
-#endif // ZEN_PLATFORM_WINDOWS
-#if ZEN_PLATFORM_MAC || ZEN_PLATFORM_LINUX
- Content.Attributes.push_back(GetFileMode(ManifestFilePath));
-#endif // ZEN_PLATFORM_MAC || ZEN_PLATFORM_LINUX
-
- LocalFolderScanStats.AcceptedFileByteCount += Content.RawSizes.back();
- LocalFolderScanStats.AcceptedFileCount++;
+ Content = GetValidFolderContent(
+ m_IOWorkerPool,
+ LocalFolderScanStats,
+ m_Path,
+ AssetPaths,
+ [](uint64_t PathCount, uint64_t CompletedPathCount) { ZEN_UNUSED(PathCount, CompletedPathCount); },
+ 1000,
+ m_AbortFlag,
+ m_PauseFlag);
+
+ if (Content.Paths.size() != AssetPaths.size())
+ {
+ const tsl::robin_set<std::filesystem::path> FoundPaths(Content.Paths.begin(), Content.Paths.end());
+ ExtendableStringBuilder<1024> SB;
+ for (const std::filesystem::path& AssetPath : AssetPaths)
+ {
+ if (!FoundPaths.contains(AssetPath))
+ {
+ SB << "\n " << AssetPath.generic_string();
+ }
+ }
+ throw std::runtime_error(
+ fmt::format("Manifest file at '{}' references files that does not exist{}", ManifestPath, SB.ToView()));
}
- LocalFolderScanStats.FoundFileByteCount.store(LocalFolderScanStats.AcceptedFileByteCount);
- LocalFolderScanStats.FoundFileCount.store(LocalFolderScanStats.AcceptedFileCount);
- LocalFolderScanStats.ElapsedWallTimeUS = ManifestParseTimer.GetElapsedTimeUs();
Part.PartId = PartManifest.PartId;
Part.PartName = PartManifest.PartName;
@@ -4754,7 +4752,9 @@ BuildsOperationUploadFolder::ReadManifestParts(const std::filesystem::path& Mani
std::vector<std::pair<Oid, std::string>>
BuildsOperationUploadFolder::Execute(const Oid& BuildPartId,
const std::string_view BuildPartName,
- const std::filesystem::path& ManifestPath)
+ const std::filesystem::path& ManifestPath,
+ ChunkingController& ChunkController,
+ ChunkingCache& ChunkCache)
{
ZEN_TRACE_CPU("BuildsOperationUploadFolder::Execute");
try
@@ -4823,19 +4823,15 @@ BuildsOperationUploadFolder::Execute(const Oid& BuildPartId,
m_PrepBuildResultFuture = m_NetworkPool.EnqueueTask(std::packaged_task<PrepareBuildResult()>{[this] { return PrepareBuild(); }},
WorkerThreadPool::EMode::EnableBacklog);
+ for (uint32_t PartIndex = 0; PartIndex < UploadParts.size(); PartIndex++)
{
- std::unique_ptr<ChunkingController> ChunkController = CreateStandardChunkingController(StandardChunkingControllerSettings{});
+ const uint32_t PartStepOffset = UploadPartsStep + (PartIndex * uint32_t(PartTaskSteps::StepCount));
- for (uint32_t PartIndex = 0; PartIndex < UploadParts.size(); PartIndex++)
+ const UploadPart& Part = UploadParts[PartIndex];
+ UploadBuildPart(ChunkController, ChunkCache, PartIndex, Part, PartStepOffset, StepCount);
+ if (m_AbortFlag)
{
- const uint32_t PartStepOffset = UploadPartsStep + (PartIndex * uint32_t(PartTaskSteps::StepCount));
-
- const UploadPart& Part = UploadParts[PartIndex];
- UploadBuildPart(*ChunkController, PartIndex, Part, PartStepOffset, StepCount);
- if (m_AbortFlag)
- {
- return {};
- }
+ return {};
}
}
@@ -5501,6 +5497,7 @@ BuildsOperationUploadFolder::RebuildBlock(const ChunkedFolderContent& Content,
void
BuildsOperationUploadFolder::UploadBuildPart(ChunkingController& ChunkController,
+ ChunkingCache& ChunkCache,
uint32_t PartIndex,
const UploadPart& Part,
uint32_t PartStepOffset,
@@ -5532,6 +5529,7 @@ BuildsOperationUploadFolder::UploadBuildPart(ChunkingController& ChunkController
m_Path,
Part.Content,
ChunkController,
+ ChunkCache,
m_LogOutput.GetProgressUpdateDelayMS(),
[&](bool IsAborted, bool IsPaused, std::ptrdiff_t) {
FilteredBytesHashed.Update(ChunkingStats.BytesHashed.load());
@@ -7747,6 +7745,8 @@ namespace buildstorageoperations_testutils {
TestState(const std::filesystem::path& InRootPath)
: RootPath(InRootPath)
, LogOutput(CreateStandardLogOutput(Log))
+ , ChunkController(CreateStandardChunkingController(StandardChunkingControllerSettings{}))
+ , ChunkCache(CreateMemoryChunkingCache())
, WorkerPool(2)
, NetworkPool(2)
{
@@ -7797,7 +7797,7 @@ namespace buildstorageoperations_testutils {
true,
MetaData,
BuildsOperationUploadFolder::Options{.TempDir = TempPath});
- return Upload.Execute(BuildPartId, BuildPartName, ManifestPath);
+ return Upload.Execute(BuildPartId, BuildPartName, ManifestPath, *ChunkController, *ChunkCache);
}
void ValidateUpload(const Oid& BuildId, const std::vector<std::pair<Oid, std::string>>& Parts)
@@ -7845,8 +7845,6 @@ namespace buildstorageoperations_testutils {
std::vector<ChunkedFolderContent> PartContents;
- std::unique_ptr<ChunkingController> ChunkController;
-
std::vector<ChunkBlockDescription> BlockDescriptions;
std::vector<IoHash> LooseChunkHashes;
@@ -7916,6 +7914,7 @@ namespace buildstorageoperations_testutils {
TargetPath,
CurrentLocalFolderState,
*ChunkController,
+ *ChunkCache,
1000,
[&](bool IsAborted, bool IsPaused, std::ptrdiff_t) { ZEN_UNUSED(IsAborted, IsPaused); },
AbortFlag,
@@ -7995,6 +7994,9 @@ namespace buildstorageoperations_testutils {
LoggerRef Log = ConsoleLog();
std::unique_ptr<OperationLogOutput> LogOutput;
+ std::unique_ptr<ChunkingController> ChunkController;
+ std::unique_ptr<ChunkingCache> ChunkCache;
+
StorageInstance Storage;
BuildStorageBase::Statistics StorageStats;
@@ -8106,6 +8108,92 @@ TEST_CASE("buildstorageoperations.upload.manifest")
State.ValidateDownload(ManifestFiles, ManifestSizes, "source", "download", DownloadContent);
}
+TEST_CASE("buildstorageoperations.memorychunkingcache")
+{
+ using namespace buildstorageoperations_testutils;
+
+ FastRandom BaseRandom;
+
+ const size_t FileCount = 11;
+
+ const std::string Paths[FileCount] = {{"file_1"},
+ {"file_2.exe"},
+ {"file_3.txt"},
+ {"dir_1/dir1_file_1.exe"},
+ {"dir_1/dir1_file_2.pdb"},
+ {"dir_1/dir1_file_3.txt"},
+ {"dir_2/dir2_dir1/dir2_dir1_file_1.exe"},
+ {"dir_2/dir2_dir1/dir2_dir1_file_2.pdb"},
+ {"dir_2/dir2_dir1/dir2_dir1_file_3.dll"},
+ {"dir_2/dir2_dir2/dir2_dir2_file_1.txt"},
+ {"dir_2/dir2_dir2/dir2_dir2_file_2.json"}};
+ const uint64_t Sizes[FileCount] =
+ {6u * 1024u, 0, 798, 19u * 1024u, 7u * 1024u, 93, 31u * 1024u, 17u * 1024u, 13u * 1024u, 2u * 1024u, 3u * 1024u};
+
+ ScopedTemporaryDirectory SourceFolder;
+ TestState State(SourceFolder.Path());
+ State.Initialize();
+ State.CreateSourceData("source", Paths, Sizes);
+
+ const Oid BuildId = Oid::NewOid();
+ const Oid BuildPartId = Oid::NewOid();
+ const std::string BuildPartName = "default";
+
+ {
+ const std::filesystem::path SourcePath = SourceFolder.Path() / "source";
+ CbObject MetaData;
+ BuildsOperationUploadFolder Upload(*State.LogOutput,
+ State.Storage,
+ State.AbortFlag,
+ State.PauseFlag,
+ State.WorkerPool,
+ State.NetworkPool,
+ BuildId,
+ SourcePath,
+ true,
+ MetaData,
+ BuildsOperationUploadFolder::Options{.TempDir = State.TempPath});
+ auto Result = Upload.Execute(BuildPartId, BuildPartName, {}, *State.ChunkController, *State.ChunkCache);
+
+ CHECK_EQ(Upload.m_ChunkingStats.FilesStoredInCache.load(), FileCount - 1); // Zero size files are not stored in cache
+ CHECK_EQ(Upload.m_ChunkingStats.BytesStoredInCache.load(), std::accumulate(&Sizes[0], &Sizes[FileCount], uint64_t(0)));
+ CHECK(Upload.m_ChunkingStats.ChunksStoredInCache.load() >= FileCount - 1); // Zero size files are not stored in cache
+
+ CHECK_EQ(Result.size(), 1u);
+ CHECK_EQ(Result[0].first, BuildPartId);
+ CHECK_EQ(Result[0].second, BuildPartName);
+ }
+
+ auto Result = State.Upload(BuildId, BuildPartId, BuildPartName, "source", {});
+
+ const Oid BuildId2 = Oid::NewOid();
+ const Oid BuildPartId2 = Oid::NewOid();
+
+ {
+ const std::filesystem::path SourcePath = SourceFolder.Path() / "source";
+ CbObject MetaData;
+ BuildsOperationUploadFolder Upload(*State.LogOutput,
+ State.Storage,
+ State.AbortFlag,
+ State.PauseFlag,
+ State.WorkerPool,
+ State.NetworkPool,
+ BuildId2,
+ SourcePath,
+ true,
+ MetaData,
+ BuildsOperationUploadFolder::Options{.TempDir = State.TempPath});
+ Upload.Execute(BuildPartId2, BuildPartName, {}, *State.ChunkController, *State.ChunkCache);
+
+ CHECK_EQ(Upload.m_ChunkingStats.FilesFoundInCache.load(), FileCount - 1); // Zero size files are not stored in cache
+ CHECK_EQ(Upload.m_ChunkingStats.BytesFoundInCache.load(), std::accumulate(&Sizes[0], &Sizes[FileCount], uint64_t(0)));
+ CHECK(Upload.m_ChunkingStats.ChunksFoundInCache.load() >= FileCount - 1); // Zero size files are not stored in cache
+ }
+
+ FolderContent DownloadContent = State.Download(BuildId2, BuildPartId2, {}, "download", /* Append */ false);
+ State.ValidateDownload(Paths, Sizes, "source", "download", DownloadContent);
+}
+
TEST_CASE("buildstorageoperations.upload.multipart")
{
using namespace buildstorageoperations_testutils;
diff --git a/src/zenremotestore/chunking/chunkedcontent.cpp b/src/zenremotestore/chunking/chunkedcontent.cpp
index fda01aa56..26d179f14 100644
--- a/src/zenremotestore/chunking/chunkedcontent.cpp
+++ b/src/zenremotestore/chunking/chunkedcontent.cpp
@@ -13,6 +13,7 @@
#include <zencore/trace.h>
#include <zenremotestore/chunking/chunkblock.h>
#include <zenremotestore/chunking/chunkedfile.h>
+#include <zenremotestore/chunking/chunkingcache.h>
#include <zenremotestore/chunking/chunkingcontroller.h>
#include <zenutil/wildcard.h>
@@ -100,6 +101,8 @@ namespace {
IoHash HashOneFile(ChunkingStatistics& Stats,
const ChunkingController& InChunkingController,
+ ChunkingCache& InChunkingCache,
+ std::span<const uint64_t> ModificationTicks,
ChunkedFolderContent& OutChunkedContent,
tsl::robin_map<IoHash, uint32_t, IoHash::Hasher>& ChunkHashToChunkIndex,
tsl::robin_map<IoHash, uint32_t, IoHash::Hasher>& RawHashToSequenceRawHashIndex,
@@ -110,8 +113,9 @@ namespace {
{
ZEN_TRACE_CPU("HashOneFile");
- const uint64_t RawSize = OutChunkedContent.RawSizes[PathIndex];
- const std::filesystem::path& Path = OutChunkedContent.Paths[PathIndex];
+ const std::filesystem::path& Path = OutChunkedContent.Paths[PathIndex];
+ const uint64_t RawSize = OutChunkedContent.RawSizes[PathIndex];
+ const uint64_t ModificationTick = ModificationTicks[PathIndex];
if (RawSize == 0)
{
@@ -119,16 +123,53 @@ namespace {
}
else
{
+ std::filesystem::path FullPath = FolderPath / Path;
+ FullPath.make_preferred();
+
ChunkedInfoWithSource Chunked;
- const bool DidChunking =
- InChunkingController.ProcessFile((FolderPath / Path).make_preferred(), RawSize, Chunked, Stats.BytesHashed, AbortFlag);
- if (DidChunking)
+
+ if (!InChunkingCache.GetCachedFile(FullPath, RawSize, ModificationTick, Chunked))
{
- Lock.WithExclusiveLock([&]() {
- if (!RawHashToSequenceRawHashIndex.contains(Chunked.Info.RawHash))
+ const bool DidChunking = InChunkingController.ProcessFile(FullPath, RawSize, Chunked, Stats.BytesHashed, AbortFlag);
+ if (!DidChunking)
+ {
+ ZEN_TRACE_CPU("HashOnly");
+
+ IoBuffer Buffer = IoBufferBuilder::MakeFromFile(FullPath);
+ if (Buffer.GetSize() != RawSize)
+ {
+ throw std::runtime_error(fmt::format("Failed opening file '{}' for hashing", FolderPath / Path));
+ }
+
+ Chunked.Info.RawSize = RawSize;
+ Chunked.Info.RawHash = IoHash::HashBuffer(Buffer, &Stats.BytesHashed);
+ }
+ if (InChunkingCache.PutCachedFile(FullPath, ModificationTick, Chunked))
+ {
+ Stats.FilesStoredInCache++;
+ Stats.ChunksStoredInCache += Chunked.Info.ChunkSequence.empty() ? 1 : Chunked.Info.ChunkHashes.size();
+ Stats.BytesStoredInCache += RawSize;
+ }
+ }
+ else
+ {
+ Stats.FilesFoundInCache++;
+ Stats.ChunksFoundInCache += Chunked.Info.ChunkSequence.empty() ? 1 : Chunked.Info.ChunkHashes.size();
+ Stats.BytesFoundInCache += RawSize;
+ }
+ Lock.WithExclusiveLock([&]() {
+ if (!RawHashToSequenceRawHashIndex.contains(Chunked.Info.RawHash))
+ {
+ RawHashToSequenceRawHashIndex.insert(
+ {Chunked.Info.RawHash, gsl::narrow<uint32_t>(OutChunkedContent.ChunkedContent.SequenceRawHashes.size())});
+
+ if (Chunked.Info.ChunkSequence.empty())
+ {
+ AddChunkSequence(Stats, OutChunkedContent.ChunkedContent, ChunkHashToChunkIndex, Chunked.Info.RawHash, RawSize);
+ Stats.UniqueSequencesFound++;
+ }
+ else
{
- RawHashToSequenceRawHashIndex.insert(
- {Chunked.Info.RawHash, gsl::narrow<uint32_t>(OutChunkedContent.ChunkedContent.SequenceRawHashes.size())});
std::vector<uint64_t> ChunkSizes;
ChunkSizes.reserve(Chunked.ChunkSources.size());
for (const ChunkSource& Source : Chunked.ChunkSources)
@@ -144,34 +185,12 @@ namespace {
Chunked.Info.ChunkSequence,
Chunked.Info.ChunkHashes,
ChunkSizes);
- Stats.UniqueSequencesFound++;
}
- });
- Stats.FilesChunked++;
- return Chunked.Info.RawHash;
- }
- else
- {
- ZEN_TRACE_CPU("HashOnly");
-
- IoBuffer Buffer = IoBufferBuilder::MakeFromFile((FolderPath / Path).make_preferred());
- if (Buffer.GetSize() != RawSize)
- {
- throw std::runtime_error(fmt::format("Failed opening file '{}' for hashing", FolderPath / Path));
+ Stats.UniqueSequencesFound++;
}
- const IoHash Hash = IoHash::HashBuffer(Buffer, &Stats.BytesHashed);
-
- Lock.WithExclusiveLock([&]() {
- if (!RawHashToSequenceRawHashIndex.contains(Hash))
- {
- RawHashToSequenceRawHashIndex.insert(
- {Hash, gsl::narrow<uint32_t>(OutChunkedContent.ChunkedContent.SequenceRawHashes.size())});
- AddChunkSequence(Stats, OutChunkedContent.ChunkedContent, ChunkHashToChunkIndex, Hash, RawSize);
- Stats.UniqueSequencesFound++;
- }
- });
- return Hash;
- }
+ });
+ Stats.FilesChunked++;
+ return Chunked.Info.RawHash;
}
}
@@ -1113,6 +1132,7 @@ ChunkFolderContent(ChunkingStatistics& Stats,
const std::filesystem::path& RootPath,
const FolderContent& Content,
const ChunkingController& InChunkingController,
+ ChunkingCache& InChunkingCache,
int32_t UpdateIntervalMS,
std::function<void(bool IsAborted, bool IsPaused, std::ptrdiff_t PendingWork)>&& UpdateCallback,
std::atomic<bool>& AbortFlag,
@@ -1123,6 +1143,10 @@ ChunkFolderContent(ChunkingStatistics& Stats,
Stopwatch Timer;
auto _ = MakeGuard([&Stats, &Timer]() { Stats.ElapsedWallTimeUS = Timer.GetElapsedTimeUs(); });
+ ZEN_ASSERT(Content.ModificationTicks.size() == Content.Paths.size());
+ ZEN_ASSERT(Content.RawSizes.size() == Content.Paths.size());
+ ZEN_ASSERT(Content.Attributes.size() == Content.Paths.size());
+
ChunkedFolderContent Result = {.Platform = Content.Platform,
.Paths = Content.Paths,
.RawSizes = Content.RawSizes,
@@ -1163,12 +1187,15 @@ ChunkFolderContent(ChunkingStatistics& Stats,
{
break;
}
+
Work.ScheduleWork(WorkerPool, // GetSyncWorkerPool()
[&, PathIndex](std::atomic<bool>& AbortFlag) {
if (!AbortFlag)
{
IoHash RawHash = HashOneFile(Stats,
InChunkingController,
+ InChunkingCache,
+ Content.ModificationTicks,
Result,
ChunkHashToChunkIndex,
RawHashToSequenceRawHashIndex,
diff --git a/src/zenremotestore/chunking/chunkingcache.cpp b/src/zenremotestore/chunking/chunkingcache.cpp
new file mode 100644
index 000000000..7f0a26330
--- /dev/null
+++ b/src/zenremotestore/chunking/chunkingcache.cpp
@@ -0,0 +1,627 @@
+// Copyright Epic Games, Inc. All Rights Reserved.
+
+#include <zenremotestore/chunking/chunkingcache.h>
+
+#include <zenbase/zenbase.h>
+#include <zencore/basicfile.h>
+#include <zencore/compactbinarybuilder.h>
+#include <zencore/compactbinaryutil.h>
+#include <zencore/filesystem.h>
+#include <zencore/fmtutils.h>
+#include <zencore/logging.h>
+#include <zenremotestore/chunking/chunkedfile.h>
+#include <zenremotestore/chunking/chunkingcontroller.h>
+
+ZEN_THIRD_PARTY_INCLUDES_START
+#include <tsl/robin_map.h>
+#include <xxhash.h>
+#include <gsl/gsl-lite.hpp>
+ZEN_THIRD_PARTY_INCLUDES_END
+
+#if ZEN_WITH_TESTS
+# include <zencore/testing.h>
+# include <zencore/testutils.h>
+# include <algorithm>
+#endif // ZEN_WITH_TESTS
+
+namespace zen {
+
+class NullChunkingCache : public ChunkingCache
+{
+public:
+ NullChunkingCache() {}
+
+ virtual bool GetCachedFile(const std::filesystem::path& InputPath,
+ uint64_t RawSize,
+ uint64_t ModificationTick,
+ ChunkedInfoWithSource& OutChunked) override
+ {
+ ZEN_UNUSED(InputPath, RawSize, OutChunked, ModificationTick);
+ return false;
+ }
+
+ virtual bool PutCachedFile(const std::filesystem::path& InputPath,
+ uint64_t ModificationTick,
+ const ChunkedInfoWithSource& Chunked) override
+ {
+ ZEN_UNUSED(InputPath, Chunked, ModificationTick);
+ return false;
+ }
+};
+
+class MemoryChunkingCache : public ChunkingCache
+{
+public:
+ MemoryChunkingCache() {}
+
+ virtual bool GetCachedFile(const std::filesystem::path& InputPath,
+ uint64_t RawSize,
+ uint64_t ModificationTick,
+ ChunkedInfoWithSource& OutChunked) override
+ {
+ const std::u8string PathString = InputPath.generic_u8string();
+ const IoHash PathHash = IoHash::HashBuffer(PathString.data(), PathString.length());
+
+ RwLock::SharedLockScope Lock(m_Lock);
+ if (auto It = m_PathHashToEntry.find(PathHash); It != m_PathHashToEntry.end())
+ {
+ const CachedEntry& Entry = m_Entries[It->second];
+ if (ModificationTick == Entry.ModificationTick && RawSize == Entry.Chunked.Info.RawSize)
+ {
+ OutChunked = Entry.Chunked;
+ return true;
+ }
+ else
+ {
+ Lock.ReleaseNow();
+ RwLock::ExclusiveLockScope EditLock(m_Lock);
+ if (auto RemoveIt = m_PathHashToEntry.find(PathHash); It != m_PathHashToEntry.end())
+ {
+ CachedEntry& DeleteEntry = m_Entries[It->second];
+ DeleteEntry.Chunked = {};
+ DeleteEntry.ModificationTick = 0;
+ m_FreeEntryIndexes.push_back(It->second);
+ m_PathHashToEntry.erase(It);
+ }
+ }
+ }
+ return false;
+ }
+
+ virtual bool PutCachedFile(const std::filesystem::path& InputPath,
+ uint64_t ModificationTick,
+ const ChunkedInfoWithSource& Chunked) override
+ {
+ const std::u8string PathString = InputPath.generic_u8string();
+ const IoHash PathHash = IoHash::HashBuffer(PathString.data(), PathString.length());
+
+ RwLock::ExclusiveLockScope _(m_Lock);
+ if (auto It = m_PathHashToEntry.find(PathHash); It != m_PathHashToEntry.end())
+ {
+ CachedEntry& Entry = m_Entries[It->second];
+ if (ModificationTick != Entry.ModificationTick || Chunked.Info.RawSize != Entry.Chunked.Info.RawSize)
+ {
+ Entry.Chunked = Chunked;
+ Entry.ModificationTick = ModificationTick;
+ }
+ }
+ else
+ {
+ uint32_t EntryIndex = gsl::narrow<uint32_t>(m_Entries.size());
+ if (!m_FreeEntryIndexes.empty())
+ {
+ EntryIndex = m_FreeEntryIndexes.back();
+ m_FreeEntryIndexes.pop_back();
+ m_Entries[EntryIndex] = CachedEntry{.Chunked = Chunked, .ModificationTick = ModificationTick};
+ }
+ else
+ {
+ m_Entries.emplace_back(CachedEntry{.Chunked = Chunked, .ModificationTick = ModificationTick});
+ }
+ m_PathHashToEntry.insert_or_assign(PathHash, EntryIndex);
+ }
+ return true;
+ }
+
+ RwLock m_Lock;
+
+ tsl::robin_map<IoHash, uint32_t, IoHash::Hasher> m_PathHashToEntry;
+ std::vector<uint32_t> m_FreeEntryIndexes;
+
+ struct CachedEntry
+ {
+ ChunkedInfoWithSource Chunked;
+ uint64_t ModificationTick = 0;
+ };
+
+ std::vector<CachedEntry> m_Entries;
+};
+
+class DiskChunkingCache : public ChunkingCache
+{
+public:
+ DiskChunkingCache(const std::filesystem::path& RootPath, ChunkingController& ChunkController, uint64_t MinimumRawSizeForCaching)
+ : m_RootPath(RootPath)
+ , m_ChunkerId(GetChunkerIdentity(ChunkController))
+ , m_MinimumRawSizeForCaching(MinimumRawSizeForCaching)
+ {
+ }
+
+ virtual bool GetCachedFile(const std::filesystem::path& InputPath,
+ uint64_t RawSize,
+ uint64_t ModificationTick,
+ ChunkedInfoWithSource& OutChunked) override
+ {
+ if (RawSize < m_MinimumRawSizeForCaching)
+ {
+ return false;
+ }
+
+ const std::filesystem::path CachePath = GetCachePath(InputPath);
+
+ return ReadChunkedInfo(CachePath, RawSize, ModificationTick, OutChunked);
+ }
+
+ virtual bool PutCachedFile(const std::filesystem::path& InputPath,
+ uint64_t ModificationTick,
+ const ChunkedInfoWithSource& Chunked) override
+ {
+ if (Chunked.Info.RawSize < m_MinimumRawSizeForCaching)
+ {
+ return false;
+ }
+
+ const std::filesystem::path CachePath = GetCachePath(InputPath);
+
+ return WriteChunkedInfo(CachePath, ModificationTick, Chunked);
+ }
+
+private:
+ static constexpr uint32_t ImplementationRevision = 1;
+
+#pragma pack(push)
+#pragma pack(1)
+ struct ChunkedInfoHeader
+ {
+ static constexpr uint32_t ExpectedMagic = 0x75636368; // 'ucch';
+ static constexpr uint32_t CurrentVersion = 1;
+
+ uint32_t Magic = ExpectedMagic;
+ uint32_t Version = CurrentVersion;
+ uint64_t SequenceCount = 0;
+ uint64_t ChunkCount = 0;
+ uint64_t RawSize = 0;
+ IoHash RawHash = IoHash::Zero;
+ uint64_t ModificationTick = 0;
+ uint32_t Checksum = 0;
+
+ static uint32_t ComputeChecksum(const ChunkedInfoHeader& Header)
+ {
+ return XXH32(&Header.Magic, sizeof(Header) - sizeof(uint32_t), 0xC0C0'BABA);
+ }
+ };
+#pragma pack(pop)
+ static_assert(sizeof(ChunkedInfoHeader) == 64);
+ static_assert(sizeof(ChunkSource) == 12);
+
+ std::filesystem::path GetCachePath(const std::filesystem::path& InputPath)
+ {
+ const std::string IdentityString = fmt::format("{}_{}_{}", ImplementationRevision, m_ChunkerId, InputPath.generic_string());
+ const IoHash IdentityHash = IoHash::HashBuffer(IdentityString.data(), IdentityString.length());
+ std::filesystem::path CachePath = m_RootPath / fmt::format("{}.chunked_content", IdentityHash);
+ return CachePath;
+ }
+
+ bool WriteChunkedInfo(const std::filesystem::path& CachePath, uint64_t ModificationTick, const ChunkedInfoWithSource& Chunked)
+ {
+ CreateDirectories(CachePath.parent_path());
+
+ TemporaryFile OutputFile;
+ std::error_code Ec;
+ OutputFile.CreateTemporary(CachePath.parent_path(), Ec);
+ if (Ec)
+ {
+ ZEN_DEBUG("Failed to create temp file for cached chunked data at '{}'", CachePath);
+ return false;
+ }
+ ChunkedInfoHeader Header = {.SequenceCount = Chunked.Info.ChunkSequence.size(),
+ .ChunkCount = Chunked.Info.ChunkHashes.size(),
+ .RawSize = Chunked.Info.RawSize,
+ .RawHash = Chunked.Info.RawHash,
+ .ModificationTick = ModificationTick};
+
+ Header.Checksum = ChunkedInfoHeader::ComputeChecksum(Header);
+
+ try
+ {
+ uint64_t Offset = 0;
+
+ OutputFile.Write(&Header, sizeof(ChunkedInfoHeader), Offset);
+ Offset += sizeof(ChunkedInfoHeader);
+
+ if (Header.SequenceCount > 0)
+ {
+ OutputFile.Write(Chunked.Info.ChunkSequence.data(), Header.SequenceCount * sizeof(uint32_t), Offset);
+ Offset += Header.SequenceCount * sizeof(uint32_t);
+ }
+
+ if (Header.ChunkCount > 0)
+ {
+ OutputFile.Write(Chunked.Info.ChunkHashes.data(), Header.ChunkCount * sizeof(IoHash), Offset);
+ Offset += Header.ChunkCount * sizeof(IoHash);
+
+ OutputFile.Write(Chunked.ChunkSources.data(), Header.ChunkCount * sizeof(ChunkSource), Offset);
+ Offset += Header.ChunkCount * sizeof(ChunkSource);
+ }
+
+ OutputFile.Flush();
+ }
+ catch (const std::exception& Ex)
+ {
+ ZEN_DEBUG("Failed to write cached file {}. Reason: {}", CachePath, Ex.what());
+ return false;
+ }
+ OutputFile.MoveTemporaryIntoPlace(CachePath, Ec);
+ if (Ec)
+ {
+ ZEN_DEBUG("Failed to move temporary file {} to {}. Reason: {}", OutputFile.GetPath(), CachePath, Ec.message());
+ return false;
+ }
+
+ return true;
+ }
+
+ bool ReadChunkedInfo(const std::filesystem::path& CachePath,
+ uint64_t RawSize,
+ uint64_t ModificationTick,
+ ChunkedInfoWithSource& OutChunked)
+ {
+ BasicFile InputFile;
+ std::error_code Ec;
+ InputFile.Open(CachePath, BasicFile::Mode::kRead, Ec);
+ if (Ec)
+ {
+ return false;
+ }
+ try
+ {
+ uint64_t Size = InputFile.FileSize();
+ if (Size < sizeof(ChunkedInfoHeader))
+ {
+ throw std::runtime_error(fmt::format("Expected size >= {}, file has size {}", sizeof(ChunkedInfoHeader), Size));
+ }
+
+ uint64_t Offset = 0;
+ ChunkedInfoHeader Header;
+ InputFile.Read(&Header, sizeof(ChunkedInfoHeader), Offset);
+ Offset += sizeof(Header);
+
+ if (Header.Magic != ChunkedInfoHeader::ExpectedMagic)
+ {
+ throw std::runtime_error(
+ fmt::format("Expected magic 0x{:04x}, file has magic 0x{:04x}", ChunkedInfoHeader::ExpectedMagic, Header.Magic));
+ }
+ if (Header.Version != ChunkedInfoHeader::CurrentVersion)
+ {
+ throw std::runtime_error(
+ fmt::format("Expected version {}, file has version {}", ChunkedInfoHeader::CurrentVersion, Header.Version));
+ }
+ if (Header.Checksum != ChunkedInfoHeader::ComputeChecksum(Header))
+ {
+ throw std::runtime_error(fmt::format("Expected checksum 0x{:04x}, file has checksum 0x{:04x}",
+ Header.Checksum,
+ ChunkedInfoHeader::ComputeChecksum(Header)));
+ }
+
+ uint64_t ExpectedSize = sizeof(ChunkedInfoHeader) + Header.SequenceCount * sizeof(uint32_t) +
+ Header.ChunkCount * sizeof(IoHash) + Header.ChunkCount * sizeof(ChunkSource);
+
+ if (ExpectedSize != Size)
+ {
+ throw std::runtime_error(fmt::format("Expected size {}, file has size {}", ExpectedSize, Size));
+ }
+
+ if (Header.RawSize != RawSize)
+ {
+ InputFile.Close();
+ RemoveFile(CachePath, Ec);
+ return false;
+ }
+
+ if (Header.ModificationTick != ModificationTick)
+ {
+ InputFile.Close();
+ RemoveFile(CachePath, Ec);
+ return false;
+ }
+
+ OutChunked.Info.RawSize = Header.RawSize;
+ OutChunked.Info.RawHash = Header.RawHash;
+
+ if (Header.SequenceCount > 0)
+ {
+ OutChunked.Info.ChunkSequence.resize(Header.SequenceCount);
+ InputFile.Read(OutChunked.Info.ChunkSequence.data(), Header.SequenceCount * sizeof(uint32_t), Offset);
+ Offset += Header.SequenceCount * sizeof(uint32_t);
+ }
+
+ if (Header.ChunkCount > 0)
+ {
+ OutChunked.Info.ChunkHashes.resize(Header.ChunkCount);
+ OutChunked.ChunkSources.resize(Header.ChunkCount);
+
+ InputFile.Read(OutChunked.Info.ChunkHashes.data(), Header.ChunkCount * sizeof(IoHash), Offset);
+ Offset += Header.ChunkCount * sizeof(IoHash);
+
+ InputFile.Read(OutChunked.ChunkSources.data(), Header.ChunkCount * sizeof(ChunkSource), Offset);
+ Offset += Header.ChunkCount * sizeof(ChunkSource);
+ }
+ }
+ catch (const std::exception& Ex)
+ {
+ ZEN_DEBUG("Failed to read cached file {}. Reason: {}", CachePath, Ex.what());
+ InputFile.Close();
+ RemoveFile(CachePath, Ec);
+ return false;
+ }
+
+ return true;
+ }
+
+ const std::filesystem::path m_RootPath;
+ const IoHash m_ChunkerId;
+ const uint64_t m_MinimumRawSizeForCaching;
+
+ static IoHash GetChunkerIdentity(ChunkingController& ChunkController)
+ {
+ IoHashStream ChunkerIdStream;
+ std::string_view ChunkerName = ChunkController.GetName();
+ ChunkerIdStream.Append(ChunkerName.data(), ChunkerName.length());
+ const CbObject ChunkerParameters = ChunkController.GetParameters();
+ ChunkerParameters.GetHash(ChunkerIdStream);
+ return ChunkerIdStream.GetHash();
+ }
+};
+
+std::unique_ptr<ChunkingCache>
+CreateNullChunkingCache()
+{
+ return std::make_unique<NullChunkingCache>();
+}
+
+std::unique_ptr<ChunkingCache>
+CreateMemoryChunkingCache()
+{
+ return std::make_unique<MemoryChunkingCache>();
+}
+
+std::unique_ptr<ChunkingCache>
+CreateDiskChunkingCache(const std::filesystem::path& RootPath, ChunkingController& ChunkController, uint64_t MinimumRawSizeForCaching)
+{
+ return std::make_unique<DiskChunkingCache>(RootPath, ChunkController, MinimumRawSizeForCaching);
+}
+
+#if ZEN_WITH_TESTS
+
+namespace chunkingcache_testutils {
+ ChunkedInfoWithSource CreateChunked(const std::string_view Data, uint32_t SplitSize)
+ {
+ std::vector<uint32_t> ChunkSequence;
+ std::vector<IoHash> ChunkHashes;
+ std::vector<ChunkSource> ChunkSources;
+
+ if (SplitSize > 0)
+ {
+ std::string_view::size_type SplitOffset = 0;
+ while (SplitOffset < Data.length())
+ {
+ std::string_view DataPart(Data.substr(SplitOffset, SplitSize));
+
+ ChunkSequence.push_back(gsl::narrow<uint32_t>(ChunkSequence.size()));
+ ChunkHashes.push_back(IoHash::HashBuffer(DataPart.data(), DataPart.length()));
+ ChunkSources.push_back({.Offset = SplitOffset, .Size = gsl::narrow<uint32_t>(DataPart.length())});
+ SplitOffset += DataPart.length();
+ }
+ }
+
+ return ChunkedInfoWithSource{.Info = {.RawSize = Data.length(),
+ .RawHash = IoHash::HashBuffer(Data.data(), Data.length()),
+ .ChunkSequence = std::move(ChunkSequence),
+ .ChunkHashes = std::move(ChunkHashes)},
+ .ChunkSources = std::move(ChunkSources)};
+ }
+
+ bool Equals(const ChunkedInfoWithSource& Lhs, const ChunkedInfoWithSource& Rhs)
+ {
+ if (Lhs.ChunkSources.size() != Rhs.ChunkSources.size())
+ {
+ return false;
+ }
+ if (std::mismatch(Lhs.ChunkSources.begin(),
+ Lhs.ChunkSources.end(),
+ Rhs.ChunkSources.begin(),
+ [](const ChunkSource& Lhs, const ChunkSource& Rhs) { return Lhs.Offset == Rhs.Offset && Lhs.Size == Rhs.Size; })
+ .first != Lhs.ChunkSources.end())
+ {
+ return false;
+ }
+ if (Lhs.Info.RawSize != Rhs.Info.RawSize)
+ {
+ return false;
+ }
+ if (Lhs.Info.ChunkSequence != Rhs.Info.ChunkSequence)
+ {
+ return false;
+ }
+ if (Lhs.Info.ChunkHashes != Rhs.Info.ChunkHashes)
+ {
+ return false;
+ }
+ return true;
+ }
+} // namespace chunkingcache_testutils
+
+TEST_CASE("chunkingcache.nullchunkingcache")
+{
+ using namespace chunkingcache_testutils;
+
+ std::unique_ptr<ChunkingCache> Cache = CreateNullChunkingCache();
+ ChunkedInfoWithSource Result;
+ CHECK(!Cache->GetCachedFile("dummy-path", 495, 91283, Result));
+ CHECK(Result.Info.ChunkHashes.empty());
+
+ ChunkedInfoWithSource Chunked = CreateChunked("my data string", 4);
+ CHECK(!Cache->PutCachedFile("dummy-path", 91283, Chunked));
+
+ CHECK(!Cache->GetCachedFile("dummy-path", 495, 91283, Result));
+ CHECK(Result.Info.ChunkHashes.empty());
+}
+
+TEST_CASE("chunkingcache.memorychunkingcache")
+{
+ using namespace chunkingcache_testutils;
+
+ std::unique_ptr<ChunkingCache> Cache = CreateMemoryChunkingCache();
+ ChunkedInfoWithSource Result;
+ CHECK(!Cache->GetCachedFile("file/A/Path", 495, 91283, Result));
+ CHECK(Result.Info.ChunkHashes.empty());
+ CHECK(!Cache->GetCachedFile("file/B/Path", 395, 671283, Result));
+ CHECK(Result.Info.ChunkHashes.empty());
+
+ ChunkedInfoWithSource ChunkedAV1 = CreateChunked("File A data string", 4);
+ ChunkedInfoWithSource ChunkedAV2 = CreateChunked("File A updated data string", 4);
+ ChunkedInfoWithSource ChunkedBV1 = CreateChunked("File B data string", 4);
+
+ CHECK(Cache->PutCachedFile("file/A/Path", 91283, ChunkedAV1));
+ CHECK(Cache->PutCachedFile("file/B/Path", 51283, ChunkedBV1));
+
+ CHECK(Cache->GetCachedFile("file/A/Path", ChunkedAV1.Info.RawSize, 91283, Result));
+ CHECK(Equals(Result, ChunkedAV1));
+ Result = ChunkedInfoWithSource{};
+
+ CHECK(Cache->GetCachedFile("file/B/Path", ChunkedBV1.Info.RawSize, 51283, Result));
+ CHECK(Equals(Result, ChunkedBV1));
+ Result = ChunkedInfoWithSource{};
+
+ CHECK(!Cache->GetCachedFile("file/A/Path-wrong", 495, 91283, Result));
+ CHECK(Result.Info.ChunkHashes.empty());
+
+ CHECK(!Cache->GetCachedFile("file/A/Path", 493, 91283, Result));
+ CHECK(Result.Info.ChunkHashes.empty());
+
+ // Asking a path that exists but without a match will remove that path
+ CHECK(!Cache->GetCachedFile("file/A/Path", 495, 91283, Result));
+ CHECK(Result.Info.ChunkHashes.empty());
+
+ CHECK(!Cache->GetCachedFile("file/A/Path", 495, 9283, Result));
+ CHECK(Result.Info.ChunkHashes.empty());
+
+ CHECK(Cache->PutCachedFile("file/A/Path", 91283, ChunkedAV1));
+ CHECK(Cache->GetCachedFile("file/A/Path", ChunkedAV1.Info.RawSize, 91283, Result));
+ CHECK(Equals(Result, ChunkedAV1));
+ Result = ChunkedInfoWithSource{};
+
+ CHECK(Cache->PutCachedFile("file/A/Path", 91483, ChunkedAV2));
+ CHECK(Cache->GetCachedFile("file/A/Path", ChunkedAV2.Info.RawSize, 91483, Result));
+ CHECK(Equals(Result, ChunkedAV2));
+ Result = ChunkedInfoWithSource{};
+
+ CHECK(!Cache->GetCachedFile("file/A/Path", ChunkedAV1.Info.RawSize, 91283, Result));
+ CHECK(Result.Info.ChunkHashes.empty());
+
+ CHECK(Cache->GetCachedFile("file/B/Path", ChunkedBV1.Info.RawSize, 51283, Result));
+ CHECK(Equals(Result, ChunkedBV1));
+ Result = ChunkedInfoWithSource{};
+
+ CHECK(!Cache->GetCachedFile("file/B/Path", ChunkedBV1.Info.RawSize + 1, 51283, Result));
+ CHECK(Result.Info.ChunkHashes.empty());
+}
+
+TEST_CASE("chunkingcache.diskchunkingcache")
+{
+ using namespace chunkingcache_testutils;
+
+ ScopedTemporaryDirectory TmpDir;
+
+ std::unique_ptr<ChunkingController> ChunkController = CreateStandardChunkingController(StandardChunkingControllerSettings{});
+
+ ChunkedInfoWithSource ChunkedAV1 = CreateChunked("File A data string", 4);
+ ChunkedInfoWithSource ChunkedAV2 = CreateChunked("File A updated data string", 4);
+ ChunkedInfoWithSource ChunkedBV1 = CreateChunked("File B data string", 4);
+
+ {
+ std::unique_ptr<ChunkingCache> Cache = CreateDiskChunkingCache(TmpDir.Path(), *ChunkController, 0);
+ ChunkedInfoWithSource Result;
+ CHECK(!Cache->GetCachedFile("file/A/Path", 495, 91283, Result));
+ CHECK(Result.Info.ChunkHashes.empty());
+ CHECK(!Cache->GetCachedFile("file/B/Path", 395, 671283, Result));
+ CHECK(Result.Info.ChunkHashes.empty());
+
+ CHECK(Cache->PutCachedFile("file/A/Path", 91283, ChunkedAV1));
+ CHECK(Cache->PutCachedFile("file/B/Path", 51283, ChunkedBV1));
+
+ CHECK(Cache->GetCachedFile("file/A/Path", ChunkedAV1.Info.RawSize, 91283, Result));
+ CHECK(Equals(Result, ChunkedAV1));
+ Result = ChunkedInfoWithSource{};
+
+ CHECK(Cache->GetCachedFile("file/B/Path", ChunkedBV1.Info.RawSize, 51283, Result));
+ CHECK(Equals(Result, ChunkedBV1));
+ Result = ChunkedInfoWithSource{};
+
+ CHECK(!Cache->GetCachedFile("file/A/Path-wrong", 495, 91283, Result));
+ CHECK(Result.Info.ChunkHashes.empty());
+
+ CHECK(!Cache->GetCachedFile("file/A/Path", 493, 91283, Result));
+ CHECK(Result.Info.ChunkHashes.empty());
+
+ // Asking a path that exists but without a match will remove that path
+ CHECK(!Cache->GetCachedFile("file/A/Path", 495, 91283, Result));
+ CHECK(Result.Info.ChunkHashes.empty());
+
+ CHECK(!Cache->GetCachedFile("file/A/Path", 495, 9283, Result));
+ CHECK(Result.Info.ChunkHashes.empty());
+
+ CHECK(Cache->PutCachedFile("file/A/Path", 91283, ChunkedAV1));
+ CHECK(Cache->GetCachedFile("file/A/Path", ChunkedAV1.Info.RawSize, 91283, Result));
+ CHECK(Equals(Result, ChunkedAV1));
+ Result = ChunkedInfoWithSource{};
+
+ CHECK(Cache->PutCachedFile("file/A/Path", 91483, ChunkedAV2));
+ CHECK(Cache->GetCachedFile("file/A/Path", ChunkedAV2.Info.RawSize, 91483, Result));
+ CHECK(Equals(Result, ChunkedAV2));
+ Result = ChunkedInfoWithSource{};
+ }
+ {
+ std::unique_ptr<ChunkingCache> Cache = CreateDiskChunkingCache(TmpDir.Path(), *ChunkController, 0);
+ ChunkedInfoWithSource Result;
+
+ CHECK(Cache->GetCachedFile("file/A/Path", ChunkedAV2.Info.RawSize, 91483, Result));
+ CHECK(Equals(Result, ChunkedAV2));
+ Result = ChunkedInfoWithSource{};
+
+ CHECK(!Cache->GetCachedFile("file/A/Path", ChunkedAV2.Info.RawSize, 91283, Result));
+ CHECK(Result.Info.ChunkHashes.empty());
+
+ CHECK(!Cache->GetCachedFile("file/A/Path", ChunkedAV2.Info.RawSize, 91483, Result));
+ CHECK(Result.Info.ChunkHashes.empty());
+
+ CHECK(Cache->GetCachedFile("file/B/Path", ChunkedBV1.Info.RawSize, 51283, Result));
+ CHECK(Equals(Result, ChunkedBV1));
+ Result = ChunkedInfoWithSource{};
+
+ CHECK(!Cache->GetCachedFile("file/B/Path", ChunkedBV1.Info.RawSize + 1, 51283, Result));
+ CHECK(Result.Info.ChunkHashes.empty());
+
+ CHECK(!Cache->GetCachedFile("file/B/Path", ChunkedBV1.Info.RawSize, 51283, Result));
+ CHECK(Result.Info.ChunkHashes.empty());
+ }
+}
+
+void
+chunkingcache_forcelink()
+{
+}
+
+#endif // ZEN_WITH_TESTS
+
+} // namespace zen
diff --git a/src/zenremotestore/include/zenremotestore/builds/buildstorageoperations.h b/src/zenremotestore/include/zenremotestore/builds/buildstorageoperations.h
index 057ad1a10..9f7d93398 100644
--- a/src/zenremotestore/include/zenremotestore/builds/buildstorageoperations.h
+++ b/src/zenremotestore/include/zenremotestore/builds/buildstorageoperations.h
@@ -551,7 +551,9 @@ public:
std::vector<std::pair<Oid, std::string>> Execute(const Oid& BuildPartId,
const std::string_view BuildPartName,
- const std::filesystem::path& ManifestPath);
+ const std::filesystem::path& ManifestPath,
+ ChunkingController& ChunkController,
+ ChunkingCache& ChunkCache);
DiskStatistics m_DiskStats;
GetFolderContentStatistics m_LocalFolderScanStats;
@@ -659,6 +661,7 @@ private:
};
void UploadBuildPart(ChunkingController& ChunkController,
+ ChunkingCache& ChunkCache,
uint32_t PartIndex,
const UploadPart& Part,
uint32_t PartStepOffset,
diff --git a/src/zenremotestore/include/zenremotestore/chunking/chunkedcontent.h b/src/zenremotestore/include/zenremotestore/chunking/chunkedcontent.h
index 1e8e878df..d402bd3f0 100644
--- a/src/zenremotestore/include/zenremotestore/chunking/chunkedcontent.h
+++ b/src/zenremotestore/include/zenremotestore/chunking/chunkedcontent.h
@@ -17,6 +17,7 @@ namespace zen {
class CbWriter;
class ChunkingController;
+class ChunkingCache;
class WorkerThreadPool;
enum class SourcePlatform
@@ -165,6 +166,12 @@ struct ChunkingStatistics
std::atomic<uint64_t> UniqueChunksFound = 0;
std::atomic<uint64_t> UniqueSequencesFound = 0;
std::atomic<uint64_t> UniqueBytesFound = 0;
+ std::atomic<uint64_t> FilesFoundInCache = 0;
+ std::atomic<uint64_t> ChunksFoundInCache = 0;
+ std::atomic<uint64_t> BytesFoundInCache = 0;
+ std::atomic<uint64_t> FilesStoredInCache = 0;
+ std::atomic<uint64_t> ChunksStoredInCache = 0;
+ std::atomic<uint64_t> BytesStoredInCache = 0;
uint64_t ElapsedWallTimeUS = 0;
inline ChunkingStatistics& operator+=(const ChunkingStatistics& Rhs)
@@ -176,6 +183,12 @@ struct ChunkingStatistics
UniqueSequencesFound += Rhs.UniqueSequencesFound;
UniqueBytesFound += Rhs.UniqueBytesFound;
ElapsedWallTimeUS += Rhs.ElapsedWallTimeUS;
+ FilesFoundInCache += Rhs.FilesFoundInCache;
+ ChunksFoundInCache += Rhs.ChunksFoundInCache;
+ BytesFoundInCache += Rhs.BytesFoundInCache;
+ FilesStoredInCache += Rhs.FilesStoredInCache;
+ ChunksStoredInCache += Rhs.ChunksStoredInCache;
+ BytesStoredInCache += Rhs.BytesStoredInCache;
return *this;
}
};
@@ -185,6 +198,7 @@ ChunkedFolderContent ChunkFolderContent(ChunkingStatistics& Stats,
const std::filesystem::path& RootPath,
const FolderContent& Content,
const ChunkingController& InChunkingController,
+ ChunkingCache& InChunkingCache,
int32_t UpdateIntervalMS,
std::function<void(bool IsAborted, bool IsPaused, std::ptrdiff_t PendingWork)>&& UpdateCallback,
std::atomic<bool>& AbortFlag,
diff --git a/src/zenremotestore/include/zenremotestore/chunking/chunkedfile.h b/src/zenremotestore/include/zenremotestore/chunking/chunkedfile.h
index 4cec80fdb..64e2c9c29 100644
--- a/src/zenremotestore/include/zenremotestore/chunking/chunkedfile.h
+++ b/src/zenremotestore/include/zenremotestore/chunking/chunkedfile.h
@@ -21,11 +21,14 @@ struct ChunkedInfo
std::vector<IoHash> ChunkHashes;
};
+#pragma pack(push)
+#pragma pack(4)
struct ChunkSource
{
uint64_t Offset; // 8
uint32_t Size; // 4
};
+#pragma pack(pop)
struct ChunkedInfoWithSource
{
diff --git a/src/zenremotestore/include/zenremotestore/chunking/chunkingcache.h b/src/zenremotestore/include/zenremotestore/chunking/chunkingcache.h
new file mode 100644
index 000000000..e213bc41b
--- /dev/null
+++ b/src/zenremotestore/include/zenremotestore/chunking/chunkingcache.h
@@ -0,0 +1,44 @@
+// Copyright Epic Games, Inc. All Rights Reserved.
+
+#pragma once
+
+#include <filesystem>
+
+namespace zen {
+
+struct ChunkedInfoWithSource;
+class ChunkingController;
+
+class ChunkingCache
+{
+public:
+ virtual ~ChunkingCache() {}
+
+ /*
+ * Attempting to fetch a cached file with mismatching RawSize of ModificationTick will delete any existing cached data for that
+ * InputPath
+ *
+ * If GetCachedFile returns false, OutChunked is untouched
+ */
+ virtual bool GetCachedFile(const std::filesystem::path& InputPath,
+ uint64_t RawSize,
+ uint64_t ModificationTick,
+ ChunkedInfoWithSource& OutChunked) = 0;
+
+ /*
+ * Putting a cached entry with an existing InputPath will overwrite it with the new ModificationTick and Chunked data
+ */
+ virtual bool PutCachedFile(const std::filesystem::path& InputPath, uint64_t ModificationTick, const ChunkedInfoWithSource& Chunked) = 0;
+};
+
+std::unique_ptr<ChunkingCache> CreateNullChunkingCache();
+std::unique_ptr<ChunkingCache> CreateMemoryChunkingCache();
+std::unique_ptr<ChunkingCache> CreateDiskChunkingCache(const std::filesystem::path& RootPath,
+ ChunkingController& ChunkController,
+ uint64_t MinimumRawSizeForCaching);
+
+#if ZEN_WITH_TESTS
+void chunkingcache_forcelink();
+#endif // ZEN_WITH_TESTS
+
+} // namespace zen
diff --git a/src/zenremotestore/zenremotestore.cpp b/src/zenremotestore/zenremotestore.cpp
index ecf85794d..0d008ec40 100644
--- a/src/zenremotestore/zenremotestore.cpp
+++ b/src/zenremotestore/zenremotestore.cpp
@@ -6,6 +6,7 @@
#include <zenremotestore/builds/buildstorageoperations.h>
#include <zenremotestore/chunking/chunkedcontent.h>
#include <zenremotestore/chunking/chunkedfile.h>
+#include <zenremotestore/chunking/chunkingcache.h>
#include <zenremotestore/filesystemutils.h>
#include <zenremotestore/projectstore/remoteprojectstore.h>
@@ -21,6 +22,7 @@ zenremotestore_forcelinktests()
chunkblock_forcelink();
chunkedcontent_forcelink();
chunkedfile_forcelink();
+ chunkingcache_forcelink();
filesystemutils_forcelink();
remoteprojectstore_forcelink();
}