diff options
Diffstat (limited to 'src')
| -rw-r--r-- | src/zen/cmds/builds_cmd.cpp | 326 | ||||
| -rw-r--r-- | src/zen/cmds/builds_cmd.h | 7 | ||||
| -rw-r--r-- | src/zenremotestore/builds/buildstorageoperations.cpp | 184 | ||||
| -rw-r--r-- | src/zenremotestore/chunking/chunkedcontent.cpp | 97 | ||||
| -rw-r--r-- | src/zenremotestore/chunking/chunkingcache.cpp | 627 | ||||
| -rw-r--r-- | src/zenremotestore/include/zenremotestore/builds/buildstorageoperations.h | 5 | ||||
| -rw-r--r-- | src/zenremotestore/include/zenremotestore/chunking/chunkedcontent.h | 14 | ||||
| -rw-r--r-- | src/zenremotestore/include/zenremotestore/chunking/chunkedfile.h | 3 | ||||
| -rw-r--r-- | src/zenremotestore/include/zenremotestore/chunking/chunkingcache.h | 44 | ||||
| -rw-r--r-- | src/zenremotestore/zenremotestore.cpp | 2 |
10 files changed, 1110 insertions, 199 deletions
diff --git a/src/zen/cmds/builds_cmd.cpp b/src/zen/cmds/builds_cmd.cpp index f859a618b..508d2ba60 100644 --- a/src/zen/cmds/builds_cmd.cpp +++ b/src/zen/cmds/builds_cmd.cpp @@ -33,6 +33,7 @@ #include <zenremotestore/chunking/chunkblock.h> #include <zenremotestore/chunking/chunkedcontent.h> #include <zenremotestore/chunking/chunkedfile.h> +#include <zenremotestore/chunking/chunkingcache.h> #include <zenremotestore/chunking/chunkingcontroller.h> #include <zenremotestore/filesystemutils.h> #include <zenremotestore/jupiter/jupiterhost.h> @@ -420,53 +421,63 @@ namespace { NiceTimeSpanMs(ValidateOp.m_ValidateStats.ElapsedWallTimeUS / 1000)); } - std::vector<std::pair<Oid, std::string>> UploadFolder(OperationLogOutput& Output, - TransferThreadWorkers& Workers, - StorageInstance& Storage, - const Oid& BuildId, - const Oid& BuildPartId, - const std::string_view BuildPartName, - const std::filesystem::path& Path, - const std::filesystem::path& TempDir, - const std::filesystem::path& ManifestPath, - const uint64_t FindBlockMaxCount, - const uint8_t BlockReuseMinPercentLimit, - bool AllowMultiparts, - const CbObject& MetaData, - bool CreateBuild, - bool IgnoreExistingBlocks, - bool UploadToZenCache, - const std::vector<std::string>& ExcludeFolders, - const std::vector<std::string>& ExcludeExtensions) + struct UploadFolderOptions + { + std::filesystem::path TempDir; + uint64_t FindBlockMaxCount; + uint8_t BlockReuseMinPercentLimit; + bool AllowMultiparts; + bool CreateBuild; + bool IgnoreExistingBlocks; + bool UploadToZenCache; + const std::vector<std::string>& ExcludeFolders = DefaultExcludeFolders; + const std::vector<std::string>& ExcludeExtensions = DefaultExcludeExtensions; + }; + + std::vector<std::pair<Oid, std::string>> UploadFolder(OperationLogOutput& Output, + TransferThreadWorkers& Workers, + StorageInstance& Storage, + const Oid& BuildId, + const Oid& BuildPartId, + const std::string_view BuildPartName, + const std::filesystem::path& Path, + const std::filesystem::path& ManifestPath, + const CbObject& MetaData, + ChunkingController& ChunkController, + ChunkingCache& ChunkCache, + const UploadFolderOptions& Options) { ProgressBar::SetLogOperationName(ProgressMode, "Upload Folder"); Stopwatch UploadTimer; - BuildsOperationUploadFolder UploadOp(Output, - Storage, - AbortFlag, - PauseFlag, - Workers.GetIOWorkerPool(), - Workers.GetNetworkPool(), - BuildId, - Path, - CreateBuild, - std::move(MetaData), - BuildsOperationUploadFolder::Options{.IsQuiet = IsQuiet, - .IsVerbose = IsVerbose, - .DoExtraContentValidation = DoExtraContentVerify, - .FindBlockMaxCount = FindBlockMaxCount, - .BlockReuseMinPercentLimit = BlockReuseMinPercentLimit, - .AllowMultiparts = AllowMultiparts, - .IgnoreExistingBlocks = IgnoreExistingBlocks, - .TempDir = TempDir, - .ExcludeFolders = ExcludeFolders, - .ExcludeExtensions = ExcludeExtensions, - .ZenExcludeManifestName = ZenExcludeManifestName, - .NonCompressableExtensions = DefaultSplitOnlyExtensions, - .PopulateCache = UploadToZenCache}); - std::vector<std::pair<Oid, std::string>> UploadedParts = UploadOp.Execute(BuildPartId, BuildPartName, ManifestPath); + BuildsOperationUploadFolder UploadOp( + Output, + Storage, + AbortFlag, + PauseFlag, + Workers.GetIOWorkerPool(), + Workers.GetNetworkPool(), + BuildId, + Path, + Options.CreateBuild, + std::move(MetaData), + BuildsOperationUploadFolder::Options{.IsQuiet = IsQuiet, + .IsVerbose = IsVerbose, + .DoExtraContentValidation = DoExtraContentVerify, + .FindBlockMaxCount = Options.FindBlockMaxCount, + .BlockReuseMinPercentLimit = Options.BlockReuseMinPercentLimit, + .AllowMultiparts = Options.AllowMultiparts, + .IgnoreExistingBlocks = Options.IgnoreExistingBlocks, + .TempDir = Options.TempDir, + .ExcludeFolders = Options.ExcludeFolders, + .ExcludeExtensions = Options.ExcludeExtensions, + .ZenExcludeManifestName = ZenExcludeManifestName, + .NonCompressableExtensions = DefaultSplitOnlyExtensions, + .PopulateCache = Options.UploadToZenCache}); + + std::vector<std::pair<Oid, std::string>> UploadedParts = + UploadOp.Execute(BuildPartId, BuildPartName, ManifestPath, ChunkController, ChunkCache); if (AbortFlag) { return {}; @@ -493,6 +504,10 @@ namespace { "\n UniqueChunksFound: {}" "\n UniqueSequencesFound: {}" "\n UniqueBytesFound: {}" + "\n FilesFoundInCache: {}" + "\n ChunksFoundInCache: {}" + "\n FilesStoredInCache: {}" + "\n ChunksStoredInCache: {}" "\n ElapsedWallTimeUS: {}", UploadOp.m_ChunkingStats.FilesProcessed.load(), UploadOp.m_ChunkingStats.FilesChunked.load(), @@ -500,6 +515,12 @@ namespace { UploadOp.m_ChunkingStats.UniqueChunksFound.load(), UploadOp.m_ChunkingStats.UniqueSequencesFound.load(), NiceBytes(UploadOp.m_ChunkingStats.UniqueBytesFound.load()), + UploadOp.m_ChunkingStats.FilesFoundInCache.load(), + UploadOp.m_ChunkingStats.ChunksFoundInCache.load(), + NiceBytes(UploadOp.m_ChunkingStats.BytesFoundInCache.load()), + UploadOp.m_ChunkingStats.FilesStoredInCache.load(), + UploadOp.m_ChunkingStats.ChunksStoredInCache.load(), + NiceBytes(UploadOp.m_ChunkingStats.BytesStoredInCache.load()), NiceLatencyNs(UploadOp.m_ChunkingStats.ElapsedWallTimeUS * 1000)); ZEN_CONSOLE_VERBOSE( @@ -608,7 +629,7 @@ namespace { : 0.0; const std::string MultipartAttachmentStats = - AllowMultiparts ? fmt::format(" ({} as multipart)", UploadOp.m_UploadStats.MultipartAttachmentCount.load()) : ""; + Options.AllowMultiparts ? fmt::format(" ({} as multipart)", UploadOp.m_UploadStats.MultipartAttachmentCount.load()) : ""; if (!IsQuiet) { @@ -643,10 +664,10 @@ namespace { NiceTimeSpanMs(UploadOp.m_GenerateBlocksStats.GenerateBlocksElapsedWallTimeUS / 1000), UploadOp.m_LooseChunksStats.CompressedChunkCount.load(), - NiceBytes(UploadOp.m_LooseChunksStats.ChunkByteCount), + NiceBytes(UploadOp.m_LooseChunksStats.CompressedChunkRawBytes), NiceBytes(UploadOp.m_LooseChunksStats.CompressedChunkBytes.load()), NiceNum(GetBytesPerSecond(UploadOp.m_LooseChunksStats.CompressChunksElapsedWallTimeUS, - UploadOp.m_LooseChunksStats.ChunkByteCount)), + UploadOp.m_LooseChunksStats.CompressedChunkRawBytes)), NiceTimeSpanMs(UploadOp.m_LooseChunksStats.CompressChunksElapsedWallTimeUS / 1000), UploadOp.m_UploadStats.BlockCount.load() + UploadOp.m_UploadStats.ChunkCount.load(), @@ -909,6 +930,7 @@ namespace { ChunkingStatistics& ChunkingStats, const std::filesystem::path& Path, ChunkingController& ChunkController, + ChunkingCache& ChunkCache, std::span<const std::filesystem::path> PathsToCheck) { FolderContent FolderState; @@ -953,6 +975,7 @@ namespace { Path, FolderState, ChunkController, + ChunkCache, GetUpdateDelayMS(ProgressMode), [&](bool IsAborted, bool IsPaused, std::ptrdiff_t) { FilteredBytesHashed.Update(LocalChunkingStats.BytesHashed.load()); @@ -988,7 +1011,8 @@ namespace { ChunkingStatistics& ChunkingStats, const std::filesystem::path& Path, const std::filesystem::path& StateFilePath, - ChunkingController& ChunkController) + ChunkingController& ChunkController, + ChunkingCache& ChunkCache) { Stopwatch ReadStateTimer; bool FileExists = IsFile(StateFilePath); @@ -1075,6 +1099,7 @@ namespace { Path, UpdatedContent, ChunkController, + ChunkCache, GetUpdateDelayMS(ProgressMode), [&](bool IsAborted, bool IsPaused, std::ptrdiff_t) { FilteredBytesHashed.Update(LocalChunkingStats.BytesHashed.load()); @@ -1143,7 +1168,8 @@ namespace { const std::filesystem::path& Path, std::function<bool(const std::string_view& RelativePath)>&& IsAcceptedFolder, std::function<bool(std::string_view RelativePath, uint64_t Size, uint32_t Attributes)>&& IsAcceptedFile, - ChunkingController& ChunkController) + ChunkingController& ChunkController, + ChunkingCache& ChunkCache) { Stopwatch Timer; @@ -1163,14 +1189,19 @@ namespace { return {}; } - BuildState LocalContent = - GetLocalContent(Workers, GetFolderContentStats, ChunkingStats, Path, ZenStateFilePath(Path / ZenFolderName), ChunkController) - .State; + BuildState LocalContent = GetLocalContent(Workers, + GetFolderContentStats, + ChunkingStats, + Path, + ZenStateFilePath(Path / ZenFolderName), + ChunkController, + ChunkCache) + .State; std::vector<std::filesystem::path> UntrackedPaths = GetNewPaths(LocalContent.ChunkedContent.Paths, Content.Paths); BuildState UntrackedLocalContent = - GetLocalStateFromPaths(Workers, GetFolderContentStats, ChunkingStats, Path, ChunkController, UntrackedPaths).State; + GetLocalStateFromPaths(Workers, GetFolderContentStats, ChunkingStats, Path, ChunkController, ChunkCache, UntrackedPaths).State; ChunkedFolderContent Result = MergeChunkedFolderContents(LocalContent.ChunkedContent, std::vector<ChunkedFolderContent>{UntrackedLocalContent.ChunkedContent}); @@ -1210,7 +1241,7 @@ namespace { uint64_t MaximumInMemoryPayloadSize = 512u * 1024u; bool PopulateCache = true; bool AppendNewContent = false; - std::vector<std::string> ExcludeFolders; + std::vector<std::string> ExcludeFolders = DefaultExcludeFolders; }; void DownloadFolder(OperationLogOutput& Output, @@ -1297,18 +1328,25 @@ namespace { ZEN_CONSOLE_INFO("Unspecified chunking algorithm, using default"); ChunkController = CreateStandardChunkingController(StandardChunkingControllerSettings{}); } + std::unique_ptr<ChunkingCache> ChunkCache(CreateNullChunkingCache()); LocalState = GetLocalContent(Workers, LocalFolderScanStats, ChunkingStats, Path, ZenStateFilePath(Path / ZenFolderName), - *ChunkController); + *ChunkController, + *ChunkCache); std::vector<std::filesystem::path> UntrackedPaths = GetNewPaths(LocalState.State.ChunkedContent.Paths, RemoteContent.Paths); - BuildSaveState UntrackedLocalContent = - GetLocalStateFromPaths(Workers, LocalFolderScanStats, ChunkingStats, Path, *ChunkController, UntrackedPaths); + BuildSaveState UntrackedLocalContent = GetLocalStateFromPaths(Workers, + LocalFolderScanStats, + ChunkingStats, + Path, + *ChunkController, + *ChunkCache, + UntrackedPaths); if (!UntrackedLocalContent.State.ChunkedContent.Paths.empty()) { @@ -1773,9 +1811,10 @@ namespace { void DiffFolders(TransferThreadWorkers& Workers, const std::filesystem::path& BasePath, const std::filesystem::path& ComparePath, - bool OnlyChunked, - const std::vector<std::string>& InExcludeFolders, - const std::vector<std::string>& InExcludeExtensions) + ChunkingController& ChunkController, + ChunkingCache& ChunkCache, + const std::vector<std::string>& ExcludeFolders, + const std::vector<std::string>& ExcludeExtensions) { ZEN_TRACE_CPU("DiffFolders"); @@ -1797,20 +1836,7 @@ namespace { ChunkedFolderContent CompareFolderContent; { - StandardChunkingControllerSettings ChunkingSettings; - std::unique_ptr<ChunkingController> ChunkController = CreateStandardChunkingController(ChunkingSettings); - std::vector<std::string> ExcludeExtensions = InExcludeExtensions; - if (OnlyChunked) - { - ExcludeExtensions.insert(ExcludeExtensions.end(), - ChunkingSettings.SplitOnlyExtensions.begin(), - ChunkingSettings.SplitOnlyExtensions.end()); - ExcludeExtensions.insert(ExcludeExtensions.end(), - ChunkingSettings.SplitAndCompressExtensions.begin(), - ChunkingSettings.SplitAndCompressExtensions.end()); - } - - auto IsAcceptedFolder = [ExcludeFolders = InExcludeFolders](const std::string_view& RelativePath) -> bool { + auto IsAcceptedFolder = [ExcludeFolders](const std::string_view& RelativePath) -> bool { for (const std::string& ExcludeFolder : ExcludeFolders) { if (RelativePath.starts_with(ExcludeFolder)) @@ -1849,7 +1875,8 @@ namespace { BasePath, IsAcceptedFolder, IsAcceptedFile, - *ChunkController); + ChunkController, + ChunkCache); if (AbortFlag) { return; @@ -1865,7 +1892,8 @@ namespace { ComparePath, IsAcceptedFolder, IsAcceptedFile, - *ChunkController); + ChunkController, + ChunkCache); if (AbortFlag) { @@ -2082,6 +2110,15 @@ BuildsCommand::BuildsCommand() "<boostworkers>"); }; + auto AddChunkingCacheOptions = [this](cxxopts::Options& Ops) { + Ops.add_option("", + "", + "chunking-cache-path", + "Path to cache for chunking information of scanned files. Default is empty resulting in no caching", + cxxopts::value(m_ChunkingCachePath), + "<chunkingcachepath>"); + }; + auto AddWildcardOptions = [this](cxxopts::Options& Ops) { Ops.add_option("", "", @@ -2238,6 +2275,7 @@ BuildsCommand::BuildsCommand() AddZenFolderOptions(m_UploadOptions); AddExcludeFolderOption(m_UploadOptions); AddExcludeExtensionsOption(m_UploadOptions); + AddChunkingCacheOptions(m_UploadOptions); m_UploadOptions.add_options()("h,help", "Print help"); m_UploadOptions.add_option("", "l", "local-path", "Root file system folder for build", cxxopts::value(m_Path), "<local-path>"); m_UploadOptions.add_option("", @@ -2431,6 +2469,7 @@ BuildsCommand::BuildsCommand() AddWorkerOptions(m_DiffOptions); AddExcludeFolderOption(m_DiffOptions); AddExcludeExtensionsOption(m_DiffOptions); + AddChunkingCacheOptions(m_DiffOptions); m_DiffOptions.add_options()("h,help", "Print help"); m_DiffOptions.add_option("", "l", "local-path", "Root file system folder used as base", cxxopts::value(m_Path), "<local-path>"); m_DiffOptions.add_option("", "c", "compare-path", "Root file system folder used as diff", cxxopts::value(m_DiffPath), "<diff-path>"); @@ -2456,6 +2495,7 @@ BuildsCommand::BuildsCommand() AddPartialBlockRequestOptions(m_TestOptions); AddWildcardOptions(m_TestOptions); AddAppendNewContentOptions(m_TestOptions); + AddChunkingCacheOptions(m_TestOptions); m_TestOptions.add_option("", "", @@ -3321,6 +3361,7 @@ BuildsCommand::Run(const ZenCliOptions& GlobalOptions, int argc, char** argv) m_ZenFolderPath = std::filesystem::current_path() / ZenFolderName; } MakeSafeAbsolutePathÍnPlace(m_ZenFolderPath); + MakeSafeAbsolutePathÍnPlace(m_ChunkingCachePath); CreateDirectories(m_ZenFolderPath); auto _ = MakeGuard([this, &Workers]() { CleanAndRemoveDirectory(Workers.GetIOWorkerPool(), m_ZenFolderPath); }); @@ -3357,24 +3398,32 @@ BuildsCommand::Run(const ZenCliOptions& GlobalOptions, int argc, char** argv) std::vector<std::string> ExcludeExtensions = DefaultExcludeExtensions; ParseExcludeFolderAndExtension(ExcludeFolders, ExcludeExtensions); - std::vector<std::pair<Oid, std::string>> UploadedParts = UploadFolder(*Output, - Workers, - Storage, - BuildId, - BuildPartId, - m_BuildPartName, - m_Path, - TempDir, - m_ManifestPath, - m_FindBlockMaxCount, - m_BlockReuseMinPercentLimit, - m_AllowMultiparts, - MetaData, - m_CreateBuild, - m_Clean, - m_UploadToZenCache, - ExcludeFolders, - ExcludeExtensions); + std::unique_ptr<ChunkingController> ChunkController = CreateStandardChunkingController(StandardChunkingControllerSettings{}); + std::unique_ptr<ChunkingCache> ChunkCache = m_ChunkingCachePath.empty() + ? CreateNullChunkingCache() + : CreateDiskChunkingCache(m_ChunkingCachePath, *ChunkController, 256u * 1024u); + + std::vector<std::pair<Oid, std::string>> UploadedParts = + UploadFolder(*Output, + Workers, + Storage, + BuildId, + BuildPartId, + m_BuildPartName, + m_Path, + m_ManifestPath, + MetaData, + *ChunkController, + *ChunkCache, + UploadFolderOptions{.TempDir = TempDir, + .FindBlockMaxCount = m_FindBlockMaxCount, + .BlockReuseMinPercentLimit = m_BlockReuseMinPercentLimit, + .AllowMultiparts = m_AllowMultiparts, + .CreateBuild = m_CreateBuild, + .IgnoreExistingBlocks = m_Clean, + .UploadToZenCache = m_UploadToZenCache, + .ExcludeFolders = ExcludeFolders, + .ExcludeExtensions = ExcludeExtensions}); if (!AbortFlag) { @@ -3619,11 +3668,29 @@ BuildsCommand::Run(const ZenCliOptions& GlobalOptions, int argc, char** argv) ParsePath(); ParseDiffPath(); + MakeSafeAbsolutePathÍnPlace(m_ChunkingCachePath); + std::vector<std::string> ExcludeFolders = DefaultExcludeFolders; std::vector<std::string> ExcludeExtensions = DefaultExcludeExtensions; ParseExcludeFolderAndExtension(ExcludeFolders, ExcludeExtensions); - DiffFolders(Workers, m_Path, m_DiffPath, m_OnlyChunked, ExcludeFolders, ExcludeExtensions); + StandardChunkingControllerSettings ChunkingSettings; + std::unique_ptr<ChunkingController> ChunkController = CreateStandardChunkingController(ChunkingSettings); + std::unique_ptr<ChunkingCache> ChunkCache = m_ChunkingCachePath.empty() + ? CreateNullChunkingCache() + : CreateDiskChunkingCache(m_ChunkingCachePath, *ChunkController, 256u * 1024u); + + if (m_OnlyChunked) + { + ExcludeExtensions.insert(ExcludeExtensions.end(), + ChunkingSettings.SplitOnlyExtensions.begin(), + ChunkingSettings.SplitOnlyExtensions.end()); + ExcludeExtensions.insert(ExcludeExtensions.end(), + ChunkingSettings.SplitAndCompressExtensions.begin(), + ChunkingSettings.SplitAndCompressExtensions.end()); + } + + DiffFolders(Workers, m_Path, m_DiffPath, *ChunkController, *ChunkCache, ExcludeFolders, ExcludeExtensions); if (AbortFlag) { throw std::runtime_error("Diff folders aborted"); @@ -3988,6 +4055,7 @@ BuildsCommand::Run(const ZenCliOptions& GlobalOptions, int argc, char** argv) m_ZenFolderPath = m_Path / ZenFolderName; } MakeSafeAbsolutePathÍnPlace(m_ZenFolderPath); + MakeSafeAbsolutePathÍnPlace(m_ChunkingCachePath); StorageInstance Storage = CreateBuildStorage(StorageStats, StorageCacheStats, @@ -4026,7 +4094,11 @@ BuildsCommand::Run(const ZenCliOptions& GlobalOptions, int argc, char** argv) } const std::filesystem::path UploadTempDir = UploadTempDirectory(m_Path); - // std::filesystem::path UploadTempDir = m_ZenFolderPath / "upload_tmp"; + + std::unique_ptr<ChunkingController> ChunkController = CreateStandardChunkingController(StandardChunkingControllerSettings{}); + std::unique_ptr<ChunkingCache> ChunkCache = m_ChunkingCachePath.empty() + ? CreateNullChunkingCache() + : CreateDiskChunkingCache(m_ChunkingCachePath, *ChunkController, 256u * 1024u); UploadFolder(*Output, Workers, @@ -4035,22 +4107,51 @@ BuildsCommand::Run(const ZenCliOptions& GlobalOptions, int argc, char** argv) BuildPartId, m_BuildPartName, m_Path, - UploadTempDir, {}, - m_FindBlockMaxCount, - m_BlockReuseMinPercentLimit, - m_AllowMultiparts, MetaData, - true, - false, - m_UploadToZenCache, - DefaultExcludeFolders, - DefaultExcludeExtensions); + *ChunkController, + *ChunkCache, + UploadFolderOptions{.TempDir = UploadTempDir, + .FindBlockMaxCount = m_FindBlockMaxCount, + .BlockReuseMinPercentLimit = m_BlockReuseMinPercentLimit, + .AllowMultiparts = m_AllowMultiparts, + .CreateBuild = true, + .IgnoreExistingBlocks = false, + .UploadToZenCache = m_UploadToZenCache}); + if (AbortFlag) { throw std::runtime_error("Test aborted. (Upload build)"); } + { + ZEN_CONSOLE("Upload Build {}, Part {} ({}) from '{}' with chunking cache", m_BuildId, BuildPartId, m_BuildPartName, m_Path); + + UploadFolder(*Output, + Workers, + Storage, + Oid::NewOid(), + Oid::NewOid(), + m_BuildPartName, + m_Path, + {}, + MetaData, + *ChunkController, + *ChunkCache, + UploadFolderOptions{.TempDir = UploadTempDir, + .FindBlockMaxCount = m_FindBlockMaxCount, + .BlockReuseMinPercentLimit = m_BlockReuseMinPercentLimit, + .AllowMultiparts = m_AllowMultiparts, + .CreateBuild = true, + .IgnoreExistingBlocks = false, + .UploadToZenCache = m_UploadToZenCache}); + + if (AbortFlag) + { + throw std::runtime_error("Test aborted. (Upload again, chunking is cached)"); + } + } + ValidateBuildPart(*Output, Workers, *Storage.BuildStorage, BuildId, BuildPartId, m_BuildPartName); if (!m_IncludeWildcard.empty() || !m_ExcludeWildcard.empty()) @@ -4334,17 +4435,18 @@ BuildsCommand::Run(const ZenCliOptions& GlobalOptions, int argc, char** argv) BuildPartId2, m_BuildPartName, DownloadPath, - UploadTempDir, {}, - m_FindBlockMaxCount, - m_BlockReuseMinPercentLimit, - m_AllowMultiparts, MetaData2, - true, - false, - m_UploadToZenCache, - DefaultExcludeFolders, - DefaultExcludeExtensions); + *ChunkController, + *ChunkCache, + UploadFolderOptions{.TempDir = UploadTempDir, + .FindBlockMaxCount = m_FindBlockMaxCount, + .BlockReuseMinPercentLimit = m_BlockReuseMinPercentLimit, + .AllowMultiparts = m_AllowMultiparts, + .CreateBuild = true, + .IgnoreExistingBlocks = false, + .UploadToZenCache = m_UploadToZenCache}); + if (AbortFlag) { throw std::runtime_error("Test aborted. (Upload scrambled)"); diff --git a/src/zen/cmds/builds_cmd.h b/src/zen/cmds/builds_cmd.h index 80c64c48d..f87cd01ae 100644 --- a/src/zen/cmds/builds_cmd.h +++ b/src/zen/cmds/builds_cmd.h @@ -96,9 +96,10 @@ private: std::string m_ExcludeFolders; std::string m_ExcludeExtensions; - cxxopts::Options m_UploadOptions{"upload", "Upload a folder"}; - uint64_t m_FindBlockMaxCount = 10000; - bool m_PostUploadVerify = false; + cxxopts::Options m_UploadOptions{"upload", "Upload a folder"}; + uint64_t m_FindBlockMaxCount = 10000; + bool m_PostUploadVerify = false; + std::filesystem::path m_ChunkingCachePath; cxxopts::Options m_DownloadOptions{"download", "Download a folder"}; std::vector<std::string> m_BuildPartNames; diff --git a/src/zenremotestore/builds/buildstorageoperations.cpp b/src/zenremotestore/builds/buildstorageoperations.cpp index a8169538b..485973e2b 100644 --- a/src/zenremotestore/builds/buildstorageoperations.cpp +++ b/src/zenremotestore/builds/buildstorageoperations.cpp @@ -8,6 +8,7 @@ #include <zenremotestore/builds/buildstoragecache.h> #include <zenremotestore/builds/buildstorageutil.h> #include <zenremotestore/chunking/chunkblock.h> +#include <zenremotestore/chunking/chunkingcache.h> #include <zenremotestore/chunking/chunkingcontroller.h> #include <zenremotestore/filesystemutils.h> #include <zenremotestore/operationlogoutput.h> @@ -4703,45 +4704,42 @@ BuildsOperationUploadFolder::ReadManifestParts(const std::filesystem::path& Mani UploadParts.resize(Manifest.Parts.size()); for (size_t PartIndex = 0; PartIndex < Manifest.Parts.size(); PartIndex++) { - const PartManifest::Part& PartManifest = Manifest.Parts[PartIndex]; - UploadPart& Part = UploadParts[PartIndex]; - FolderContent& Content = Part.Content; + PartManifest::Part& PartManifest = Manifest.Parts[PartIndex]; + if (ManifestPath.is_relative()) + { + PartManifest.Files.push_back(ManifestPath); + } + + UploadPart& Part = UploadParts[PartIndex]; + FolderContent& Content = Part.Content; GetFolderContentStatistics& LocalFolderScanStats = Part.LocalFolderScanStats; const std::vector<std::filesystem::path>& AssetPaths = PartManifest.Files; - for (const std::filesystem::path& AssetPath : AssetPaths) - { - Content.Paths.push_back(AssetPath); - const std::filesystem::path AssetFilePath = (m_Path / AssetPath).make_preferred(); - Content.RawSizes.push_back(FileSizeFromPath(AssetFilePath)); -#if ZEN_PLATFORM_WINDOWS - Content.Attributes.push_back(GetFileAttributesFromPath(AssetFilePath)); -#endif // ZEN_PLATFORM_WINDOWS -#if ZEN_PLATFORM_MAC || ZEN_PLATFORM_LINUX - Content.Attributes.push_back(GetFileMode(AssetFilePath)); -#endif // ZEN_PLATFORM_MAC || ZEN_PLATFORM_LINUX - LocalFolderScanStats.AcceptedFileByteCount += Content.RawSizes.back(); - LocalFolderScanStats.AcceptedFileCount++; - } - if (ManifestPath.is_relative()) - { - Content.Paths.push_back(ManifestPath); - const std::filesystem::path ManifestFilePath = (m_Path / ManifestPath).make_preferred(); - Content.RawSizes.push_back(FileSizeFromPath(ManifestFilePath)); -#if ZEN_PLATFORM_WINDOWS - Content.Attributes.push_back(GetFileAttributesFromPath(ManifestFilePath)); -#endif // ZEN_PLATFORM_WINDOWS -#if ZEN_PLATFORM_MAC || ZEN_PLATFORM_LINUX - Content.Attributes.push_back(GetFileMode(ManifestFilePath)); -#endif // ZEN_PLATFORM_MAC || ZEN_PLATFORM_LINUX - - LocalFolderScanStats.AcceptedFileByteCount += Content.RawSizes.back(); - LocalFolderScanStats.AcceptedFileCount++; + Content = GetValidFolderContent( + m_IOWorkerPool, + LocalFolderScanStats, + m_Path, + AssetPaths, + [](uint64_t PathCount, uint64_t CompletedPathCount) { ZEN_UNUSED(PathCount, CompletedPathCount); }, + 1000, + m_AbortFlag, + m_PauseFlag); + + if (Content.Paths.size() != AssetPaths.size()) + { + const tsl::robin_set<std::filesystem::path> FoundPaths(Content.Paths.begin(), Content.Paths.end()); + ExtendableStringBuilder<1024> SB; + for (const std::filesystem::path& AssetPath : AssetPaths) + { + if (!FoundPaths.contains(AssetPath)) + { + SB << "\n " << AssetPath.generic_string(); + } + } + throw std::runtime_error( + fmt::format("Manifest file at '{}' references files that does not exist{}", ManifestPath, SB.ToView())); } - LocalFolderScanStats.FoundFileByteCount.store(LocalFolderScanStats.AcceptedFileByteCount); - LocalFolderScanStats.FoundFileCount.store(LocalFolderScanStats.AcceptedFileCount); - LocalFolderScanStats.ElapsedWallTimeUS = ManifestParseTimer.GetElapsedTimeUs(); Part.PartId = PartManifest.PartId; Part.PartName = PartManifest.PartName; @@ -4754,7 +4752,9 @@ BuildsOperationUploadFolder::ReadManifestParts(const std::filesystem::path& Mani std::vector<std::pair<Oid, std::string>> BuildsOperationUploadFolder::Execute(const Oid& BuildPartId, const std::string_view BuildPartName, - const std::filesystem::path& ManifestPath) + const std::filesystem::path& ManifestPath, + ChunkingController& ChunkController, + ChunkingCache& ChunkCache) { ZEN_TRACE_CPU("BuildsOperationUploadFolder::Execute"); try @@ -4823,19 +4823,15 @@ BuildsOperationUploadFolder::Execute(const Oid& BuildPartId, m_PrepBuildResultFuture = m_NetworkPool.EnqueueTask(std::packaged_task<PrepareBuildResult()>{[this] { return PrepareBuild(); }}, WorkerThreadPool::EMode::EnableBacklog); + for (uint32_t PartIndex = 0; PartIndex < UploadParts.size(); PartIndex++) { - std::unique_ptr<ChunkingController> ChunkController = CreateStandardChunkingController(StandardChunkingControllerSettings{}); + const uint32_t PartStepOffset = UploadPartsStep + (PartIndex * uint32_t(PartTaskSteps::StepCount)); - for (uint32_t PartIndex = 0; PartIndex < UploadParts.size(); PartIndex++) + const UploadPart& Part = UploadParts[PartIndex]; + UploadBuildPart(ChunkController, ChunkCache, PartIndex, Part, PartStepOffset, StepCount); + if (m_AbortFlag) { - const uint32_t PartStepOffset = UploadPartsStep + (PartIndex * uint32_t(PartTaskSteps::StepCount)); - - const UploadPart& Part = UploadParts[PartIndex]; - UploadBuildPart(*ChunkController, PartIndex, Part, PartStepOffset, StepCount); - if (m_AbortFlag) - { - return {}; - } + return {}; } } @@ -5501,6 +5497,7 @@ BuildsOperationUploadFolder::RebuildBlock(const ChunkedFolderContent& Content, void BuildsOperationUploadFolder::UploadBuildPart(ChunkingController& ChunkController, + ChunkingCache& ChunkCache, uint32_t PartIndex, const UploadPart& Part, uint32_t PartStepOffset, @@ -5532,6 +5529,7 @@ BuildsOperationUploadFolder::UploadBuildPart(ChunkingController& ChunkController m_Path, Part.Content, ChunkController, + ChunkCache, m_LogOutput.GetProgressUpdateDelayMS(), [&](bool IsAborted, bool IsPaused, std::ptrdiff_t) { FilteredBytesHashed.Update(ChunkingStats.BytesHashed.load()); @@ -7747,6 +7745,8 @@ namespace buildstorageoperations_testutils { TestState(const std::filesystem::path& InRootPath) : RootPath(InRootPath) , LogOutput(CreateStandardLogOutput(Log)) + , ChunkController(CreateStandardChunkingController(StandardChunkingControllerSettings{})) + , ChunkCache(CreateMemoryChunkingCache()) , WorkerPool(2) , NetworkPool(2) { @@ -7797,7 +7797,7 @@ namespace buildstorageoperations_testutils { true, MetaData, BuildsOperationUploadFolder::Options{.TempDir = TempPath}); - return Upload.Execute(BuildPartId, BuildPartName, ManifestPath); + return Upload.Execute(BuildPartId, BuildPartName, ManifestPath, *ChunkController, *ChunkCache); } void ValidateUpload(const Oid& BuildId, const std::vector<std::pair<Oid, std::string>>& Parts) @@ -7845,8 +7845,6 @@ namespace buildstorageoperations_testutils { std::vector<ChunkedFolderContent> PartContents; - std::unique_ptr<ChunkingController> ChunkController; - std::vector<ChunkBlockDescription> BlockDescriptions; std::vector<IoHash> LooseChunkHashes; @@ -7916,6 +7914,7 @@ namespace buildstorageoperations_testutils { TargetPath, CurrentLocalFolderState, *ChunkController, + *ChunkCache, 1000, [&](bool IsAborted, bool IsPaused, std::ptrdiff_t) { ZEN_UNUSED(IsAborted, IsPaused); }, AbortFlag, @@ -7995,6 +7994,9 @@ namespace buildstorageoperations_testutils { LoggerRef Log = ConsoleLog(); std::unique_ptr<OperationLogOutput> LogOutput; + std::unique_ptr<ChunkingController> ChunkController; + std::unique_ptr<ChunkingCache> ChunkCache; + StorageInstance Storage; BuildStorageBase::Statistics StorageStats; @@ -8106,6 +8108,92 @@ TEST_CASE("buildstorageoperations.upload.manifest") State.ValidateDownload(ManifestFiles, ManifestSizes, "source", "download", DownloadContent); } +TEST_CASE("buildstorageoperations.memorychunkingcache") +{ + using namespace buildstorageoperations_testutils; + + FastRandom BaseRandom; + + const size_t FileCount = 11; + + const std::string Paths[FileCount] = {{"file_1"}, + {"file_2.exe"}, + {"file_3.txt"}, + {"dir_1/dir1_file_1.exe"}, + {"dir_1/dir1_file_2.pdb"}, + {"dir_1/dir1_file_3.txt"}, + {"dir_2/dir2_dir1/dir2_dir1_file_1.exe"}, + {"dir_2/dir2_dir1/dir2_dir1_file_2.pdb"}, + {"dir_2/dir2_dir1/dir2_dir1_file_3.dll"}, + {"dir_2/dir2_dir2/dir2_dir2_file_1.txt"}, + {"dir_2/dir2_dir2/dir2_dir2_file_2.json"}}; + const uint64_t Sizes[FileCount] = + {6u * 1024u, 0, 798, 19u * 1024u, 7u * 1024u, 93, 31u * 1024u, 17u * 1024u, 13u * 1024u, 2u * 1024u, 3u * 1024u}; + + ScopedTemporaryDirectory SourceFolder; + TestState State(SourceFolder.Path()); + State.Initialize(); + State.CreateSourceData("source", Paths, Sizes); + + const Oid BuildId = Oid::NewOid(); + const Oid BuildPartId = Oid::NewOid(); + const std::string BuildPartName = "default"; + + { + const std::filesystem::path SourcePath = SourceFolder.Path() / "source"; + CbObject MetaData; + BuildsOperationUploadFolder Upload(*State.LogOutput, + State.Storage, + State.AbortFlag, + State.PauseFlag, + State.WorkerPool, + State.NetworkPool, + BuildId, + SourcePath, + true, + MetaData, + BuildsOperationUploadFolder::Options{.TempDir = State.TempPath}); + auto Result = Upload.Execute(BuildPartId, BuildPartName, {}, *State.ChunkController, *State.ChunkCache); + + CHECK_EQ(Upload.m_ChunkingStats.FilesStoredInCache.load(), FileCount - 1); // Zero size files are not stored in cache + CHECK_EQ(Upload.m_ChunkingStats.BytesStoredInCache.load(), std::accumulate(&Sizes[0], &Sizes[FileCount], uint64_t(0))); + CHECK(Upload.m_ChunkingStats.ChunksStoredInCache.load() >= FileCount - 1); // Zero size files are not stored in cache + + CHECK_EQ(Result.size(), 1u); + CHECK_EQ(Result[0].first, BuildPartId); + CHECK_EQ(Result[0].second, BuildPartName); + } + + auto Result = State.Upload(BuildId, BuildPartId, BuildPartName, "source", {}); + + const Oid BuildId2 = Oid::NewOid(); + const Oid BuildPartId2 = Oid::NewOid(); + + { + const std::filesystem::path SourcePath = SourceFolder.Path() / "source"; + CbObject MetaData; + BuildsOperationUploadFolder Upload(*State.LogOutput, + State.Storage, + State.AbortFlag, + State.PauseFlag, + State.WorkerPool, + State.NetworkPool, + BuildId2, + SourcePath, + true, + MetaData, + BuildsOperationUploadFolder::Options{.TempDir = State.TempPath}); + Upload.Execute(BuildPartId2, BuildPartName, {}, *State.ChunkController, *State.ChunkCache); + + CHECK_EQ(Upload.m_ChunkingStats.FilesFoundInCache.load(), FileCount - 1); // Zero size files are not stored in cache + CHECK_EQ(Upload.m_ChunkingStats.BytesFoundInCache.load(), std::accumulate(&Sizes[0], &Sizes[FileCount], uint64_t(0))); + CHECK(Upload.m_ChunkingStats.ChunksFoundInCache.load() >= FileCount - 1); // Zero size files are not stored in cache + } + + FolderContent DownloadContent = State.Download(BuildId2, BuildPartId2, {}, "download", /* Append */ false); + State.ValidateDownload(Paths, Sizes, "source", "download", DownloadContent); +} + TEST_CASE("buildstorageoperations.upload.multipart") { using namespace buildstorageoperations_testutils; diff --git a/src/zenremotestore/chunking/chunkedcontent.cpp b/src/zenremotestore/chunking/chunkedcontent.cpp index fda01aa56..26d179f14 100644 --- a/src/zenremotestore/chunking/chunkedcontent.cpp +++ b/src/zenremotestore/chunking/chunkedcontent.cpp @@ -13,6 +13,7 @@ #include <zencore/trace.h> #include <zenremotestore/chunking/chunkblock.h> #include <zenremotestore/chunking/chunkedfile.h> +#include <zenremotestore/chunking/chunkingcache.h> #include <zenremotestore/chunking/chunkingcontroller.h> #include <zenutil/wildcard.h> @@ -100,6 +101,8 @@ namespace { IoHash HashOneFile(ChunkingStatistics& Stats, const ChunkingController& InChunkingController, + ChunkingCache& InChunkingCache, + std::span<const uint64_t> ModificationTicks, ChunkedFolderContent& OutChunkedContent, tsl::robin_map<IoHash, uint32_t, IoHash::Hasher>& ChunkHashToChunkIndex, tsl::robin_map<IoHash, uint32_t, IoHash::Hasher>& RawHashToSequenceRawHashIndex, @@ -110,8 +113,9 @@ namespace { { ZEN_TRACE_CPU("HashOneFile"); - const uint64_t RawSize = OutChunkedContent.RawSizes[PathIndex]; - const std::filesystem::path& Path = OutChunkedContent.Paths[PathIndex]; + const std::filesystem::path& Path = OutChunkedContent.Paths[PathIndex]; + const uint64_t RawSize = OutChunkedContent.RawSizes[PathIndex]; + const uint64_t ModificationTick = ModificationTicks[PathIndex]; if (RawSize == 0) { @@ -119,16 +123,53 @@ namespace { } else { + std::filesystem::path FullPath = FolderPath / Path; + FullPath.make_preferred(); + ChunkedInfoWithSource Chunked; - const bool DidChunking = - InChunkingController.ProcessFile((FolderPath / Path).make_preferred(), RawSize, Chunked, Stats.BytesHashed, AbortFlag); - if (DidChunking) + + if (!InChunkingCache.GetCachedFile(FullPath, RawSize, ModificationTick, Chunked)) { - Lock.WithExclusiveLock([&]() { - if (!RawHashToSequenceRawHashIndex.contains(Chunked.Info.RawHash)) + const bool DidChunking = InChunkingController.ProcessFile(FullPath, RawSize, Chunked, Stats.BytesHashed, AbortFlag); + if (!DidChunking) + { + ZEN_TRACE_CPU("HashOnly"); + + IoBuffer Buffer = IoBufferBuilder::MakeFromFile(FullPath); + if (Buffer.GetSize() != RawSize) + { + throw std::runtime_error(fmt::format("Failed opening file '{}' for hashing", FolderPath / Path)); + } + + Chunked.Info.RawSize = RawSize; + Chunked.Info.RawHash = IoHash::HashBuffer(Buffer, &Stats.BytesHashed); + } + if (InChunkingCache.PutCachedFile(FullPath, ModificationTick, Chunked)) + { + Stats.FilesStoredInCache++; + Stats.ChunksStoredInCache += Chunked.Info.ChunkSequence.empty() ? 1 : Chunked.Info.ChunkHashes.size(); + Stats.BytesStoredInCache += RawSize; + } + } + else + { + Stats.FilesFoundInCache++; + Stats.ChunksFoundInCache += Chunked.Info.ChunkSequence.empty() ? 1 : Chunked.Info.ChunkHashes.size(); + Stats.BytesFoundInCache += RawSize; + } + Lock.WithExclusiveLock([&]() { + if (!RawHashToSequenceRawHashIndex.contains(Chunked.Info.RawHash)) + { + RawHashToSequenceRawHashIndex.insert( + {Chunked.Info.RawHash, gsl::narrow<uint32_t>(OutChunkedContent.ChunkedContent.SequenceRawHashes.size())}); + + if (Chunked.Info.ChunkSequence.empty()) + { + AddChunkSequence(Stats, OutChunkedContent.ChunkedContent, ChunkHashToChunkIndex, Chunked.Info.RawHash, RawSize); + Stats.UniqueSequencesFound++; + } + else { - RawHashToSequenceRawHashIndex.insert( - {Chunked.Info.RawHash, gsl::narrow<uint32_t>(OutChunkedContent.ChunkedContent.SequenceRawHashes.size())}); std::vector<uint64_t> ChunkSizes; ChunkSizes.reserve(Chunked.ChunkSources.size()); for (const ChunkSource& Source : Chunked.ChunkSources) @@ -144,34 +185,12 @@ namespace { Chunked.Info.ChunkSequence, Chunked.Info.ChunkHashes, ChunkSizes); - Stats.UniqueSequencesFound++; } - }); - Stats.FilesChunked++; - return Chunked.Info.RawHash; - } - else - { - ZEN_TRACE_CPU("HashOnly"); - - IoBuffer Buffer = IoBufferBuilder::MakeFromFile((FolderPath / Path).make_preferred()); - if (Buffer.GetSize() != RawSize) - { - throw std::runtime_error(fmt::format("Failed opening file '{}' for hashing", FolderPath / Path)); + Stats.UniqueSequencesFound++; } - const IoHash Hash = IoHash::HashBuffer(Buffer, &Stats.BytesHashed); - - Lock.WithExclusiveLock([&]() { - if (!RawHashToSequenceRawHashIndex.contains(Hash)) - { - RawHashToSequenceRawHashIndex.insert( - {Hash, gsl::narrow<uint32_t>(OutChunkedContent.ChunkedContent.SequenceRawHashes.size())}); - AddChunkSequence(Stats, OutChunkedContent.ChunkedContent, ChunkHashToChunkIndex, Hash, RawSize); - Stats.UniqueSequencesFound++; - } - }); - return Hash; - } + }); + Stats.FilesChunked++; + return Chunked.Info.RawHash; } } @@ -1113,6 +1132,7 @@ ChunkFolderContent(ChunkingStatistics& Stats, const std::filesystem::path& RootPath, const FolderContent& Content, const ChunkingController& InChunkingController, + ChunkingCache& InChunkingCache, int32_t UpdateIntervalMS, std::function<void(bool IsAborted, bool IsPaused, std::ptrdiff_t PendingWork)>&& UpdateCallback, std::atomic<bool>& AbortFlag, @@ -1123,6 +1143,10 @@ ChunkFolderContent(ChunkingStatistics& Stats, Stopwatch Timer; auto _ = MakeGuard([&Stats, &Timer]() { Stats.ElapsedWallTimeUS = Timer.GetElapsedTimeUs(); }); + ZEN_ASSERT(Content.ModificationTicks.size() == Content.Paths.size()); + ZEN_ASSERT(Content.RawSizes.size() == Content.Paths.size()); + ZEN_ASSERT(Content.Attributes.size() == Content.Paths.size()); + ChunkedFolderContent Result = {.Platform = Content.Platform, .Paths = Content.Paths, .RawSizes = Content.RawSizes, @@ -1163,12 +1187,15 @@ ChunkFolderContent(ChunkingStatistics& Stats, { break; } + Work.ScheduleWork(WorkerPool, // GetSyncWorkerPool() [&, PathIndex](std::atomic<bool>& AbortFlag) { if (!AbortFlag) { IoHash RawHash = HashOneFile(Stats, InChunkingController, + InChunkingCache, + Content.ModificationTicks, Result, ChunkHashToChunkIndex, RawHashToSequenceRawHashIndex, diff --git a/src/zenremotestore/chunking/chunkingcache.cpp b/src/zenremotestore/chunking/chunkingcache.cpp new file mode 100644 index 000000000..7f0a26330 --- /dev/null +++ b/src/zenremotestore/chunking/chunkingcache.cpp @@ -0,0 +1,627 @@ +// Copyright Epic Games, Inc. All Rights Reserved. + +#include <zenremotestore/chunking/chunkingcache.h> + +#include <zenbase/zenbase.h> +#include <zencore/basicfile.h> +#include <zencore/compactbinarybuilder.h> +#include <zencore/compactbinaryutil.h> +#include <zencore/filesystem.h> +#include <zencore/fmtutils.h> +#include <zencore/logging.h> +#include <zenremotestore/chunking/chunkedfile.h> +#include <zenremotestore/chunking/chunkingcontroller.h> + +ZEN_THIRD_PARTY_INCLUDES_START +#include <tsl/robin_map.h> +#include <xxhash.h> +#include <gsl/gsl-lite.hpp> +ZEN_THIRD_PARTY_INCLUDES_END + +#if ZEN_WITH_TESTS +# include <zencore/testing.h> +# include <zencore/testutils.h> +# include <algorithm> +#endif // ZEN_WITH_TESTS + +namespace zen { + +class NullChunkingCache : public ChunkingCache +{ +public: + NullChunkingCache() {} + + virtual bool GetCachedFile(const std::filesystem::path& InputPath, + uint64_t RawSize, + uint64_t ModificationTick, + ChunkedInfoWithSource& OutChunked) override + { + ZEN_UNUSED(InputPath, RawSize, OutChunked, ModificationTick); + return false; + } + + virtual bool PutCachedFile(const std::filesystem::path& InputPath, + uint64_t ModificationTick, + const ChunkedInfoWithSource& Chunked) override + { + ZEN_UNUSED(InputPath, Chunked, ModificationTick); + return false; + } +}; + +class MemoryChunkingCache : public ChunkingCache +{ +public: + MemoryChunkingCache() {} + + virtual bool GetCachedFile(const std::filesystem::path& InputPath, + uint64_t RawSize, + uint64_t ModificationTick, + ChunkedInfoWithSource& OutChunked) override + { + const std::u8string PathString = InputPath.generic_u8string(); + const IoHash PathHash = IoHash::HashBuffer(PathString.data(), PathString.length()); + + RwLock::SharedLockScope Lock(m_Lock); + if (auto It = m_PathHashToEntry.find(PathHash); It != m_PathHashToEntry.end()) + { + const CachedEntry& Entry = m_Entries[It->second]; + if (ModificationTick == Entry.ModificationTick && RawSize == Entry.Chunked.Info.RawSize) + { + OutChunked = Entry.Chunked; + return true; + } + else + { + Lock.ReleaseNow(); + RwLock::ExclusiveLockScope EditLock(m_Lock); + if (auto RemoveIt = m_PathHashToEntry.find(PathHash); It != m_PathHashToEntry.end()) + { + CachedEntry& DeleteEntry = m_Entries[It->second]; + DeleteEntry.Chunked = {}; + DeleteEntry.ModificationTick = 0; + m_FreeEntryIndexes.push_back(It->second); + m_PathHashToEntry.erase(It); + } + } + } + return false; + } + + virtual bool PutCachedFile(const std::filesystem::path& InputPath, + uint64_t ModificationTick, + const ChunkedInfoWithSource& Chunked) override + { + const std::u8string PathString = InputPath.generic_u8string(); + const IoHash PathHash = IoHash::HashBuffer(PathString.data(), PathString.length()); + + RwLock::ExclusiveLockScope _(m_Lock); + if (auto It = m_PathHashToEntry.find(PathHash); It != m_PathHashToEntry.end()) + { + CachedEntry& Entry = m_Entries[It->second]; + if (ModificationTick != Entry.ModificationTick || Chunked.Info.RawSize != Entry.Chunked.Info.RawSize) + { + Entry.Chunked = Chunked; + Entry.ModificationTick = ModificationTick; + } + } + else + { + uint32_t EntryIndex = gsl::narrow<uint32_t>(m_Entries.size()); + if (!m_FreeEntryIndexes.empty()) + { + EntryIndex = m_FreeEntryIndexes.back(); + m_FreeEntryIndexes.pop_back(); + m_Entries[EntryIndex] = CachedEntry{.Chunked = Chunked, .ModificationTick = ModificationTick}; + } + else + { + m_Entries.emplace_back(CachedEntry{.Chunked = Chunked, .ModificationTick = ModificationTick}); + } + m_PathHashToEntry.insert_or_assign(PathHash, EntryIndex); + } + return true; + } + + RwLock m_Lock; + + tsl::robin_map<IoHash, uint32_t, IoHash::Hasher> m_PathHashToEntry; + std::vector<uint32_t> m_FreeEntryIndexes; + + struct CachedEntry + { + ChunkedInfoWithSource Chunked; + uint64_t ModificationTick = 0; + }; + + std::vector<CachedEntry> m_Entries; +}; + +class DiskChunkingCache : public ChunkingCache +{ +public: + DiskChunkingCache(const std::filesystem::path& RootPath, ChunkingController& ChunkController, uint64_t MinimumRawSizeForCaching) + : m_RootPath(RootPath) + , m_ChunkerId(GetChunkerIdentity(ChunkController)) + , m_MinimumRawSizeForCaching(MinimumRawSizeForCaching) + { + } + + virtual bool GetCachedFile(const std::filesystem::path& InputPath, + uint64_t RawSize, + uint64_t ModificationTick, + ChunkedInfoWithSource& OutChunked) override + { + if (RawSize < m_MinimumRawSizeForCaching) + { + return false; + } + + const std::filesystem::path CachePath = GetCachePath(InputPath); + + return ReadChunkedInfo(CachePath, RawSize, ModificationTick, OutChunked); + } + + virtual bool PutCachedFile(const std::filesystem::path& InputPath, + uint64_t ModificationTick, + const ChunkedInfoWithSource& Chunked) override + { + if (Chunked.Info.RawSize < m_MinimumRawSizeForCaching) + { + return false; + } + + const std::filesystem::path CachePath = GetCachePath(InputPath); + + return WriteChunkedInfo(CachePath, ModificationTick, Chunked); + } + +private: + static constexpr uint32_t ImplementationRevision = 1; + +#pragma pack(push) +#pragma pack(1) + struct ChunkedInfoHeader + { + static constexpr uint32_t ExpectedMagic = 0x75636368; // 'ucch'; + static constexpr uint32_t CurrentVersion = 1; + + uint32_t Magic = ExpectedMagic; + uint32_t Version = CurrentVersion; + uint64_t SequenceCount = 0; + uint64_t ChunkCount = 0; + uint64_t RawSize = 0; + IoHash RawHash = IoHash::Zero; + uint64_t ModificationTick = 0; + uint32_t Checksum = 0; + + static uint32_t ComputeChecksum(const ChunkedInfoHeader& Header) + { + return XXH32(&Header.Magic, sizeof(Header) - sizeof(uint32_t), 0xC0C0'BABA); + } + }; +#pragma pack(pop) + static_assert(sizeof(ChunkedInfoHeader) == 64); + static_assert(sizeof(ChunkSource) == 12); + + std::filesystem::path GetCachePath(const std::filesystem::path& InputPath) + { + const std::string IdentityString = fmt::format("{}_{}_{}", ImplementationRevision, m_ChunkerId, InputPath.generic_string()); + const IoHash IdentityHash = IoHash::HashBuffer(IdentityString.data(), IdentityString.length()); + std::filesystem::path CachePath = m_RootPath / fmt::format("{}.chunked_content", IdentityHash); + return CachePath; + } + + bool WriteChunkedInfo(const std::filesystem::path& CachePath, uint64_t ModificationTick, const ChunkedInfoWithSource& Chunked) + { + CreateDirectories(CachePath.parent_path()); + + TemporaryFile OutputFile; + std::error_code Ec; + OutputFile.CreateTemporary(CachePath.parent_path(), Ec); + if (Ec) + { + ZEN_DEBUG("Failed to create temp file for cached chunked data at '{}'", CachePath); + return false; + } + ChunkedInfoHeader Header = {.SequenceCount = Chunked.Info.ChunkSequence.size(), + .ChunkCount = Chunked.Info.ChunkHashes.size(), + .RawSize = Chunked.Info.RawSize, + .RawHash = Chunked.Info.RawHash, + .ModificationTick = ModificationTick}; + + Header.Checksum = ChunkedInfoHeader::ComputeChecksum(Header); + + try + { + uint64_t Offset = 0; + + OutputFile.Write(&Header, sizeof(ChunkedInfoHeader), Offset); + Offset += sizeof(ChunkedInfoHeader); + + if (Header.SequenceCount > 0) + { + OutputFile.Write(Chunked.Info.ChunkSequence.data(), Header.SequenceCount * sizeof(uint32_t), Offset); + Offset += Header.SequenceCount * sizeof(uint32_t); + } + + if (Header.ChunkCount > 0) + { + OutputFile.Write(Chunked.Info.ChunkHashes.data(), Header.ChunkCount * sizeof(IoHash), Offset); + Offset += Header.ChunkCount * sizeof(IoHash); + + OutputFile.Write(Chunked.ChunkSources.data(), Header.ChunkCount * sizeof(ChunkSource), Offset); + Offset += Header.ChunkCount * sizeof(ChunkSource); + } + + OutputFile.Flush(); + } + catch (const std::exception& Ex) + { + ZEN_DEBUG("Failed to write cached file {}. Reason: {}", CachePath, Ex.what()); + return false; + } + OutputFile.MoveTemporaryIntoPlace(CachePath, Ec); + if (Ec) + { + ZEN_DEBUG("Failed to move temporary file {} to {}. Reason: {}", OutputFile.GetPath(), CachePath, Ec.message()); + return false; + } + + return true; + } + + bool ReadChunkedInfo(const std::filesystem::path& CachePath, + uint64_t RawSize, + uint64_t ModificationTick, + ChunkedInfoWithSource& OutChunked) + { + BasicFile InputFile; + std::error_code Ec; + InputFile.Open(CachePath, BasicFile::Mode::kRead, Ec); + if (Ec) + { + return false; + } + try + { + uint64_t Size = InputFile.FileSize(); + if (Size < sizeof(ChunkedInfoHeader)) + { + throw std::runtime_error(fmt::format("Expected size >= {}, file has size {}", sizeof(ChunkedInfoHeader), Size)); + } + + uint64_t Offset = 0; + ChunkedInfoHeader Header; + InputFile.Read(&Header, sizeof(ChunkedInfoHeader), Offset); + Offset += sizeof(Header); + + if (Header.Magic != ChunkedInfoHeader::ExpectedMagic) + { + throw std::runtime_error( + fmt::format("Expected magic 0x{:04x}, file has magic 0x{:04x}", ChunkedInfoHeader::ExpectedMagic, Header.Magic)); + } + if (Header.Version != ChunkedInfoHeader::CurrentVersion) + { + throw std::runtime_error( + fmt::format("Expected version {}, file has version {}", ChunkedInfoHeader::CurrentVersion, Header.Version)); + } + if (Header.Checksum != ChunkedInfoHeader::ComputeChecksum(Header)) + { + throw std::runtime_error(fmt::format("Expected checksum 0x{:04x}, file has checksum 0x{:04x}", + Header.Checksum, + ChunkedInfoHeader::ComputeChecksum(Header))); + } + + uint64_t ExpectedSize = sizeof(ChunkedInfoHeader) + Header.SequenceCount * sizeof(uint32_t) + + Header.ChunkCount * sizeof(IoHash) + Header.ChunkCount * sizeof(ChunkSource); + + if (ExpectedSize != Size) + { + throw std::runtime_error(fmt::format("Expected size {}, file has size {}", ExpectedSize, Size)); + } + + if (Header.RawSize != RawSize) + { + InputFile.Close(); + RemoveFile(CachePath, Ec); + return false; + } + + if (Header.ModificationTick != ModificationTick) + { + InputFile.Close(); + RemoveFile(CachePath, Ec); + return false; + } + + OutChunked.Info.RawSize = Header.RawSize; + OutChunked.Info.RawHash = Header.RawHash; + + if (Header.SequenceCount > 0) + { + OutChunked.Info.ChunkSequence.resize(Header.SequenceCount); + InputFile.Read(OutChunked.Info.ChunkSequence.data(), Header.SequenceCount * sizeof(uint32_t), Offset); + Offset += Header.SequenceCount * sizeof(uint32_t); + } + + if (Header.ChunkCount > 0) + { + OutChunked.Info.ChunkHashes.resize(Header.ChunkCount); + OutChunked.ChunkSources.resize(Header.ChunkCount); + + InputFile.Read(OutChunked.Info.ChunkHashes.data(), Header.ChunkCount * sizeof(IoHash), Offset); + Offset += Header.ChunkCount * sizeof(IoHash); + + InputFile.Read(OutChunked.ChunkSources.data(), Header.ChunkCount * sizeof(ChunkSource), Offset); + Offset += Header.ChunkCount * sizeof(ChunkSource); + } + } + catch (const std::exception& Ex) + { + ZEN_DEBUG("Failed to read cached file {}. Reason: {}", CachePath, Ex.what()); + InputFile.Close(); + RemoveFile(CachePath, Ec); + return false; + } + + return true; + } + + const std::filesystem::path m_RootPath; + const IoHash m_ChunkerId; + const uint64_t m_MinimumRawSizeForCaching; + + static IoHash GetChunkerIdentity(ChunkingController& ChunkController) + { + IoHashStream ChunkerIdStream; + std::string_view ChunkerName = ChunkController.GetName(); + ChunkerIdStream.Append(ChunkerName.data(), ChunkerName.length()); + const CbObject ChunkerParameters = ChunkController.GetParameters(); + ChunkerParameters.GetHash(ChunkerIdStream); + return ChunkerIdStream.GetHash(); + } +}; + +std::unique_ptr<ChunkingCache> +CreateNullChunkingCache() +{ + return std::make_unique<NullChunkingCache>(); +} + +std::unique_ptr<ChunkingCache> +CreateMemoryChunkingCache() +{ + return std::make_unique<MemoryChunkingCache>(); +} + +std::unique_ptr<ChunkingCache> +CreateDiskChunkingCache(const std::filesystem::path& RootPath, ChunkingController& ChunkController, uint64_t MinimumRawSizeForCaching) +{ + return std::make_unique<DiskChunkingCache>(RootPath, ChunkController, MinimumRawSizeForCaching); +} + +#if ZEN_WITH_TESTS + +namespace chunkingcache_testutils { + ChunkedInfoWithSource CreateChunked(const std::string_view Data, uint32_t SplitSize) + { + std::vector<uint32_t> ChunkSequence; + std::vector<IoHash> ChunkHashes; + std::vector<ChunkSource> ChunkSources; + + if (SplitSize > 0) + { + std::string_view::size_type SplitOffset = 0; + while (SplitOffset < Data.length()) + { + std::string_view DataPart(Data.substr(SplitOffset, SplitSize)); + + ChunkSequence.push_back(gsl::narrow<uint32_t>(ChunkSequence.size())); + ChunkHashes.push_back(IoHash::HashBuffer(DataPart.data(), DataPart.length())); + ChunkSources.push_back({.Offset = SplitOffset, .Size = gsl::narrow<uint32_t>(DataPart.length())}); + SplitOffset += DataPart.length(); + } + } + + return ChunkedInfoWithSource{.Info = {.RawSize = Data.length(), + .RawHash = IoHash::HashBuffer(Data.data(), Data.length()), + .ChunkSequence = std::move(ChunkSequence), + .ChunkHashes = std::move(ChunkHashes)}, + .ChunkSources = std::move(ChunkSources)}; + } + + bool Equals(const ChunkedInfoWithSource& Lhs, const ChunkedInfoWithSource& Rhs) + { + if (Lhs.ChunkSources.size() != Rhs.ChunkSources.size()) + { + return false; + } + if (std::mismatch(Lhs.ChunkSources.begin(), + Lhs.ChunkSources.end(), + Rhs.ChunkSources.begin(), + [](const ChunkSource& Lhs, const ChunkSource& Rhs) { return Lhs.Offset == Rhs.Offset && Lhs.Size == Rhs.Size; }) + .first != Lhs.ChunkSources.end()) + { + return false; + } + if (Lhs.Info.RawSize != Rhs.Info.RawSize) + { + return false; + } + if (Lhs.Info.ChunkSequence != Rhs.Info.ChunkSequence) + { + return false; + } + if (Lhs.Info.ChunkHashes != Rhs.Info.ChunkHashes) + { + return false; + } + return true; + } +} // namespace chunkingcache_testutils + +TEST_CASE("chunkingcache.nullchunkingcache") +{ + using namespace chunkingcache_testutils; + + std::unique_ptr<ChunkingCache> Cache = CreateNullChunkingCache(); + ChunkedInfoWithSource Result; + CHECK(!Cache->GetCachedFile("dummy-path", 495, 91283, Result)); + CHECK(Result.Info.ChunkHashes.empty()); + + ChunkedInfoWithSource Chunked = CreateChunked("my data string", 4); + CHECK(!Cache->PutCachedFile("dummy-path", 91283, Chunked)); + + CHECK(!Cache->GetCachedFile("dummy-path", 495, 91283, Result)); + CHECK(Result.Info.ChunkHashes.empty()); +} + +TEST_CASE("chunkingcache.memorychunkingcache") +{ + using namespace chunkingcache_testutils; + + std::unique_ptr<ChunkingCache> Cache = CreateMemoryChunkingCache(); + ChunkedInfoWithSource Result; + CHECK(!Cache->GetCachedFile("file/A/Path", 495, 91283, Result)); + CHECK(Result.Info.ChunkHashes.empty()); + CHECK(!Cache->GetCachedFile("file/B/Path", 395, 671283, Result)); + CHECK(Result.Info.ChunkHashes.empty()); + + ChunkedInfoWithSource ChunkedAV1 = CreateChunked("File A data string", 4); + ChunkedInfoWithSource ChunkedAV2 = CreateChunked("File A updated data string", 4); + ChunkedInfoWithSource ChunkedBV1 = CreateChunked("File B data string", 4); + + CHECK(Cache->PutCachedFile("file/A/Path", 91283, ChunkedAV1)); + CHECK(Cache->PutCachedFile("file/B/Path", 51283, ChunkedBV1)); + + CHECK(Cache->GetCachedFile("file/A/Path", ChunkedAV1.Info.RawSize, 91283, Result)); + CHECK(Equals(Result, ChunkedAV1)); + Result = ChunkedInfoWithSource{}; + + CHECK(Cache->GetCachedFile("file/B/Path", ChunkedBV1.Info.RawSize, 51283, Result)); + CHECK(Equals(Result, ChunkedBV1)); + Result = ChunkedInfoWithSource{}; + + CHECK(!Cache->GetCachedFile("file/A/Path-wrong", 495, 91283, Result)); + CHECK(Result.Info.ChunkHashes.empty()); + + CHECK(!Cache->GetCachedFile("file/A/Path", 493, 91283, Result)); + CHECK(Result.Info.ChunkHashes.empty()); + + // Asking a path that exists but without a match will remove that path + CHECK(!Cache->GetCachedFile("file/A/Path", 495, 91283, Result)); + CHECK(Result.Info.ChunkHashes.empty()); + + CHECK(!Cache->GetCachedFile("file/A/Path", 495, 9283, Result)); + CHECK(Result.Info.ChunkHashes.empty()); + + CHECK(Cache->PutCachedFile("file/A/Path", 91283, ChunkedAV1)); + CHECK(Cache->GetCachedFile("file/A/Path", ChunkedAV1.Info.RawSize, 91283, Result)); + CHECK(Equals(Result, ChunkedAV1)); + Result = ChunkedInfoWithSource{}; + + CHECK(Cache->PutCachedFile("file/A/Path", 91483, ChunkedAV2)); + CHECK(Cache->GetCachedFile("file/A/Path", ChunkedAV2.Info.RawSize, 91483, Result)); + CHECK(Equals(Result, ChunkedAV2)); + Result = ChunkedInfoWithSource{}; + + CHECK(!Cache->GetCachedFile("file/A/Path", ChunkedAV1.Info.RawSize, 91283, Result)); + CHECK(Result.Info.ChunkHashes.empty()); + + CHECK(Cache->GetCachedFile("file/B/Path", ChunkedBV1.Info.RawSize, 51283, Result)); + CHECK(Equals(Result, ChunkedBV1)); + Result = ChunkedInfoWithSource{}; + + CHECK(!Cache->GetCachedFile("file/B/Path", ChunkedBV1.Info.RawSize + 1, 51283, Result)); + CHECK(Result.Info.ChunkHashes.empty()); +} + +TEST_CASE("chunkingcache.diskchunkingcache") +{ + using namespace chunkingcache_testutils; + + ScopedTemporaryDirectory TmpDir; + + std::unique_ptr<ChunkingController> ChunkController = CreateStandardChunkingController(StandardChunkingControllerSettings{}); + + ChunkedInfoWithSource ChunkedAV1 = CreateChunked("File A data string", 4); + ChunkedInfoWithSource ChunkedAV2 = CreateChunked("File A updated data string", 4); + ChunkedInfoWithSource ChunkedBV1 = CreateChunked("File B data string", 4); + + { + std::unique_ptr<ChunkingCache> Cache = CreateDiskChunkingCache(TmpDir.Path(), *ChunkController, 0); + ChunkedInfoWithSource Result; + CHECK(!Cache->GetCachedFile("file/A/Path", 495, 91283, Result)); + CHECK(Result.Info.ChunkHashes.empty()); + CHECK(!Cache->GetCachedFile("file/B/Path", 395, 671283, Result)); + CHECK(Result.Info.ChunkHashes.empty()); + + CHECK(Cache->PutCachedFile("file/A/Path", 91283, ChunkedAV1)); + CHECK(Cache->PutCachedFile("file/B/Path", 51283, ChunkedBV1)); + + CHECK(Cache->GetCachedFile("file/A/Path", ChunkedAV1.Info.RawSize, 91283, Result)); + CHECK(Equals(Result, ChunkedAV1)); + Result = ChunkedInfoWithSource{}; + + CHECK(Cache->GetCachedFile("file/B/Path", ChunkedBV1.Info.RawSize, 51283, Result)); + CHECK(Equals(Result, ChunkedBV1)); + Result = ChunkedInfoWithSource{}; + + CHECK(!Cache->GetCachedFile("file/A/Path-wrong", 495, 91283, Result)); + CHECK(Result.Info.ChunkHashes.empty()); + + CHECK(!Cache->GetCachedFile("file/A/Path", 493, 91283, Result)); + CHECK(Result.Info.ChunkHashes.empty()); + + // Asking a path that exists but without a match will remove that path + CHECK(!Cache->GetCachedFile("file/A/Path", 495, 91283, Result)); + CHECK(Result.Info.ChunkHashes.empty()); + + CHECK(!Cache->GetCachedFile("file/A/Path", 495, 9283, Result)); + CHECK(Result.Info.ChunkHashes.empty()); + + CHECK(Cache->PutCachedFile("file/A/Path", 91283, ChunkedAV1)); + CHECK(Cache->GetCachedFile("file/A/Path", ChunkedAV1.Info.RawSize, 91283, Result)); + CHECK(Equals(Result, ChunkedAV1)); + Result = ChunkedInfoWithSource{}; + + CHECK(Cache->PutCachedFile("file/A/Path", 91483, ChunkedAV2)); + CHECK(Cache->GetCachedFile("file/A/Path", ChunkedAV2.Info.RawSize, 91483, Result)); + CHECK(Equals(Result, ChunkedAV2)); + Result = ChunkedInfoWithSource{}; + } + { + std::unique_ptr<ChunkingCache> Cache = CreateDiskChunkingCache(TmpDir.Path(), *ChunkController, 0); + ChunkedInfoWithSource Result; + + CHECK(Cache->GetCachedFile("file/A/Path", ChunkedAV2.Info.RawSize, 91483, Result)); + CHECK(Equals(Result, ChunkedAV2)); + Result = ChunkedInfoWithSource{}; + + CHECK(!Cache->GetCachedFile("file/A/Path", ChunkedAV2.Info.RawSize, 91283, Result)); + CHECK(Result.Info.ChunkHashes.empty()); + + CHECK(!Cache->GetCachedFile("file/A/Path", ChunkedAV2.Info.RawSize, 91483, Result)); + CHECK(Result.Info.ChunkHashes.empty()); + + CHECK(Cache->GetCachedFile("file/B/Path", ChunkedBV1.Info.RawSize, 51283, Result)); + CHECK(Equals(Result, ChunkedBV1)); + Result = ChunkedInfoWithSource{}; + + CHECK(!Cache->GetCachedFile("file/B/Path", ChunkedBV1.Info.RawSize + 1, 51283, Result)); + CHECK(Result.Info.ChunkHashes.empty()); + + CHECK(!Cache->GetCachedFile("file/B/Path", ChunkedBV1.Info.RawSize, 51283, Result)); + CHECK(Result.Info.ChunkHashes.empty()); + } +} + +void +chunkingcache_forcelink() +{ +} + +#endif // ZEN_WITH_TESTS + +} // namespace zen diff --git a/src/zenremotestore/include/zenremotestore/builds/buildstorageoperations.h b/src/zenremotestore/include/zenremotestore/builds/buildstorageoperations.h index 057ad1a10..9f7d93398 100644 --- a/src/zenremotestore/include/zenremotestore/builds/buildstorageoperations.h +++ b/src/zenremotestore/include/zenremotestore/builds/buildstorageoperations.h @@ -551,7 +551,9 @@ public: std::vector<std::pair<Oid, std::string>> Execute(const Oid& BuildPartId, const std::string_view BuildPartName, - const std::filesystem::path& ManifestPath); + const std::filesystem::path& ManifestPath, + ChunkingController& ChunkController, + ChunkingCache& ChunkCache); DiskStatistics m_DiskStats; GetFolderContentStatistics m_LocalFolderScanStats; @@ -659,6 +661,7 @@ private: }; void UploadBuildPart(ChunkingController& ChunkController, + ChunkingCache& ChunkCache, uint32_t PartIndex, const UploadPart& Part, uint32_t PartStepOffset, diff --git a/src/zenremotestore/include/zenremotestore/chunking/chunkedcontent.h b/src/zenremotestore/include/zenremotestore/chunking/chunkedcontent.h index 1e8e878df..d402bd3f0 100644 --- a/src/zenremotestore/include/zenremotestore/chunking/chunkedcontent.h +++ b/src/zenremotestore/include/zenremotestore/chunking/chunkedcontent.h @@ -17,6 +17,7 @@ namespace zen { class CbWriter; class ChunkingController; +class ChunkingCache; class WorkerThreadPool; enum class SourcePlatform @@ -165,6 +166,12 @@ struct ChunkingStatistics std::atomic<uint64_t> UniqueChunksFound = 0; std::atomic<uint64_t> UniqueSequencesFound = 0; std::atomic<uint64_t> UniqueBytesFound = 0; + std::atomic<uint64_t> FilesFoundInCache = 0; + std::atomic<uint64_t> ChunksFoundInCache = 0; + std::atomic<uint64_t> BytesFoundInCache = 0; + std::atomic<uint64_t> FilesStoredInCache = 0; + std::atomic<uint64_t> ChunksStoredInCache = 0; + std::atomic<uint64_t> BytesStoredInCache = 0; uint64_t ElapsedWallTimeUS = 0; inline ChunkingStatistics& operator+=(const ChunkingStatistics& Rhs) @@ -176,6 +183,12 @@ struct ChunkingStatistics UniqueSequencesFound += Rhs.UniqueSequencesFound; UniqueBytesFound += Rhs.UniqueBytesFound; ElapsedWallTimeUS += Rhs.ElapsedWallTimeUS; + FilesFoundInCache += Rhs.FilesFoundInCache; + ChunksFoundInCache += Rhs.ChunksFoundInCache; + BytesFoundInCache += Rhs.BytesFoundInCache; + FilesStoredInCache += Rhs.FilesStoredInCache; + ChunksStoredInCache += Rhs.ChunksStoredInCache; + BytesStoredInCache += Rhs.BytesStoredInCache; return *this; } }; @@ -185,6 +198,7 @@ ChunkedFolderContent ChunkFolderContent(ChunkingStatistics& Stats, const std::filesystem::path& RootPath, const FolderContent& Content, const ChunkingController& InChunkingController, + ChunkingCache& InChunkingCache, int32_t UpdateIntervalMS, std::function<void(bool IsAborted, bool IsPaused, std::ptrdiff_t PendingWork)>&& UpdateCallback, std::atomic<bool>& AbortFlag, diff --git a/src/zenremotestore/include/zenremotestore/chunking/chunkedfile.h b/src/zenremotestore/include/zenremotestore/chunking/chunkedfile.h index 4cec80fdb..64e2c9c29 100644 --- a/src/zenremotestore/include/zenremotestore/chunking/chunkedfile.h +++ b/src/zenremotestore/include/zenremotestore/chunking/chunkedfile.h @@ -21,11 +21,14 @@ struct ChunkedInfo std::vector<IoHash> ChunkHashes; }; +#pragma pack(push) +#pragma pack(4) struct ChunkSource { uint64_t Offset; // 8 uint32_t Size; // 4 }; +#pragma pack(pop) struct ChunkedInfoWithSource { diff --git a/src/zenremotestore/include/zenremotestore/chunking/chunkingcache.h b/src/zenremotestore/include/zenremotestore/chunking/chunkingcache.h new file mode 100644 index 000000000..e213bc41b --- /dev/null +++ b/src/zenremotestore/include/zenremotestore/chunking/chunkingcache.h @@ -0,0 +1,44 @@ +// Copyright Epic Games, Inc. All Rights Reserved. + +#pragma once + +#include <filesystem> + +namespace zen { + +struct ChunkedInfoWithSource; +class ChunkingController; + +class ChunkingCache +{ +public: + virtual ~ChunkingCache() {} + + /* + * Attempting to fetch a cached file with mismatching RawSize of ModificationTick will delete any existing cached data for that + * InputPath + * + * If GetCachedFile returns false, OutChunked is untouched + */ + virtual bool GetCachedFile(const std::filesystem::path& InputPath, + uint64_t RawSize, + uint64_t ModificationTick, + ChunkedInfoWithSource& OutChunked) = 0; + + /* + * Putting a cached entry with an existing InputPath will overwrite it with the new ModificationTick and Chunked data + */ + virtual bool PutCachedFile(const std::filesystem::path& InputPath, uint64_t ModificationTick, const ChunkedInfoWithSource& Chunked) = 0; +}; + +std::unique_ptr<ChunkingCache> CreateNullChunkingCache(); +std::unique_ptr<ChunkingCache> CreateMemoryChunkingCache(); +std::unique_ptr<ChunkingCache> CreateDiskChunkingCache(const std::filesystem::path& RootPath, + ChunkingController& ChunkController, + uint64_t MinimumRawSizeForCaching); + +#if ZEN_WITH_TESTS +void chunkingcache_forcelink(); +#endif // ZEN_WITH_TESTS + +} // namespace zen diff --git a/src/zenremotestore/zenremotestore.cpp b/src/zenremotestore/zenremotestore.cpp index ecf85794d..0d008ec40 100644 --- a/src/zenremotestore/zenremotestore.cpp +++ b/src/zenremotestore/zenremotestore.cpp @@ -6,6 +6,7 @@ #include <zenremotestore/builds/buildstorageoperations.h> #include <zenremotestore/chunking/chunkedcontent.h> #include <zenremotestore/chunking/chunkedfile.h> +#include <zenremotestore/chunking/chunkingcache.h> #include <zenremotestore/filesystemutils.h> #include <zenremotestore/projectstore/remoteprojectstore.h> @@ -21,6 +22,7 @@ zenremotestore_forcelinktests() chunkblock_forcelink(); chunkedcontent_forcelink(); chunkedfile_forcelink(); + chunkingcache_forcelink(); filesystemutils_forcelink(); remoteprojectstore_forcelink(); } |