diff options
| author | Dan Engelbrecht <[email protected]> | 2026-01-23 14:48:33 +0100 |
|---|---|---|
| committer | GitHub Enterprise <[email protected]> | 2026-01-23 14:48:33 +0100 |
| commit | 0b4d6e9f9f8d3b70fe91a6859625c1390d9b9202 (patch) | |
| tree | 5031cdeef1b792d9283392afef4b96cdef9ecdf8 /src/zenremotestore | |
| parent | make sure new blocks generated by a part is accessible to following parts (#732) (diff) | |
| download | zen-5.7.19-pre0.tar.xz zen-5.7.19-pre0.zip | |
builds scanning cache (#727)v5.7.19-pre0
- Feature: Added `--chunking-cache-path` option to `zen builds upload` and `zen builds diff`
- Path to cache for chunking information of scanned files. Default is empty resulting in no caching
Diffstat (limited to 'src/zenremotestore')
8 files changed, 892 insertions, 84 deletions
diff --git a/src/zenremotestore/builds/buildstorageoperations.cpp b/src/zenremotestore/builds/buildstorageoperations.cpp index a8169538b..485973e2b 100644 --- a/src/zenremotestore/builds/buildstorageoperations.cpp +++ b/src/zenremotestore/builds/buildstorageoperations.cpp @@ -8,6 +8,7 @@ #include <zenremotestore/builds/buildstoragecache.h> #include <zenremotestore/builds/buildstorageutil.h> #include <zenremotestore/chunking/chunkblock.h> +#include <zenremotestore/chunking/chunkingcache.h> #include <zenremotestore/chunking/chunkingcontroller.h> #include <zenremotestore/filesystemutils.h> #include <zenremotestore/operationlogoutput.h> @@ -4703,45 +4704,42 @@ BuildsOperationUploadFolder::ReadManifestParts(const std::filesystem::path& Mani UploadParts.resize(Manifest.Parts.size()); for (size_t PartIndex = 0; PartIndex < Manifest.Parts.size(); PartIndex++) { - const PartManifest::Part& PartManifest = Manifest.Parts[PartIndex]; - UploadPart& Part = UploadParts[PartIndex]; - FolderContent& Content = Part.Content; + PartManifest::Part& PartManifest = Manifest.Parts[PartIndex]; + if (ManifestPath.is_relative()) + { + PartManifest.Files.push_back(ManifestPath); + } + + UploadPart& Part = UploadParts[PartIndex]; + FolderContent& Content = Part.Content; GetFolderContentStatistics& LocalFolderScanStats = Part.LocalFolderScanStats; const std::vector<std::filesystem::path>& AssetPaths = PartManifest.Files; - for (const std::filesystem::path& AssetPath : AssetPaths) - { - Content.Paths.push_back(AssetPath); - const std::filesystem::path AssetFilePath = (m_Path / AssetPath).make_preferred(); - Content.RawSizes.push_back(FileSizeFromPath(AssetFilePath)); -#if ZEN_PLATFORM_WINDOWS - Content.Attributes.push_back(GetFileAttributesFromPath(AssetFilePath)); -#endif // ZEN_PLATFORM_WINDOWS -#if ZEN_PLATFORM_MAC || ZEN_PLATFORM_LINUX - Content.Attributes.push_back(GetFileMode(AssetFilePath)); -#endif // ZEN_PLATFORM_MAC || ZEN_PLATFORM_LINUX - LocalFolderScanStats.AcceptedFileByteCount += Content.RawSizes.back(); - LocalFolderScanStats.AcceptedFileCount++; - } - if (ManifestPath.is_relative()) - { - Content.Paths.push_back(ManifestPath); - const std::filesystem::path ManifestFilePath = (m_Path / ManifestPath).make_preferred(); - Content.RawSizes.push_back(FileSizeFromPath(ManifestFilePath)); -#if ZEN_PLATFORM_WINDOWS - Content.Attributes.push_back(GetFileAttributesFromPath(ManifestFilePath)); -#endif // ZEN_PLATFORM_WINDOWS -#if ZEN_PLATFORM_MAC || ZEN_PLATFORM_LINUX - Content.Attributes.push_back(GetFileMode(ManifestFilePath)); -#endif // ZEN_PLATFORM_MAC || ZEN_PLATFORM_LINUX - - LocalFolderScanStats.AcceptedFileByteCount += Content.RawSizes.back(); - LocalFolderScanStats.AcceptedFileCount++; + Content = GetValidFolderContent( + m_IOWorkerPool, + LocalFolderScanStats, + m_Path, + AssetPaths, + [](uint64_t PathCount, uint64_t CompletedPathCount) { ZEN_UNUSED(PathCount, CompletedPathCount); }, + 1000, + m_AbortFlag, + m_PauseFlag); + + if (Content.Paths.size() != AssetPaths.size()) + { + const tsl::robin_set<std::filesystem::path> FoundPaths(Content.Paths.begin(), Content.Paths.end()); + ExtendableStringBuilder<1024> SB; + for (const std::filesystem::path& AssetPath : AssetPaths) + { + if (!FoundPaths.contains(AssetPath)) + { + SB << "\n " << AssetPath.generic_string(); + } + } + throw std::runtime_error( + fmt::format("Manifest file at '{}' references files that does not exist{}", ManifestPath, SB.ToView())); } - LocalFolderScanStats.FoundFileByteCount.store(LocalFolderScanStats.AcceptedFileByteCount); - LocalFolderScanStats.FoundFileCount.store(LocalFolderScanStats.AcceptedFileCount); - LocalFolderScanStats.ElapsedWallTimeUS = ManifestParseTimer.GetElapsedTimeUs(); Part.PartId = PartManifest.PartId; Part.PartName = PartManifest.PartName; @@ -4754,7 +4752,9 @@ BuildsOperationUploadFolder::ReadManifestParts(const std::filesystem::path& Mani std::vector<std::pair<Oid, std::string>> BuildsOperationUploadFolder::Execute(const Oid& BuildPartId, const std::string_view BuildPartName, - const std::filesystem::path& ManifestPath) + const std::filesystem::path& ManifestPath, + ChunkingController& ChunkController, + ChunkingCache& ChunkCache) { ZEN_TRACE_CPU("BuildsOperationUploadFolder::Execute"); try @@ -4823,19 +4823,15 @@ BuildsOperationUploadFolder::Execute(const Oid& BuildPartId, m_PrepBuildResultFuture = m_NetworkPool.EnqueueTask(std::packaged_task<PrepareBuildResult()>{[this] { return PrepareBuild(); }}, WorkerThreadPool::EMode::EnableBacklog); + for (uint32_t PartIndex = 0; PartIndex < UploadParts.size(); PartIndex++) { - std::unique_ptr<ChunkingController> ChunkController = CreateStandardChunkingController(StandardChunkingControllerSettings{}); + const uint32_t PartStepOffset = UploadPartsStep + (PartIndex * uint32_t(PartTaskSteps::StepCount)); - for (uint32_t PartIndex = 0; PartIndex < UploadParts.size(); PartIndex++) + const UploadPart& Part = UploadParts[PartIndex]; + UploadBuildPart(ChunkController, ChunkCache, PartIndex, Part, PartStepOffset, StepCount); + if (m_AbortFlag) { - const uint32_t PartStepOffset = UploadPartsStep + (PartIndex * uint32_t(PartTaskSteps::StepCount)); - - const UploadPart& Part = UploadParts[PartIndex]; - UploadBuildPart(*ChunkController, PartIndex, Part, PartStepOffset, StepCount); - if (m_AbortFlag) - { - return {}; - } + return {}; } } @@ -5501,6 +5497,7 @@ BuildsOperationUploadFolder::RebuildBlock(const ChunkedFolderContent& Content, void BuildsOperationUploadFolder::UploadBuildPart(ChunkingController& ChunkController, + ChunkingCache& ChunkCache, uint32_t PartIndex, const UploadPart& Part, uint32_t PartStepOffset, @@ -5532,6 +5529,7 @@ BuildsOperationUploadFolder::UploadBuildPart(ChunkingController& ChunkController m_Path, Part.Content, ChunkController, + ChunkCache, m_LogOutput.GetProgressUpdateDelayMS(), [&](bool IsAborted, bool IsPaused, std::ptrdiff_t) { FilteredBytesHashed.Update(ChunkingStats.BytesHashed.load()); @@ -7747,6 +7745,8 @@ namespace buildstorageoperations_testutils { TestState(const std::filesystem::path& InRootPath) : RootPath(InRootPath) , LogOutput(CreateStandardLogOutput(Log)) + , ChunkController(CreateStandardChunkingController(StandardChunkingControllerSettings{})) + , ChunkCache(CreateMemoryChunkingCache()) , WorkerPool(2) , NetworkPool(2) { @@ -7797,7 +7797,7 @@ namespace buildstorageoperations_testutils { true, MetaData, BuildsOperationUploadFolder::Options{.TempDir = TempPath}); - return Upload.Execute(BuildPartId, BuildPartName, ManifestPath); + return Upload.Execute(BuildPartId, BuildPartName, ManifestPath, *ChunkController, *ChunkCache); } void ValidateUpload(const Oid& BuildId, const std::vector<std::pair<Oid, std::string>>& Parts) @@ -7845,8 +7845,6 @@ namespace buildstorageoperations_testutils { std::vector<ChunkedFolderContent> PartContents; - std::unique_ptr<ChunkingController> ChunkController; - std::vector<ChunkBlockDescription> BlockDescriptions; std::vector<IoHash> LooseChunkHashes; @@ -7916,6 +7914,7 @@ namespace buildstorageoperations_testutils { TargetPath, CurrentLocalFolderState, *ChunkController, + *ChunkCache, 1000, [&](bool IsAborted, bool IsPaused, std::ptrdiff_t) { ZEN_UNUSED(IsAborted, IsPaused); }, AbortFlag, @@ -7995,6 +7994,9 @@ namespace buildstorageoperations_testutils { LoggerRef Log = ConsoleLog(); std::unique_ptr<OperationLogOutput> LogOutput; + std::unique_ptr<ChunkingController> ChunkController; + std::unique_ptr<ChunkingCache> ChunkCache; + StorageInstance Storage; BuildStorageBase::Statistics StorageStats; @@ -8106,6 +8108,92 @@ TEST_CASE("buildstorageoperations.upload.manifest") State.ValidateDownload(ManifestFiles, ManifestSizes, "source", "download", DownloadContent); } +TEST_CASE("buildstorageoperations.memorychunkingcache") +{ + using namespace buildstorageoperations_testutils; + + FastRandom BaseRandom; + + const size_t FileCount = 11; + + const std::string Paths[FileCount] = {{"file_1"}, + {"file_2.exe"}, + {"file_3.txt"}, + {"dir_1/dir1_file_1.exe"}, + {"dir_1/dir1_file_2.pdb"}, + {"dir_1/dir1_file_3.txt"}, + {"dir_2/dir2_dir1/dir2_dir1_file_1.exe"}, + {"dir_2/dir2_dir1/dir2_dir1_file_2.pdb"}, + {"dir_2/dir2_dir1/dir2_dir1_file_3.dll"}, + {"dir_2/dir2_dir2/dir2_dir2_file_1.txt"}, + {"dir_2/dir2_dir2/dir2_dir2_file_2.json"}}; + const uint64_t Sizes[FileCount] = + {6u * 1024u, 0, 798, 19u * 1024u, 7u * 1024u, 93, 31u * 1024u, 17u * 1024u, 13u * 1024u, 2u * 1024u, 3u * 1024u}; + + ScopedTemporaryDirectory SourceFolder; + TestState State(SourceFolder.Path()); + State.Initialize(); + State.CreateSourceData("source", Paths, Sizes); + + const Oid BuildId = Oid::NewOid(); + const Oid BuildPartId = Oid::NewOid(); + const std::string BuildPartName = "default"; + + { + const std::filesystem::path SourcePath = SourceFolder.Path() / "source"; + CbObject MetaData; + BuildsOperationUploadFolder Upload(*State.LogOutput, + State.Storage, + State.AbortFlag, + State.PauseFlag, + State.WorkerPool, + State.NetworkPool, + BuildId, + SourcePath, + true, + MetaData, + BuildsOperationUploadFolder::Options{.TempDir = State.TempPath}); + auto Result = Upload.Execute(BuildPartId, BuildPartName, {}, *State.ChunkController, *State.ChunkCache); + + CHECK_EQ(Upload.m_ChunkingStats.FilesStoredInCache.load(), FileCount - 1); // Zero size files are not stored in cache + CHECK_EQ(Upload.m_ChunkingStats.BytesStoredInCache.load(), std::accumulate(&Sizes[0], &Sizes[FileCount], uint64_t(0))); + CHECK(Upload.m_ChunkingStats.ChunksStoredInCache.load() >= FileCount - 1); // Zero size files are not stored in cache + + CHECK_EQ(Result.size(), 1u); + CHECK_EQ(Result[0].first, BuildPartId); + CHECK_EQ(Result[0].second, BuildPartName); + } + + auto Result = State.Upload(BuildId, BuildPartId, BuildPartName, "source", {}); + + const Oid BuildId2 = Oid::NewOid(); + const Oid BuildPartId2 = Oid::NewOid(); + + { + const std::filesystem::path SourcePath = SourceFolder.Path() / "source"; + CbObject MetaData; + BuildsOperationUploadFolder Upload(*State.LogOutput, + State.Storage, + State.AbortFlag, + State.PauseFlag, + State.WorkerPool, + State.NetworkPool, + BuildId2, + SourcePath, + true, + MetaData, + BuildsOperationUploadFolder::Options{.TempDir = State.TempPath}); + Upload.Execute(BuildPartId2, BuildPartName, {}, *State.ChunkController, *State.ChunkCache); + + CHECK_EQ(Upload.m_ChunkingStats.FilesFoundInCache.load(), FileCount - 1); // Zero size files are not stored in cache + CHECK_EQ(Upload.m_ChunkingStats.BytesFoundInCache.load(), std::accumulate(&Sizes[0], &Sizes[FileCount], uint64_t(0))); + CHECK(Upload.m_ChunkingStats.ChunksFoundInCache.load() >= FileCount - 1); // Zero size files are not stored in cache + } + + FolderContent DownloadContent = State.Download(BuildId2, BuildPartId2, {}, "download", /* Append */ false); + State.ValidateDownload(Paths, Sizes, "source", "download", DownloadContent); +} + TEST_CASE("buildstorageoperations.upload.multipart") { using namespace buildstorageoperations_testutils; diff --git a/src/zenremotestore/chunking/chunkedcontent.cpp b/src/zenremotestore/chunking/chunkedcontent.cpp index fda01aa56..26d179f14 100644 --- a/src/zenremotestore/chunking/chunkedcontent.cpp +++ b/src/zenremotestore/chunking/chunkedcontent.cpp @@ -13,6 +13,7 @@ #include <zencore/trace.h> #include <zenremotestore/chunking/chunkblock.h> #include <zenremotestore/chunking/chunkedfile.h> +#include <zenremotestore/chunking/chunkingcache.h> #include <zenremotestore/chunking/chunkingcontroller.h> #include <zenutil/wildcard.h> @@ -100,6 +101,8 @@ namespace { IoHash HashOneFile(ChunkingStatistics& Stats, const ChunkingController& InChunkingController, + ChunkingCache& InChunkingCache, + std::span<const uint64_t> ModificationTicks, ChunkedFolderContent& OutChunkedContent, tsl::robin_map<IoHash, uint32_t, IoHash::Hasher>& ChunkHashToChunkIndex, tsl::robin_map<IoHash, uint32_t, IoHash::Hasher>& RawHashToSequenceRawHashIndex, @@ -110,8 +113,9 @@ namespace { { ZEN_TRACE_CPU("HashOneFile"); - const uint64_t RawSize = OutChunkedContent.RawSizes[PathIndex]; - const std::filesystem::path& Path = OutChunkedContent.Paths[PathIndex]; + const std::filesystem::path& Path = OutChunkedContent.Paths[PathIndex]; + const uint64_t RawSize = OutChunkedContent.RawSizes[PathIndex]; + const uint64_t ModificationTick = ModificationTicks[PathIndex]; if (RawSize == 0) { @@ -119,16 +123,53 @@ namespace { } else { + std::filesystem::path FullPath = FolderPath / Path; + FullPath.make_preferred(); + ChunkedInfoWithSource Chunked; - const bool DidChunking = - InChunkingController.ProcessFile((FolderPath / Path).make_preferred(), RawSize, Chunked, Stats.BytesHashed, AbortFlag); - if (DidChunking) + + if (!InChunkingCache.GetCachedFile(FullPath, RawSize, ModificationTick, Chunked)) { - Lock.WithExclusiveLock([&]() { - if (!RawHashToSequenceRawHashIndex.contains(Chunked.Info.RawHash)) + const bool DidChunking = InChunkingController.ProcessFile(FullPath, RawSize, Chunked, Stats.BytesHashed, AbortFlag); + if (!DidChunking) + { + ZEN_TRACE_CPU("HashOnly"); + + IoBuffer Buffer = IoBufferBuilder::MakeFromFile(FullPath); + if (Buffer.GetSize() != RawSize) + { + throw std::runtime_error(fmt::format("Failed opening file '{}' for hashing", FolderPath / Path)); + } + + Chunked.Info.RawSize = RawSize; + Chunked.Info.RawHash = IoHash::HashBuffer(Buffer, &Stats.BytesHashed); + } + if (InChunkingCache.PutCachedFile(FullPath, ModificationTick, Chunked)) + { + Stats.FilesStoredInCache++; + Stats.ChunksStoredInCache += Chunked.Info.ChunkSequence.empty() ? 1 : Chunked.Info.ChunkHashes.size(); + Stats.BytesStoredInCache += RawSize; + } + } + else + { + Stats.FilesFoundInCache++; + Stats.ChunksFoundInCache += Chunked.Info.ChunkSequence.empty() ? 1 : Chunked.Info.ChunkHashes.size(); + Stats.BytesFoundInCache += RawSize; + } + Lock.WithExclusiveLock([&]() { + if (!RawHashToSequenceRawHashIndex.contains(Chunked.Info.RawHash)) + { + RawHashToSequenceRawHashIndex.insert( + {Chunked.Info.RawHash, gsl::narrow<uint32_t>(OutChunkedContent.ChunkedContent.SequenceRawHashes.size())}); + + if (Chunked.Info.ChunkSequence.empty()) + { + AddChunkSequence(Stats, OutChunkedContent.ChunkedContent, ChunkHashToChunkIndex, Chunked.Info.RawHash, RawSize); + Stats.UniqueSequencesFound++; + } + else { - RawHashToSequenceRawHashIndex.insert( - {Chunked.Info.RawHash, gsl::narrow<uint32_t>(OutChunkedContent.ChunkedContent.SequenceRawHashes.size())}); std::vector<uint64_t> ChunkSizes; ChunkSizes.reserve(Chunked.ChunkSources.size()); for (const ChunkSource& Source : Chunked.ChunkSources) @@ -144,34 +185,12 @@ namespace { Chunked.Info.ChunkSequence, Chunked.Info.ChunkHashes, ChunkSizes); - Stats.UniqueSequencesFound++; } - }); - Stats.FilesChunked++; - return Chunked.Info.RawHash; - } - else - { - ZEN_TRACE_CPU("HashOnly"); - - IoBuffer Buffer = IoBufferBuilder::MakeFromFile((FolderPath / Path).make_preferred()); - if (Buffer.GetSize() != RawSize) - { - throw std::runtime_error(fmt::format("Failed opening file '{}' for hashing", FolderPath / Path)); + Stats.UniqueSequencesFound++; } - const IoHash Hash = IoHash::HashBuffer(Buffer, &Stats.BytesHashed); - - Lock.WithExclusiveLock([&]() { - if (!RawHashToSequenceRawHashIndex.contains(Hash)) - { - RawHashToSequenceRawHashIndex.insert( - {Hash, gsl::narrow<uint32_t>(OutChunkedContent.ChunkedContent.SequenceRawHashes.size())}); - AddChunkSequence(Stats, OutChunkedContent.ChunkedContent, ChunkHashToChunkIndex, Hash, RawSize); - Stats.UniqueSequencesFound++; - } - }); - return Hash; - } + }); + Stats.FilesChunked++; + return Chunked.Info.RawHash; } } @@ -1113,6 +1132,7 @@ ChunkFolderContent(ChunkingStatistics& Stats, const std::filesystem::path& RootPath, const FolderContent& Content, const ChunkingController& InChunkingController, + ChunkingCache& InChunkingCache, int32_t UpdateIntervalMS, std::function<void(bool IsAborted, bool IsPaused, std::ptrdiff_t PendingWork)>&& UpdateCallback, std::atomic<bool>& AbortFlag, @@ -1123,6 +1143,10 @@ ChunkFolderContent(ChunkingStatistics& Stats, Stopwatch Timer; auto _ = MakeGuard([&Stats, &Timer]() { Stats.ElapsedWallTimeUS = Timer.GetElapsedTimeUs(); }); + ZEN_ASSERT(Content.ModificationTicks.size() == Content.Paths.size()); + ZEN_ASSERT(Content.RawSizes.size() == Content.Paths.size()); + ZEN_ASSERT(Content.Attributes.size() == Content.Paths.size()); + ChunkedFolderContent Result = {.Platform = Content.Platform, .Paths = Content.Paths, .RawSizes = Content.RawSizes, @@ -1163,12 +1187,15 @@ ChunkFolderContent(ChunkingStatistics& Stats, { break; } + Work.ScheduleWork(WorkerPool, // GetSyncWorkerPool() [&, PathIndex](std::atomic<bool>& AbortFlag) { if (!AbortFlag) { IoHash RawHash = HashOneFile(Stats, InChunkingController, + InChunkingCache, + Content.ModificationTicks, Result, ChunkHashToChunkIndex, RawHashToSequenceRawHashIndex, diff --git a/src/zenremotestore/chunking/chunkingcache.cpp b/src/zenremotestore/chunking/chunkingcache.cpp new file mode 100644 index 000000000..7f0a26330 --- /dev/null +++ b/src/zenremotestore/chunking/chunkingcache.cpp @@ -0,0 +1,627 @@ +// Copyright Epic Games, Inc. All Rights Reserved. + +#include <zenremotestore/chunking/chunkingcache.h> + +#include <zenbase/zenbase.h> +#include <zencore/basicfile.h> +#include <zencore/compactbinarybuilder.h> +#include <zencore/compactbinaryutil.h> +#include <zencore/filesystem.h> +#include <zencore/fmtutils.h> +#include <zencore/logging.h> +#include <zenremotestore/chunking/chunkedfile.h> +#include <zenremotestore/chunking/chunkingcontroller.h> + +ZEN_THIRD_PARTY_INCLUDES_START +#include <tsl/robin_map.h> +#include <xxhash.h> +#include <gsl/gsl-lite.hpp> +ZEN_THIRD_PARTY_INCLUDES_END + +#if ZEN_WITH_TESTS +# include <zencore/testing.h> +# include <zencore/testutils.h> +# include <algorithm> +#endif // ZEN_WITH_TESTS + +namespace zen { + +class NullChunkingCache : public ChunkingCache +{ +public: + NullChunkingCache() {} + + virtual bool GetCachedFile(const std::filesystem::path& InputPath, + uint64_t RawSize, + uint64_t ModificationTick, + ChunkedInfoWithSource& OutChunked) override + { + ZEN_UNUSED(InputPath, RawSize, OutChunked, ModificationTick); + return false; + } + + virtual bool PutCachedFile(const std::filesystem::path& InputPath, + uint64_t ModificationTick, + const ChunkedInfoWithSource& Chunked) override + { + ZEN_UNUSED(InputPath, Chunked, ModificationTick); + return false; + } +}; + +class MemoryChunkingCache : public ChunkingCache +{ +public: + MemoryChunkingCache() {} + + virtual bool GetCachedFile(const std::filesystem::path& InputPath, + uint64_t RawSize, + uint64_t ModificationTick, + ChunkedInfoWithSource& OutChunked) override + { + const std::u8string PathString = InputPath.generic_u8string(); + const IoHash PathHash = IoHash::HashBuffer(PathString.data(), PathString.length()); + + RwLock::SharedLockScope Lock(m_Lock); + if (auto It = m_PathHashToEntry.find(PathHash); It != m_PathHashToEntry.end()) + { + const CachedEntry& Entry = m_Entries[It->second]; + if (ModificationTick == Entry.ModificationTick && RawSize == Entry.Chunked.Info.RawSize) + { + OutChunked = Entry.Chunked; + return true; + } + else + { + Lock.ReleaseNow(); + RwLock::ExclusiveLockScope EditLock(m_Lock); + if (auto RemoveIt = m_PathHashToEntry.find(PathHash); It != m_PathHashToEntry.end()) + { + CachedEntry& DeleteEntry = m_Entries[It->second]; + DeleteEntry.Chunked = {}; + DeleteEntry.ModificationTick = 0; + m_FreeEntryIndexes.push_back(It->second); + m_PathHashToEntry.erase(It); + } + } + } + return false; + } + + virtual bool PutCachedFile(const std::filesystem::path& InputPath, + uint64_t ModificationTick, + const ChunkedInfoWithSource& Chunked) override + { + const std::u8string PathString = InputPath.generic_u8string(); + const IoHash PathHash = IoHash::HashBuffer(PathString.data(), PathString.length()); + + RwLock::ExclusiveLockScope _(m_Lock); + if (auto It = m_PathHashToEntry.find(PathHash); It != m_PathHashToEntry.end()) + { + CachedEntry& Entry = m_Entries[It->second]; + if (ModificationTick != Entry.ModificationTick || Chunked.Info.RawSize != Entry.Chunked.Info.RawSize) + { + Entry.Chunked = Chunked; + Entry.ModificationTick = ModificationTick; + } + } + else + { + uint32_t EntryIndex = gsl::narrow<uint32_t>(m_Entries.size()); + if (!m_FreeEntryIndexes.empty()) + { + EntryIndex = m_FreeEntryIndexes.back(); + m_FreeEntryIndexes.pop_back(); + m_Entries[EntryIndex] = CachedEntry{.Chunked = Chunked, .ModificationTick = ModificationTick}; + } + else + { + m_Entries.emplace_back(CachedEntry{.Chunked = Chunked, .ModificationTick = ModificationTick}); + } + m_PathHashToEntry.insert_or_assign(PathHash, EntryIndex); + } + return true; + } + + RwLock m_Lock; + + tsl::robin_map<IoHash, uint32_t, IoHash::Hasher> m_PathHashToEntry; + std::vector<uint32_t> m_FreeEntryIndexes; + + struct CachedEntry + { + ChunkedInfoWithSource Chunked; + uint64_t ModificationTick = 0; + }; + + std::vector<CachedEntry> m_Entries; +}; + +class DiskChunkingCache : public ChunkingCache +{ +public: + DiskChunkingCache(const std::filesystem::path& RootPath, ChunkingController& ChunkController, uint64_t MinimumRawSizeForCaching) + : m_RootPath(RootPath) + , m_ChunkerId(GetChunkerIdentity(ChunkController)) + , m_MinimumRawSizeForCaching(MinimumRawSizeForCaching) + { + } + + virtual bool GetCachedFile(const std::filesystem::path& InputPath, + uint64_t RawSize, + uint64_t ModificationTick, + ChunkedInfoWithSource& OutChunked) override + { + if (RawSize < m_MinimumRawSizeForCaching) + { + return false; + } + + const std::filesystem::path CachePath = GetCachePath(InputPath); + + return ReadChunkedInfo(CachePath, RawSize, ModificationTick, OutChunked); + } + + virtual bool PutCachedFile(const std::filesystem::path& InputPath, + uint64_t ModificationTick, + const ChunkedInfoWithSource& Chunked) override + { + if (Chunked.Info.RawSize < m_MinimumRawSizeForCaching) + { + return false; + } + + const std::filesystem::path CachePath = GetCachePath(InputPath); + + return WriteChunkedInfo(CachePath, ModificationTick, Chunked); + } + +private: + static constexpr uint32_t ImplementationRevision = 1; + +#pragma pack(push) +#pragma pack(1) + struct ChunkedInfoHeader + { + static constexpr uint32_t ExpectedMagic = 0x75636368; // 'ucch'; + static constexpr uint32_t CurrentVersion = 1; + + uint32_t Magic = ExpectedMagic; + uint32_t Version = CurrentVersion; + uint64_t SequenceCount = 0; + uint64_t ChunkCount = 0; + uint64_t RawSize = 0; + IoHash RawHash = IoHash::Zero; + uint64_t ModificationTick = 0; + uint32_t Checksum = 0; + + static uint32_t ComputeChecksum(const ChunkedInfoHeader& Header) + { + return XXH32(&Header.Magic, sizeof(Header) - sizeof(uint32_t), 0xC0C0'BABA); + } + }; +#pragma pack(pop) + static_assert(sizeof(ChunkedInfoHeader) == 64); + static_assert(sizeof(ChunkSource) == 12); + + std::filesystem::path GetCachePath(const std::filesystem::path& InputPath) + { + const std::string IdentityString = fmt::format("{}_{}_{}", ImplementationRevision, m_ChunkerId, InputPath.generic_string()); + const IoHash IdentityHash = IoHash::HashBuffer(IdentityString.data(), IdentityString.length()); + std::filesystem::path CachePath = m_RootPath / fmt::format("{}.chunked_content", IdentityHash); + return CachePath; + } + + bool WriteChunkedInfo(const std::filesystem::path& CachePath, uint64_t ModificationTick, const ChunkedInfoWithSource& Chunked) + { + CreateDirectories(CachePath.parent_path()); + + TemporaryFile OutputFile; + std::error_code Ec; + OutputFile.CreateTemporary(CachePath.parent_path(), Ec); + if (Ec) + { + ZEN_DEBUG("Failed to create temp file for cached chunked data at '{}'", CachePath); + return false; + } + ChunkedInfoHeader Header = {.SequenceCount = Chunked.Info.ChunkSequence.size(), + .ChunkCount = Chunked.Info.ChunkHashes.size(), + .RawSize = Chunked.Info.RawSize, + .RawHash = Chunked.Info.RawHash, + .ModificationTick = ModificationTick}; + + Header.Checksum = ChunkedInfoHeader::ComputeChecksum(Header); + + try + { + uint64_t Offset = 0; + + OutputFile.Write(&Header, sizeof(ChunkedInfoHeader), Offset); + Offset += sizeof(ChunkedInfoHeader); + + if (Header.SequenceCount > 0) + { + OutputFile.Write(Chunked.Info.ChunkSequence.data(), Header.SequenceCount * sizeof(uint32_t), Offset); + Offset += Header.SequenceCount * sizeof(uint32_t); + } + + if (Header.ChunkCount > 0) + { + OutputFile.Write(Chunked.Info.ChunkHashes.data(), Header.ChunkCount * sizeof(IoHash), Offset); + Offset += Header.ChunkCount * sizeof(IoHash); + + OutputFile.Write(Chunked.ChunkSources.data(), Header.ChunkCount * sizeof(ChunkSource), Offset); + Offset += Header.ChunkCount * sizeof(ChunkSource); + } + + OutputFile.Flush(); + } + catch (const std::exception& Ex) + { + ZEN_DEBUG("Failed to write cached file {}. Reason: {}", CachePath, Ex.what()); + return false; + } + OutputFile.MoveTemporaryIntoPlace(CachePath, Ec); + if (Ec) + { + ZEN_DEBUG("Failed to move temporary file {} to {}. Reason: {}", OutputFile.GetPath(), CachePath, Ec.message()); + return false; + } + + return true; + } + + bool ReadChunkedInfo(const std::filesystem::path& CachePath, + uint64_t RawSize, + uint64_t ModificationTick, + ChunkedInfoWithSource& OutChunked) + { + BasicFile InputFile; + std::error_code Ec; + InputFile.Open(CachePath, BasicFile::Mode::kRead, Ec); + if (Ec) + { + return false; + } + try + { + uint64_t Size = InputFile.FileSize(); + if (Size < sizeof(ChunkedInfoHeader)) + { + throw std::runtime_error(fmt::format("Expected size >= {}, file has size {}", sizeof(ChunkedInfoHeader), Size)); + } + + uint64_t Offset = 0; + ChunkedInfoHeader Header; + InputFile.Read(&Header, sizeof(ChunkedInfoHeader), Offset); + Offset += sizeof(Header); + + if (Header.Magic != ChunkedInfoHeader::ExpectedMagic) + { + throw std::runtime_error( + fmt::format("Expected magic 0x{:04x}, file has magic 0x{:04x}", ChunkedInfoHeader::ExpectedMagic, Header.Magic)); + } + if (Header.Version != ChunkedInfoHeader::CurrentVersion) + { + throw std::runtime_error( + fmt::format("Expected version {}, file has version {}", ChunkedInfoHeader::CurrentVersion, Header.Version)); + } + if (Header.Checksum != ChunkedInfoHeader::ComputeChecksum(Header)) + { + throw std::runtime_error(fmt::format("Expected checksum 0x{:04x}, file has checksum 0x{:04x}", + Header.Checksum, + ChunkedInfoHeader::ComputeChecksum(Header))); + } + + uint64_t ExpectedSize = sizeof(ChunkedInfoHeader) + Header.SequenceCount * sizeof(uint32_t) + + Header.ChunkCount * sizeof(IoHash) + Header.ChunkCount * sizeof(ChunkSource); + + if (ExpectedSize != Size) + { + throw std::runtime_error(fmt::format("Expected size {}, file has size {}", ExpectedSize, Size)); + } + + if (Header.RawSize != RawSize) + { + InputFile.Close(); + RemoveFile(CachePath, Ec); + return false; + } + + if (Header.ModificationTick != ModificationTick) + { + InputFile.Close(); + RemoveFile(CachePath, Ec); + return false; + } + + OutChunked.Info.RawSize = Header.RawSize; + OutChunked.Info.RawHash = Header.RawHash; + + if (Header.SequenceCount > 0) + { + OutChunked.Info.ChunkSequence.resize(Header.SequenceCount); + InputFile.Read(OutChunked.Info.ChunkSequence.data(), Header.SequenceCount * sizeof(uint32_t), Offset); + Offset += Header.SequenceCount * sizeof(uint32_t); + } + + if (Header.ChunkCount > 0) + { + OutChunked.Info.ChunkHashes.resize(Header.ChunkCount); + OutChunked.ChunkSources.resize(Header.ChunkCount); + + InputFile.Read(OutChunked.Info.ChunkHashes.data(), Header.ChunkCount * sizeof(IoHash), Offset); + Offset += Header.ChunkCount * sizeof(IoHash); + + InputFile.Read(OutChunked.ChunkSources.data(), Header.ChunkCount * sizeof(ChunkSource), Offset); + Offset += Header.ChunkCount * sizeof(ChunkSource); + } + } + catch (const std::exception& Ex) + { + ZEN_DEBUG("Failed to read cached file {}. Reason: {}", CachePath, Ex.what()); + InputFile.Close(); + RemoveFile(CachePath, Ec); + return false; + } + + return true; + } + + const std::filesystem::path m_RootPath; + const IoHash m_ChunkerId; + const uint64_t m_MinimumRawSizeForCaching; + + static IoHash GetChunkerIdentity(ChunkingController& ChunkController) + { + IoHashStream ChunkerIdStream; + std::string_view ChunkerName = ChunkController.GetName(); + ChunkerIdStream.Append(ChunkerName.data(), ChunkerName.length()); + const CbObject ChunkerParameters = ChunkController.GetParameters(); + ChunkerParameters.GetHash(ChunkerIdStream); + return ChunkerIdStream.GetHash(); + } +}; + +std::unique_ptr<ChunkingCache> +CreateNullChunkingCache() +{ + return std::make_unique<NullChunkingCache>(); +} + +std::unique_ptr<ChunkingCache> +CreateMemoryChunkingCache() +{ + return std::make_unique<MemoryChunkingCache>(); +} + +std::unique_ptr<ChunkingCache> +CreateDiskChunkingCache(const std::filesystem::path& RootPath, ChunkingController& ChunkController, uint64_t MinimumRawSizeForCaching) +{ + return std::make_unique<DiskChunkingCache>(RootPath, ChunkController, MinimumRawSizeForCaching); +} + +#if ZEN_WITH_TESTS + +namespace chunkingcache_testutils { + ChunkedInfoWithSource CreateChunked(const std::string_view Data, uint32_t SplitSize) + { + std::vector<uint32_t> ChunkSequence; + std::vector<IoHash> ChunkHashes; + std::vector<ChunkSource> ChunkSources; + + if (SplitSize > 0) + { + std::string_view::size_type SplitOffset = 0; + while (SplitOffset < Data.length()) + { + std::string_view DataPart(Data.substr(SplitOffset, SplitSize)); + + ChunkSequence.push_back(gsl::narrow<uint32_t>(ChunkSequence.size())); + ChunkHashes.push_back(IoHash::HashBuffer(DataPart.data(), DataPart.length())); + ChunkSources.push_back({.Offset = SplitOffset, .Size = gsl::narrow<uint32_t>(DataPart.length())}); + SplitOffset += DataPart.length(); + } + } + + return ChunkedInfoWithSource{.Info = {.RawSize = Data.length(), + .RawHash = IoHash::HashBuffer(Data.data(), Data.length()), + .ChunkSequence = std::move(ChunkSequence), + .ChunkHashes = std::move(ChunkHashes)}, + .ChunkSources = std::move(ChunkSources)}; + } + + bool Equals(const ChunkedInfoWithSource& Lhs, const ChunkedInfoWithSource& Rhs) + { + if (Lhs.ChunkSources.size() != Rhs.ChunkSources.size()) + { + return false; + } + if (std::mismatch(Lhs.ChunkSources.begin(), + Lhs.ChunkSources.end(), + Rhs.ChunkSources.begin(), + [](const ChunkSource& Lhs, const ChunkSource& Rhs) { return Lhs.Offset == Rhs.Offset && Lhs.Size == Rhs.Size; }) + .first != Lhs.ChunkSources.end()) + { + return false; + } + if (Lhs.Info.RawSize != Rhs.Info.RawSize) + { + return false; + } + if (Lhs.Info.ChunkSequence != Rhs.Info.ChunkSequence) + { + return false; + } + if (Lhs.Info.ChunkHashes != Rhs.Info.ChunkHashes) + { + return false; + } + return true; + } +} // namespace chunkingcache_testutils + +TEST_CASE("chunkingcache.nullchunkingcache") +{ + using namespace chunkingcache_testutils; + + std::unique_ptr<ChunkingCache> Cache = CreateNullChunkingCache(); + ChunkedInfoWithSource Result; + CHECK(!Cache->GetCachedFile("dummy-path", 495, 91283, Result)); + CHECK(Result.Info.ChunkHashes.empty()); + + ChunkedInfoWithSource Chunked = CreateChunked("my data string", 4); + CHECK(!Cache->PutCachedFile("dummy-path", 91283, Chunked)); + + CHECK(!Cache->GetCachedFile("dummy-path", 495, 91283, Result)); + CHECK(Result.Info.ChunkHashes.empty()); +} + +TEST_CASE("chunkingcache.memorychunkingcache") +{ + using namespace chunkingcache_testutils; + + std::unique_ptr<ChunkingCache> Cache = CreateMemoryChunkingCache(); + ChunkedInfoWithSource Result; + CHECK(!Cache->GetCachedFile("file/A/Path", 495, 91283, Result)); + CHECK(Result.Info.ChunkHashes.empty()); + CHECK(!Cache->GetCachedFile("file/B/Path", 395, 671283, Result)); + CHECK(Result.Info.ChunkHashes.empty()); + + ChunkedInfoWithSource ChunkedAV1 = CreateChunked("File A data string", 4); + ChunkedInfoWithSource ChunkedAV2 = CreateChunked("File A updated data string", 4); + ChunkedInfoWithSource ChunkedBV1 = CreateChunked("File B data string", 4); + + CHECK(Cache->PutCachedFile("file/A/Path", 91283, ChunkedAV1)); + CHECK(Cache->PutCachedFile("file/B/Path", 51283, ChunkedBV1)); + + CHECK(Cache->GetCachedFile("file/A/Path", ChunkedAV1.Info.RawSize, 91283, Result)); + CHECK(Equals(Result, ChunkedAV1)); + Result = ChunkedInfoWithSource{}; + + CHECK(Cache->GetCachedFile("file/B/Path", ChunkedBV1.Info.RawSize, 51283, Result)); + CHECK(Equals(Result, ChunkedBV1)); + Result = ChunkedInfoWithSource{}; + + CHECK(!Cache->GetCachedFile("file/A/Path-wrong", 495, 91283, Result)); + CHECK(Result.Info.ChunkHashes.empty()); + + CHECK(!Cache->GetCachedFile("file/A/Path", 493, 91283, Result)); + CHECK(Result.Info.ChunkHashes.empty()); + + // Asking a path that exists but without a match will remove that path + CHECK(!Cache->GetCachedFile("file/A/Path", 495, 91283, Result)); + CHECK(Result.Info.ChunkHashes.empty()); + + CHECK(!Cache->GetCachedFile("file/A/Path", 495, 9283, Result)); + CHECK(Result.Info.ChunkHashes.empty()); + + CHECK(Cache->PutCachedFile("file/A/Path", 91283, ChunkedAV1)); + CHECK(Cache->GetCachedFile("file/A/Path", ChunkedAV1.Info.RawSize, 91283, Result)); + CHECK(Equals(Result, ChunkedAV1)); + Result = ChunkedInfoWithSource{}; + + CHECK(Cache->PutCachedFile("file/A/Path", 91483, ChunkedAV2)); + CHECK(Cache->GetCachedFile("file/A/Path", ChunkedAV2.Info.RawSize, 91483, Result)); + CHECK(Equals(Result, ChunkedAV2)); + Result = ChunkedInfoWithSource{}; + + CHECK(!Cache->GetCachedFile("file/A/Path", ChunkedAV1.Info.RawSize, 91283, Result)); + CHECK(Result.Info.ChunkHashes.empty()); + + CHECK(Cache->GetCachedFile("file/B/Path", ChunkedBV1.Info.RawSize, 51283, Result)); + CHECK(Equals(Result, ChunkedBV1)); + Result = ChunkedInfoWithSource{}; + + CHECK(!Cache->GetCachedFile("file/B/Path", ChunkedBV1.Info.RawSize + 1, 51283, Result)); + CHECK(Result.Info.ChunkHashes.empty()); +} + +TEST_CASE("chunkingcache.diskchunkingcache") +{ + using namespace chunkingcache_testutils; + + ScopedTemporaryDirectory TmpDir; + + std::unique_ptr<ChunkingController> ChunkController = CreateStandardChunkingController(StandardChunkingControllerSettings{}); + + ChunkedInfoWithSource ChunkedAV1 = CreateChunked("File A data string", 4); + ChunkedInfoWithSource ChunkedAV2 = CreateChunked("File A updated data string", 4); + ChunkedInfoWithSource ChunkedBV1 = CreateChunked("File B data string", 4); + + { + std::unique_ptr<ChunkingCache> Cache = CreateDiskChunkingCache(TmpDir.Path(), *ChunkController, 0); + ChunkedInfoWithSource Result; + CHECK(!Cache->GetCachedFile("file/A/Path", 495, 91283, Result)); + CHECK(Result.Info.ChunkHashes.empty()); + CHECK(!Cache->GetCachedFile("file/B/Path", 395, 671283, Result)); + CHECK(Result.Info.ChunkHashes.empty()); + + CHECK(Cache->PutCachedFile("file/A/Path", 91283, ChunkedAV1)); + CHECK(Cache->PutCachedFile("file/B/Path", 51283, ChunkedBV1)); + + CHECK(Cache->GetCachedFile("file/A/Path", ChunkedAV1.Info.RawSize, 91283, Result)); + CHECK(Equals(Result, ChunkedAV1)); + Result = ChunkedInfoWithSource{}; + + CHECK(Cache->GetCachedFile("file/B/Path", ChunkedBV1.Info.RawSize, 51283, Result)); + CHECK(Equals(Result, ChunkedBV1)); + Result = ChunkedInfoWithSource{}; + + CHECK(!Cache->GetCachedFile("file/A/Path-wrong", 495, 91283, Result)); + CHECK(Result.Info.ChunkHashes.empty()); + + CHECK(!Cache->GetCachedFile("file/A/Path", 493, 91283, Result)); + CHECK(Result.Info.ChunkHashes.empty()); + + // Asking a path that exists but without a match will remove that path + CHECK(!Cache->GetCachedFile("file/A/Path", 495, 91283, Result)); + CHECK(Result.Info.ChunkHashes.empty()); + + CHECK(!Cache->GetCachedFile("file/A/Path", 495, 9283, Result)); + CHECK(Result.Info.ChunkHashes.empty()); + + CHECK(Cache->PutCachedFile("file/A/Path", 91283, ChunkedAV1)); + CHECK(Cache->GetCachedFile("file/A/Path", ChunkedAV1.Info.RawSize, 91283, Result)); + CHECK(Equals(Result, ChunkedAV1)); + Result = ChunkedInfoWithSource{}; + + CHECK(Cache->PutCachedFile("file/A/Path", 91483, ChunkedAV2)); + CHECK(Cache->GetCachedFile("file/A/Path", ChunkedAV2.Info.RawSize, 91483, Result)); + CHECK(Equals(Result, ChunkedAV2)); + Result = ChunkedInfoWithSource{}; + } + { + std::unique_ptr<ChunkingCache> Cache = CreateDiskChunkingCache(TmpDir.Path(), *ChunkController, 0); + ChunkedInfoWithSource Result; + + CHECK(Cache->GetCachedFile("file/A/Path", ChunkedAV2.Info.RawSize, 91483, Result)); + CHECK(Equals(Result, ChunkedAV2)); + Result = ChunkedInfoWithSource{}; + + CHECK(!Cache->GetCachedFile("file/A/Path", ChunkedAV2.Info.RawSize, 91283, Result)); + CHECK(Result.Info.ChunkHashes.empty()); + + CHECK(!Cache->GetCachedFile("file/A/Path", ChunkedAV2.Info.RawSize, 91483, Result)); + CHECK(Result.Info.ChunkHashes.empty()); + + CHECK(Cache->GetCachedFile("file/B/Path", ChunkedBV1.Info.RawSize, 51283, Result)); + CHECK(Equals(Result, ChunkedBV1)); + Result = ChunkedInfoWithSource{}; + + CHECK(!Cache->GetCachedFile("file/B/Path", ChunkedBV1.Info.RawSize + 1, 51283, Result)); + CHECK(Result.Info.ChunkHashes.empty()); + + CHECK(!Cache->GetCachedFile("file/B/Path", ChunkedBV1.Info.RawSize, 51283, Result)); + CHECK(Result.Info.ChunkHashes.empty()); + } +} + +void +chunkingcache_forcelink() +{ +} + +#endif // ZEN_WITH_TESTS + +} // namespace zen diff --git a/src/zenremotestore/include/zenremotestore/builds/buildstorageoperations.h b/src/zenremotestore/include/zenremotestore/builds/buildstorageoperations.h index 057ad1a10..9f7d93398 100644 --- a/src/zenremotestore/include/zenremotestore/builds/buildstorageoperations.h +++ b/src/zenremotestore/include/zenremotestore/builds/buildstorageoperations.h @@ -551,7 +551,9 @@ public: std::vector<std::pair<Oid, std::string>> Execute(const Oid& BuildPartId, const std::string_view BuildPartName, - const std::filesystem::path& ManifestPath); + const std::filesystem::path& ManifestPath, + ChunkingController& ChunkController, + ChunkingCache& ChunkCache); DiskStatistics m_DiskStats; GetFolderContentStatistics m_LocalFolderScanStats; @@ -659,6 +661,7 @@ private: }; void UploadBuildPart(ChunkingController& ChunkController, + ChunkingCache& ChunkCache, uint32_t PartIndex, const UploadPart& Part, uint32_t PartStepOffset, diff --git a/src/zenremotestore/include/zenremotestore/chunking/chunkedcontent.h b/src/zenremotestore/include/zenremotestore/chunking/chunkedcontent.h index 1e8e878df..d402bd3f0 100644 --- a/src/zenremotestore/include/zenremotestore/chunking/chunkedcontent.h +++ b/src/zenremotestore/include/zenremotestore/chunking/chunkedcontent.h @@ -17,6 +17,7 @@ namespace zen { class CbWriter; class ChunkingController; +class ChunkingCache; class WorkerThreadPool; enum class SourcePlatform @@ -165,6 +166,12 @@ struct ChunkingStatistics std::atomic<uint64_t> UniqueChunksFound = 0; std::atomic<uint64_t> UniqueSequencesFound = 0; std::atomic<uint64_t> UniqueBytesFound = 0; + std::atomic<uint64_t> FilesFoundInCache = 0; + std::atomic<uint64_t> ChunksFoundInCache = 0; + std::atomic<uint64_t> BytesFoundInCache = 0; + std::atomic<uint64_t> FilesStoredInCache = 0; + std::atomic<uint64_t> ChunksStoredInCache = 0; + std::atomic<uint64_t> BytesStoredInCache = 0; uint64_t ElapsedWallTimeUS = 0; inline ChunkingStatistics& operator+=(const ChunkingStatistics& Rhs) @@ -176,6 +183,12 @@ struct ChunkingStatistics UniqueSequencesFound += Rhs.UniqueSequencesFound; UniqueBytesFound += Rhs.UniqueBytesFound; ElapsedWallTimeUS += Rhs.ElapsedWallTimeUS; + FilesFoundInCache += Rhs.FilesFoundInCache; + ChunksFoundInCache += Rhs.ChunksFoundInCache; + BytesFoundInCache += Rhs.BytesFoundInCache; + FilesStoredInCache += Rhs.FilesStoredInCache; + ChunksStoredInCache += Rhs.ChunksStoredInCache; + BytesStoredInCache += Rhs.BytesStoredInCache; return *this; } }; @@ -185,6 +198,7 @@ ChunkedFolderContent ChunkFolderContent(ChunkingStatistics& Stats, const std::filesystem::path& RootPath, const FolderContent& Content, const ChunkingController& InChunkingController, + ChunkingCache& InChunkingCache, int32_t UpdateIntervalMS, std::function<void(bool IsAborted, bool IsPaused, std::ptrdiff_t PendingWork)>&& UpdateCallback, std::atomic<bool>& AbortFlag, diff --git a/src/zenremotestore/include/zenremotestore/chunking/chunkedfile.h b/src/zenremotestore/include/zenremotestore/chunking/chunkedfile.h index 4cec80fdb..64e2c9c29 100644 --- a/src/zenremotestore/include/zenremotestore/chunking/chunkedfile.h +++ b/src/zenremotestore/include/zenremotestore/chunking/chunkedfile.h @@ -21,11 +21,14 @@ struct ChunkedInfo std::vector<IoHash> ChunkHashes; }; +#pragma pack(push) +#pragma pack(4) struct ChunkSource { uint64_t Offset; // 8 uint32_t Size; // 4 }; +#pragma pack(pop) struct ChunkedInfoWithSource { diff --git a/src/zenremotestore/include/zenremotestore/chunking/chunkingcache.h b/src/zenremotestore/include/zenremotestore/chunking/chunkingcache.h new file mode 100644 index 000000000..e213bc41b --- /dev/null +++ b/src/zenremotestore/include/zenremotestore/chunking/chunkingcache.h @@ -0,0 +1,44 @@ +// Copyright Epic Games, Inc. All Rights Reserved. + +#pragma once + +#include <filesystem> + +namespace zen { + +struct ChunkedInfoWithSource; +class ChunkingController; + +class ChunkingCache +{ +public: + virtual ~ChunkingCache() {} + + /* + * Attempting to fetch a cached file with mismatching RawSize of ModificationTick will delete any existing cached data for that + * InputPath + * + * If GetCachedFile returns false, OutChunked is untouched + */ + virtual bool GetCachedFile(const std::filesystem::path& InputPath, + uint64_t RawSize, + uint64_t ModificationTick, + ChunkedInfoWithSource& OutChunked) = 0; + + /* + * Putting a cached entry with an existing InputPath will overwrite it with the new ModificationTick and Chunked data + */ + virtual bool PutCachedFile(const std::filesystem::path& InputPath, uint64_t ModificationTick, const ChunkedInfoWithSource& Chunked) = 0; +}; + +std::unique_ptr<ChunkingCache> CreateNullChunkingCache(); +std::unique_ptr<ChunkingCache> CreateMemoryChunkingCache(); +std::unique_ptr<ChunkingCache> CreateDiskChunkingCache(const std::filesystem::path& RootPath, + ChunkingController& ChunkController, + uint64_t MinimumRawSizeForCaching); + +#if ZEN_WITH_TESTS +void chunkingcache_forcelink(); +#endif // ZEN_WITH_TESTS + +} // namespace zen diff --git a/src/zenremotestore/zenremotestore.cpp b/src/zenremotestore/zenremotestore.cpp index ecf85794d..0d008ec40 100644 --- a/src/zenremotestore/zenremotestore.cpp +++ b/src/zenremotestore/zenremotestore.cpp @@ -6,6 +6,7 @@ #include <zenremotestore/builds/buildstorageoperations.h> #include <zenremotestore/chunking/chunkedcontent.h> #include <zenremotestore/chunking/chunkedfile.h> +#include <zenremotestore/chunking/chunkingcache.h> #include <zenremotestore/filesystemutils.h> #include <zenremotestore/projectstore/remoteprojectstore.h> @@ -21,6 +22,7 @@ zenremotestore_forcelinktests() chunkblock_forcelink(); chunkedcontent_forcelink(); chunkedfile_forcelink(); + chunkingcache_forcelink(); filesystemutils_forcelink(); remoteprojectstore_forcelink(); } |