// Copyright Epic Games, Inc. All Rights Reserved. #include #include #include #include #include #include #include #include #include #include ZEN_THIRD_PARTY_INCLUDES_START #include #include #include ZEN_THIRD_PARTY_INCLUDES_END #if ZEN_WITH_TESTS # include # include # include #endif // ZEN_WITH_TESTS namespace zen { class NullChunkingCache : public ChunkingCache { public: NullChunkingCache() {} virtual bool GetCachedFile(const std::filesystem::path& InputPath, uint64_t RawSize, uint64_t ModificationTick, ChunkedInfoWithSource& OutChunked) override { ZEN_UNUSED(InputPath, RawSize, OutChunked, ModificationTick); return false; } virtual bool PutCachedFile(const std::filesystem::path& InputPath, uint64_t ModificationTick, const ChunkedInfoWithSource& Chunked) override { ZEN_UNUSED(InputPath, Chunked, ModificationTick); return false; } }; class MemoryChunkingCache : public ChunkingCache { public: MemoryChunkingCache() {} virtual bool GetCachedFile(const std::filesystem::path& InputPath, uint64_t RawSize, uint64_t ModificationTick, ChunkedInfoWithSource& OutChunked) override { const std::u8string PathString = InputPath.generic_u8string(); const IoHash PathHash = IoHash::HashBuffer(PathString.data(), PathString.length()); RwLock::SharedLockScope Lock(m_Lock); if (auto It = m_PathHashToEntry.find(PathHash); It != m_PathHashToEntry.end()) { const CachedEntry& Entry = m_Entries[It->second]; if (ModificationTick == Entry.ModificationTick && RawSize == Entry.Chunked.Info.RawSize) { OutChunked = Entry.Chunked; return true; } else { Lock.ReleaseNow(); RwLock::ExclusiveLockScope EditLock(m_Lock); if (auto RemoveIt = m_PathHashToEntry.find(PathHash); It != m_PathHashToEntry.end()) { CachedEntry& DeleteEntry = m_Entries[It->second]; DeleteEntry.Chunked = {}; DeleteEntry.ModificationTick = 0; m_FreeEntryIndexes.push_back(It->second); m_PathHashToEntry.erase(It); } } } return false; } virtual bool PutCachedFile(const std::filesystem::path& InputPath, uint64_t ModificationTick, const ChunkedInfoWithSource& Chunked) override { const std::u8string PathString = InputPath.generic_u8string(); const IoHash PathHash = IoHash::HashBuffer(PathString.data(), PathString.length()); RwLock::ExclusiveLockScope _(m_Lock); if (auto It = m_PathHashToEntry.find(PathHash); It != m_PathHashToEntry.end()) { CachedEntry& Entry = m_Entries[It->second]; if (ModificationTick != Entry.ModificationTick || Chunked.Info.RawSize != Entry.Chunked.Info.RawSize) { Entry.Chunked = Chunked; Entry.ModificationTick = ModificationTick; } } else { uint32_t EntryIndex = gsl::narrow(m_Entries.size()); if (!m_FreeEntryIndexes.empty()) { EntryIndex = m_FreeEntryIndexes.back(); m_FreeEntryIndexes.pop_back(); m_Entries[EntryIndex] = CachedEntry{.Chunked = Chunked, .ModificationTick = ModificationTick}; } else { m_Entries.emplace_back(CachedEntry{.Chunked = Chunked, .ModificationTick = ModificationTick}); } m_PathHashToEntry.insert_or_assign(PathHash, EntryIndex); } return true; } RwLock m_Lock; tsl::robin_map m_PathHashToEntry; std::vector m_FreeEntryIndexes; struct CachedEntry { ChunkedInfoWithSource Chunked; uint64_t ModificationTick = 0; }; std::vector m_Entries; }; class DiskChunkingCache : public ChunkingCache { public: DiskChunkingCache(const std::filesystem::path& RootPath, ChunkingController& ChunkController, uint64_t MinimumRawSizeForCaching) : m_RootPath(RootPath) , m_ChunkerId(GetChunkerIdentity(ChunkController)) , m_MinimumRawSizeForCaching(MinimumRawSizeForCaching) { } virtual bool GetCachedFile(const std::filesystem::path& InputPath, uint64_t RawSize, uint64_t ModificationTick, ChunkedInfoWithSource& OutChunked) override { if (RawSize < m_MinimumRawSizeForCaching) { return false; } const std::filesystem::path CachePath = GetCachePath(InputPath); return ReadChunkedInfo(CachePath, RawSize, ModificationTick, OutChunked); } virtual bool PutCachedFile(const std::filesystem::path& InputPath, uint64_t ModificationTick, const ChunkedInfoWithSource& Chunked) override { if (Chunked.Info.RawSize < m_MinimumRawSizeForCaching) { return false; } const std::filesystem::path CachePath = GetCachePath(InputPath); return WriteChunkedInfo(CachePath, ModificationTick, Chunked); } private: static constexpr uint32_t ImplementationRevision = 1; #pragma pack(push) #pragma pack(1) struct ChunkedInfoHeader { static constexpr uint32_t ExpectedMagic = 0x75636368; // 'ucch'; static constexpr uint32_t CurrentVersion = 1; uint32_t Magic = ExpectedMagic; uint32_t Version = CurrentVersion; uint64_t SequenceCount = 0; uint64_t ChunkCount = 0; uint64_t RawSize = 0; IoHash RawHash = IoHash::Zero; uint64_t ModificationTick = 0; uint32_t Checksum = 0; static uint32_t ComputeChecksum(const ChunkedInfoHeader& Header) { return XXH32(&Header.Magic, sizeof(Header) - sizeof(uint32_t), 0xC0C0'BABA); } }; #pragma pack(pop) static_assert(sizeof(ChunkedInfoHeader) == 64); static_assert(sizeof(ChunkSource) == 12); std::filesystem::path GetCachePath(const std::filesystem::path& InputPath) { const std::string IdentityString = fmt::format("{}_{}_{}", ImplementationRevision, m_ChunkerId, InputPath.generic_string()); const IoHash IdentityHash = IoHash::HashBuffer(IdentityString.data(), IdentityString.length()); std::filesystem::path CachePath = m_RootPath / fmt::format("{}.chunked_content", IdentityHash); return CachePath; } bool WriteChunkedInfo(const std::filesystem::path& CachePath, uint64_t ModificationTick, const ChunkedInfoWithSource& Chunked) { CreateDirectories(CachePath.parent_path()); TemporaryFile OutputFile; std::error_code Ec; OutputFile.CreateTemporary(CachePath.parent_path(), Ec); if (Ec) { ZEN_DEBUG("Failed to create temp file for cached chunked data at '{}'", CachePath); return false; } ChunkedInfoHeader Header = {.SequenceCount = Chunked.Info.ChunkSequence.size(), .ChunkCount = Chunked.Info.ChunkHashes.size(), .RawSize = Chunked.Info.RawSize, .RawHash = Chunked.Info.RawHash, .ModificationTick = ModificationTick}; Header.Checksum = ChunkedInfoHeader::ComputeChecksum(Header); try { uint64_t Offset = 0; OutputFile.Write(&Header, sizeof(ChunkedInfoHeader), Offset); Offset += sizeof(ChunkedInfoHeader); if (Header.SequenceCount > 0) { OutputFile.Write(Chunked.Info.ChunkSequence.data(), Header.SequenceCount * sizeof(uint32_t), Offset); Offset += Header.SequenceCount * sizeof(uint32_t); } if (Header.ChunkCount > 0) { OutputFile.Write(Chunked.Info.ChunkHashes.data(), Header.ChunkCount * sizeof(IoHash), Offset); Offset += Header.ChunkCount * sizeof(IoHash); OutputFile.Write(Chunked.ChunkSources.data(), Header.ChunkCount * sizeof(ChunkSource), Offset); Offset += Header.ChunkCount * sizeof(ChunkSource); } OutputFile.Flush(); } catch (const std::exception& Ex) { ZEN_DEBUG("Failed to write cached file {}. Reason: {}", CachePath, Ex.what()); return false; } OutputFile.MoveTemporaryIntoPlace(CachePath, Ec); if (Ec) { ZEN_DEBUG("Failed to move temporary file {} to {}. Reason: {}", OutputFile.GetPath(), CachePath, Ec.message()); return false; } return true; } bool ReadChunkedInfo(const std::filesystem::path& CachePath, uint64_t RawSize, uint64_t ModificationTick, ChunkedInfoWithSource& OutChunked) { BasicFile InputFile; std::error_code Ec; InputFile.Open(CachePath, BasicFile::Mode::kRead, Ec); if (Ec) { return false; } try { uint64_t Size = InputFile.FileSize(); if (Size < sizeof(ChunkedInfoHeader)) { throw std::runtime_error(fmt::format("Expected size >= {}, file has size {}", sizeof(ChunkedInfoHeader), Size)); } uint64_t Offset = 0; ChunkedInfoHeader Header; InputFile.Read(&Header, sizeof(ChunkedInfoHeader), Offset); Offset += sizeof(Header); if (Header.Magic != ChunkedInfoHeader::ExpectedMagic) { throw std::runtime_error( fmt::format("Expected magic 0x{:04x}, file has magic 0x{:04x}", ChunkedInfoHeader::ExpectedMagic, Header.Magic)); } if (Header.Version != ChunkedInfoHeader::CurrentVersion) { throw std::runtime_error( fmt::format("Expected version {}, file has version {}", ChunkedInfoHeader::CurrentVersion, Header.Version)); } if (Header.Checksum != ChunkedInfoHeader::ComputeChecksum(Header)) { throw std::runtime_error(fmt::format("Expected checksum 0x{:04x}, file has checksum 0x{:04x}", Header.Checksum, ChunkedInfoHeader::ComputeChecksum(Header))); } uint64_t ExpectedSize = sizeof(ChunkedInfoHeader) + Header.SequenceCount * sizeof(uint32_t) + Header.ChunkCount * sizeof(IoHash) + Header.ChunkCount * sizeof(ChunkSource); if (ExpectedSize != Size) { throw std::runtime_error(fmt::format("Expected size {}, file has size {}", ExpectedSize, Size)); } if (Header.RawSize != RawSize) { InputFile.Close(); RemoveFile(CachePath, Ec); return false; } if (Header.ModificationTick != ModificationTick) { InputFile.Close(); RemoveFile(CachePath, Ec); return false; } OutChunked.Info.RawSize = Header.RawSize; OutChunked.Info.RawHash = Header.RawHash; if (Header.SequenceCount > 0) { OutChunked.Info.ChunkSequence.resize(Header.SequenceCount); InputFile.Read(OutChunked.Info.ChunkSequence.data(), Header.SequenceCount * sizeof(uint32_t), Offset); Offset += Header.SequenceCount * sizeof(uint32_t); } if (Header.ChunkCount > 0) { OutChunked.Info.ChunkHashes.resize(Header.ChunkCount); OutChunked.ChunkSources.resize(Header.ChunkCount); InputFile.Read(OutChunked.Info.ChunkHashes.data(), Header.ChunkCount * sizeof(IoHash), Offset); Offset += Header.ChunkCount * sizeof(IoHash); InputFile.Read(OutChunked.ChunkSources.data(), Header.ChunkCount * sizeof(ChunkSource), Offset); Offset += Header.ChunkCount * sizeof(ChunkSource); } } catch (const std::exception& Ex) { ZEN_DEBUG("Failed to read cached file {}. Reason: {}", CachePath, Ex.what()); InputFile.Close(); RemoveFile(CachePath, Ec); return false; } return true; } const std::filesystem::path m_RootPath; const IoHash m_ChunkerId; const uint64_t m_MinimumRawSizeForCaching; static IoHash GetChunkerIdentity(ChunkingController& ChunkController) { IoHashStream ChunkerIdStream; std::string_view ChunkerName = ChunkController.GetName(); ChunkerIdStream.Append(ChunkerName.data(), ChunkerName.length()); const CbObject ChunkerParameters = ChunkController.GetParameters(); ChunkerParameters.GetHash(ChunkerIdStream); return ChunkerIdStream.GetHash(); } }; std::unique_ptr CreateNullChunkingCache() { return std::make_unique(); } std::unique_ptr CreateMemoryChunkingCache() { return std::make_unique(); } std::unique_ptr CreateDiskChunkingCache(const std::filesystem::path& RootPath, ChunkingController& ChunkController, uint64_t MinimumRawSizeForCaching) { return std::make_unique(RootPath, ChunkController, MinimumRawSizeForCaching); } #if ZEN_WITH_TESTS namespace chunkingcache_testutils { ChunkedInfoWithSource CreateChunked(const std::string_view Data, uint32_t SplitSize) { std::vector ChunkSequence; std::vector ChunkHashes; std::vector ChunkSources; if (SplitSize > 0) { std::string_view::size_type SplitOffset = 0; while (SplitOffset < Data.length()) { std::string_view DataPart(Data.substr(SplitOffset, SplitSize)); ChunkSequence.push_back(gsl::narrow(ChunkSequence.size())); ChunkHashes.push_back(IoHash::HashBuffer(DataPart.data(), DataPart.length())); ChunkSources.push_back({.Offset = SplitOffset, .Size = gsl::narrow(DataPart.length())}); SplitOffset += DataPart.length(); } } return ChunkedInfoWithSource{.Info = {.RawSize = Data.length(), .RawHash = IoHash::HashBuffer(Data.data(), Data.length()), .ChunkSequence = std::move(ChunkSequence), .ChunkHashes = std::move(ChunkHashes)}, .ChunkSources = std::move(ChunkSources)}; } bool Equals(const ChunkedInfoWithSource& Lhs, const ChunkedInfoWithSource& Rhs) { if (Lhs.ChunkSources.size() != Rhs.ChunkSources.size()) { return false; } if (std::mismatch(Lhs.ChunkSources.begin(), Lhs.ChunkSources.end(), Rhs.ChunkSources.begin(), [](const ChunkSource& Lhs, const ChunkSource& Rhs) { return Lhs.Offset == Rhs.Offset && Lhs.Size == Rhs.Size; }) .first != Lhs.ChunkSources.end()) { return false; } if (Lhs.Info.RawSize != Rhs.Info.RawSize) { return false; } if (Lhs.Info.ChunkSequence != Rhs.Info.ChunkSequence) { return false; } if (Lhs.Info.ChunkHashes != Rhs.Info.ChunkHashes) { return false; } return true; } } // namespace chunkingcache_testutils TEST_CASE("chunkingcache.nullchunkingcache") { using namespace chunkingcache_testutils; std::unique_ptr Cache = CreateNullChunkingCache(); ChunkedInfoWithSource Result; CHECK(!Cache->GetCachedFile("dummy-path", 495, 91283, Result)); CHECK(Result.Info.ChunkHashes.empty()); ChunkedInfoWithSource Chunked = CreateChunked("my data string", 4); CHECK(!Cache->PutCachedFile("dummy-path", 91283, Chunked)); CHECK(!Cache->GetCachedFile("dummy-path", 495, 91283, Result)); CHECK(Result.Info.ChunkHashes.empty()); } TEST_CASE("chunkingcache.memorychunkingcache") { using namespace chunkingcache_testutils; std::unique_ptr Cache = CreateMemoryChunkingCache(); ChunkedInfoWithSource Result; CHECK(!Cache->GetCachedFile("file/A/Path", 495, 91283, Result)); CHECK(Result.Info.ChunkHashes.empty()); CHECK(!Cache->GetCachedFile("file/B/Path", 395, 671283, Result)); CHECK(Result.Info.ChunkHashes.empty()); ChunkedInfoWithSource ChunkedAV1 = CreateChunked("File A data string", 4); ChunkedInfoWithSource ChunkedAV2 = CreateChunked("File A updated data string", 4); ChunkedInfoWithSource ChunkedBV1 = CreateChunked("File B data string", 4); CHECK(Cache->PutCachedFile("file/A/Path", 91283, ChunkedAV1)); CHECK(Cache->PutCachedFile("file/B/Path", 51283, ChunkedBV1)); CHECK(Cache->GetCachedFile("file/A/Path", ChunkedAV1.Info.RawSize, 91283, Result)); CHECK(Equals(Result, ChunkedAV1)); Result = ChunkedInfoWithSource{}; CHECK(Cache->GetCachedFile("file/B/Path", ChunkedBV1.Info.RawSize, 51283, Result)); CHECK(Equals(Result, ChunkedBV1)); Result = ChunkedInfoWithSource{}; CHECK(!Cache->GetCachedFile("file/A/Path-wrong", 495, 91283, Result)); CHECK(Result.Info.ChunkHashes.empty()); CHECK(!Cache->GetCachedFile("file/A/Path", 493, 91283, Result)); CHECK(Result.Info.ChunkHashes.empty()); // Asking a path that exists but without a match will remove that path CHECK(!Cache->GetCachedFile("file/A/Path", 495, 91283, Result)); CHECK(Result.Info.ChunkHashes.empty()); CHECK(!Cache->GetCachedFile("file/A/Path", 495, 9283, Result)); CHECK(Result.Info.ChunkHashes.empty()); CHECK(Cache->PutCachedFile("file/A/Path", 91283, ChunkedAV1)); CHECK(Cache->GetCachedFile("file/A/Path", ChunkedAV1.Info.RawSize, 91283, Result)); CHECK(Equals(Result, ChunkedAV1)); Result = ChunkedInfoWithSource{}; CHECK(Cache->PutCachedFile("file/A/Path", 91483, ChunkedAV2)); CHECK(Cache->GetCachedFile("file/A/Path", ChunkedAV2.Info.RawSize, 91483, Result)); CHECK(Equals(Result, ChunkedAV2)); Result = ChunkedInfoWithSource{}; CHECK(!Cache->GetCachedFile("file/A/Path", ChunkedAV1.Info.RawSize, 91283, Result)); CHECK(Result.Info.ChunkHashes.empty()); CHECK(Cache->GetCachedFile("file/B/Path", ChunkedBV1.Info.RawSize, 51283, Result)); CHECK(Equals(Result, ChunkedBV1)); Result = ChunkedInfoWithSource{}; CHECK(!Cache->GetCachedFile("file/B/Path", ChunkedBV1.Info.RawSize + 1, 51283, Result)); CHECK(Result.Info.ChunkHashes.empty()); } TEST_CASE("chunkingcache.diskchunkingcache") { using namespace chunkingcache_testutils; ScopedTemporaryDirectory TmpDir; std::unique_ptr ChunkController = CreateStandardChunkingController(StandardChunkingControllerSettings{}); ChunkedInfoWithSource ChunkedAV1 = CreateChunked("File A data string", 4); ChunkedInfoWithSource ChunkedAV2 = CreateChunked("File A updated data string", 4); ChunkedInfoWithSource ChunkedBV1 = CreateChunked("File B data string", 4); { std::unique_ptr Cache = CreateDiskChunkingCache(TmpDir.Path(), *ChunkController, 0); ChunkedInfoWithSource Result; CHECK(!Cache->GetCachedFile("file/A/Path", 495, 91283, Result)); CHECK(Result.Info.ChunkHashes.empty()); CHECK(!Cache->GetCachedFile("file/B/Path", 395, 671283, Result)); CHECK(Result.Info.ChunkHashes.empty()); CHECK(Cache->PutCachedFile("file/A/Path", 91283, ChunkedAV1)); CHECK(Cache->PutCachedFile("file/B/Path", 51283, ChunkedBV1)); CHECK(Cache->GetCachedFile("file/A/Path", ChunkedAV1.Info.RawSize, 91283, Result)); CHECK(Equals(Result, ChunkedAV1)); Result = ChunkedInfoWithSource{}; CHECK(Cache->GetCachedFile("file/B/Path", ChunkedBV1.Info.RawSize, 51283, Result)); CHECK(Equals(Result, ChunkedBV1)); Result = ChunkedInfoWithSource{}; CHECK(!Cache->GetCachedFile("file/A/Path-wrong", 495, 91283, Result)); CHECK(Result.Info.ChunkHashes.empty()); CHECK(!Cache->GetCachedFile("file/A/Path", 493, 91283, Result)); CHECK(Result.Info.ChunkHashes.empty()); // Asking a path that exists but without a match will remove that path CHECK(!Cache->GetCachedFile("file/A/Path", 495, 91283, Result)); CHECK(Result.Info.ChunkHashes.empty()); CHECK(!Cache->GetCachedFile("file/A/Path", 495, 9283, Result)); CHECK(Result.Info.ChunkHashes.empty()); CHECK(Cache->PutCachedFile("file/A/Path", 91283, ChunkedAV1)); CHECK(Cache->GetCachedFile("file/A/Path", ChunkedAV1.Info.RawSize, 91283, Result)); CHECK(Equals(Result, ChunkedAV1)); Result = ChunkedInfoWithSource{}; CHECK(Cache->PutCachedFile("file/A/Path", 91483, ChunkedAV2)); CHECK(Cache->GetCachedFile("file/A/Path", ChunkedAV2.Info.RawSize, 91483, Result)); CHECK(Equals(Result, ChunkedAV2)); Result = ChunkedInfoWithSource{}; } { std::unique_ptr Cache = CreateDiskChunkingCache(TmpDir.Path(), *ChunkController, 0); ChunkedInfoWithSource Result; CHECK(Cache->GetCachedFile("file/A/Path", ChunkedAV2.Info.RawSize, 91483, Result)); CHECK(Equals(Result, ChunkedAV2)); Result = ChunkedInfoWithSource{}; CHECK(!Cache->GetCachedFile("file/A/Path", ChunkedAV2.Info.RawSize, 91283, Result)); CHECK(Result.Info.ChunkHashes.empty()); CHECK(!Cache->GetCachedFile("file/A/Path", ChunkedAV2.Info.RawSize, 91483, Result)); CHECK(Result.Info.ChunkHashes.empty()); CHECK(Cache->GetCachedFile("file/B/Path", ChunkedBV1.Info.RawSize, 51283, Result)); CHECK(Equals(Result, ChunkedBV1)); Result = ChunkedInfoWithSource{}; CHECK(!Cache->GetCachedFile("file/B/Path", ChunkedBV1.Info.RawSize + 1, 51283, Result)); CHECK(Result.Info.ChunkHashes.empty()); CHECK(!Cache->GetCachedFile("file/B/Path", ChunkedBV1.Info.RawSize, 51283, Result)); CHECK(Result.Info.ChunkHashes.empty()); } } void chunkingcache_forcelink() { } #endif // ZEN_WITH_TESTS } // namespace zen