diff options
| author | Per Larsson <[email protected]> | 2021-09-20 08:54:34 +0200 |
|---|---|---|
| committer | Per Larsson <[email protected]> | 2021-09-20 08:54:34 +0200 |
| commit | e25b4b20d8a5696aa7055c9c167fa47b3739bc7e (patch) | |
| tree | 049654b87096a22e1bf696a385db608a75f229fa /zenstore/compactcas.cpp | |
| parent | Probe upstream Zen server when initializing upstream cache. (diff) | |
| parent | Fixed unused variable warnings exposed by xmake build (unclear why I do not r... (diff) | |
| download | zen-e25b4b20d8a5696aa7055c9c167fa47b3739bc7e.tar.xz zen-e25b4b20d8a5696aa7055c9c167fa47b3739bc7e.zip | |
Merge branch 'main' of https://github.com/EpicGames/zen
Diffstat (limited to 'zenstore/compactcas.cpp')
| -rw-r--r-- | zenstore/compactcas.cpp | 142 |
1 files changed, 131 insertions, 11 deletions
diff --git a/zenstore/compactcas.cpp b/zenstore/compactcas.cpp index 4407d8b08..070ca1503 100644 --- a/zenstore/compactcas.cpp +++ b/zenstore/compactcas.cpp @@ -10,18 +10,22 @@ #include <zencore/thread.h> #include <zencore/uid.h> -#include <gsl/gsl-lite.hpp> - -#include <functional> - -struct IUnknown; // Workaround for "combaseapi.h(229): error C2187: syntax error: 'identifier' was unexpected here" when using /permissive- -#include <atlfile.h> #include <filesystem> +#include <functional> +#include <gsl/gsl-lite.hpp> ////////////////////////////////////////////////////////////////////////// namespace zen { +CasContainerStrategy::CasContainerStrategy(const CasStoreConfiguration& Config) : m_Config(Config) +{ +} + +CasContainerStrategy::~CasContainerStrategy() +{ +} + void CasContainerStrategy::Initialize(const std::string_view ContainerBaseName, uint64_t Alignment, bool IsNewStore) { @@ -43,7 +47,9 @@ CasContainerStrategy::Initialize(const std::string_view ContainerBaseName, uint6 uint64_t MaxFileOffset = 0; { - // This is not technically necessary but may help future static analysis + // This is not technically necessary (nobody should be accessing us from + // another thread at this stage) but may help static analysis + zen::RwLock::ExclusiveLockScope _(m_LocationMapLock); m_CasLog.Replay([&](const CasDiskIndexEntry& Record) { @@ -103,9 +109,8 @@ IoBuffer CasContainerStrategy::FindChunk(const IoHash& ChunkHash) { RwLock::SharedLockScope _(m_LocationMapLock); - auto KeyIt = m_LocationMap.find(ChunkHash); - if (KeyIt != m_LocationMap.end()) + if (auto KeyIt = m_LocationMap.find(ChunkHash); KeyIt != m_LocationMap.end()) { const CasDiskLocation& Location = KeyIt->second; return zen::IoBufferBuilder::MakeFromFileHandle(m_SmallObjectFile.Handle(), Location.Offset, Location.Size); @@ -120,9 +125,8 @@ bool CasContainerStrategy::HaveChunk(const IoHash& ChunkHash) { RwLock::SharedLockScope _(m_LocationMapLock); - auto KeyIt = m_LocationMap.find(ChunkHash); - if (KeyIt != m_LocationMap.end()) + if (auto KeyIt = m_LocationMap.find(ChunkHash); KeyIt != m_LocationMap.end()) { return true; } @@ -133,6 +137,13 @@ CasContainerStrategy::HaveChunk(const IoHash& ChunkHash) void CasContainerStrategy::FilterChunks(CasChunkSet& InOutChunks) { + // This implementation is good enough for relatively small + // chunk sets (in terms of chunk identifiers), but would + // benefit from a better implementation which removes + // items incrementally for large sets, especially when + // we're likely to already have a large proportion of the + // chunks in the set + std::unordered_set<IoHash> HaveSet; for (const IoHash& Hash : InOutChunks.GetChunkSet()) @@ -157,4 +168,113 @@ CasContainerStrategy::Flush() m_SmallObjectFile.Flush(); } +void +CasContainerStrategy::Scrub(ScrubContext& Ctx) +{ + const uint64_t WindowSize = 4 * 1024 * 1024; + uint64_t WindowStart = 0; + uint64_t WindowEnd = WindowSize; + const uint64_t FileSize = m_SmallObjectFile.FileSize(); + + std::vector<CasDiskIndexEntry> BigChunks; + std::vector<CasDiskIndexEntry> BadChunks; + + // We do a read sweep through the payloads file and validate + // any entries that are contained within each segment, with + // the assumption that most entries will be checked in this + // pass. An alternative strategy would be to use memory mapping. + + { + IoBuffer ReadBuffer{WindowSize}; + void* BufferBase = ReadBuffer.MutableData(); + + RwLock::SharedLockScope _(m_LocationMapLock); + + do + { + const uint64_t ChunkSize = zen::Min(WindowSize, FileSize - WindowStart); + m_SmallObjectFile.Read(BufferBase, ChunkSize, WindowStart); + + for (auto& Entry : m_LocationMap) + { + const uint64_t EntryOffset = Entry.second.Offset; + + if ((EntryOffset >= WindowStart) && (EntryOffset < WindowEnd)) + { + const uint64_t EntryEnd = EntryOffset + Entry.second.Size; + + if (EntryEnd >= WindowEnd) + { + BigChunks.push_back({.Key = Entry.first, .Location = Entry.second}); + + continue; + } + + const IoHash ComputedHash = IoHash::HashBuffer(BufferBase, Entry.second.Size); + + if (Entry.first != ComputedHash) + { + // Hash mismatch + + BadChunks.push_back({.Key = Entry.first, .Location = Entry.second}); + } + } + } + + WindowStart += WindowSize; + WindowEnd += WindowSize; + } while (WindowStart < FileSize); + } + + // Deal with large chunks + + for (const CasDiskIndexEntry& Entry : BigChunks) + { + IoHashStream Hasher; + m_SmallObjectFile.StreamByteRange(Entry.Location.Offset, Entry.Location.Size, [&](const void* Data, uint64_t Size) { + Hasher.Append(Data, Size); + }); + IoHash ComputedHash = Hasher.GetHash(); + + if (Entry.Key != ComputedHash) + { + BadChunks.push_back(Entry); + } + } + + // Deal with bad chunks by removing them from our lookup map + + std::vector<IoHash> BadChunkHashes; + + for (const CasDiskIndexEntry& Entry : BadChunks) + { + BadChunkHashes.push_back(Entry.Key); + m_LocationMap.erase(Entry.Key); + } + + // Let whomever it concerns know about the bad chunks. This could + // be used to invalidate higher level data structures more efficiently + // than a full validation pass might be able to do + + Ctx.ReportBadChunks(BadChunkHashes); +} + +void +CasContainerStrategy::MakeSnapshot() +{ + RwLock::SharedLockScope _(m_LocationMapLock); + + std::vector<CasDiskIndexEntry> Entries{m_LocationMap.size()}; + + uint64_t EntryIndex = 0; + for (auto& Entry : m_LocationMap) + { + CasDiskIndexEntry& IndexEntry = Entries[EntryIndex++]; + IndexEntry.Key = Entry.first; + IndexEntry.Location = Entry.second; + } + + m_SmallObjectIndex.Write(Entries.data(), Entries.size() * sizeof(CasDiskIndexEntry), 0); +} + } // namespace zen |