diff options
| author | Stefan Boberg <[email protected]> | 2021-09-19 23:19:06 +0200 |
|---|---|---|
| committer | Stefan Boberg <[email protected]> | 2021-09-19 23:19:06 +0200 |
| commit | 3cf9dedfd08fe4d7a049e51b14a937f7a34afce3 (patch) | |
| tree | d68b5203818665f6f54aceea586f550dc6aadca4 /zenstore/compactcas.cpp | |
| parent | Added BasicFile::StreamFile helper function to support reading large files in... (diff) | |
| download | zen-3cf9dedfd08fe4d7a049e51b14a937f7a34afce3.tar.xz zen-3cf9dedfd08fe4d7a049e51b14a937f7a34afce3.zip | |
Implemended basic scrubbing / detection of disk corruption. Still needs more code to propagate errors and make adjustments to account for them in higher level data structures
Diffstat (limited to 'zenstore/compactcas.cpp')
| -rw-r--r-- | zenstore/compactcas.cpp | 93 |
1 files changed, 87 insertions, 6 deletions
diff --git a/zenstore/compactcas.cpp b/zenstore/compactcas.cpp index fd223e284..0f9349ab0 100644 --- a/zenstore/compactcas.cpp +++ b/zenstore/compactcas.cpp @@ -18,6 +18,14 @@ namespace zen { +CasContainerStrategy::CasContainerStrategy(const CasStoreConfiguration& Config) : m_Config(Config) +{ +} + +CasContainerStrategy::~CasContainerStrategy() +{ +} + void CasContainerStrategy::Initialize(const std::string_view ContainerBaseName, uint64_t Alignment, bool IsNewStore) { @@ -101,9 +109,8 @@ IoBuffer CasContainerStrategy::FindChunk(const IoHash& ChunkHash) { RwLock::SharedLockScope _(m_LocationMapLock); - auto KeyIt = m_LocationMap.find(ChunkHash); - if (KeyIt != m_LocationMap.end()) + if (auto KeyIt = m_LocationMap.find(ChunkHash); KeyIt != m_LocationMap.end()) { const CasDiskLocation& Location = KeyIt->second; return zen::IoBufferBuilder::MakeFromFileHandle(m_SmallObjectFile.Handle(), Location.Offset, Location.Size); @@ -118,9 +125,8 @@ bool CasContainerStrategy::HaveChunk(const IoHash& ChunkHash) { RwLock::SharedLockScope _(m_LocationMapLock); - auto KeyIt = m_LocationMap.find(ChunkHash); - if (KeyIt != m_LocationMap.end()) + if (auto KeyIt = m_LocationMap.find(ChunkHash); KeyIt != m_LocationMap.end()) { return true; } @@ -163,9 +169,84 @@ CasContainerStrategy::Flush() } void -CasContainerStrategy::Scrub() +CasContainerStrategy::Scrub(ScrubContext& Ctx) { - RwLock::SharedLockScope _(m_LocationMapLock); + const uint64_t WindowSize = 4 * 1024 * 1024; + uint64_t WindowStart = 0; + uint64_t WindowEnd = WindowSize; + const uint64_t FileSize = m_SmallObjectFile.FileSize(); + + std::vector<CasDiskIndexEntry> BigChunks; + std::vector<CasDiskIndexEntry> BadChunks; + + // We do a read sweep through the payloads file and validate + // any entries that are contained within each segment, with + // the assumption that most entries will be checked in this + // pass. An alternative strategy would be to use memory mapping. + + { + IoBuffer ReadBuffer{WindowSize}; + void* BufferBase = ReadBuffer.MutableData(); + + RwLock::SharedLockScope _(m_LocationMapLock); + + do + { + const uint64_t ChunkSize = zen::Min(WindowSize, FileSize - WindowStart); + m_SmallObjectFile.Read(BufferBase, ChunkSize, WindowStart); + + for (auto& Entry : m_LocationMap) + { + const uint64_t EntryOffset = Entry.second.Offset; + + if ((EntryOffset >= WindowStart) && (EntryOffset < WindowEnd)) + { + const uint64_t EntryEnd = EntryOffset + Entry.second.Size; + + if (EntryEnd >= WindowEnd) + { + BigChunks.push_back({.Key = Entry.first, .Location = Entry.second}); + + continue; + } + + const IoHash ComputedHash = IoHash::HashBuffer(BufferBase, Entry.second.Size); + + if (Entry.first != ComputedHash) + { + // Hash mismatch + + BadChunks.push_back({.Key = Entry.first, .Location = Entry.second}); + } + } + } + + WindowStart += WindowSize; + WindowEnd += WindowSize; + } while (WindowStart < FileSize); + } + + // Deal with large chunks + + for (const CasDiskIndexEntry& Entry : BigChunks) + { + } + + // Deal with bad chunks by removing them from our lookup map + + std::vector<IoHash> BadChunkHashes; + + for (const CasDiskIndexEntry& Entry : BadChunks) + { + BadChunkHashes.push_back(Entry.Key); + m_LocationMap.erase(Entry.Key); + } + + // Let whomever it concerns know about the bad chunks. This could + // be used to invalidate higher level data structures more efficiently + // than a full validation pass might be able to do + + Ctx.ReportBadChunks(BadChunkHashes); } void |