diff options
| author | Dan Engelbrecht <[email protected]> | 2022-05-01 23:31:35 +0200 |
|---|---|---|
| committer | Dan Engelbrecht <[email protected]> | 2022-05-01 23:31:35 +0200 |
| commit | 08a0dc388f98e6d3eb8387b983a9a7fb959fe603 (patch) | |
| tree | 5f4bef8e81e55e22f65690250c84d943098c738b /zenstore/compactcas.cpp | |
| parent | remove m_TotalSize for blockstore (diff) | |
| download | zen-08a0dc388f98e6d3eb8387b983a9a7fb959fe603.tar.xz zen-08a0dc388f98e6d3eb8387b983a9a7fb959fe603.zip | |
reimplement CasContainerStrategy::Scrub
Diffstat (limited to 'zenstore/compactcas.cpp')
| -rw-r--r-- | zenstore/compactcas.cpp | 132 |
1 files changed, 54 insertions, 78 deletions
diff --git a/zenstore/compactcas.cpp b/zenstore/compactcas.cpp index 30747f554..a6e617474 100644 --- a/zenstore/compactcas.cpp +++ b/zenstore/compactcas.cpp @@ -338,84 +338,56 @@ void CasContainerStrategy::Scrub(ScrubContext& Ctx) { ZEN_UNUSED(Ctx); -#if 0 - std::vector<CasDiskIndexEntry> BadChunks; - // We do a read sweep through the payloads file and validate - // any entries that are contained within each segment, with - // the assumption that most entries will be checked in this - // pass. An alternative strategy would be to use memory mapping. + RwLock::SharedLockScope _(m_LocationMapLock); + uint64_t TotalChunkCount = m_LocationMap.size(); + std::vector<BlockStoreLocation> ChunkLocations; + std::vector<IoHash> ChunkIndexToChunkHash; + ChunkLocations.reserve(TotalChunkCount); + ChunkIndexToChunkHash.reserve(TotalChunkCount); { - std::vector<CasDiskIndexEntry> BigChunks; - const uint64_t WindowSize = 4 * 1024 * 1024; - IoBuffer ReadBuffer{WindowSize}; - void* BufferBase = ReadBuffer.MutableData(); - - RwLock::SharedLockScope _(m_InsertLock); // TODO: Refactor so we don't have to keep m_InsertLock all the time? - RwLock::SharedLockScope __(m_LocationMapLock); - - for (const auto& Block : m_ChunkBlocks) + for (const auto& Entry : m_LocationMap) { - uint64_t WindowStart = 0; - uint64_t WindowEnd = WindowSize; - const Ref<BlockStoreFile>& BlockFile = Block.second; - BlockFile->Open(); - const uint64_t FileSize = BlockFile->FileSize(); + const IoHash& ChunkHash = Entry.first; + const BlockStoreDiskLocation& DiskLocation = Entry.second; + BlockStoreLocation Location = DiskLocation.Get(m_PayloadAlignment); + size_t ChunkIndex = ChunkLocations.size(); - do - { - const uint64_t ChunkSize = Min(WindowSize, FileSize - WindowStart); - BlockFile->Read(BufferBase, ChunkSize, WindowStart); - - for (auto& Entry : m_LocationMap) - { - const BlockStoreLocation Location = Entry.second.Get(m_PayloadAlignment); - const uint64_t EntryOffset = Location.Offset; - - if ((EntryOffset >= WindowStart) && (EntryOffset < WindowEnd)) - { - const uint64_t EntryEnd = EntryOffset + Location.Size; - - if (EntryEnd >= WindowEnd) - { - BigChunks.push_back({.Key = Entry.first, .Location = Entry.second}); - - continue; - } - - const IoHash ComputedHash = - IoHash::HashBuffer(reinterpret_cast<uint8_t*>(BufferBase) + Location.Offset - WindowStart, Location.Size); - - if (Entry.first != ComputedHash) - { - // Hash mismatch - BadChunks.push_back({.Key = Entry.first, .Location = Entry.second, .Flags = CasDiskIndexEntry::kTombstone}); - } - } - } - - WindowStart += WindowSize; - WindowEnd += WindowSize; - } while (WindowStart < FileSize); + ChunkLocations.push_back(Location); + ChunkIndexToChunkHash[ChunkIndex] = ChunkHash; } + } - // Deal with large chunks + std::vector<IoHash> BadChunks; - for (const CasDiskIndexEntry& Entry : BigChunks) - { - IoHashStream Hasher; - const BlockStoreLocation Location = Entry.Location.Get(m_PayloadAlignment); - const Ref<BlockStoreFile>& BlockFile = m_ChunkBlocks[Location.BlockIndex]; - BlockFile->StreamByteRange(Location.Offset, Location.Size, [&](const void* Data, uint64_t Size) { Hasher.Append(Data, Size); }); - IoHash ComputedHash = Hasher.GetHash(); + // We do a read sweep through the payloads file and validate + // any entries that are contained within each segment, with + // the assumption that most entries will be checked in this + // pass. An alternative strategy would be to use memory mapping. - if (Entry.Key != ComputedHash) + m_BlockStore.IterateChunks( + ChunkLocations, + [&ChunkIndexToChunkHash, &BadChunks](size_t ChunkIndex, const void* Data, uint64_t Size) { + const IoHash ComputedHash = IoHash::HashBuffer(Data, Size); + const IoHash& ExpectedHash = ChunkIndexToChunkHash[ChunkIndex]; + if (ComputedHash != ExpectedHash) { - BadChunks.push_back({.Key = Entry.Key, .Location = Entry.Location, .Flags = CasDiskIndexEntry::kTombstone}); + // Hash mismatch + BadChunks.push_back(ExpectedHash); } - } - } + }, + [&ChunkIndexToChunkHash, &BadChunks](size_t ChunkIndex, BasicFile& BlockFile, uint64_t Offset, uint64_t Size) { + IoHashStream Hasher; + BlockFile.StreamByteRange(Offset, Size, [&](const void* Data, uint64_t Size) { Hasher.Append(Data, Size); }); + IoHash ComputedHash = Hasher.GetHash(); + const IoHash& ExpectedHash = ChunkIndexToChunkHash[ChunkIndex]; + if (ComputedHash != ExpectedHash) + { + // Hash mismatch + BadChunks.push_back(ExpectedHash); + } + }); if (BadChunks.empty()) { @@ -424,26 +396,31 @@ CasContainerStrategy::Scrub(ScrubContext& Ctx) ZEN_ERROR("Scrubbing found {} bad chunks in '{}'", BadChunks.size(), m_Config.RootDirectory / m_ContainerBaseName); + _.ReleaseNow(); // Deal with bad chunks by removing them from our lookup map - std::vector<IoHash> BadChunkHashes; - BadChunkHashes.reserve(BadChunks.size()); - - m_CasLog.Append(BadChunks); + std::vector<CasDiskIndexEntry> LogEntries; + LogEntries.reserve(BadChunks.size()); { - RwLock::ExclusiveLockScope _(m_LocationMapLock); - for (const CasDiskIndexEntry& Entry : BadChunks) + RwLock::ExclusiveLockScope __(m_LocationMapLock); + for (const IoHash& ChunkHash : BadChunks) { - BadChunkHashes.push_back(Entry.Key); - m_LocationMap.erase(Entry.Key); + const auto KeyIt = m_LocationMap.find(ChunkHash); + if (KeyIt == m_LocationMap.end()) + { + // Might have been GC'd + continue; + } + LogEntries.push_back({.Key = KeyIt->first, .Location = KeyIt->second, .Flags = CasDiskIndexEntry::kTombstone}); + m_LocationMap.erase(KeyIt); } } + m_CasLog.Append(LogEntries); // Let whomever it concerns know about the bad chunks. This could // be used to invalidate higher level data structures more efficiently // than a full validation pass might be able to do - Ctx.ReportBadCasChunks(BadChunkHashes); -#endif // 0 + Ctx.ReportBadCasChunks(BadChunks); } void @@ -481,7 +458,6 @@ CasContainerStrategy::CollectGarbage(GcContext& GcCtx) std::vector<size_t> KeepChunkIndexes; std::vector<IoHash> ChunkIndexToChunkHash; ChunkLocations.reserve(TotalChunkCount); - ChunkLocations.reserve(TotalChunkCount); ChunkIndexToChunkHash.reserve(TotalChunkCount); GcCtx.FilterCas(TotalChunkHashes, [&](const IoHash& ChunkHash, bool Keep) { |