aboutsummaryrefslogtreecommitdiff
path: root/zenstore/compactcas.cpp
diff options
context:
space:
mode:
authorDan Engelbrecht <[email protected]>2022-05-01 23:31:35 +0200
committerDan Engelbrecht <[email protected]>2022-05-01 23:31:35 +0200
commit08a0dc388f98e6d3eb8387b983a9a7fb959fe603 (patch)
tree5f4bef8e81e55e22f65690250c84d943098c738b /zenstore/compactcas.cpp
parentremove m_TotalSize for blockstore (diff)
downloadzen-08a0dc388f98e6d3eb8387b983a9a7fb959fe603.tar.xz
zen-08a0dc388f98e6d3eb8387b983a9a7fb959fe603.zip
reimplement CasContainerStrategy::Scrub
Diffstat (limited to 'zenstore/compactcas.cpp')
-rw-r--r--zenstore/compactcas.cpp132
1 files changed, 54 insertions, 78 deletions
diff --git a/zenstore/compactcas.cpp b/zenstore/compactcas.cpp
index 30747f554..a6e617474 100644
--- a/zenstore/compactcas.cpp
+++ b/zenstore/compactcas.cpp
@@ -338,84 +338,56 @@ void
CasContainerStrategy::Scrub(ScrubContext& Ctx)
{
ZEN_UNUSED(Ctx);
-#if 0
- std::vector<CasDiskIndexEntry> BadChunks;
- // We do a read sweep through the payloads file and validate
- // any entries that are contained within each segment, with
- // the assumption that most entries will be checked in this
- // pass. An alternative strategy would be to use memory mapping.
+ RwLock::SharedLockScope _(m_LocationMapLock);
+ uint64_t TotalChunkCount = m_LocationMap.size();
+ std::vector<BlockStoreLocation> ChunkLocations;
+ std::vector<IoHash> ChunkIndexToChunkHash;
+ ChunkLocations.reserve(TotalChunkCount);
+ ChunkIndexToChunkHash.reserve(TotalChunkCount);
{
- std::vector<CasDiskIndexEntry> BigChunks;
- const uint64_t WindowSize = 4 * 1024 * 1024;
- IoBuffer ReadBuffer{WindowSize};
- void* BufferBase = ReadBuffer.MutableData();
-
- RwLock::SharedLockScope _(m_InsertLock); // TODO: Refactor so we don't have to keep m_InsertLock all the time?
- RwLock::SharedLockScope __(m_LocationMapLock);
-
- for (const auto& Block : m_ChunkBlocks)
+ for (const auto& Entry : m_LocationMap)
{
- uint64_t WindowStart = 0;
- uint64_t WindowEnd = WindowSize;
- const Ref<BlockStoreFile>& BlockFile = Block.second;
- BlockFile->Open();
- const uint64_t FileSize = BlockFile->FileSize();
+ const IoHash& ChunkHash = Entry.first;
+ const BlockStoreDiskLocation& DiskLocation = Entry.second;
+ BlockStoreLocation Location = DiskLocation.Get(m_PayloadAlignment);
+ size_t ChunkIndex = ChunkLocations.size();
- do
- {
- const uint64_t ChunkSize = Min(WindowSize, FileSize - WindowStart);
- BlockFile->Read(BufferBase, ChunkSize, WindowStart);
-
- for (auto& Entry : m_LocationMap)
- {
- const BlockStoreLocation Location = Entry.second.Get(m_PayloadAlignment);
- const uint64_t EntryOffset = Location.Offset;
-
- if ((EntryOffset >= WindowStart) && (EntryOffset < WindowEnd))
- {
- const uint64_t EntryEnd = EntryOffset + Location.Size;
-
- if (EntryEnd >= WindowEnd)
- {
- BigChunks.push_back({.Key = Entry.first, .Location = Entry.second});
-
- continue;
- }
-
- const IoHash ComputedHash =
- IoHash::HashBuffer(reinterpret_cast<uint8_t*>(BufferBase) + Location.Offset - WindowStart, Location.Size);
-
- if (Entry.first != ComputedHash)
- {
- // Hash mismatch
- BadChunks.push_back({.Key = Entry.first, .Location = Entry.second, .Flags = CasDiskIndexEntry::kTombstone});
- }
- }
- }
-
- WindowStart += WindowSize;
- WindowEnd += WindowSize;
- } while (WindowStart < FileSize);
+ ChunkLocations.push_back(Location);
+ ChunkIndexToChunkHash[ChunkIndex] = ChunkHash;
}
+ }
- // Deal with large chunks
+ std::vector<IoHash> BadChunks;
- for (const CasDiskIndexEntry& Entry : BigChunks)
- {
- IoHashStream Hasher;
- const BlockStoreLocation Location = Entry.Location.Get(m_PayloadAlignment);
- const Ref<BlockStoreFile>& BlockFile = m_ChunkBlocks[Location.BlockIndex];
- BlockFile->StreamByteRange(Location.Offset, Location.Size, [&](const void* Data, uint64_t Size) { Hasher.Append(Data, Size); });
- IoHash ComputedHash = Hasher.GetHash();
+ // We do a read sweep through the payloads file and validate
+ // any entries that are contained within each segment, with
+ // the assumption that most entries will be checked in this
+ // pass. An alternative strategy would be to use memory mapping.
- if (Entry.Key != ComputedHash)
+ m_BlockStore.IterateChunks(
+ ChunkLocations,
+ [&ChunkIndexToChunkHash, &BadChunks](size_t ChunkIndex, const void* Data, uint64_t Size) {
+ const IoHash ComputedHash = IoHash::HashBuffer(Data, Size);
+ const IoHash& ExpectedHash = ChunkIndexToChunkHash[ChunkIndex];
+ if (ComputedHash != ExpectedHash)
{
- BadChunks.push_back({.Key = Entry.Key, .Location = Entry.Location, .Flags = CasDiskIndexEntry::kTombstone});
+ // Hash mismatch
+ BadChunks.push_back(ExpectedHash);
}
- }
- }
+ },
+ [&ChunkIndexToChunkHash, &BadChunks](size_t ChunkIndex, BasicFile& BlockFile, uint64_t Offset, uint64_t Size) {
+ IoHashStream Hasher;
+ BlockFile.StreamByteRange(Offset, Size, [&](const void* Data, uint64_t Size) { Hasher.Append(Data, Size); });
+ IoHash ComputedHash = Hasher.GetHash();
+ const IoHash& ExpectedHash = ChunkIndexToChunkHash[ChunkIndex];
+ if (ComputedHash != ExpectedHash)
+ {
+ // Hash mismatch
+ BadChunks.push_back(ExpectedHash);
+ }
+ });
if (BadChunks.empty())
{
@@ -424,26 +396,31 @@ CasContainerStrategy::Scrub(ScrubContext& Ctx)
ZEN_ERROR("Scrubbing found {} bad chunks in '{}'", BadChunks.size(), m_Config.RootDirectory / m_ContainerBaseName);
+ _.ReleaseNow();
// Deal with bad chunks by removing them from our lookup map
- std::vector<IoHash> BadChunkHashes;
- BadChunkHashes.reserve(BadChunks.size());
-
- m_CasLog.Append(BadChunks);
+ std::vector<CasDiskIndexEntry> LogEntries;
+ LogEntries.reserve(BadChunks.size());
{
- RwLock::ExclusiveLockScope _(m_LocationMapLock);
- for (const CasDiskIndexEntry& Entry : BadChunks)
+ RwLock::ExclusiveLockScope __(m_LocationMapLock);
+ for (const IoHash& ChunkHash : BadChunks)
{
- BadChunkHashes.push_back(Entry.Key);
- m_LocationMap.erase(Entry.Key);
+ const auto KeyIt = m_LocationMap.find(ChunkHash);
+ if (KeyIt == m_LocationMap.end())
+ {
+ // Might have been GC'd
+ continue;
+ }
+ LogEntries.push_back({.Key = KeyIt->first, .Location = KeyIt->second, .Flags = CasDiskIndexEntry::kTombstone});
+ m_LocationMap.erase(KeyIt);
}
}
+ m_CasLog.Append(LogEntries);
// Let whomever it concerns know about the bad chunks. This could
// be used to invalidate higher level data structures more efficiently
// than a full validation pass might be able to do
- Ctx.ReportBadCasChunks(BadChunkHashes);
-#endif // 0
+ Ctx.ReportBadCasChunks(BadChunks);
}
void
@@ -481,7 +458,6 @@ CasContainerStrategy::CollectGarbage(GcContext& GcCtx)
std::vector<size_t> KeepChunkIndexes;
std::vector<IoHash> ChunkIndexToChunkHash;
ChunkLocations.reserve(TotalChunkCount);
- ChunkLocations.reserve(TotalChunkCount);
ChunkIndexToChunkHash.reserve(TotalChunkCount);
GcCtx.FilterCas(TotalChunkHashes, [&](const IoHash& ChunkHash, bool Keep) {