aboutsummaryrefslogtreecommitdiff
path: root/zenstore/compactcas.cpp
diff options
context:
space:
mode:
authorStefan Boberg <[email protected]>2021-09-19 23:19:06 +0200
committerStefan Boberg <[email protected]>2021-09-19 23:19:06 +0200
commit3cf9dedfd08fe4d7a049e51b14a937f7a34afce3 (patch)
treed68b5203818665f6f54aceea586f550dc6aadca4 /zenstore/compactcas.cpp
parentAdded BasicFile::StreamFile helper function to support reading large files in... (diff)
downloadzen-3cf9dedfd08fe4d7a049e51b14a937f7a34afce3.tar.xz
zen-3cf9dedfd08fe4d7a049e51b14a937f7a34afce3.zip
Implemended basic scrubbing / detection of disk corruption. Still needs more code to propagate errors and make adjustments to account for them in higher level data structures
Diffstat (limited to 'zenstore/compactcas.cpp')
-rw-r--r--zenstore/compactcas.cpp93
1 files changed, 87 insertions, 6 deletions
diff --git a/zenstore/compactcas.cpp b/zenstore/compactcas.cpp
index fd223e284..0f9349ab0 100644
--- a/zenstore/compactcas.cpp
+++ b/zenstore/compactcas.cpp
@@ -18,6 +18,14 @@
namespace zen {
+CasContainerStrategy::CasContainerStrategy(const CasStoreConfiguration& Config) : m_Config(Config)
+{
+}
+
+CasContainerStrategy::~CasContainerStrategy()
+{
+}
+
void
CasContainerStrategy::Initialize(const std::string_view ContainerBaseName, uint64_t Alignment, bool IsNewStore)
{
@@ -101,9 +109,8 @@ IoBuffer
CasContainerStrategy::FindChunk(const IoHash& ChunkHash)
{
RwLock::SharedLockScope _(m_LocationMapLock);
- auto KeyIt = m_LocationMap.find(ChunkHash);
- if (KeyIt != m_LocationMap.end())
+ if (auto KeyIt = m_LocationMap.find(ChunkHash); KeyIt != m_LocationMap.end())
{
const CasDiskLocation& Location = KeyIt->second;
return zen::IoBufferBuilder::MakeFromFileHandle(m_SmallObjectFile.Handle(), Location.Offset, Location.Size);
@@ -118,9 +125,8 @@ bool
CasContainerStrategy::HaveChunk(const IoHash& ChunkHash)
{
RwLock::SharedLockScope _(m_LocationMapLock);
- auto KeyIt = m_LocationMap.find(ChunkHash);
- if (KeyIt != m_LocationMap.end())
+ if (auto KeyIt = m_LocationMap.find(ChunkHash); KeyIt != m_LocationMap.end())
{
return true;
}
@@ -163,9 +169,84 @@ CasContainerStrategy::Flush()
}
void
-CasContainerStrategy::Scrub()
+CasContainerStrategy::Scrub(ScrubContext& Ctx)
{
- RwLock::SharedLockScope _(m_LocationMapLock);
+ const uint64_t WindowSize = 4 * 1024 * 1024;
+ uint64_t WindowStart = 0;
+ uint64_t WindowEnd = WindowSize;
+ const uint64_t FileSize = m_SmallObjectFile.FileSize();
+
+ std::vector<CasDiskIndexEntry> BigChunks;
+ std::vector<CasDiskIndexEntry> BadChunks;
+
+ // We do a read sweep through the payloads file and validate
+ // any entries that are contained within each segment, with
+ // the assumption that most entries will be checked in this
+ // pass. An alternative strategy would be to use memory mapping.
+
+ {
+ IoBuffer ReadBuffer{WindowSize};
+ void* BufferBase = ReadBuffer.MutableData();
+
+ RwLock::SharedLockScope _(m_LocationMapLock);
+
+ do
+ {
+ const uint64_t ChunkSize = zen::Min(WindowSize, FileSize - WindowStart);
+ m_SmallObjectFile.Read(BufferBase, ChunkSize, WindowStart);
+
+ for (auto& Entry : m_LocationMap)
+ {
+ const uint64_t EntryOffset = Entry.second.Offset;
+
+ if ((EntryOffset >= WindowStart) && (EntryOffset < WindowEnd))
+ {
+ const uint64_t EntryEnd = EntryOffset + Entry.second.Size;
+
+ if (EntryEnd >= WindowEnd)
+ {
+ BigChunks.push_back({.Key = Entry.first, .Location = Entry.second});
+
+ continue;
+ }
+
+ const IoHash ComputedHash = IoHash::HashBuffer(BufferBase, Entry.second.Size);
+
+ if (Entry.first != ComputedHash)
+ {
+ // Hash mismatch
+
+ BadChunks.push_back({.Key = Entry.first, .Location = Entry.second});
+ }
+ }
+ }
+
+ WindowStart += WindowSize;
+ WindowEnd += WindowSize;
+ } while (WindowStart < FileSize);
+ }
+
+ // Deal with large chunks
+
+ for (const CasDiskIndexEntry& Entry : BigChunks)
+ {
+ }
+
+ // Deal with bad chunks by removing them from our lookup map
+
+ std::vector<IoHash> BadChunkHashes;
+
+ for (const CasDiskIndexEntry& Entry : BadChunks)
+ {
+ BadChunkHashes.push_back(Entry.Key);
+ m_LocationMap.erase(Entry.Key);
+ }
+
+ // Let whomever it concerns know about the bad chunks. This could
+ // be used to invalidate higher level data structures more efficiently
+ // than a full validation pass might be able to do
+
+ Ctx.ReportBadChunks(BadChunkHashes);
}
void