diff options
| author | Dan Engelbrecht <[email protected]> | 2023-05-09 14:50:41 +0200 |
|---|---|---|
| committer | GitHub <[email protected]> | 2023-05-09 14:50:41 +0200 |
| commit | 12058b79d7ba17902b90c3e777b10a8c4403fe5e (patch) | |
| tree | f776799cb817d8723e799dc1189bc12e5cca5ab4 /src | |
| parent | implemented thread-local activity tracking (diff) | |
| download | zen-12058b79d7ba17902b90c3e777b10a8c4403fe5e.tar.xz zen-12058b79d7ba17902b90c3e777b10a8c4403fe5e.zip | |
Validate that entries points inside valid blocks at startup (#280)
* Separate initialization of block store from pruning of unknown blocks
* Validate that entries points inside valid blocks
Diffstat (limited to 'src')
| -rw-r--r-- | src/zenserver/cache/structuredcachestore.cpp | 48 | ||||
| -rw-r--r-- | src/zenstore/blockstore.cpp | 79 | ||||
| -rw-r--r-- | src/zenstore/compactcas.cpp | 47 | ||||
| -rw-r--r-- | src/zenstore/include/zenstore/blockstore.h | 9 |
4 files changed, 137 insertions, 46 deletions
diff --git a/src/zenserver/cache/structuredcachestore.cpp b/src/zenserver/cache/structuredcachestore.cpp index 26e970073..99ca23407 100644 --- a/src/zenserver/cache/structuredcachestore.cpp +++ b/src/zenserver/cache/structuredcachestore.cpp @@ -1002,6 +1002,11 @@ ZenCacheDiskLayer::CacheBucket::OpenLog(const bool IsNew) fs::remove_all(m_BlocksBasePath); } + CreateDirectories(m_BucketDir); + + std::unordered_map<uint32_t, uint64_t> BlockSizes = + m_BlockStore.Initialize(m_BlocksBasePath, MaxBlockSize, BlockStoreDiskLocation::MaxBlockIndex + 1); + uint64_t LogEntryCount = 0; { uint32_t IndexVersion = 0; @@ -1023,12 +1028,11 @@ ZenCacheDiskLayer::CacheBucket::OpenLog(const bool IsNew) } } - CreateDirectories(m_BucketDir); - m_SlogFile.Open(LogPath, CasLogFile::Mode::kWrite); std::vector<BlockStoreLocation> KnownLocations; KnownLocations.reserve(m_Index.size()); + std::vector<DiskIndexEntry> BadEntries; for (const auto& Entry : m_Index) { size_t EntryIndex = Entry.second; @@ -1041,10 +1045,46 @@ ZenCacheDiskLayer::CacheBucket::OpenLog(const bool IsNew) continue; } const BlockStoreLocation& BlockLocation = Location.GetBlockLocation(m_PayloadAlignment); - KnownLocations.push_back(BlockLocation); + + auto BlockIt = BlockSizes.find(BlockLocation.BlockIndex); + if (BlockIt == BlockSizes.end()) + { + ZEN_WARN("Unknown block {} for entry {}", BlockLocation.BlockIndex, Entry.first.ToHexString()); + } + else + { + uint64_t BlockSize = BlockIt->second; + if (BlockLocation.Offset + BlockLocation.Size > BlockSize) + { + ZEN_WARN("Range is outside of block {} for entry {}", BlockLocation.BlockIndex, Entry.first.ToHexString()); + } + else + { + KnownLocations.push_back(BlockLocation); + continue; + } + } + + DiskLocation NewLocation = Payload.Location; + NewLocation.Flags |= DiskLocation::kTombStone; + BadEntries.push_back(DiskIndexEntry{.Key = Entry.first, .Location = NewLocation}); } - m_BlockStore.Initialize(m_BlocksBasePath, MaxBlockSize, BlockStoreDiskLocation::MaxBlockIndex + 1, KnownLocations); + if (!BadEntries.empty()) + { + m_SlogFile.Append(BadEntries); + m_SlogFile.Flush(); + + LogEntryCount += BadEntries.size(); + + for (const DiskIndexEntry& BadEntry : BadEntries) + { + m_Index.erase(BadEntry.Key); + } + } + + m_BlockStore.Prune(KnownLocations); + if (IsNew || LogEntryCount > 0) { MakeIndexSnapshot(); diff --git a/src/zenstore/blockstore.cpp b/src/zenstore/blockstore.cpp index e19712c40..05bc69fcb 100644 --- a/src/zenstore/blockstore.cpp +++ b/src/zenstore/blockstore.cpp @@ -118,28 +118,19 @@ BlockStoreFile::StreamByteRange(uint64_t FileOffset, uint64_t Size, std::functio constexpr uint64_t ScrubSmallChunkWindowSize = 4 * 1024 * 1024; -void -BlockStore::Initialize(const std::filesystem::path& BlocksBasePath, - uint64_t MaxBlockSize, - uint64_t MaxBlockCount, - const std::vector<BlockStoreLocation>& KnownLocations) +std::unordered_map<uint32_t, uint64_t> +BlockStore::Initialize(const std::filesystem::path& BlocksBasePath, uint64_t MaxBlockSize, uint64_t MaxBlockCount) { ZEN_ASSERT(MaxBlockSize > 0); ZEN_ASSERT(MaxBlockCount > 0); ZEN_ASSERT(IsPow2(MaxBlockCount)); + std::unordered_map<uint32_t, uint64_t> FoundBlocks; + m_TotalSize = 0; m_BlocksBasePath = BlocksBasePath; m_MaxBlockSize = MaxBlockSize; - m_ChunkBlocks.clear(); - - std::unordered_set<uint32_t> KnownBlocks; - for (const auto& Entry : KnownLocations) - { - KnownBlocks.insert(Entry.BlockIndex); - } - if (std::filesystem::is_directory(m_BlocksBasePath)) { std::vector<std::filesystem::path> FoldersToScan; @@ -168,23 +159,11 @@ BlockStore::Initialize(const std::filesystem::path& BlocksBasePath, { continue; } - if (!KnownBlocks.contains(BlockIndex)) - { - // Log removing unreferenced block - // Clear out unused blocks - ZEN_DEBUG("removing unused block at '{}'", Path); - std::error_code Ec; - std::filesystem::remove(Path, Ec); - if (Ec) - { - ZEN_WARN("Failed to delete file '{}' reason: '{}'", Path, Ec.message()); - } - continue; - } Ref<BlockStoreFile> BlockFile{new BlockStoreFile(Path)}; BlockFile->Open(); m_TotalSize.fetch_add(BlockFile->FileSize(), std::memory_order::relaxed); m_ChunkBlocks[BlockIndex] = BlockFile; + FoundBlocks[BlockIndex] = BlockFile->FileSize(); } } ++FolderOffset; @@ -194,6 +173,39 @@ BlockStore::Initialize(const std::filesystem::path& BlocksBasePath, { CreateDirectories(m_BlocksBasePath); } + return FoundBlocks; +} + +void +BlockStore::Prune(const std::vector<BlockStoreLocation>& KnownLocations) +{ + RwLock::ExclusiveLockScope InsertLock(m_InsertLock); + + std::unordered_set<uint32_t> KnownBlocks; + for (const auto& Entry : KnownLocations) + { + KnownBlocks.insert(Entry.BlockIndex); + } + std::vector<uint32_t> BlocksToDelete; + for (auto It = m_ChunkBlocks.begin(); It != m_ChunkBlocks.end(); ++It) + { + uint32_t BlockIndex = It->first; + if (!KnownBlocks.contains(BlockIndex)) + { + Ref<BlockStoreFile> BlockFile = m_ChunkBlocks[BlockIndex]; + m_TotalSize.fetch_add(BlockFile->FileSize(), std::memory_order::relaxed); + BlocksToDelete.push_back(BlockIndex); + } + } + + for (uint32_t BlockIndex : BlocksToDelete) + { + // Clear out unused blocks + Ref<BlockStoreFile> BlockFile = m_ChunkBlocks[BlockIndex]; + m_ChunkBlocks.erase(BlockIndex); + ZEN_DEBUG("marking block store file '{}' for delete, block #{}", BlockFile->GetPath(), BlockIndex); + BlockFile->MarkAsDeleteOnClose(); + } } void @@ -939,7 +951,7 @@ TEST_CASE("blockstore.chunks") auto RootDirectory = TempDir.Path(); BlockStore Store; - Store.Initialize(RootDirectory, 128, 1024, {}); + Store.Initialize(RootDirectory, 128, 1024); IoBuffer BadChunk = Store.TryGetChunk({.BlockIndex = 0, .Offset = 0, .Size = 512}); CHECK(!BadChunk); @@ -969,7 +981,7 @@ TEST_CASE("blockstore.clean.stray.blocks") auto RootDirectory = TempDir.Path(); BlockStore Store; - Store.Initialize(RootDirectory / "store", 128, 1024, {}); + Store.Initialize(RootDirectory / "store", 128, 1024); std::string FirstChunkData = "This is the data of the first chunk that we will write"; BlockStoreLocation FirstChunkLocation = WriteStringAsChunk(Store, FirstChunkData, 4); @@ -982,7 +994,8 @@ TEST_CASE("blockstore.clean.stray.blocks") Store.Close(); // Not referencing the second block means that we should be deleted - Store.Initialize(RootDirectory / "store", 128, 1024, {FirstChunkLocation, SecondChunkLocation}); + Store.Initialize(RootDirectory / "store", 128, 1024); + Store.Prune({FirstChunkLocation, SecondChunkLocation}); CHECK(GetDirectoryContent(RootDirectory / "store", true, false).size() == 1); } @@ -995,7 +1008,7 @@ TEST_CASE("blockstore.flush.forces.new.block") auto RootDirectory = TempDir.Path(); BlockStore Store; - Store.Initialize(RootDirectory / "store", 128, 1024, {}); + Store.Initialize(RootDirectory / "store", 128, 1024); std::string FirstChunkData = "This is the data of the first chunk that we will write"; WriteStringAsChunk(Store, FirstChunkData, 4); @@ -1018,7 +1031,7 @@ TEST_CASE("blockstore.iterate.chunks") auto RootDirectory = TempDir.Path(); BlockStore Store; - Store.Initialize(RootDirectory / "store", ScrubSmallChunkWindowSize * 2, 1024, {}); + Store.Initialize(RootDirectory / "store", ScrubSmallChunkWindowSize * 2, 1024); IoBuffer BadChunk = Store.TryGetChunk({.BlockIndex = 0, .Offset = 0, .Size = 512}); CHECK(!BadChunk); @@ -1114,7 +1127,7 @@ TEST_CASE("blockstore.reclaim.space") auto RootDirectory = TempDir.Path(); BlockStore Store; - Store.Initialize(RootDirectory / "store", 512, 1024, {}); + Store.Initialize(RootDirectory / "store", 512, 1024); constexpr size_t ChunkCount = 200; constexpr size_t Alignment = 8; @@ -1231,7 +1244,7 @@ TEST_CASE("blockstore.thread.read.write") auto RootDirectory = TempDir.Path(); BlockStore Store; - Store.Initialize(RootDirectory / "store", 1088, 1024, {}); + Store.Initialize(RootDirectory / "store", 1088, 1024); constexpr size_t ChunkCount = 1000; constexpr size_t Alignment = 8; diff --git a/src/zenstore/compactcas.cpp b/src/zenstore/compactcas.cpp index 7b2c21b0f..2974570e5 100644 --- a/src/zenstore/compactcas.cpp +++ b/src/zenstore/compactcas.cpp @@ -701,23 +701,60 @@ CasContainerStrategy::OpenContainer(bool IsNewStore) std::filesystem::remove_all(BasePath); } + CreateDirectories(BasePath); + + std::unordered_map<uint32_t, uint64_t> BlockSizes = + m_BlockStore.Initialize(m_BlocksBasePath, m_MaxBlockSize, BlockStoreDiskLocation::MaxBlockIndex + 1); + m_LogFlushPosition = ReadIndexFile(); uint64_t LogEntryCount = ReadLog(m_LogFlushPosition); - CreateDirectories(BasePath); - std::filesystem::path LogPath = GetLogPath(m_RootDirectory, m_ContainerBaseName); m_CasLog.Open(LogPath, CasLogFile::Mode::kWrite); std::vector<BlockStoreLocation> KnownLocations; KnownLocations.reserve(m_LocationMap.size()); + std::vector<CasDiskIndexEntry> BadEntries; for (const auto& Entry : m_LocationMap) { - const BlockStoreDiskLocation& Location = Entry.second; - KnownLocations.push_back(Location.Get(m_PayloadAlignment)); + const BlockStoreDiskLocation& DiskLocation = Entry.second; + auto BlockIt = BlockSizes.find(DiskLocation.GetBlockIndex()); + if (BlockIt == BlockSizes.end()) + { + ZEN_WARN("Unknown block {} for entry {}", DiskLocation.GetBlockIndex(), Entry.first.ToHexString()); + } + else + { + BlockStoreLocation BlockLocation = DiskLocation.Get(m_PayloadAlignment); + + uint64_t BlockSize = BlockIt->second; + if (BlockLocation.Offset + BlockLocation.Size > BlockSize) + { + ZEN_WARN("Range is outside of block {} for entry {}", BlockLocation.BlockIndex, Entry.first.ToHexString()); + } + else + { + KnownLocations.emplace_back(std::move(BlockLocation)); + continue; + } + BadEntries.push_back({.Key = Entry.first, .Location = DiskLocation, .Flags = CasDiskIndexEntry::kTombstone}); + } + } + + if (!BadEntries.empty()) + { + m_CasLog.Append(BadEntries); + m_CasLog.Flush(); + + LogEntryCount += BadEntries.size(); + + for (const CasDiskIndexEntry& BadEntry : BadEntries) + { + m_LocationMap.erase(BadEntry.Key); + } } - m_BlockStore.Initialize(m_BlocksBasePath, m_MaxBlockSize, BlockStoreDiskLocation::MaxBlockIndex + 1, KnownLocations); + m_BlockStore.Prune(KnownLocations); if (IsNewStore || (LogEntryCount > 0)) { diff --git a/src/zenstore/include/zenstore/blockstore.h b/src/zenstore/include/zenstore/blockstore.h index 857ccae38..738510cac 100644 --- a/src/zenstore/include/zenstore/blockstore.h +++ b/src/zenstore/include/zenstore/blockstore.h @@ -124,10 +124,11 @@ public: typedef std::function<void(size_t ChunkIndex, BlockStoreFile& File, uint64_t Offset, uint64_t Size)> IterateChunksLargeSizeCallback; typedef std::function<void(const BlockStoreLocation& Location)> WriteChunkCallback; - void Initialize(const std::filesystem::path& BlocksBasePath, - uint64_t MaxBlockSize, - uint64_t MaxBlockCount, - const std::vector<BlockStoreLocation>& KnownLocations); + std::unordered_map<uint32_t, uint64_t> Initialize(const std::filesystem::path& BlocksBasePath, + uint64_t MaxBlockSize, + uint64_t MaxBlockCount); + + void Prune(const std::vector<BlockStoreLocation>& KnownLocations); void Close(); void WriteChunk(const void* Data, uint64_t Size, uint64_t Alignment, const WriteChunkCallback& Callback); |