diff options
| author | Stefan Boberg <[email protected]> | 2022-06-16 15:42:17 +0200 |
|---|---|---|
| committer | Stefan Boberg <[email protected]> | 2022-06-16 15:42:17 +0200 |
| commit | b8797a647406d31ebfd137a9ae07819ccf332a10 (patch) | |
| tree | b57dcb1443c817577e1c9f8e10a35837e1d85389 /zenstore/blockstore.cpp | |
| parent | asio: added some context to error reporting (diff) | |
| download | zen-b8797a647406d31ebfd137a9ae07819ccf332a10.tar.xz zen-b8797a647406d31ebfd137a9ae07819ccf332a10.zip | |
merged from main
Diffstat (limited to 'zenstore/blockstore.cpp')
| -rw-r--r-- | zenstore/blockstore.cpp | 225 |
1 files changed, 151 insertions, 74 deletions
diff --git a/zenstore/blockstore.cpp b/zenstore/blockstore.cpp index 4e61c23cf..88592d785 100644 --- a/zenstore/blockstore.cpp +++ b/zenstore/blockstore.cpp @@ -7,12 +7,13 @@ #include <zencore/scopeguard.h> #include <zencore/timer.h> +#include <algorithm> + #if ZEN_WITH_TESTS # include <zencore/compactbinarybuilder.h> # include <zencore/testing.h> # include <zencore/testutils.h> # include <zencore/workthreadpool.h> -# include <algorithm> # include <random> #endif @@ -208,7 +209,7 @@ BlockStore::Close() } void -BlockStore::WriteChunk(const void* Data, uint64_t Size, uint64_t Alignment, WriteChunkCallback Callback) +BlockStore::WriteChunk(const void* Data, uint64_t Size, uint64_t Alignment, const WriteChunkCallback& Callback) { ZEN_ASSERT(Data != nullptr); ZEN_ASSERT(Size > 0u); @@ -612,74 +613,108 @@ BlockStore::ReclaimSpace(const ReclaimSnapshotState& Snapshot, void BlockStore::IterateChunks(const std::vector<BlockStoreLocation>& ChunkLocations, - IterateChunksSmallSizeCallback SmallSizeCallback, - IterateChunksLargeSizeCallback LargeSizeCallback) + const IterateChunksSmallSizeCallback& SmallSizeCallback, + const IterateChunksLargeSizeCallback& LargeSizeCallback) { - // We do a read sweep through the payloads file and validate - // any entries that are contained within each segment, with - // the assumption that most entries will be checked in this - // pass. An alternative strategy would be to use memory mapping. - + std::vector<size_t> LocationIndexes; + LocationIndexes.reserve(ChunkLocations.size()); + for (size_t ChunkIndex = 0; ChunkIndex < ChunkLocations.size(); ++ChunkIndex) { - ChunkIndexArray BigChunks; - IoBuffer ReadBuffer{ScrubSmallChunkWindowSize}; - void* BufferBase = ReadBuffer.MutableData(); - - RwLock::SharedLockScope _(m_InsertLock); - - for (const auto& Block : m_ChunkBlocks) + LocationIndexes.push_back(ChunkIndex); + } + std::sort(LocationIndexes.begin(), LocationIndexes.end(), [&](size_t IndexA, size_t IndexB) -> bool { + const BlockStoreLocation& LocationA = ChunkLocations[IndexA]; + const BlockStoreLocation& LocationB = ChunkLocations[IndexB]; + if (LocationA.BlockIndex < LocationB.BlockIndex) { - uint64_t WindowStart = 0; - uint64_t WindowEnd = ScrubSmallChunkWindowSize; - uint32_t BlockIndex = Block.first; - const Ref<BlockStoreFile>& BlockFile = Block.second; - const uint64_t FileSize = BlockFile->FileSize(); - - do - { - const uint64_t ChunkSize = Min(ScrubSmallChunkWindowSize, FileSize - WindowStart); - BlockFile->Read(BufferBase, ChunkSize, WindowStart); + return true; + } + else if (LocationA.BlockIndex > LocationB.BlockIndex) + { + return false; + } + return LocationA.Offset < LocationB.Offset; + }); - // TODO: We could be smarter here if the ChunkLocations were sorted on block index - we could - // then only scan a subset of ChunkLocations instead of scanning through them all... - for (size_t ChunkIndex = 0; ChunkIndex < ChunkLocations.size(); ++ChunkIndex) - { - const BlockStoreLocation Location = ChunkLocations[ChunkIndex]; - if (BlockIndex != Location.BlockIndex) - { - continue; - } + IoBuffer ReadBuffer{ScrubSmallChunkWindowSize}; + void* BufferBase = ReadBuffer.MutableData(); - const uint64_t EntryOffset = Location.Offset; - if ((EntryOffset >= WindowStart) && (EntryOffset < WindowEnd)) - { - const uint64_t EntryEnd = EntryOffset + Location.Size; + RwLock::SharedLockScope _(m_InsertLock); - if (EntryEnd >= WindowEnd) - { - BigChunks.push_back(ChunkIndex); + auto GetNextRange = [&](size_t StartIndexOffset) { + size_t ChunkCount = 0; + size_t StartIndex = LocationIndexes[StartIndexOffset]; + const BlockStoreLocation& StartLocation = ChunkLocations[StartIndex]; + uint64_t StartOffset = StartLocation.Offset; + while (StartIndexOffset + ChunkCount < LocationIndexes.size()) + { + size_t NextIndex = LocationIndexes[StartIndexOffset + ChunkCount]; + const BlockStoreLocation& Location = ChunkLocations[NextIndex]; + if (Location.BlockIndex != StartLocation.BlockIndex) + { + break; + } + if ((Location.Offset + Location.Size) - StartOffset > ScrubSmallChunkWindowSize) + { + break; + } + ++ChunkCount; + } + return ChunkCount; + }; - continue; - } + size_t LocationIndexOffset = 0; + while (LocationIndexOffset < LocationIndexes.size()) + { + size_t ChunkIndex = LocationIndexes[LocationIndexOffset]; + const BlockStoreLocation& FirstLocation = ChunkLocations[ChunkIndex]; - SmallSizeCallback(ChunkIndex, - reinterpret_cast<uint8_t*>(BufferBase) + Location.Offset - WindowStart, - Location.Size); - } + const Ref<BlockStoreFile>& BlockFile = m_ChunkBlocks[FirstLocation.BlockIndex]; + if (!BlockFile) + { + while (ChunkLocations[ChunkIndex].BlockIndex == FirstLocation.BlockIndex) + { + SmallSizeCallback(ChunkIndex, nullptr, 0); + LocationIndexOffset++; + if (LocationIndexOffset == LocationIndexes.size()) + { + break; } - - WindowStart += ScrubSmallChunkWindowSize; - WindowEnd += ScrubSmallChunkWindowSize; - } while (WindowStart < FileSize); + ChunkIndex = LocationIndexes[LocationIndexOffset]; + } + continue; } - - // Deal with large chunks and chunks that extend over a ScrubSmallChunkWindowSize border - for (size_t ChunkIndex : BigChunks) + size_t BlockSize = BlockFile->FileSize(); + size_t RangeCount = GetNextRange(LocationIndexOffset); + if (RangeCount > 0) { - const BlockStoreLocation Location = ChunkLocations[ChunkIndex]; - const Ref<BlockStoreFile>& BlockFile = m_ChunkBlocks[Location.BlockIndex]; - LargeSizeCallback(ChunkIndex, BlockFile, Location.Offset, Location.Size); + size_t LastChunkIndex = LocationIndexes[LocationIndexOffset + RangeCount - 1]; + const BlockStoreLocation& LastLocation = ChunkLocations[LastChunkIndex]; + uint64_t Size = LastLocation.Offset + LastLocation.Size - FirstLocation.Offset; + BlockFile->Read(BufferBase, Size, FirstLocation.Offset); + for (size_t RangeIndex = 0; RangeIndex < RangeCount; ++RangeIndex) + { + size_t NextChunkIndex = LocationIndexes[LocationIndexOffset + RangeIndex]; + const BlockStoreLocation& ChunkLocation = ChunkLocations[NextChunkIndex]; + if (ChunkLocation.Size == 0 || (ChunkLocation.Offset + ChunkLocation.Size > BlockSize)) + { + SmallSizeCallback(NextChunkIndex, nullptr, 0); + continue; + } + void* BufferPtr = &((char*)BufferBase)[ChunkLocation.Offset - FirstLocation.Offset]; + SmallSizeCallback(NextChunkIndex, BufferPtr, ChunkLocation.Size); + } + LocationIndexOffset += RangeCount; + continue; + } + if (FirstLocation.Size == 0 || (FirstLocation.Offset + FirstLocation.Size > BlockSize)) + { + SmallSizeCallback(ChunkIndex, nullptr, 0); + LocationIndexOffset++; + continue; } + LargeSizeCallback(ChunkIndex, *BlockFile.Get(), FirstLocation.Offset, FirstLocation.Size); + LocationIndexOffset++; } } @@ -1176,35 +1211,77 @@ TEST_CASE("blockstore.iterate.chunks") std::string VeryLargeChunk(ScrubSmallChunkWindowSize * 2, 'L'); BlockStoreLocation VeryLargeChunkLocation = WriteStringAsChunk(Store, VeryLargeChunk, 4); + BlockStoreLocation BadLocationZeroSize = {.BlockIndex = 0, .Offset = 0, .Size = 0}; + BlockStoreLocation BadLocationOutOfRange = {.BlockIndex = 0, + .Offset = ScrubSmallChunkWindowSize, + .Size = ScrubSmallChunkWindowSize * 2}; + BlockStoreLocation BadBlockIndex = {.BlockIndex = 0xfffff, .Offset = 1024, .Size = 1024}; + Store.IterateChunks( - {FirstChunkLocation, SecondChunkLocation, VeryLargeChunkLocation}, + {FirstChunkLocation, SecondChunkLocation, VeryLargeChunkLocation, BadLocationZeroSize, BadLocationOutOfRange, BadBlockIndex}, [&](size_t ChunkIndex, const void* Data, uint64_t Size) { - CHECK(Data); - CHECK(Size > 0); - std::string AsString((const char*)Data, Size); switch (ChunkIndex) { case 0: - CHECK(AsString == FirstChunkData); + CHECK(Data); + CHECK(Size == FirstChunkData.size()); + CHECK(std::string((const char*)Data, Size) == FirstChunkData); break; case 1: - CHECK(AsString == SecondChunkData); + CHECK(Data); + CHECK(Size == SecondChunkData.size()); + CHECK(std::string((const char*)Data, Size) == SecondChunkData); + break; + case 2: + CHECK(false); + break; + case 3: + CHECK(!Data); + break; + case 4: + CHECK(!Data); + break; + case 5: + CHECK(!Data); break; default: CHECK(false); break; } }, - [&](size_t ChunkIndex, Ref<BlockStoreFile> BlockFile, uint64_t Offset, uint64_t Size) { - CHECK(BlockFile); - CHECK(ChunkIndex == 2); - CHECK(Offset == VeryLargeChunkLocation.Offset); - CHECK(Size == VeryLargeChunkLocation.Size); - size_t StreamOffset = 0; - BlockFile->StreamByteRange(Offset, Size, [&](const void* Data, size_t Size) { - const char* VeryLargeChunkSection = &(VeryLargeChunk.data()[StreamOffset]); - CHECK(memcmp(VeryLargeChunkSection, Data, Size) == 0); - }); + [&](size_t ChunkIndex, BlockStoreFile& File, uint64_t Offset, uint64_t Size) { + switch (ChunkIndex) + { + case 0: + case 1: + CHECK(false); + break; + case 2: + { + CHECK(Size == VeryLargeChunk.size()); + char* Buffer = new char[Size]; + size_t HashOffset = 0; + File.StreamByteRange(Offset, Size, [&](const void* Data, uint64_t Size) { + memcpy(&Buffer[HashOffset], Data, Size); + HashOffset += Size; + }); + CHECK(memcmp(Buffer, VeryLargeChunk.data(), Size) == 0); + delete[] Buffer; + } + break; + case 3: + CHECK(false); + break; + case 4: + CHECK(false); + break; + case 5: + CHECK(false); + break; + default: + CHECK(false); + break; + } }); } |