aboutsummaryrefslogtreecommitdiff
path: root/zenstore/blockstore.cpp
diff options
context:
space:
mode:
authorStefan Boberg <[email protected]>2022-06-16 15:42:17 +0200
committerStefan Boberg <[email protected]>2022-06-16 15:42:17 +0200
commitb8797a647406d31ebfd137a9ae07819ccf332a10 (patch)
treeb57dcb1443c817577e1c9f8e10a35837e1d85389 /zenstore/blockstore.cpp
parentasio: added some context to error reporting (diff)
downloadzen-b8797a647406d31ebfd137a9ae07819ccf332a10.tar.xz
zen-b8797a647406d31ebfd137a9ae07819ccf332a10.zip
merged from main
Diffstat (limited to 'zenstore/blockstore.cpp')
-rw-r--r--zenstore/blockstore.cpp225
1 files changed, 151 insertions, 74 deletions
diff --git a/zenstore/blockstore.cpp b/zenstore/blockstore.cpp
index 4e61c23cf..88592d785 100644
--- a/zenstore/blockstore.cpp
+++ b/zenstore/blockstore.cpp
@@ -7,12 +7,13 @@
#include <zencore/scopeguard.h>
#include <zencore/timer.h>
+#include <algorithm>
+
#if ZEN_WITH_TESTS
# include <zencore/compactbinarybuilder.h>
# include <zencore/testing.h>
# include <zencore/testutils.h>
# include <zencore/workthreadpool.h>
-# include <algorithm>
# include <random>
#endif
@@ -208,7 +209,7 @@ BlockStore::Close()
}
void
-BlockStore::WriteChunk(const void* Data, uint64_t Size, uint64_t Alignment, WriteChunkCallback Callback)
+BlockStore::WriteChunk(const void* Data, uint64_t Size, uint64_t Alignment, const WriteChunkCallback& Callback)
{
ZEN_ASSERT(Data != nullptr);
ZEN_ASSERT(Size > 0u);
@@ -612,74 +613,108 @@ BlockStore::ReclaimSpace(const ReclaimSnapshotState& Snapshot,
void
BlockStore::IterateChunks(const std::vector<BlockStoreLocation>& ChunkLocations,
- IterateChunksSmallSizeCallback SmallSizeCallback,
- IterateChunksLargeSizeCallback LargeSizeCallback)
+ const IterateChunksSmallSizeCallback& SmallSizeCallback,
+ const IterateChunksLargeSizeCallback& LargeSizeCallback)
{
- // We do a read sweep through the payloads file and validate
- // any entries that are contained within each segment, with
- // the assumption that most entries will be checked in this
- // pass. An alternative strategy would be to use memory mapping.
-
+ std::vector<size_t> LocationIndexes;
+ LocationIndexes.reserve(ChunkLocations.size());
+ for (size_t ChunkIndex = 0; ChunkIndex < ChunkLocations.size(); ++ChunkIndex)
{
- ChunkIndexArray BigChunks;
- IoBuffer ReadBuffer{ScrubSmallChunkWindowSize};
- void* BufferBase = ReadBuffer.MutableData();
-
- RwLock::SharedLockScope _(m_InsertLock);
-
- for (const auto& Block : m_ChunkBlocks)
+ LocationIndexes.push_back(ChunkIndex);
+ }
+ std::sort(LocationIndexes.begin(), LocationIndexes.end(), [&](size_t IndexA, size_t IndexB) -> bool {
+ const BlockStoreLocation& LocationA = ChunkLocations[IndexA];
+ const BlockStoreLocation& LocationB = ChunkLocations[IndexB];
+ if (LocationA.BlockIndex < LocationB.BlockIndex)
{
- uint64_t WindowStart = 0;
- uint64_t WindowEnd = ScrubSmallChunkWindowSize;
- uint32_t BlockIndex = Block.first;
- const Ref<BlockStoreFile>& BlockFile = Block.second;
- const uint64_t FileSize = BlockFile->FileSize();
-
- do
- {
- const uint64_t ChunkSize = Min(ScrubSmallChunkWindowSize, FileSize - WindowStart);
- BlockFile->Read(BufferBase, ChunkSize, WindowStart);
+ return true;
+ }
+ else if (LocationA.BlockIndex > LocationB.BlockIndex)
+ {
+ return false;
+ }
+ return LocationA.Offset < LocationB.Offset;
+ });
- // TODO: We could be smarter here if the ChunkLocations were sorted on block index - we could
- // then only scan a subset of ChunkLocations instead of scanning through them all...
- for (size_t ChunkIndex = 0; ChunkIndex < ChunkLocations.size(); ++ChunkIndex)
- {
- const BlockStoreLocation Location = ChunkLocations[ChunkIndex];
- if (BlockIndex != Location.BlockIndex)
- {
- continue;
- }
+ IoBuffer ReadBuffer{ScrubSmallChunkWindowSize};
+ void* BufferBase = ReadBuffer.MutableData();
- const uint64_t EntryOffset = Location.Offset;
- if ((EntryOffset >= WindowStart) && (EntryOffset < WindowEnd))
- {
- const uint64_t EntryEnd = EntryOffset + Location.Size;
+ RwLock::SharedLockScope _(m_InsertLock);
- if (EntryEnd >= WindowEnd)
- {
- BigChunks.push_back(ChunkIndex);
+ auto GetNextRange = [&](size_t StartIndexOffset) {
+ size_t ChunkCount = 0;
+ size_t StartIndex = LocationIndexes[StartIndexOffset];
+ const BlockStoreLocation& StartLocation = ChunkLocations[StartIndex];
+ uint64_t StartOffset = StartLocation.Offset;
+ while (StartIndexOffset + ChunkCount < LocationIndexes.size())
+ {
+ size_t NextIndex = LocationIndexes[StartIndexOffset + ChunkCount];
+ const BlockStoreLocation& Location = ChunkLocations[NextIndex];
+ if (Location.BlockIndex != StartLocation.BlockIndex)
+ {
+ break;
+ }
+ if ((Location.Offset + Location.Size) - StartOffset > ScrubSmallChunkWindowSize)
+ {
+ break;
+ }
+ ++ChunkCount;
+ }
+ return ChunkCount;
+ };
- continue;
- }
+ size_t LocationIndexOffset = 0;
+ while (LocationIndexOffset < LocationIndexes.size())
+ {
+ size_t ChunkIndex = LocationIndexes[LocationIndexOffset];
+ const BlockStoreLocation& FirstLocation = ChunkLocations[ChunkIndex];
- SmallSizeCallback(ChunkIndex,
- reinterpret_cast<uint8_t*>(BufferBase) + Location.Offset - WindowStart,
- Location.Size);
- }
+ const Ref<BlockStoreFile>& BlockFile = m_ChunkBlocks[FirstLocation.BlockIndex];
+ if (!BlockFile)
+ {
+ while (ChunkLocations[ChunkIndex].BlockIndex == FirstLocation.BlockIndex)
+ {
+ SmallSizeCallback(ChunkIndex, nullptr, 0);
+ LocationIndexOffset++;
+ if (LocationIndexOffset == LocationIndexes.size())
+ {
+ break;
}
-
- WindowStart += ScrubSmallChunkWindowSize;
- WindowEnd += ScrubSmallChunkWindowSize;
- } while (WindowStart < FileSize);
+ ChunkIndex = LocationIndexes[LocationIndexOffset];
+ }
+ continue;
}
-
- // Deal with large chunks and chunks that extend over a ScrubSmallChunkWindowSize border
- for (size_t ChunkIndex : BigChunks)
+ size_t BlockSize = BlockFile->FileSize();
+ size_t RangeCount = GetNextRange(LocationIndexOffset);
+ if (RangeCount > 0)
{
- const BlockStoreLocation Location = ChunkLocations[ChunkIndex];
- const Ref<BlockStoreFile>& BlockFile = m_ChunkBlocks[Location.BlockIndex];
- LargeSizeCallback(ChunkIndex, BlockFile, Location.Offset, Location.Size);
+ size_t LastChunkIndex = LocationIndexes[LocationIndexOffset + RangeCount - 1];
+ const BlockStoreLocation& LastLocation = ChunkLocations[LastChunkIndex];
+ uint64_t Size = LastLocation.Offset + LastLocation.Size - FirstLocation.Offset;
+ BlockFile->Read(BufferBase, Size, FirstLocation.Offset);
+ for (size_t RangeIndex = 0; RangeIndex < RangeCount; ++RangeIndex)
+ {
+ size_t NextChunkIndex = LocationIndexes[LocationIndexOffset + RangeIndex];
+ const BlockStoreLocation& ChunkLocation = ChunkLocations[NextChunkIndex];
+ if (ChunkLocation.Size == 0 || (ChunkLocation.Offset + ChunkLocation.Size > BlockSize))
+ {
+ SmallSizeCallback(NextChunkIndex, nullptr, 0);
+ continue;
+ }
+ void* BufferPtr = &((char*)BufferBase)[ChunkLocation.Offset - FirstLocation.Offset];
+ SmallSizeCallback(NextChunkIndex, BufferPtr, ChunkLocation.Size);
+ }
+ LocationIndexOffset += RangeCount;
+ continue;
+ }
+ if (FirstLocation.Size == 0 || (FirstLocation.Offset + FirstLocation.Size > BlockSize))
+ {
+ SmallSizeCallback(ChunkIndex, nullptr, 0);
+ LocationIndexOffset++;
+ continue;
}
+ LargeSizeCallback(ChunkIndex, *BlockFile.Get(), FirstLocation.Offset, FirstLocation.Size);
+ LocationIndexOffset++;
}
}
@@ -1176,35 +1211,77 @@ TEST_CASE("blockstore.iterate.chunks")
std::string VeryLargeChunk(ScrubSmallChunkWindowSize * 2, 'L');
BlockStoreLocation VeryLargeChunkLocation = WriteStringAsChunk(Store, VeryLargeChunk, 4);
+ BlockStoreLocation BadLocationZeroSize = {.BlockIndex = 0, .Offset = 0, .Size = 0};
+ BlockStoreLocation BadLocationOutOfRange = {.BlockIndex = 0,
+ .Offset = ScrubSmallChunkWindowSize,
+ .Size = ScrubSmallChunkWindowSize * 2};
+ BlockStoreLocation BadBlockIndex = {.BlockIndex = 0xfffff, .Offset = 1024, .Size = 1024};
+
Store.IterateChunks(
- {FirstChunkLocation, SecondChunkLocation, VeryLargeChunkLocation},
+ {FirstChunkLocation, SecondChunkLocation, VeryLargeChunkLocation, BadLocationZeroSize, BadLocationOutOfRange, BadBlockIndex},
[&](size_t ChunkIndex, const void* Data, uint64_t Size) {
- CHECK(Data);
- CHECK(Size > 0);
- std::string AsString((const char*)Data, Size);
switch (ChunkIndex)
{
case 0:
- CHECK(AsString == FirstChunkData);
+ CHECK(Data);
+ CHECK(Size == FirstChunkData.size());
+ CHECK(std::string((const char*)Data, Size) == FirstChunkData);
break;
case 1:
- CHECK(AsString == SecondChunkData);
+ CHECK(Data);
+ CHECK(Size == SecondChunkData.size());
+ CHECK(std::string((const char*)Data, Size) == SecondChunkData);
+ break;
+ case 2:
+ CHECK(false);
+ break;
+ case 3:
+ CHECK(!Data);
+ break;
+ case 4:
+ CHECK(!Data);
+ break;
+ case 5:
+ CHECK(!Data);
break;
default:
CHECK(false);
break;
}
},
- [&](size_t ChunkIndex, Ref<BlockStoreFile> BlockFile, uint64_t Offset, uint64_t Size) {
- CHECK(BlockFile);
- CHECK(ChunkIndex == 2);
- CHECK(Offset == VeryLargeChunkLocation.Offset);
- CHECK(Size == VeryLargeChunkLocation.Size);
- size_t StreamOffset = 0;
- BlockFile->StreamByteRange(Offset, Size, [&](const void* Data, size_t Size) {
- const char* VeryLargeChunkSection = &(VeryLargeChunk.data()[StreamOffset]);
- CHECK(memcmp(VeryLargeChunkSection, Data, Size) == 0);
- });
+ [&](size_t ChunkIndex, BlockStoreFile& File, uint64_t Offset, uint64_t Size) {
+ switch (ChunkIndex)
+ {
+ case 0:
+ case 1:
+ CHECK(false);
+ break;
+ case 2:
+ {
+ CHECK(Size == VeryLargeChunk.size());
+ char* Buffer = new char[Size];
+ size_t HashOffset = 0;
+ File.StreamByteRange(Offset, Size, [&](const void* Data, uint64_t Size) {
+ memcpy(&Buffer[HashOffset], Data, Size);
+ HashOffset += Size;
+ });
+ CHECK(memcmp(Buffer, VeryLargeChunk.data(), Size) == 0);
+ delete[] Buffer;
+ }
+ break;
+ case 3:
+ CHECK(false);
+ break;
+ case 4:
+ CHECK(false);
+ break;
+ case 5:
+ CHECK(false);
+ break;
+ default:
+ CHECK(false);
+ break;
+ }
});
}