aboutsummaryrefslogtreecommitdiff
path: root/zenstore/blockstore.cpp
diff options
context:
space:
mode:
authorDan Engelbrecht <[email protected]>2022-05-03 11:48:35 +0200
committerDan Engelbrecht <[email protected]>2022-05-03 11:48:35 +0200
commit1b3b8b0e6f70129222085eec40b5a58cf9b29b01 (patch)
tree9cb7c2b801f11a90174c4ea11eb9ae0c219bb459 /zenstore/blockstore.cpp
parentcleanup (diff)
downloadzen-1b3b8b0e6f70129222085eec40b5a58cf9b29b01.tar.xz
zen-1b3b8b0e6f70129222085eec40b5a58cf9b29b01.zip
more tests for block store
Diffstat (limited to 'zenstore/blockstore.cpp')
-rw-r--r--zenstore/blockstore.cpp342
1 files changed, 329 insertions, 13 deletions
diff --git a/zenstore/blockstore.cpp b/zenstore/blockstore.cpp
index f0a798e36..f469e3746 100644
--- a/zenstore/blockstore.cpp
+++ b/zenstore/blockstore.cpp
@@ -117,6 +117,8 @@ BlockStoreFile::StreamByteRange(uint64_t FileOffset, uint64_t Size, std::functio
m_File.StreamByteRange(FileOffset, Size, std::move(ChunkFun));
}
+constexpr uint64_t ScrubSmallChunkWindowSize = 4 * 1024 * 1024;
+
void
BlockStore::Initialize(const std::filesystem::path& BlocksBasePath,
uint64_t MaxBlockSize,
@@ -260,7 +262,11 @@ Ref<BlockStoreFile>
BlockStore::GetChunkBlock(const BlockStoreLocation& Location)
{
RwLock::SharedLockScope InsertLock(m_InsertLock);
- return m_ChunkBlocks[Location.BlockIndex];
+ if (auto BlockIt = m_ChunkBlocks.find(Location.BlockIndex); BlockIt != m_ChunkBlocks.end())
+ {
+ return BlockIt->second;
+ }
+ return {};
}
void
@@ -607,8 +613,7 @@ BlockStore::IterateChunks(const std::vector<BlockStoreLocation>& ChunkLocations,
{
ChunkIndexArray BigChunks;
- const uint64_t WindowSize = 4 * 1024 * 1024;
- IoBuffer ReadBuffer{WindowSize};
+ IoBuffer ReadBuffer{ScrubSmallChunkWindowSize};
void* BufferBase = ReadBuffer.MutableData();
RwLock::SharedLockScope _(m_InsertLock);
@@ -616,15 +621,14 @@ BlockStore::IterateChunks(const std::vector<BlockStoreLocation>& ChunkLocations,
for (const auto& Block : m_ChunkBlocks)
{
uint64_t WindowStart = 0;
- uint64_t WindowEnd = WindowSize;
+ uint64_t WindowEnd = ScrubSmallChunkWindowSize;
uint32_t BlockIndex = Block.first;
const Ref<BlockStoreFile>& BlockFile = Block.second;
- BlockFile->Open();
- const uint64_t FileSize = BlockFile->FileSize();
+ const uint64_t FileSize = BlockFile->FileSize();
do
{
- const uint64_t ChunkSize = Min(WindowSize, FileSize - WindowStart);
+ const uint64_t ChunkSize = Min(ScrubSmallChunkWindowSize, FileSize - WindowStart);
BlockFile->Read(BufferBase, ChunkSize, WindowStart);
// TODO: We could be smarter here if the ChunkLocations were sorted on block index - we could
@@ -655,17 +659,16 @@ BlockStore::IterateChunks(const std::vector<BlockStoreLocation>& ChunkLocations,
}
}
- WindowStart += WindowSize;
- WindowEnd += WindowSize;
+ WindowStart += ScrubSmallChunkWindowSize;
+ WindowEnd += ScrubSmallChunkWindowSize;
} while (WindowStart < FileSize);
}
- // Deal with large chunks
-
+ // Deal with large chunks and chunks that extend over a ScrubSmallChunkWindowSize border
for (size_t ChunkIndex : BigChunks)
{
- const BlockStoreLocation Location = ChunkLocations[ChunkIndex];
- BasicFile& BlockFile = m_ChunkBlocks[Location.BlockIndex]->GetBasicFile();
+ const BlockStoreLocation Location = ChunkLocations[ChunkIndex];
+ const Ref<BlockStoreFile>& BlockFile = m_ChunkBlocks[Location.BlockIndex];
LargeSizeCallback(ChunkIndex, BlockFile, Location.Offset, Location.Size);
}
}
@@ -1029,6 +1032,319 @@ TEST_CASE("blockstore.blockfile")
CHECK(!std::filesystem::exists(RootDirectory / "1"));
}
+namespace {
+ BlockStoreLocation WriteStringAsChunk(BlockStore& Store, std::string_view String, size_t PayloadAlignment)
+ {
+ BlockStoreLocation Location = Store.WriteChunk(String.data(), String.length(), PayloadAlignment);
+ CHECK(Location.Size == String.length());
+ return Location;
+ };
+
+ std::string ReadChunkAsString(BlockStore& Store, const BlockStoreLocation& Location)
+ {
+ Ref<BlockStoreFile> ChunkBlock(Store.GetChunkBlock(Location));
+ if (!ChunkBlock)
+ {
+ return "";
+ }
+ IoBuffer ChunkData = ChunkBlock->GetChunk(Location.Offset, Location.Size);
+ if (!ChunkData)
+ {
+ return "";
+ }
+ std::string AsString((const char*)ChunkData.Data(), ChunkData.Size());
+ return AsString;
+ };
+
+ std::vector<std::filesystem::path> GetDirectoryContent(std::filesystem::path RootDir, bool Files, bool Directories)
+ {
+ FileSystemTraversal Traversal;
+ struct Visitor : public FileSystemTraversal::TreeVisitor
+ {
+ virtual void VisitFile(const std::filesystem::path& Parent, const path_view& File, uint64_t) override
+ {
+ if (Files)
+ {
+ Items.push_back(Parent / File);
+ }
+ }
+
+ virtual bool VisitDirectory(const std::filesystem::path& Parent, const path_view& Dir) override
+ {
+ if (Directories)
+ {
+ Items.push_back(Parent / Dir);
+ }
+ return true;
+ }
+
+ bool Files;
+ bool Directories;
+ std::vector<std::filesystem::path> Items;
+ } Visit;
+ Visit.Files = Files;
+ Visit.Directories = Directories;
+
+ Traversal.TraverseFileSystem(RootDir, Visit);
+ return Visit.Items;
+ };
+
+ static IoBuffer CreateChunk(uint64_t Size)
+ {
+ static std::random_device rd;
+ static std::mt19937 g(rd());
+
+ std::vector<uint8_t> Values;
+ Values.resize(Size);
+ for (size_t Idx = 0; Idx < Size; ++Idx)
+ {
+ Values[Idx] = static_cast<uint8_t>(Idx);
+ }
+ std::shuffle(Values.begin(), Values.end(), g);
+
+ return IoBufferBuilder::MakeCloneFromMemory(Values.data(), Values.size());
+ }
+} // namespace
+
+TEST_CASE("blockstore.chunks")
+{
+ ScopedTemporaryDirectory TempDir;
+ auto RootDirectory = TempDir.Path();
+
+ BlockStore Store;
+ Store.Initialize(RootDirectory, 128, 1024, {});
+ Ref<BlockStoreFile> BadChunk = Store.GetChunkBlock({.BlockIndex = 0, .Offset = 0, .Size = 512});
+ CHECK(!BadChunk);
+
+ std::string FirstChunkData = "This is the data of the first chunk that we will write";
+ BlockStoreLocation FirstChunkLocation = WriteStringAsChunk(Store, FirstChunkData, 4);
+ std::string SecondChunkData = "This is the data for the second chunk that we will write";
+ BlockStoreLocation SecondChunkLocation = WriteStringAsChunk(Store, SecondChunkData, 4);
+
+ CHECK(ReadChunkAsString(Store, FirstChunkLocation) == FirstChunkData);
+ CHECK(ReadChunkAsString(Store, SecondChunkLocation) == SecondChunkData);
+
+ std::string ThirdChunkData =
+ "This is a much longer string that will not fit in the first block so it should be placed in the second block";
+ BlockStoreLocation ThirdChunkLocation = WriteStringAsChunk(Store, ThirdChunkData, 4);
+ CHECK(ThirdChunkLocation.BlockIndex != FirstChunkLocation.BlockIndex);
+
+ CHECK(ReadChunkAsString(Store, FirstChunkLocation) == FirstChunkData);
+ CHECK(ReadChunkAsString(Store, SecondChunkLocation) == SecondChunkData);
+ CHECK(ReadChunkAsString(Store, ThirdChunkLocation) == ThirdChunkData);
+}
+
+TEST_CASE("blockstore.clean.stray.blocks")
+{
+ ScopedTemporaryDirectory TempDir;
+ auto RootDirectory = TempDir.Path();
+
+ BlockStore Store;
+ Store.Initialize(RootDirectory / "store", 128, 1024, {});
+
+ std::string FirstChunkData = "This is the data of the first chunk that we will write";
+ BlockStoreLocation FirstChunkLocation = WriteStringAsChunk(Store, FirstChunkData, 4);
+ std::string SecondChunkData = "This is the data for the second chunk that we will write";
+ BlockStoreLocation SecondChunkLocation = WriteStringAsChunk(Store, SecondChunkData, 4);
+ std::string ThirdChunkData =
+ "This is a much longer string that will not fit in the first block so it should be placed in the second block";
+ WriteStringAsChunk(Store, ThirdChunkData, 4);
+
+ Store.Close();
+
+ // Not referencing the second block means that we should be deleted
+ Store.Initialize(RootDirectory / "store", 128, 1024, {FirstChunkLocation, SecondChunkLocation});
+
+ CHECK(GetDirectoryContent(RootDirectory / "store", true, false).size() == 1);
+}
+
+TEST_CASE("blockstore.flush.forces.new.block")
+{
+ ScopedTemporaryDirectory TempDir;
+ auto RootDirectory = TempDir.Path();
+
+ BlockStore Store;
+ Store.Initialize(RootDirectory / "store", 128, 1024, {});
+
+ std::string FirstChunkData = "This is the data of the first chunk that we will write";
+ BlockStoreLocation FirstChunkLocation = WriteStringAsChunk(Store, FirstChunkData, 4);
+ Store.Flush();
+ std::string SecondChunkData = "This is the data for the second chunk that we will write";
+ BlockStoreLocation SecondChunkLocation = WriteStringAsChunk(Store, SecondChunkData, 4);
+ Store.Flush();
+ std::string ThirdChunkData =
+ "This is a much longer string that will not fit in the first block so it should be placed in the second block";
+ WriteStringAsChunk(Store, ThirdChunkData, 4);
+
+ CHECK(GetDirectoryContent(RootDirectory / "store", true, false).size() == 3);
+}
+
+TEST_CASE("blockstore.iterate.chunks")
+{
+ ScopedTemporaryDirectory TempDir;
+ auto RootDirectory = TempDir.Path();
+
+ BlockStore Store;
+ Store.Initialize(RootDirectory / "store", 128, 1024, {});
+ Ref<BlockStoreFile> BadChunk = Store.GetChunkBlock({.BlockIndex = 0, .Offset = 0, .Size = 512});
+ CHECK(!BadChunk);
+
+ std::string FirstChunkData = "This is the data of the first chunk that we will write";
+ BlockStoreLocation FirstChunkLocation = WriteStringAsChunk(Store, FirstChunkData, 4);
+
+ std::string SecondChunkData = "This is the data for the second chunk that we will write";
+ BlockStoreLocation SecondChunkLocation = WriteStringAsChunk(Store, SecondChunkData, 4);
+
+ std::string VeryLargeChunk(ScrubSmallChunkWindowSize * 2, 'L');
+ BlockStoreLocation VeryLargeChunkLocation = WriteStringAsChunk(Store, VeryLargeChunk, 4);
+
+ Store.IterateChunks(
+ {FirstChunkLocation, SecondChunkLocation, VeryLargeChunkLocation},
+ [&](size_t ChunkIndex, const void* Data, uint64_t Size) {
+ CHECK(Data);
+ CHECK(Size > 0);
+ std::string AsString((const char*)Data, Size);
+ switch (ChunkIndex)
+ {
+ case 0:
+ CHECK(AsString == FirstChunkData);
+ break;
+ case 1:
+ CHECK(AsString == SecondChunkData);
+ break;
+ default:
+ CHECK(false);
+ break;
+ }
+ },
+ [&](size_t ChunkIndex, Ref<BlockStoreFile> BlockFile, uint64_t Offset, uint64_t Size) {
+ CHECK(BlockFile);
+ CHECK(ChunkIndex == 2);
+ CHECK(Offset == VeryLargeChunkLocation.Offset);
+ CHECK(Size == VeryLargeChunkLocation.Size);
+ size_t StreamOffset = 0;
+ BlockFile->StreamByteRange(Offset, Size, [&](const void* Data, size_t Size) {
+ const char* VeryLargeChunkSection = &(VeryLargeChunk.data()[StreamOffset]);
+ CHECK(memcmp(VeryLargeChunkSection, Data, Size) == 0);
+ });
+ });
+}
+
+TEST_CASE("blockstore.reclaim.space")
+{
+ ScopedTemporaryDirectory TempDir;
+ auto RootDirectory = TempDir.Path();
+
+ BlockStore Store;
+ Store.Initialize(RootDirectory / "store", 512, 1024, {});
+
+ constexpr size_t ChunkCount = 200;
+ constexpr size_t Alignment = 8;
+ std::vector<BlockStoreLocation> ChunkLocations;
+ std::vector<IoHash> ChunkHashes;
+ ChunkLocations.reserve(ChunkCount);
+ ChunkHashes.reserve(ChunkCount);
+ for (size_t ChunkIndex = 0; ChunkIndex < ChunkCount; ++ChunkIndex)
+ {
+ IoBuffer Chunk = CreateChunk(57 + ChunkIndex);
+ ChunkLocations.push_back(Store.WriteChunk(Chunk.Data(), Chunk.Size(), Alignment));
+ ChunkHashes.push_back(IoHash::HashBuffer(Chunk.Data(), Chunk.Size()));
+ }
+
+ std::vector<size_t> ChunksToKeep;
+ ChunksToKeep.reserve(ChunkLocations.size());
+ for (size_t ChunkIndex = 0; ChunkIndex < ChunkCount; ++ChunkIndex)
+ {
+ ChunksToKeep.push_back(ChunkIndex);
+ }
+
+ Store.Flush();
+ BlockStore::ReclaimSnapshotState State1 = Store.GetReclaimSnapshotState();
+ Store.ReclaimSpace(State1, ChunkLocations, ChunksToKeep, Alignment, true);
+
+ // If we keep all the chunks we should not get any callbacks on moved/deleted stuff
+ Store.ReclaimSpace(
+ State1,
+ ChunkLocations,
+ ChunksToKeep,
+ Alignment,
+ false,
+ [](const BlockStore::MovedChunksArray&, const BlockStore::ChunkIndexArray&) { CHECK(false); },
+ []() {
+ CHECK(false);
+ return 0;
+ });
+
+ size_t DeleteChunkCount = 38;
+ ChunksToKeep.clear();
+ for (size_t ChunkIndex = DeleteChunkCount; ChunkIndex < ChunkCount; ++ChunkIndex)
+ {
+ ChunksToKeep.push_back(ChunkIndex);
+ }
+
+ std::vector<BlockStoreLocation> NewChunkLocations = ChunkLocations;
+ size_t MovedChunkCount = 0;
+ size_t DeletedChunkCount = 0;
+ Store.ReclaimSpace(
+ State1,
+ ChunkLocations,
+ ChunksToKeep,
+ Alignment,
+ false,
+ [&](const BlockStore::MovedChunksArray& MovedChunks, const BlockStore::ChunkIndexArray& DeletedChunks) {
+ for (const auto& MovedChunk : MovedChunks)
+ {
+ CHECK(MovedChunk.first >= DeleteChunkCount);
+ NewChunkLocations[MovedChunk.first] = MovedChunk.second;
+ }
+ MovedChunkCount += MovedChunks.size();
+ for (size_t DeletedIndex : DeletedChunks)
+ {
+ CHECK(DeletedIndex < DeleteChunkCount);
+ }
+ DeletedChunkCount += DeletedChunks.size();
+ },
+ []() {
+ CHECK(false);
+ return 0;
+ });
+ CHECK(MovedChunkCount <= DeleteChunkCount);
+ CHECK(DeletedChunkCount == DeleteChunkCount);
+ ChunkLocations = std::vector<BlockStoreLocation>(NewChunkLocations.begin() + DeleteChunkCount, NewChunkLocations.end());
+
+ for (size_t ChunkIndex = 0; ChunkIndex < ChunkCount; ++ChunkIndex)
+ {
+ Ref<BlockStoreFile> ChunkBlock = Store.GetChunkBlock(NewChunkLocations[ChunkIndex]);
+ if (ChunkIndex >= DeleteChunkCount)
+ {
+ CHECK(ChunkBlock);
+ IoBuffer VerifyChunk = ChunkBlock->GetChunk(NewChunkLocations[ChunkIndex].Offset, NewChunkLocations[ChunkIndex].Size);
+ CHECK(VerifyChunk);
+ IoHash VerifyHash = IoHash::HashBuffer(VerifyChunk.Data(), VerifyChunk.Size());
+ CHECK(VerifyHash == ChunkHashes[ChunkIndex]);
+ }
+ }
+
+ NewChunkLocations = ChunkLocations;
+ MovedChunkCount = 0;
+ DeletedChunkCount = 0;
+ Store.ReclaimSpace(
+ State1,
+ ChunkLocations,
+ {},
+ Alignment,
+ false,
+ [&](const BlockStore::MovedChunksArray& MovedChunks, const BlockStore::ChunkIndexArray& DeletedChunks) {
+ CHECK(MovedChunks.empty());
+ DeletedChunkCount += DeletedChunks.size();
+ },
+ []() {
+ CHECK(false);
+ return 0;
+ });
+ CHECK(DeletedChunkCount == ChunkCount - DeleteChunkCount);
+}
+
#endif
void