aboutsummaryrefslogtreecommitdiff
path: root/zenstore/compactcas.cpp
diff options
context:
space:
mode:
authorDan Engelbrecht <[email protected]>2022-03-17 23:16:57 +0100
committerDan Engelbrecht <[email protected]>2022-03-31 11:28:32 +0200
commitdb802259323abf66bc49186408461db1df3447dd (patch)
tree2d36faf094215a6347b6853f130b7fea7f49adaa /zenstore/compactcas.cpp
parentWIP - bit assignement for CasDiskLocation (diff)
downloadzen-db802259323abf66bc49186408461db1df3447dd.tar.xz
zen-db802259323abf66bc49186408461db1df3447dd.zip
Use bitpacking for Cas block location
Diffstat (limited to 'zenstore/compactcas.cpp')
-rw-r--r--zenstore/compactcas.cpp129
1 files changed, 82 insertions, 47 deletions
diff --git a/zenstore/compactcas.cpp b/zenstore/compactcas.cpp
index 27f0d0e29..aaefa9c6c 100644
--- a/zenstore/compactcas.cpp
+++ b/zenstore/compactcas.cpp
@@ -42,7 +42,7 @@ namespace {
std::filesystem::path BuildUcasPath(const std::filesystem::path& RootDirectory,
const std::string_view ContainerBaseName,
- const uint16_t BlockIndex)
+ const uint32_t BlockIndex)
{
return RootDirectory / (std::string(ContainerBaseName) + "." + (std::to_string(BlockIndex) + ".ucas"));
}
@@ -74,7 +74,7 @@ namespace {
struct CasContainerStrategy::ChunkBlock
{
- ChunkBlock(const std::filesystem::path& RootDirectory, const std::string_view ContainerBaseName, uint16_t BlockIndex);
+ ChunkBlock(const std::filesystem::path& RootDirectory, const std::string_view ContainerBaseName, uint32_t BlockIndex);
~ChunkBlock();
const std::filesystem::path GetPath() const;
void Open();
@@ -95,7 +95,7 @@ private:
CasContainerStrategy::ChunkBlock::ChunkBlock(const std::filesystem::path& RootDirectory,
const std::string_view ContainerBaseName,
- uint16_t BlockIndex)
+ uint32_t BlockIndex)
: m_Path(BuildUcasPath(RootDirectory, ContainerBaseName, BlockIndex))
{
}
@@ -237,7 +237,7 @@ CasContainerStrategy::InsertChunk(const void* ChunkData, size_t ChunkSize, const
if ((m_CurrentInsertOffset + ChunkSize) > m_MaxBlockSize)
{
RwLock::ExclusiveLockScope __(m_LocationMapLock);
- uint16_t NewBlockIndex = m_CurrentBlockIndex + 1;
+ uint32_t NewBlockIndex = m_CurrentBlockIndex + 1;
while (m_OpenBlocks.contains(NewBlockIndex))
{
NewBlockIndex++;
@@ -253,15 +253,15 @@ CasContainerStrategy::InsertChunk(const void* ChunkData, size_t ChunkSize, const
m_CurrentBlock = SmallObjectFile;
m_CurrentInsertOffset = 0;
}
- const uint32_t InsertOffset = m_CurrentInsertOffset;
+ const uint64_t InsertOffset = m_CurrentInsertOffset;
m_CurrentBlock.lock()->Write(ChunkData, ChunkSize, InsertOffset);
- m_CurrentInsertOffset = static_cast<uint32_t>(AlignPositon(InsertOffset + ChunkSize, m_PayloadAlignment));
+ m_CurrentInsertOffset = AlignPositon(InsertOffset + ChunkSize, m_PayloadAlignment);
- const CasDiskLocation Location{m_CurrentBlockIndex, InsertOffset, static_cast<uint32_t>(ChunkSize)};
- CasDiskIndexEntry IndexEntry{.Key = ChunkHash, .Location = Location};
+ const CasLocation Location(m_CurrentBlockIndex, InsertOffset, ChunkSize);
+ CasDiskIndexEntry IndexEntry{.Key = ChunkHash, .Location = CasDiskLocation(Location)};
RwLock::ExclusiveLockScope __(m_LocationMapLock);
- m_LocationMap[ChunkHash] = Location;
+ m_LocationMap[ChunkHash] = CasDiskLocation(Location);
m_TotalSize.fetch_add(static_cast<uint64_t>(ChunkSize));
m_CasLog.Append(IndexEntry);
@@ -281,7 +281,7 @@ CasContainerStrategy::FindChunk(const IoHash& ChunkHash)
if (auto KeyIt = m_LocationMap.find(ChunkHash); KeyIt != m_LocationMap.end())
{
- const CasDiskLocation& Location = KeyIt->second;
+ CasLocation Location = *KeyIt->second;
if (auto BlockIt = m_OpenBlocks.find(Location.BlockIndex); BlockIt != m_OpenBlocks.end())
{
@@ -356,11 +356,12 @@ CasContainerStrategy::Scrub(ScrubContext& Ctx)
for (auto& Entry : m_LocationMap)
{
- const uint64_t EntryOffset = Entry.second.Offset;
+ const CasLocation Location = *Entry.second;
+ const uint64_t EntryOffset = Location.Offset;
if ((EntryOffset >= WindowStart) && (EntryOffset < WindowEnd))
{
- const uint64_t EntryEnd = EntryOffset + Entry.second.Size;
+ const uint64_t EntryEnd = EntryOffset + Location.Size;
if (EntryEnd >= WindowEnd)
{
@@ -370,8 +371,7 @@ CasContainerStrategy::Scrub(ScrubContext& Ctx)
}
const IoHash ComputedHash =
- IoHash::HashBuffer(reinterpret_cast<uint8_t*>(BufferBase) + Entry.second.Offset - WindowStart,
- Entry.second.Size);
+ IoHash::HashBuffer(reinterpret_cast<uint8_t*>(BufferBase) + Location.Offset - WindowStart, Location.Size);
if (Entry.first != ComputedHash)
{
@@ -391,9 +391,10 @@ CasContainerStrategy::Scrub(ScrubContext& Ctx)
for (const CasDiskIndexEntry& Entry : BigChunks)
{
- IoHashStream Hasher;
- auto& SmallObjectFile = *m_OpenBlocks[Entry.Location.BlockIndex];
- SmallObjectFile.StreamByteRange(Entry.Location.Offset, Entry.Location.Size, [&](const void* Data, uint64_t Size) {
+ IoHashStream Hasher;
+ const CasLocation Location = *Entry.Location;
+ auto& SmallObjectFile = *m_OpenBlocks[Location.BlockIndex];
+ SmallObjectFile.StreamByteRange(Location.Offset, Location.Size, [&](const void* Data, uint64_t Size) {
Hasher.Append(Data, Size);
});
IoHash ComputedHash = Hasher.GetHash();
@@ -469,7 +470,7 @@ CasContainerStrategy::CollectGarbage(GcContext& GcCtx)
std::unordered_map<uint64_t, size_t> BlockIndexToKeepChunksMap;
std::vector<std::unordered_map<IoHash, CasDiskLocation, IoHash::Hasher>> KeepChunks;
std::vector<IoHash> DeletedChunks;
- std::unordered_set<uint16_t> BlocksToReWrite;
+ std::unordered_set<uint32_t> BlocksToReWrite;
{
RwLock::ExclusiveLockScope _i(m_InsertLock);
RwLock::ExclusiveLockScope _l(m_LocationMapLock);
@@ -493,11 +494,12 @@ CasContainerStrategy::CollectGarbage(GcContext& GcCtx)
for (const auto& Entry : m_LocationMap)
{
TotalChunkHashes.push_back(Entry.first);
- if (BlockIndexToKeepChunksMap.contains(Entry.second.BlockIndex))
+ const CasLocation Location = *Entry.second;
+ if (BlockIndexToKeepChunksMap.contains(Location.BlockIndex))
{
continue;
}
- BlockIndexToKeepChunksMap[Entry.second.BlockIndex] = KeepChunks.size();
+ BlockIndexToKeepChunksMap[Location.BlockIndex] = KeepChunks.size();
KeepChunks.resize(KeepChunks.size() + 1);
}
@@ -507,10 +509,10 @@ CasContainerStrategy::CollectGarbage(GcContext& GcCtx)
GcCtx.FilterCas(TotalChunkHashes, [&](const IoHash& ChunkHash, bool Keep) {
if (Keep)
{
- auto KeyIt = m_LocationMap.find(ChunkHash);
- const auto& ChunkLocation = KeyIt->second;
- auto& ChunkMap = KeepChunks[BlockIndexToKeepChunksMap[ChunkLocation.BlockIndex]];
- ChunkMap[ChunkHash] = ChunkLocation;
+ auto KeyIt = m_LocationMap.find(ChunkHash);
+ const CasLocation ChunkLocation = *KeyIt->second;
+ auto& ChunkMap = KeepChunks[BlockIndexToKeepChunksMap[ChunkLocation.BlockIndex]];
+ ChunkMap[ChunkHash] = KeyIt->second;
NewTotalSize += ChunkLocation.Size;
}
else
@@ -532,10 +534,10 @@ CasContainerStrategy::CollectGarbage(GcContext& GcCtx)
for (const auto& ChunkHash : DeletedChunks)
{
- auto KeyIt = m_LocationMap.find(ChunkHash);
- const auto& ChunkLocation = KeyIt->second;
+ auto KeyIt = m_LocationMap.find(ChunkHash);
+ const CasLocation& ChunkLocation = *KeyIt->second;
BlocksToReWrite.insert(ChunkLocation.BlockIndex);
- m_CasLog.Append({.Key = ChunkHash, .Location = ChunkLocation, .Flags = CasDiskIndexEntry::kTombstone});
+ m_CasLog.Append({.Key = ChunkHash, .Location = KeyIt->second, .Flags = CasDiskIndexEntry::kTombstone});
m_LocationMap.erase(ChunkHash);
m_TotalSize.fetch_sub(static_cast<uint64_t>(ChunkLocation.Size));
}
@@ -544,7 +546,7 @@ CasContainerStrategy::CollectGarbage(GcContext& GcCtx)
// a small amount of bytes to gain.
if (BlocksToReWrite.contains(m_CurrentBlockIndex))
{
- uint16_t NewBlockIndex = m_CurrentBlockIndex + 1;
+ uint32_t NewBlockIndex = m_CurrentBlockIndex + 1;
while (m_OpenBlocks.contains(NewBlockIndex))
{
NewBlockIndex++;
@@ -552,7 +554,7 @@ CasContainerStrategy::CollectGarbage(GcContext& GcCtx)
{
ZEN_ERROR("unable to allocate a new block in {}, count limit {} exeeded",
m_ContainerBaseName,
- static_cast<uint32_t>(std::numeric_limits<uint16_t>::max()) + 1);
+ static_cast<uint64_t>(std::numeric_limits<uint32_t>::max()) + 1);
return;
}
}
@@ -569,7 +571,7 @@ CasContainerStrategy::CollectGarbage(GcContext& GcCtx)
std::shared_ptr<ChunkBlock> NewBlockFile;
uint64_t WriteOffset = {};
- uint16_t NewBlockIndex = {};
+ uint32_t NewBlockIndex = {};
std::unordered_map<IoHash, CasDiskLocation> MovedBlocks;
for (auto BlockIndex : BlocksToReWrite)
@@ -604,7 +606,7 @@ CasContainerStrategy::CollectGarbage(GcContext& GcCtx)
std::vector<uint8_t> Chunk;
for (auto& Entry : ChunkMap)
{
- const CasDiskLocation& ChunkLocation = Entry.second;
+ const CasLocation ChunkLocation = *Entry.second;
Chunk.resize(ChunkLocation.Size);
BlockFile->Read(Chunk.data(), Chunk.size(), ChunkLocation.Offset);
@@ -630,7 +632,7 @@ CasContainerStrategy::CollectGarbage(GcContext& GcCtx)
{
ZEN_ERROR("unable to allocate a new block in {}, count limit {} exeeded",
m_ContainerBaseName,
- static_cast<uint32_t>(std::numeric_limits<uint16_t>::max()) + 1);
+ static_cast<uint64_t>(std::numeric_limits<uint32_t>::max()) + 1);
return;
}
}
@@ -665,10 +667,8 @@ CasContainerStrategy::CollectGarbage(GcContext& GcCtx)
}
NewBlockFile->Write(Chunk.data(), Chunk.size(), WriteOffset);
- CasDiskLocation NewChunkLocation(NewBlockIndex, gsl::narrow<uint32_t>(WriteOffset), gsl::narrow<uint32_t>(Chunk.size()));
- Entry.second = {.BlockIndex = NewBlockIndex,
- .Offset = gsl::narrow<uint32_t>(WriteOffset),
- .Size = gsl::narrow<uint32_t>(Chunk.size())};
+ CasLocation NewChunkLocation(NewBlockIndex, WriteOffset, Chunk.size());
+ Entry.second = CasDiskLocation(NewChunkLocation);
MovedBlocks[Entry.first] = Entry.second;
WriteOffset = AlignPositon(WriteOffset + Chunk.size(), m_PayloadAlignment);
}
@@ -914,7 +914,7 @@ CasContainerStrategy::OpenContainer(bool IsNewStore)
SmallObjectFile.Open(LegacySobsPath, false);
uint64_t MaxRequiredChunkCount = SmallObjectFile.FileSize() / m_MaxBlockSize;
- uint64_t MaxPossibleChunkCount = static_cast<uint64_t>(std::numeric_limits<std::uint16_t>::max()) + 1;
+ uint64_t MaxPossibleChunkCount = static_cast<uint64_t>(std::numeric_limits<std::uint32_t>::max()) + 1;
if (MaxRequiredChunkCount > MaxPossibleChunkCount)
{
ZEN_ERROR("legacy store migration from '{}' FAILED, required block count {}, possible {}",
@@ -958,7 +958,7 @@ CasContainerStrategy::OpenContainer(bool IsNewStore)
std::unique_ptr<ChunkBlock> NewBlockFile;
uint64_t WriteOffset = {};
- uint16_t NewBlockIndex = {};
+ uint32_t NewBlockIndex = {};
std::vector<uint8_t> Chunk;
for (const auto& ChunkHash : ChunkHashes)
@@ -982,8 +982,8 @@ CasContainerStrategy::OpenContainer(bool IsNewStore)
WriteOffset = 0;
}
NewBlockFile->Write(Chunk.data(), Chunk.size(), WriteOffset);
- CasDiskLocation NewChunkLocation(NewBlockIndex, gsl::narrow<uint32_t>(WriteOffset), gsl::narrow<uint32_t>(Chunk.size()));
- m_CasLog.Append({.Key = ChunkHash, .Location = NewChunkLocation});
+ CasLocation NewChunkLocation(NewBlockIndex, WriteOffset, Chunk.size());
+ m_CasLog.Append({.Key = ChunkHash, .Location = CasDiskLocation(NewChunkLocation)});
WriteOffset = AlignPositon(WriteOffset + Chunk.size(), m_PayloadAlignment);
}
m_CasLog.Close();
@@ -1033,10 +1033,10 @@ CasContainerStrategy::OpenContainer(bool IsNewStore)
});
}
- std::unordered_map<uint16_t, uint64_t> BlockUsage;
+ std::unordered_map<uint32_t, uint64_t> BlockUsage;
for (const auto& Entry : m_LocationMap)
{
- const auto& Location = Entry.second;
+ const CasLocation Location = *Entry.second;
m_TotalSize.fetch_add(Location.Size);
uint64_t NextBlockStart = Location.Offset + Location.Size;
auto It = BlockUsage.find(Location.BlockIndex);
@@ -1072,7 +1072,7 @@ CasContainerStrategy::OpenContainer(bool IsNewStore)
}
try
{
- uint16_t BlockIndex = static_cast<uint16_t>(std::stoi(FileName.substr(m_ContainerBaseName.length() + 1)));
+ uint32_t BlockIndex = static_cast<uint32_t>(std::stoi(FileName.substr(m_ContainerBaseName.length() + 1)));
if (!BlockUsage.contains(BlockIndex))
{
// Clear out unused blocks
@@ -1090,8 +1090,8 @@ CasContainerStrategy::OpenContainer(bool IsNewStore)
}
}
- uint32_t LargestSizeToUse = gsl::narrow<uint32_t>(m_MaxBlockSize - m_PayloadAlignment);
- uint32_t SmallestBlockSize = LargestSizeToUse;
+ uint64_t LargestSizeToUse = m_MaxBlockSize - m_PayloadAlignment;
+ uint64_t SmallestBlockSize = LargestSizeToUse;
bool CreateNewBlock = m_OpenBlocks.empty();
if (!CreateNewBlock)
{
@@ -1099,7 +1099,7 @@ CasContainerStrategy::OpenContainer(bool IsNewStore)
{
if (Entry.second < SmallestBlockSize)
{
- SmallestBlockSize = gsl::narrow<uint32_t>(Entry.second);
+ SmallestBlockSize = Entry.second;
m_CurrentBlockIndex = Entry.first;
CreateNewBlock = false;
}
@@ -1117,7 +1117,7 @@ CasContainerStrategy::OpenContainer(bool IsNewStore)
else
{
m_CurrentBlock = m_OpenBlocks[m_CurrentBlockIndex];
- m_CurrentInsertOffset = static_cast<uint32_t>(AlignPositon(SmallestBlockSize, m_PayloadAlignment));
+ m_CurrentInsertOffset = AlignPositon(SmallestBlockSize, m_PayloadAlignment);
}
// TODO: should validate integrity of container files here
@@ -1145,6 +1145,41 @@ namespace {
}
} // namespace
+bool
+operator==(const CasLocation& Lhs, const CasLocation& Rhs)
+{
+ return Lhs.BlockIndex == Rhs.BlockIndex && Lhs.Offset == Rhs.Offset && Lhs.Size == Rhs.Size;
+}
+
+TEST_CASE("cas.casdisklocation")
+{
+ CasLocation Zero = CasLocation{.BlockIndex = 0, .Offset = 0, .Size = 0};
+ CHECK(Zero == *CasDiskLocation(Zero));
+
+ CasLocation MaxBlockIndex = CasLocation{.BlockIndex = CasDiskLocation::MaxBlockIndex, .Offset = 0, .Size = 0};
+ CHECK(MaxBlockIndex == *CasDiskLocation(MaxBlockIndex));
+
+ CasLocation MaxOffset = CasLocation{.BlockIndex = 0, .Offset = CasDiskLocation::MaxOffset, .Size = 0};
+ CHECK(MaxOffset == *CasDiskLocation(MaxOffset));
+
+ CasLocation MaxSize = CasLocation{.BlockIndex = 0, .Offset = 0, .Size = std::numeric_limits<uint32_t>::max()};
+ CHECK(MaxSize == *CasDiskLocation(MaxSize));
+
+ CasLocation MaxBlockIndexAndOffset =
+ CasLocation{.BlockIndex = CasDiskLocation::MaxBlockIndex, .Offset = CasDiskLocation::MaxOffset, .Size = 0};
+ CHECK(MaxBlockIndexAndOffset == *CasDiskLocation(MaxBlockIndexAndOffset));
+
+ CasLocation MaxAll = CasLocation{.BlockIndex = CasDiskLocation::MaxBlockIndex,
+ .Offset = CasDiskLocation::MaxOffset,
+ .Size = std::numeric_limits<uint32_t>::max()};
+ CHECK(MaxAll == *CasDiskLocation(MaxAll));
+
+ CasLocation Middle = CasLocation{.BlockIndex = (CasDiskLocation::MaxBlockIndex) / 2,
+ .Offset = (CasDiskLocation::MaxOffset) / 2,
+ .Size = std::numeric_limits<uint32_t>::max() / 2};
+ CHECK(Middle == *CasDiskLocation(Middle));
+}
+
TEST_CASE("cas.compact.gc")
{
ScopedTemporaryDirectory TempDir;