aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorDan Engelbrecht <[email protected]>2022-03-17 23:16:57 +0100
committerDan Engelbrecht <[email protected]>2022-03-31 11:28:32 +0200
commitdb802259323abf66bc49186408461db1df3447dd (patch)
tree2d36faf094215a6347b6853f130b7fea7f49adaa
parentWIP - bit assignement for CasDiskLocation (diff)
downloadzen-db802259323abf66bc49186408461db1df3447dd.tar.xz
zen-db802259323abf66bc49186408461db1df3447dd.zip
Use bitpacking for Cas block location
-rw-r--r--zenstore/compactcas.cpp129
-rw-r--r--zenstore/compactcas.h63
2 files changed, 108 insertions, 84 deletions
diff --git a/zenstore/compactcas.cpp b/zenstore/compactcas.cpp
index 27f0d0e29..aaefa9c6c 100644
--- a/zenstore/compactcas.cpp
+++ b/zenstore/compactcas.cpp
@@ -42,7 +42,7 @@ namespace {
std::filesystem::path BuildUcasPath(const std::filesystem::path& RootDirectory,
const std::string_view ContainerBaseName,
- const uint16_t BlockIndex)
+ const uint32_t BlockIndex)
{
return RootDirectory / (std::string(ContainerBaseName) + "." + (std::to_string(BlockIndex) + ".ucas"));
}
@@ -74,7 +74,7 @@ namespace {
struct CasContainerStrategy::ChunkBlock
{
- ChunkBlock(const std::filesystem::path& RootDirectory, const std::string_view ContainerBaseName, uint16_t BlockIndex);
+ ChunkBlock(const std::filesystem::path& RootDirectory, const std::string_view ContainerBaseName, uint32_t BlockIndex);
~ChunkBlock();
const std::filesystem::path GetPath() const;
void Open();
@@ -95,7 +95,7 @@ private:
CasContainerStrategy::ChunkBlock::ChunkBlock(const std::filesystem::path& RootDirectory,
const std::string_view ContainerBaseName,
- uint16_t BlockIndex)
+ uint32_t BlockIndex)
: m_Path(BuildUcasPath(RootDirectory, ContainerBaseName, BlockIndex))
{
}
@@ -237,7 +237,7 @@ CasContainerStrategy::InsertChunk(const void* ChunkData, size_t ChunkSize, const
if ((m_CurrentInsertOffset + ChunkSize) > m_MaxBlockSize)
{
RwLock::ExclusiveLockScope __(m_LocationMapLock);
- uint16_t NewBlockIndex = m_CurrentBlockIndex + 1;
+ uint32_t NewBlockIndex = m_CurrentBlockIndex + 1;
while (m_OpenBlocks.contains(NewBlockIndex))
{
NewBlockIndex++;
@@ -253,15 +253,15 @@ CasContainerStrategy::InsertChunk(const void* ChunkData, size_t ChunkSize, const
m_CurrentBlock = SmallObjectFile;
m_CurrentInsertOffset = 0;
}
- const uint32_t InsertOffset = m_CurrentInsertOffset;
+ const uint64_t InsertOffset = m_CurrentInsertOffset;
m_CurrentBlock.lock()->Write(ChunkData, ChunkSize, InsertOffset);
- m_CurrentInsertOffset = static_cast<uint32_t>(AlignPositon(InsertOffset + ChunkSize, m_PayloadAlignment));
+ m_CurrentInsertOffset = AlignPositon(InsertOffset + ChunkSize, m_PayloadAlignment);
- const CasDiskLocation Location{m_CurrentBlockIndex, InsertOffset, static_cast<uint32_t>(ChunkSize)};
- CasDiskIndexEntry IndexEntry{.Key = ChunkHash, .Location = Location};
+ const CasLocation Location(m_CurrentBlockIndex, InsertOffset, ChunkSize);
+ CasDiskIndexEntry IndexEntry{.Key = ChunkHash, .Location = CasDiskLocation(Location)};
RwLock::ExclusiveLockScope __(m_LocationMapLock);
- m_LocationMap[ChunkHash] = Location;
+ m_LocationMap[ChunkHash] = CasDiskLocation(Location);
m_TotalSize.fetch_add(static_cast<uint64_t>(ChunkSize));
m_CasLog.Append(IndexEntry);
@@ -281,7 +281,7 @@ CasContainerStrategy::FindChunk(const IoHash& ChunkHash)
if (auto KeyIt = m_LocationMap.find(ChunkHash); KeyIt != m_LocationMap.end())
{
- const CasDiskLocation& Location = KeyIt->second;
+ CasLocation Location = *KeyIt->second;
if (auto BlockIt = m_OpenBlocks.find(Location.BlockIndex); BlockIt != m_OpenBlocks.end())
{
@@ -356,11 +356,12 @@ CasContainerStrategy::Scrub(ScrubContext& Ctx)
for (auto& Entry : m_LocationMap)
{
- const uint64_t EntryOffset = Entry.second.Offset;
+ const CasLocation Location = *Entry.second;
+ const uint64_t EntryOffset = Location.Offset;
if ((EntryOffset >= WindowStart) && (EntryOffset < WindowEnd))
{
- const uint64_t EntryEnd = EntryOffset + Entry.second.Size;
+ const uint64_t EntryEnd = EntryOffset + Location.Size;
if (EntryEnd >= WindowEnd)
{
@@ -370,8 +371,7 @@ CasContainerStrategy::Scrub(ScrubContext& Ctx)
}
const IoHash ComputedHash =
- IoHash::HashBuffer(reinterpret_cast<uint8_t*>(BufferBase) + Entry.second.Offset - WindowStart,
- Entry.second.Size);
+ IoHash::HashBuffer(reinterpret_cast<uint8_t*>(BufferBase) + Location.Offset - WindowStart, Location.Size);
if (Entry.first != ComputedHash)
{
@@ -391,9 +391,10 @@ CasContainerStrategy::Scrub(ScrubContext& Ctx)
for (const CasDiskIndexEntry& Entry : BigChunks)
{
- IoHashStream Hasher;
- auto& SmallObjectFile = *m_OpenBlocks[Entry.Location.BlockIndex];
- SmallObjectFile.StreamByteRange(Entry.Location.Offset, Entry.Location.Size, [&](const void* Data, uint64_t Size) {
+ IoHashStream Hasher;
+ const CasLocation Location = *Entry.Location;
+ auto& SmallObjectFile = *m_OpenBlocks[Location.BlockIndex];
+ SmallObjectFile.StreamByteRange(Location.Offset, Location.Size, [&](const void* Data, uint64_t Size) {
Hasher.Append(Data, Size);
});
IoHash ComputedHash = Hasher.GetHash();
@@ -469,7 +470,7 @@ CasContainerStrategy::CollectGarbage(GcContext& GcCtx)
std::unordered_map<uint64_t, size_t> BlockIndexToKeepChunksMap;
std::vector<std::unordered_map<IoHash, CasDiskLocation, IoHash::Hasher>> KeepChunks;
std::vector<IoHash> DeletedChunks;
- std::unordered_set<uint16_t> BlocksToReWrite;
+ std::unordered_set<uint32_t> BlocksToReWrite;
{
RwLock::ExclusiveLockScope _i(m_InsertLock);
RwLock::ExclusiveLockScope _l(m_LocationMapLock);
@@ -493,11 +494,12 @@ CasContainerStrategy::CollectGarbage(GcContext& GcCtx)
for (const auto& Entry : m_LocationMap)
{
TotalChunkHashes.push_back(Entry.first);
- if (BlockIndexToKeepChunksMap.contains(Entry.second.BlockIndex))
+ const CasLocation Location = *Entry.second;
+ if (BlockIndexToKeepChunksMap.contains(Location.BlockIndex))
{
continue;
}
- BlockIndexToKeepChunksMap[Entry.second.BlockIndex] = KeepChunks.size();
+ BlockIndexToKeepChunksMap[Location.BlockIndex] = KeepChunks.size();
KeepChunks.resize(KeepChunks.size() + 1);
}
@@ -507,10 +509,10 @@ CasContainerStrategy::CollectGarbage(GcContext& GcCtx)
GcCtx.FilterCas(TotalChunkHashes, [&](const IoHash& ChunkHash, bool Keep) {
if (Keep)
{
- auto KeyIt = m_LocationMap.find(ChunkHash);
- const auto& ChunkLocation = KeyIt->second;
- auto& ChunkMap = KeepChunks[BlockIndexToKeepChunksMap[ChunkLocation.BlockIndex]];
- ChunkMap[ChunkHash] = ChunkLocation;
+ auto KeyIt = m_LocationMap.find(ChunkHash);
+ const CasLocation ChunkLocation = *KeyIt->second;
+ auto& ChunkMap = KeepChunks[BlockIndexToKeepChunksMap[ChunkLocation.BlockIndex]];
+ ChunkMap[ChunkHash] = KeyIt->second;
NewTotalSize += ChunkLocation.Size;
}
else
@@ -532,10 +534,10 @@ CasContainerStrategy::CollectGarbage(GcContext& GcCtx)
for (const auto& ChunkHash : DeletedChunks)
{
- auto KeyIt = m_LocationMap.find(ChunkHash);
- const auto& ChunkLocation = KeyIt->second;
+ auto KeyIt = m_LocationMap.find(ChunkHash);
+ const CasLocation& ChunkLocation = *KeyIt->second;
BlocksToReWrite.insert(ChunkLocation.BlockIndex);
- m_CasLog.Append({.Key = ChunkHash, .Location = ChunkLocation, .Flags = CasDiskIndexEntry::kTombstone});
+ m_CasLog.Append({.Key = ChunkHash, .Location = KeyIt->second, .Flags = CasDiskIndexEntry::kTombstone});
m_LocationMap.erase(ChunkHash);
m_TotalSize.fetch_sub(static_cast<uint64_t>(ChunkLocation.Size));
}
@@ -544,7 +546,7 @@ CasContainerStrategy::CollectGarbage(GcContext& GcCtx)
// a small amount of bytes to gain.
if (BlocksToReWrite.contains(m_CurrentBlockIndex))
{
- uint16_t NewBlockIndex = m_CurrentBlockIndex + 1;
+ uint32_t NewBlockIndex = m_CurrentBlockIndex + 1;
while (m_OpenBlocks.contains(NewBlockIndex))
{
NewBlockIndex++;
@@ -552,7 +554,7 @@ CasContainerStrategy::CollectGarbage(GcContext& GcCtx)
{
ZEN_ERROR("unable to allocate a new block in {}, count limit {} exeeded",
m_ContainerBaseName,
- static_cast<uint32_t>(std::numeric_limits<uint16_t>::max()) + 1);
+ static_cast<uint64_t>(std::numeric_limits<uint32_t>::max()) + 1);
return;
}
}
@@ -569,7 +571,7 @@ CasContainerStrategy::CollectGarbage(GcContext& GcCtx)
std::shared_ptr<ChunkBlock> NewBlockFile;
uint64_t WriteOffset = {};
- uint16_t NewBlockIndex = {};
+ uint32_t NewBlockIndex = {};
std::unordered_map<IoHash, CasDiskLocation> MovedBlocks;
for (auto BlockIndex : BlocksToReWrite)
@@ -604,7 +606,7 @@ CasContainerStrategy::CollectGarbage(GcContext& GcCtx)
std::vector<uint8_t> Chunk;
for (auto& Entry : ChunkMap)
{
- const CasDiskLocation& ChunkLocation = Entry.second;
+ const CasLocation ChunkLocation = *Entry.second;
Chunk.resize(ChunkLocation.Size);
BlockFile->Read(Chunk.data(), Chunk.size(), ChunkLocation.Offset);
@@ -630,7 +632,7 @@ CasContainerStrategy::CollectGarbage(GcContext& GcCtx)
{
ZEN_ERROR("unable to allocate a new block in {}, count limit {} exeeded",
m_ContainerBaseName,
- static_cast<uint32_t>(std::numeric_limits<uint16_t>::max()) + 1);
+ static_cast<uint64_t>(std::numeric_limits<uint32_t>::max()) + 1);
return;
}
}
@@ -665,10 +667,8 @@ CasContainerStrategy::CollectGarbage(GcContext& GcCtx)
}
NewBlockFile->Write(Chunk.data(), Chunk.size(), WriteOffset);
- CasDiskLocation NewChunkLocation(NewBlockIndex, gsl::narrow<uint32_t>(WriteOffset), gsl::narrow<uint32_t>(Chunk.size()));
- Entry.second = {.BlockIndex = NewBlockIndex,
- .Offset = gsl::narrow<uint32_t>(WriteOffset),
- .Size = gsl::narrow<uint32_t>(Chunk.size())};
+ CasLocation NewChunkLocation(NewBlockIndex, WriteOffset, Chunk.size());
+ Entry.second = CasDiskLocation(NewChunkLocation);
MovedBlocks[Entry.first] = Entry.second;
WriteOffset = AlignPositon(WriteOffset + Chunk.size(), m_PayloadAlignment);
}
@@ -914,7 +914,7 @@ CasContainerStrategy::OpenContainer(bool IsNewStore)
SmallObjectFile.Open(LegacySobsPath, false);
uint64_t MaxRequiredChunkCount = SmallObjectFile.FileSize() / m_MaxBlockSize;
- uint64_t MaxPossibleChunkCount = static_cast<uint64_t>(std::numeric_limits<std::uint16_t>::max()) + 1;
+ uint64_t MaxPossibleChunkCount = static_cast<uint64_t>(std::numeric_limits<std::uint32_t>::max()) + 1;
if (MaxRequiredChunkCount > MaxPossibleChunkCount)
{
ZEN_ERROR("legacy store migration from '{}' FAILED, required block count {}, possible {}",
@@ -958,7 +958,7 @@ CasContainerStrategy::OpenContainer(bool IsNewStore)
std::unique_ptr<ChunkBlock> NewBlockFile;
uint64_t WriteOffset = {};
- uint16_t NewBlockIndex = {};
+ uint32_t NewBlockIndex = {};
std::vector<uint8_t> Chunk;
for (const auto& ChunkHash : ChunkHashes)
@@ -982,8 +982,8 @@ CasContainerStrategy::OpenContainer(bool IsNewStore)
WriteOffset = 0;
}
NewBlockFile->Write(Chunk.data(), Chunk.size(), WriteOffset);
- CasDiskLocation NewChunkLocation(NewBlockIndex, gsl::narrow<uint32_t>(WriteOffset), gsl::narrow<uint32_t>(Chunk.size()));
- m_CasLog.Append({.Key = ChunkHash, .Location = NewChunkLocation});
+ CasLocation NewChunkLocation(NewBlockIndex, WriteOffset, Chunk.size());
+ m_CasLog.Append({.Key = ChunkHash, .Location = CasDiskLocation(NewChunkLocation)});
WriteOffset = AlignPositon(WriteOffset + Chunk.size(), m_PayloadAlignment);
}
m_CasLog.Close();
@@ -1033,10 +1033,10 @@ CasContainerStrategy::OpenContainer(bool IsNewStore)
});
}
- std::unordered_map<uint16_t, uint64_t> BlockUsage;
+ std::unordered_map<uint32_t, uint64_t> BlockUsage;
for (const auto& Entry : m_LocationMap)
{
- const auto& Location = Entry.second;
+ const CasLocation Location = *Entry.second;
m_TotalSize.fetch_add(Location.Size);
uint64_t NextBlockStart = Location.Offset + Location.Size;
auto It = BlockUsage.find(Location.BlockIndex);
@@ -1072,7 +1072,7 @@ CasContainerStrategy::OpenContainer(bool IsNewStore)
}
try
{
- uint16_t BlockIndex = static_cast<uint16_t>(std::stoi(FileName.substr(m_ContainerBaseName.length() + 1)));
+ uint32_t BlockIndex = static_cast<uint32_t>(std::stoi(FileName.substr(m_ContainerBaseName.length() + 1)));
if (!BlockUsage.contains(BlockIndex))
{
// Clear out unused blocks
@@ -1090,8 +1090,8 @@ CasContainerStrategy::OpenContainer(bool IsNewStore)
}
}
- uint32_t LargestSizeToUse = gsl::narrow<uint32_t>(m_MaxBlockSize - m_PayloadAlignment);
- uint32_t SmallestBlockSize = LargestSizeToUse;
+ uint64_t LargestSizeToUse = m_MaxBlockSize - m_PayloadAlignment;
+ uint64_t SmallestBlockSize = LargestSizeToUse;
bool CreateNewBlock = m_OpenBlocks.empty();
if (!CreateNewBlock)
{
@@ -1099,7 +1099,7 @@ CasContainerStrategy::OpenContainer(bool IsNewStore)
{
if (Entry.second < SmallestBlockSize)
{
- SmallestBlockSize = gsl::narrow<uint32_t>(Entry.second);
+ SmallestBlockSize = Entry.second;
m_CurrentBlockIndex = Entry.first;
CreateNewBlock = false;
}
@@ -1117,7 +1117,7 @@ CasContainerStrategy::OpenContainer(bool IsNewStore)
else
{
m_CurrentBlock = m_OpenBlocks[m_CurrentBlockIndex];
- m_CurrentInsertOffset = static_cast<uint32_t>(AlignPositon(SmallestBlockSize, m_PayloadAlignment));
+ m_CurrentInsertOffset = AlignPositon(SmallestBlockSize, m_PayloadAlignment);
}
// TODO: should validate integrity of container files here
@@ -1145,6 +1145,41 @@ namespace {
}
} // namespace
+bool
+operator==(const CasLocation& Lhs, const CasLocation& Rhs)
+{
+ return Lhs.BlockIndex == Rhs.BlockIndex && Lhs.Offset == Rhs.Offset && Lhs.Size == Rhs.Size;
+}
+
+TEST_CASE("cas.casdisklocation")
+{
+ CasLocation Zero = CasLocation{.BlockIndex = 0, .Offset = 0, .Size = 0};
+ CHECK(Zero == *CasDiskLocation(Zero));
+
+ CasLocation MaxBlockIndex = CasLocation{.BlockIndex = CasDiskLocation::MaxBlockIndex, .Offset = 0, .Size = 0};
+ CHECK(MaxBlockIndex == *CasDiskLocation(MaxBlockIndex));
+
+ CasLocation MaxOffset = CasLocation{.BlockIndex = 0, .Offset = CasDiskLocation::MaxOffset, .Size = 0};
+ CHECK(MaxOffset == *CasDiskLocation(MaxOffset));
+
+ CasLocation MaxSize = CasLocation{.BlockIndex = 0, .Offset = 0, .Size = std::numeric_limits<uint32_t>::max()};
+ CHECK(MaxSize == *CasDiskLocation(MaxSize));
+
+ CasLocation MaxBlockIndexAndOffset =
+ CasLocation{.BlockIndex = CasDiskLocation::MaxBlockIndex, .Offset = CasDiskLocation::MaxOffset, .Size = 0};
+ CHECK(MaxBlockIndexAndOffset == *CasDiskLocation(MaxBlockIndexAndOffset));
+
+ CasLocation MaxAll = CasLocation{.BlockIndex = CasDiskLocation::MaxBlockIndex,
+ .Offset = CasDiskLocation::MaxOffset,
+ .Size = std::numeric_limits<uint32_t>::max()};
+ CHECK(MaxAll == *CasDiskLocation(MaxAll));
+
+ CasLocation Middle = CasLocation{.BlockIndex = (CasDiskLocation::MaxBlockIndex) / 2,
+ .Offset = (CasDiskLocation::MaxOffset) / 2,
+ .Size = std::numeric_limits<uint32_t>::max() / 2};
+ CHECK(Middle == *CasDiskLocation(Middle));
+}
+
TEST_CASE("cas.compact.gc")
{
ScopedTemporaryDirectory TempDir;
diff --git a/zenstore/compactcas.h b/zenstore/compactcas.h
index 5f58ccab1..91c7c6981 100644
--- a/zenstore/compactcas.h
+++ b/zenstore/compactcas.h
@@ -19,16 +19,35 @@ namespace zen {
//////////////////////////////////////////////////////////////////////////
+struct CasLocation
+{
+ uint32_t BlockIndex;
+ uint64_t Offset;
+ uint64_t Size;
+};
+
#pragma pack(push)
#pragma pack(1)
struct CasDiskLocation
{
- // 20 bits blockindex
- // 28 bits offset
- // 32 bits size
+ constexpr static uint32_t MaxBlockIndexBits = 20;
+ constexpr static uint32_t MaxOffsetBits = 28;
+ constexpr static uint32_t MaxBlockIndex = (1ul << CasDiskLocation::MaxBlockIndexBits) - 1ul;
+ constexpr static uint32_t MaxOffset = (1ul << CasDiskLocation::MaxOffsetBits) - 1ul;
+
+ explicit CasDiskLocation(const CasLocation& Location) { Init(Location.BlockIndex, Location.Offset, Location.Size); }
+
+ CasDiskLocation() = default;
- CasDiskLocation(uint32_t BlockIndex, uint64_t Offset, uint64_t Size)
+ inline CasLocation operator*() const { return {.BlockIndex = BlockIndex(), .Offset = Offset(), .Size = Size()}; }
+
+private:
+ inline uint32_t BlockIndex() const { return (static_cast<uint32_t>(m_BlockIndexAndOffsetHighBits & 0xf) << 16) | m_BlockIndexLowBits; }
+ inline uint64_t Offset() const { return (static_cast<uint64_t>(m_BlockIndexAndOffsetHighBits & 0xfff0) << 12) | m_OffsetLowBits; }
+ inline uint64_t Size() const { return m_Size; }
+
+ inline void Init(uint32_t BlockIndex, uint64_t Offset, uint64_t Size)
{
ZEN_ASSERT(BlockIndex < (1L << 20));
ZEN_ASSERT(Offset < (1L << 28));
@@ -40,40 +59,10 @@ struct CasDiskLocation
m_BlockIndexAndOffsetHighBits = static_cast<uint16_t>(((BlockIndex >> 16) & 0xf) | (((Offset >> 16) & 0xfff) << 4));
}
- CasDiskLocation() = default;
-
- inline uint32_t GetBlockIndex() const
- {
- return (static_cast<uint32_t>(m_BlockIndexAndOffsetHighBits & 0xf) << 16) | m_BlockIndexLowBits;
- }
- inline uint64_t Offset() const { return (static_cast<uint64_t>(m_BlockIndexAndOffsetHighBits & 0xfff0) << 8) | m_OffsetLowBits; }
- inline uint64_t Size() const { return m_Size; }
-
-private:
uint32_t m_Size;
uint16_t m_BlockIndexLowBits;
uint16_t m_OffsetLowBits;
uint16_t m_BlockIndexAndOffsetHighBits;
-
-#if 0
- // 24 bits blockindex
- // 24 bits offset
- // 32 bits size
- inline uint32_t GetBlockIndex() const
- {
- return (static_cast<uint32_t>(BlockIndexHighBits) << 16) | BlockIndexLowBits;
- }
- inline uint32_t Offset() const
- {
- return (static_cast<uint32_t>(OffsetHighBits) << 16) | OffsetLowBits;
- }
-
- uint32_t Size;
- uint16_t BlockIndexLowBits;
- uint16_t OffsetLowBits;
- uint8_t BlockIndexHighBits;
- uint8_t OffsetHighBits;
-#endif
};
struct CasDiskIndexEntry
@@ -134,10 +123,10 @@ private:
std::unordered_map<IoHash, CasDiskLocation, IoHash::Hasher> m_LocationMap;
RwLock m_InsertLock; // used to serialize inserts
- std::unordered_map<uint16_t, std::shared_ptr<ChunkBlock>> m_OpenBlocks;
+ std::unordered_map<uint32_t, std::shared_ptr<ChunkBlock>> m_OpenBlocks;
std::weak_ptr<ChunkBlock> m_CurrentBlock;
- uint16_t m_CurrentBlockIndex = 0;
- std::atomic_uint32_t m_CurrentInsertOffset{};
+ uint32_t m_CurrentBlockIndex = 0;
+ std::atomic_uint64_t m_CurrentInsertOffset{};
std::atomic_uint64_t m_TotalSize{};
void MakeIndexSnapshot();