aboutsummaryrefslogtreecommitdiff
path: root/zenstore/compactcas.cpp
diff options
context:
space:
mode:
authorDan Engelbrecht <[email protected]>2022-03-18 15:33:09 +0100
committerDan Engelbrecht <[email protected]>2022-03-31 11:28:32 +0200
commitd9603de73189523fb9b5413fb83005ccfbbfa715 (patch)
tree76e794aab501603c38df7e1636904038bb6ac0a9 /zenstore/compactcas.cpp
parentClean up thread locking (diff)
downloadzen-d9603de73189523fb9b5413fb83005ccfbbfa715.tar.xz
zen-d9603de73189523fb9b5413fb83005ccfbbfa715.zip
Reworked storage structure and fixed race conditions
Diffstat (limited to 'zenstore/compactcas.cpp')
-rw-r--r--zenstore/compactcas.cpp236
1 files changed, 168 insertions, 68 deletions
diff --git a/zenstore/compactcas.cpp b/zenstore/compactcas.cpp
index 28f989bfb..435e63012 100644
--- a/zenstore/compactcas.cpp
+++ b/zenstore/compactcas.cpp
@@ -41,11 +41,45 @@ static_assert(sizeof(CasDiskIndexHeader) == 32);
namespace {
uint64_t AlignPositon(uint64_t Offset, uint64_t Alignment) { return (Offset + Alignment - 1) & ~(Alignment - 1); }
- std::filesystem::path BuildUcasPath(const std::filesystem::path& RootDirectory,
- const std::string_view ContainerBaseName,
- const uint32_t BlockIndex)
+ static const char* HexLUT = "0123456789abcdef";
+
+ bool ParseHex(const std::string HexString, uint32_t& OutValue)
+ {
+ if (HexString.length() != 8)
+ {
+ return false;
+ }
+ OutValue = strtoul(HexString.data(), 0, 16);
+ return true;
+ }
+
+ void FormatHex(uint32_t Value, char OutBlockHexString[9])
+ {
+ OutBlockHexString[0] = HexLUT[(Value >> 28) & 0xf];
+ OutBlockHexString[1] = HexLUT[(Value >> 24) & 0xf];
+ OutBlockHexString[2] = HexLUT[(Value >> 20) & 0xf];
+ OutBlockHexString[3] = HexLUT[(Value >> 16) & 0xf];
+ OutBlockHexString[4] = HexLUT[(Value >> 12) & 0xf];
+ OutBlockHexString[5] = HexLUT[(Value >> 8) & 0xf];
+ OutBlockHexString[6] = HexLUT[(Value >> 4) & 0xf];
+ OutBlockHexString[7] = HexLUT[(Value >> 0) & 0xf];
+ OutBlockHexString[8] = 0;
+ }
+
+ std::filesystem::path BuildUcasPath(const std::filesystem::path& BlocksBasePath, const uint32_t BlockIndex)
{
- return RootDirectory / (std::string(ContainerBaseName) + "." + (std::to_string(BlockIndex) + ".ucas"));
+ ExtendablePathBuilder<256> Path;
+
+ char BlockHexString[9];
+ FormatHex(BlockIndex, BlockHexString);
+
+ Path.Append(BlocksBasePath);
+ Path.AppendSeparator();
+ Path.AppendAsciiRange(BlockHexString, BlockHexString + 4);
+ Path.AppendSeparator();
+ Path.Append(BlockHexString);
+ Path.Append(".ucas");
+ return Path.ToPath();
}
void MarkFileAsDeleteOnClose(void* FileHandle)
@@ -75,7 +109,7 @@ namespace {
struct CasContainerStrategy::ChunkBlock
{
- ChunkBlock(const std::filesystem::path& RootDirectory, const std::string_view ContainerBaseName, uint32_t BlockIndex);
+ ChunkBlock(const std::filesystem::path& BlocksBasePath, uint32_t BlockIndex);
~ChunkBlock();
const std::filesystem::path GetPath() const;
void Open();
@@ -89,21 +123,24 @@ struct CasContainerStrategy::ChunkBlock
void StreamByteRange(uint64_t FileOffset, uint64_t Size, std::function<void(const void* Data, uint64_t Size)>&& ChunkFun);
private:
- std::filesystem::path m_Path;
- BasicFile m_SmallObjectFile;
- IoBuffer m_IoBuffer;
+ const std::filesystem::path m_Path;
+ std::atomic<bool> m_IsOpened;
+ RwLock m_OpenLock;
+ BasicFile m_SmallObjectFile;
+ IoBuffer m_IoBuffer;
};
-CasContainerStrategy::ChunkBlock::ChunkBlock(const std::filesystem::path& RootDirectory,
- const std::string_view ContainerBaseName,
- uint32_t BlockIndex)
-: m_Path(BuildUcasPath(RootDirectory, ContainerBaseName, BlockIndex))
+CasContainerStrategy::ChunkBlock::ChunkBlock(const std::filesystem::path& BlocksBasePath, uint32_t BlockIndex)
+: m_Path(BuildUcasPath(BlocksBasePath, BlockIndex))
{
}
CasContainerStrategy::ChunkBlock::~ChunkBlock()
{
- m_SmallObjectFile.Detach();
+ if (m_IsOpened.load())
+ {
+ m_SmallObjectFile.Detach();
+ }
}
const std::filesystem::path
@@ -115,73 +152,84 @@ CasContainerStrategy::ChunkBlock::GetPath() const
void
CasContainerStrategy::ChunkBlock::Open()
{
- void* FileHandle = m_SmallObjectFile.Handle();
- if (FileHandle != nullptr)
+ // Open can have a race if multiple requests wants to read the same block
+ // Create or ~ChunkBlock() can not have a race so we only need to guard Open()
+ if (m_IsOpened.load())
+ {
+ return;
+ }
+
+ RwLock::ExclusiveLockScope _(m_OpenLock);
+ if (m_IsOpened.load())
{
return;
}
+
m_SmallObjectFile.Open(m_Path, false);
- FileHandle = m_SmallObjectFile.Handle();
- m_IoBuffer = IoBuffer(IoBuffer::File, FileHandle, 0, m_SmallObjectFile.FileSize());
+ void* FileHandle = m_SmallObjectFile.Handle();
+ m_IoBuffer = IoBuffer(IoBuffer::File, FileHandle, 0, m_SmallObjectFile.FileSize());
+ m_IsOpened.store(true);
}
void
CasContainerStrategy::ChunkBlock::Create(uint64_t InitialSize)
{
- void* FileHandle = m_SmallObjectFile.Handle();
- if (FileHandle != nullptr)
- {
- return;
- }
+ ZEN_ASSERT(!m_IsOpened.load());
+
+ CreateDirectories(m_Path.parent_path());
m_SmallObjectFile.Open(m_Path, true);
if (InitialSize > 0)
{
m_SmallObjectFile.SetFileSize(InitialSize);
}
- FileHandle = m_SmallObjectFile.Handle();
- m_IoBuffer = IoBuffer(IoBuffer::File, FileHandle, 0, InitialSize);
+ void* FileHandle = m_SmallObjectFile.Handle();
+ m_IoBuffer = IoBuffer(IoBuffer::File, FileHandle, 0, InitialSize);
+ m_IsOpened.store(true);
}
uint64_t
CasContainerStrategy::ChunkBlock::FileSize()
{
+ ZEN_ASSERT(m_IsOpened.load());
return m_SmallObjectFile.FileSize();
}
void
CasContainerStrategy::ChunkBlock::MarkAsDeleteOnClose()
{
- void* FileHandle = m_SmallObjectFile.Handle();
- if (FileHandle == nullptr)
+ if (!m_IsOpened.load())
{
return;
}
+ void* FileHandle = m_SmallObjectFile.Handle();
MarkFileAsDeleteOnClose(FileHandle);
}
IoBuffer
CasContainerStrategy::ChunkBlock::GetRange(uint64_t Offset, uint64_t Size)
{
+ Open();
return IoBuffer(m_IoBuffer, Offset, Size);
}
void
CasContainerStrategy::ChunkBlock::Read(void* Data, uint64_t Size, uint64_t FileOffset)
{
+ ZEN_ASSERT(m_IsOpened.load());
m_SmallObjectFile.Read(Data, Size, FileOffset);
}
void
CasContainerStrategy::ChunkBlock::Write(const void* Data, uint64_t Size, uint64_t FileOffset)
{
+ ZEN_ASSERT(m_IsOpened.load());
m_SmallObjectFile.Write(Data, Size, FileOffset);
}
void
CasContainerStrategy::ChunkBlock::Flush()
{
- void* FileHandle = m_SmallObjectFile.Handle();
- if (FileHandle == nullptr)
+ if (!m_IsOpened.load())
{
return;
}
@@ -193,9 +241,12 @@ CasContainerStrategy::ChunkBlock::StreamByteRange(uint64_t FileOffse
uint64_t Size,
std::function<void(const void* Data, uint64_t Size)>&& ChunkFun)
{
+ ZEN_ASSERT(m_IsOpened.load());
m_SmallObjectFile.StreamByteRange(FileOffset, Size, std::move(ChunkFun));
}
+//////////////////////////////////////////////////////////////////////////
+
CasContainerStrategy::CasContainerStrategy(const CasStoreConfiguration& Config, CasGc& Gc)
: GcStorage(Gc)
, m_Config(Config)
@@ -217,6 +268,7 @@ CasContainerStrategy::Initialize(const std::string_view ContainerBaseName, uint3
m_ContainerBaseName = ContainerBaseName;
m_PayloadAlignment = Alignment;
m_MaxBlockSize = MaxBlockSize;
+ m_BlocksBasePath = m_Config.RootDirectory / m_ContainerBaseName / "blocks";
OpenContainer(IsNewStore);
@@ -255,7 +307,7 @@ CasContainerStrategy::InsertChunk(const void* ChunkData, size_t ChunkSize, const
{
WriteBlockIndex++;
}
- WriteBlock = std::make_shared<ChunkBlock>(m_Config.RootDirectory, m_ContainerBaseName, WriteBlockIndex);
+ WriteBlock = std::make_shared<ChunkBlock>(m_BlocksBasePath, WriteBlockIndex);
m_ChunkBlocks[WriteBlockIndex] = WriteBlock;
m_WriteBlockIndex.store(WriteBlockIndex);
}
@@ -299,7 +351,11 @@ CasContainerStrategy::FindChunk(const IoHash& ChunkHash)
if (auto BlockIt = m_ChunkBlocks.find(Location.BlockIndex); BlockIt != m_ChunkBlocks.end())
{
- return BlockIt->second->GetRange(Location.Offset, Location.Size);
+ if (BlockIt->second) // This happens if the data associated with the block is not found - ie the ucas file is deleted but the
+ // index not updated
+ {
+ return BlockIt->second->GetRange(Location.Offset, Location.Size);
+ }
}
}
@@ -361,11 +417,11 @@ CasContainerStrategy::Scrub(ScrubContext& Ctx)
for (const auto& Block : m_ChunkBlocks)
{
- uint64_t WindowStart = 0;
- uint64_t WindowEnd = WindowSize;
- auto& SmallObjectFile = *Block.second;
- const uint64_t FileSize = SmallObjectFile.FileSize();
+ uint64_t WindowStart = 0;
+ uint64_t WindowEnd = WindowSize;
+ auto& SmallObjectFile = *Block.second;
SmallObjectFile.Open();
+ const uint64_t FileSize = SmallObjectFile.FileSize();
do
{
@@ -394,7 +450,6 @@ CasContainerStrategy::Scrub(ScrubContext& Ctx)
if (Entry.first != ComputedHash)
{
// Hash mismatch
-
BadChunks.push_back({.Key = Entry.first, .Location = Entry.second});
}
}
@@ -639,7 +694,7 @@ CasContainerStrategy::CollectGarbage(GcContext& GcCtx)
{
NewBlockIndex++;
}
- NewBlockFile = std::make_shared<ChunkBlock>(m_Config.RootDirectory, m_ContainerBaseName, NewBlockIndex);
+ NewBlockFile = std::make_shared<ChunkBlock>(m_BlocksBasePath, NewBlockIndex);
m_ChunkBlocks[NewBlockIndex] = NewBlockFile;
}
@@ -966,7 +1021,7 @@ CasContainerStrategy::OpenContainer(bool IsNewStore)
SmallObjectFile.Read(Chunk.data(), Chunk.size(), ChunkLocation.GetOffset());
if (!NewBlockFile)
{
- NewBlockFile = std::make_unique<ChunkBlock>(m_Config.RootDirectory, m_ContainerBaseName, NewBlockIndex);
+ NewBlockFile = std::make_unique<ChunkBlock>(m_BlocksBasePath, NewBlockIndex);
NewBlockFile->Create(m_MaxBlockSize);
}
else if (WriteOffset + Chunk.size() > m_MaxBlockSize)
@@ -974,7 +1029,7 @@ CasContainerStrategy::OpenContainer(bool IsNewStore)
uint64_t ChunkEnd = ChunkLocation.GetOffset() + Chunk.size();
SmallObjectFile.SetFileSize(ChunkEnd);
NewBlockIndex = NewBlockIndex + 1;
- NewBlockFile = std::make_unique<ChunkBlock>(m_Config.RootDirectory, m_ContainerBaseName, NewBlockIndex);
+ NewBlockFile = std::make_unique<ChunkBlock>(m_BlocksBasePath, NewBlockIndex);
NewBlockFile->Create(m_MaxBlockSize);
WriteOffset = 0;
}
@@ -996,7 +1051,7 @@ CasContainerStrategy::OpenContainer(bool IsNewStore)
BasicFile SmallObjectIndex;
SmallObjectIndex.Open(SidxPath, false);
uint64_t Size = SmallObjectIndex.FileSize();
- if (SmallObjectIndex.FileSize() >= sizeof(CasDiskIndexHeader))
+ if (Size >= sizeof(CasDiskIndexHeader))
{
uint64_t ExpectedEntryCount = (Size - sizeof(sizeof(CasDiskIndexHeader))) / sizeof(CasDiskIndexEntry);
CasDiskIndexHeader Header;
@@ -1047,42 +1102,50 @@ CasContainerStrategy::OpenContainer(bool IsNewStore)
}
}
- for (const std::filesystem::directory_entry& Entry : std::filesystem::directory_iterator(m_Config.RootDirectory))
+ if (std::filesystem::exists(m_BlocksBasePath))
{
- if (Entry.is_regular_file())
+ std::vector<std::filesystem::path> FoldersToScan;
+ FoldersToScan.push_back(m_BlocksBasePath);
+ size_t FolderOffset = 0;
+ while (FolderOffset < FoldersToScan.size())
{
- // TODO: Clean up naming/storage structure so we don't have to do this complicated parsing to find our ucas files
- if (Entry.path().extension() != ".ucas")
- {
- continue;
- }
- std::string FileName = Entry.path().stem().string();
- if (!FileName.starts_with(m_ContainerBaseName))
+ for (const std::filesystem::directory_entry& Entry : std::filesystem::directory_iterator(FoldersToScan[FolderOffset]))
{
- continue;
- }
- if (IsNewStore)
- {
- std::filesystem::remove(Entry.path());
- continue;
- }
- try
- {
- uint32_t BlockIndex = static_cast<uint32_t>(std::stoi(FileName.substr(m_ContainerBaseName.length() + 1)));
- if (!BlockUsage.contains(BlockIndex))
+ if (Entry.is_directory())
{
- // Clear out unused blocks
- std::filesystem::remove(Entry.path());
+ FoldersToScan.push_back(Entry.path());
continue;
}
- auto SmallObjectFile = std::make_shared<ChunkBlock>(m_Config.RootDirectory, m_ContainerBaseName, BlockIndex);
- SmallObjectFile->Open();
- m_ChunkBlocks[BlockIndex] = SmallObjectFile;
- }
- catch (const std::invalid_argument&)
- {
- // Non-valid file, skip it (or should we remove it?)
+ if (Entry.is_regular_file())
+ {
+ const std::filesystem::path Path = Entry.path();
+ if (Path.extension() != ".ucas")
+ {
+ continue;
+ }
+ if (IsNewStore)
+ {
+ std::filesystem::remove(Path);
+ continue;
+ }
+ std::string FileName = Path.stem().string();
+ uint32_t BlockIndex;
+ bool OK = ParseHex(FileName, BlockIndex);
+ if (!OK)
+ {
+ continue;
+ }
+ if (!BlockUsage.contains(BlockIndex))
+ {
+ // Clear out unused blocks
+ std::filesystem::remove(Path);
+ continue;
+ }
+ auto SmallObjectFile = std::make_shared<ChunkBlock>(m_BlocksBasePath, BlockIndex);
+ m_ChunkBlocks[BlockIndex] = SmallObjectFile;
+ }
}
+ ++FolderOffset;
}
}
@@ -1170,6 +1233,43 @@ TEST_CASE("compactcas.casdisklocation")
CHECK(Middle == CasDiskLocation(Middle, 4).Get(4));
}
+TEST_CASE("compactcas.hex")
+{
+ uint32_t Value;
+ std::string HexString;
+ CHECK(!ParseHex("", Value));
+ char Hex[9];
+
+ FormatHex(0, Hex);
+ HexString = std::string(Hex);
+ CHECK(ParseHex(HexString, Value));
+ CHECK(Value == 0);
+
+ FormatHex(std::numeric_limits<std::uint32_t>::max(), Hex);
+ HexString = std::string(Hex);
+ CHECK(HexString == "ffffffff");
+ CHECK(ParseHex(HexString, Value));
+ CHECK(Value == std::numeric_limits<std::uint32_t>::max());
+
+ FormatHex(0xadf14711, Hex);
+ HexString = std::string(Hex);
+ CHECK(HexString == "adf14711");
+ CHECK(ParseHex(HexString, Value));
+ CHECK(Value == 0xadf14711);
+
+ FormatHex(0x80000000, Hex);
+ HexString = std::string(Hex);
+ CHECK(HexString == "80000000");
+ CHECK(ParseHex(HexString, Value));
+ CHECK(Value == 0x80000000);
+
+ FormatHex(0x718293a4, Hex);
+ HexString = std::string(Hex);
+ CHECK(HexString == "718293a4");
+ CHECK(ParseHex(HexString, Value));
+ CHECK(Value == 0x718293a4);
+}
+
TEST_CASE("compactcas.compact.gc")
{
ScopedTemporaryDirectory TempDir;