diff options
| author | Dan Engelbrecht <[email protected]> | 2022-03-18 15:33:09 +0100 |
|---|---|---|
| committer | Dan Engelbrecht <[email protected]> | 2022-03-31 11:28:32 +0200 |
| commit | d9603de73189523fb9b5413fb83005ccfbbfa715 (patch) | |
| tree | 76e794aab501603c38df7e1636904038bb6ac0a9 /zenstore/compactcas.cpp | |
| parent | Clean up thread locking (diff) | |
| download | zen-d9603de73189523fb9b5413fb83005ccfbbfa715.tar.xz zen-d9603de73189523fb9b5413fb83005ccfbbfa715.zip | |
Reworked storage structure and fixed race conditions
Diffstat (limited to 'zenstore/compactcas.cpp')
| -rw-r--r-- | zenstore/compactcas.cpp | 236 |
1 files changed, 168 insertions, 68 deletions
diff --git a/zenstore/compactcas.cpp b/zenstore/compactcas.cpp index 28f989bfb..435e63012 100644 --- a/zenstore/compactcas.cpp +++ b/zenstore/compactcas.cpp @@ -41,11 +41,45 @@ static_assert(sizeof(CasDiskIndexHeader) == 32); namespace { uint64_t AlignPositon(uint64_t Offset, uint64_t Alignment) { return (Offset + Alignment - 1) & ~(Alignment - 1); } - std::filesystem::path BuildUcasPath(const std::filesystem::path& RootDirectory, - const std::string_view ContainerBaseName, - const uint32_t BlockIndex) + static const char* HexLUT = "0123456789abcdef"; + + bool ParseHex(const std::string HexString, uint32_t& OutValue) + { + if (HexString.length() != 8) + { + return false; + } + OutValue = strtoul(HexString.data(), 0, 16); + return true; + } + + void FormatHex(uint32_t Value, char OutBlockHexString[9]) + { + OutBlockHexString[0] = HexLUT[(Value >> 28) & 0xf]; + OutBlockHexString[1] = HexLUT[(Value >> 24) & 0xf]; + OutBlockHexString[2] = HexLUT[(Value >> 20) & 0xf]; + OutBlockHexString[3] = HexLUT[(Value >> 16) & 0xf]; + OutBlockHexString[4] = HexLUT[(Value >> 12) & 0xf]; + OutBlockHexString[5] = HexLUT[(Value >> 8) & 0xf]; + OutBlockHexString[6] = HexLUT[(Value >> 4) & 0xf]; + OutBlockHexString[7] = HexLUT[(Value >> 0) & 0xf]; + OutBlockHexString[8] = 0; + } + + std::filesystem::path BuildUcasPath(const std::filesystem::path& BlocksBasePath, const uint32_t BlockIndex) { - return RootDirectory / (std::string(ContainerBaseName) + "." + (std::to_string(BlockIndex) + ".ucas")); + ExtendablePathBuilder<256> Path; + + char BlockHexString[9]; + FormatHex(BlockIndex, BlockHexString); + + Path.Append(BlocksBasePath); + Path.AppendSeparator(); + Path.AppendAsciiRange(BlockHexString, BlockHexString + 4); + Path.AppendSeparator(); + Path.Append(BlockHexString); + Path.Append(".ucas"); + return Path.ToPath(); } void MarkFileAsDeleteOnClose(void* FileHandle) @@ -75,7 +109,7 @@ namespace { struct CasContainerStrategy::ChunkBlock { - ChunkBlock(const std::filesystem::path& RootDirectory, const std::string_view ContainerBaseName, uint32_t BlockIndex); + ChunkBlock(const std::filesystem::path& BlocksBasePath, uint32_t BlockIndex); ~ChunkBlock(); const std::filesystem::path GetPath() const; void Open(); @@ -89,21 +123,24 @@ struct CasContainerStrategy::ChunkBlock void StreamByteRange(uint64_t FileOffset, uint64_t Size, std::function<void(const void* Data, uint64_t Size)>&& ChunkFun); private: - std::filesystem::path m_Path; - BasicFile m_SmallObjectFile; - IoBuffer m_IoBuffer; + const std::filesystem::path m_Path; + std::atomic<bool> m_IsOpened; + RwLock m_OpenLock; + BasicFile m_SmallObjectFile; + IoBuffer m_IoBuffer; }; -CasContainerStrategy::ChunkBlock::ChunkBlock(const std::filesystem::path& RootDirectory, - const std::string_view ContainerBaseName, - uint32_t BlockIndex) -: m_Path(BuildUcasPath(RootDirectory, ContainerBaseName, BlockIndex)) +CasContainerStrategy::ChunkBlock::ChunkBlock(const std::filesystem::path& BlocksBasePath, uint32_t BlockIndex) +: m_Path(BuildUcasPath(BlocksBasePath, BlockIndex)) { } CasContainerStrategy::ChunkBlock::~ChunkBlock() { - m_SmallObjectFile.Detach(); + if (m_IsOpened.load()) + { + m_SmallObjectFile.Detach(); + } } const std::filesystem::path @@ -115,73 +152,84 @@ CasContainerStrategy::ChunkBlock::GetPath() const void CasContainerStrategy::ChunkBlock::Open() { - void* FileHandle = m_SmallObjectFile.Handle(); - if (FileHandle != nullptr) + // Open can have a race if multiple requests wants to read the same block + // Create or ~ChunkBlock() can not have a race so we only need to guard Open() + if (m_IsOpened.load()) + { + return; + } + + RwLock::ExclusiveLockScope _(m_OpenLock); + if (m_IsOpened.load()) { return; } + m_SmallObjectFile.Open(m_Path, false); - FileHandle = m_SmallObjectFile.Handle(); - m_IoBuffer = IoBuffer(IoBuffer::File, FileHandle, 0, m_SmallObjectFile.FileSize()); + void* FileHandle = m_SmallObjectFile.Handle(); + m_IoBuffer = IoBuffer(IoBuffer::File, FileHandle, 0, m_SmallObjectFile.FileSize()); + m_IsOpened.store(true); } void CasContainerStrategy::ChunkBlock::Create(uint64_t InitialSize) { - void* FileHandle = m_SmallObjectFile.Handle(); - if (FileHandle != nullptr) - { - return; - } + ZEN_ASSERT(!m_IsOpened.load()); + + CreateDirectories(m_Path.parent_path()); m_SmallObjectFile.Open(m_Path, true); if (InitialSize > 0) { m_SmallObjectFile.SetFileSize(InitialSize); } - FileHandle = m_SmallObjectFile.Handle(); - m_IoBuffer = IoBuffer(IoBuffer::File, FileHandle, 0, InitialSize); + void* FileHandle = m_SmallObjectFile.Handle(); + m_IoBuffer = IoBuffer(IoBuffer::File, FileHandle, 0, InitialSize); + m_IsOpened.store(true); } uint64_t CasContainerStrategy::ChunkBlock::FileSize() { + ZEN_ASSERT(m_IsOpened.load()); return m_SmallObjectFile.FileSize(); } void CasContainerStrategy::ChunkBlock::MarkAsDeleteOnClose() { - void* FileHandle = m_SmallObjectFile.Handle(); - if (FileHandle == nullptr) + if (!m_IsOpened.load()) { return; } + void* FileHandle = m_SmallObjectFile.Handle(); MarkFileAsDeleteOnClose(FileHandle); } IoBuffer CasContainerStrategy::ChunkBlock::GetRange(uint64_t Offset, uint64_t Size) { + Open(); return IoBuffer(m_IoBuffer, Offset, Size); } void CasContainerStrategy::ChunkBlock::Read(void* Data, uint64_t Size, uint64_t FileOffset) { + ZEN_ASSERT(m_IsOpened.load()); m_SmallObjectFile.Read(Data, Size, FileOffset); } void CasContainerStrategy::ChunkBlock::Write(const void* Data, uint64_t Size, uint64_t FileOffset) { + ZEN_ASSERT(m_IsOpened.load()); m_SmallObjectFile.Write(Data, Size, FileOffset); } void CasContainerStrategy::ChunkBlock::Flush() { - void* FileHandle = m_SmallObjectFile.Handle(); - if (FileHandle == nullptr) + if (!m_IsOpened.load()) { return; } @@ -193,9 +241,12 @@ CasContainerStrategy::ChunkBlock::StreamByteRange(uint64_t FileOffse uint64_t Size, std::function<void(const void* Data, uint64_t Size)>&& ChunkFun) { + ZEN_ASSERT(m_IsOpened.load()); m_SmallObjectFile.StreamByteRange(FileOffset, Size, std::move(ChunkFun)); } +////////////////////////////////////////////////////////////////////////// + CasContainerStrategy::CasContainerStrategy(const CasStoreConfiguration& Config, CasGc& Gc) : GcStorage(Gc) , m_Config(Config) @@ -217,6 +268,7 @@ CasContainerStrategy::Initialize(const std::string_view ContainerBaseName, uint3 m_ContainerBaseName = ContainerBaseName; m_PayloadAlignment = Alignment; m_MaxBlockSize = MaxBlockSize; + m_BlocksBasePath = m_Config.RootDirectory / m_ContainerBaseName / "blocks"; OpenContainer(IsNewStore); @@ -255,7 +307,7 @@ CasContainerStrategy::InsertChunk(const void* ChunkData, size_t ChunkSize, const { WriteBlockIndex++; } - WriteBlock = std::make_shared<ChunkBlock>(m_Config.RootDirectory, m_ContainerBaseName, WriteBlockIndex); + WriteBlock = std::make_shared<ChunkBlock>(m_BlocksBasePath, WriteBlockIndex); m_ChunkBlocks[WriteBlockIndex] = WriteBlock; m_WriteBlockIndex.store(WriteBlockIndex); } @@ -299,7 +351,11 @@ CasContainerStrategy::FindChunk(const IoHash& ChunkHash) if (auto BlockIt = m_ChunkBlocks.find(Location.BlockIndex); BlockIt != m_ChunkBlocks.end()) { - return BlockIt->second->GetRange(Location.Offset, Location.Size); + if (BlockIt->second) // This happens if the data associated with the block is not found - ie the ucas file is deleted but the + // index not updated + { + return BlockIt->second->GetRange(Location.Offset, Location.Size); + } } } @@ -361,11 +417,11 @@ CasContainerStrategy::Scrub(ScrubContext& Ctx) for (const auto& Block : m_ChunkBlocks) { - uint64_t WindowStart = 0; - uint64_t WindowEnd = WindowSize; - auto& SmallObjectFile = *Block.second; - const uint64_t FileSize = SmallObjectFile.FileSize(); + uint64_t WindowStart = 0; + uint64_t WindowEnd = WindowSize; + auto& SmallObjectFile = *Block.second; SmallObjectFile.Open(); + const uint64_t FileSize = SmallObjectFile.FileSize(); do { @@ -394,7 +450,6 @@ CasContainerStrategy::Scrub(ScrubContext& Ctx) if (Entry.first != ComputedHash) { // Hash mismatch - BadChunks.push_back({.Key = Entry.first, .Location = Entry.second}); } } @@ -639,7 +694,7 @@ CasContainerStrategy::CollectGarbage(GcContext& GcCtx) { NewBlockIndex++; } - NewBlockFile = std::make_shared<ChunkBlock>(m_Config.RootDirectory, m_ContainerBaseName, NewBlockIndex); + NewBlockFile = std::make_shared<ChunkBlock>(m_BlocksBasePath, NewBlockIndex); m_ChunkBlocks[NewBlockIndex] = NewBlockFile; } @@ -966,7 +1021,7 @@ CasContainerStrategy::OpenContainer(bool IsNewStore) SmallObjectFile.Read(Chunk.data(), Chunk.size(), ChunkLocation.GetOffset()); if (!NewBlockFile) { - NewBlockFile = std::make_unique<ChunkBlock>(m_Config.RootDirectory, m_ContainerBaseName, NewBlockIndex); + NewBlockFile = std::make_unique<ChunkBlock>(m_BlocksBasePath, NewBlockIndex); NewBlockFile->Create(m_MaxBlockSize); } else if (WriteOffset + Chunk.size() > m_MaxBlockSize) @@ -974,7 +1029,7 @@ CasContainerStrategy::OpenContainer(bool IsNewStore) uint64_t ChunkEnd = ChunkLocation.GetOffset() + Chunk.size(); SmallObjectFile.SetFileSize(ChunkEnd); NewBlockIndex = NewBlockIndex + 1; - NewBlockFile = std::make_unique<ChunkBlock>(m_Config.RootDirectory, m_ContainerBaseName, NewBlockIndex); + NewBlockFile = std::make_unique<ChunkBlock>(m_BlocksBasePath, NewBlockIndex); NewBlockFile->Create(m_MaxBlockSize); WriteOffset = 0; } @@ -996,7 +1051,7 @@ CasContainerStrategy::OpenContainer(bool IsNewStore) BasicFile SmallObjectIndex; SmallObjectIndex.Open(SidxPath, false); uint64_t Size = SmallObjectIndex.FileSize(); - if (SmallObjectIndex.FileSize() >= sizeof(CasDiskIndexHeader)) + if (Size >= sizeof(CasDiskIndexHeader)) { uint64_t ExpectedEntryCount = (Size - sizeof(sizeof(CasDiskIndexHeader))) / sizeof(CasDiskIndexEntry); CasDiskIndexHeader Header; @@ -1047,42 +1102,50 @@ CasContainerStrategy::OpenContainer(bool IsNewStore) } } - for (const std::filesystem::directory_entry& Entry : std::filesystem::directory_iterator(m_Config.RootDirectory)) + if (std::filesystem::exists(m_BlocksBasePath)) { - if (Entry.is_regular_file()) + std::vector<std::filesystem::path> FoldersToScan; + FoldersToScan.push_back(m_BlocksBasePath); + size_t FolderOffset = 0; + while (FolderOffset < FoldersToScan.size()) { - // TODO: Clean up naming/storage structure so we don't have to do this complicated parsing to find our ucas files - if (Entry.path().extension() != ".ucas") - { - continue; - } - std::string FileName = Entry.path().stem().string(); - if (!FileName.starts_with(m_ContainerBaseName)) + for (const std::filesystem::directory_entry& Entry : std::filesystem::directory_iterator(FoldersToScan[FolderOffset])) { - continue; - } - if (IsNewStore) - { - std::filesystem::remove(Entry.path()); - continue; - } - try - { - uint32_t BlockIndex = static_cast<uint32_t>(std::stoi(FileName.substr(m_ContainerBaseName.length() + 1))); - if (!BlockUsage.contains(BlockIndex)) + if (Entry.is_directory()) { - // Clear out unused blocks - std::filesystem::remove(Entry.path()); + FoldersToScan.push_back(Entry.path()); continue; } - auto SmallObjectFile = std::make_shared<ChunkBlock>(m_Config.RootDirectory, m_ContainerBaseName, BlockIndex); - SmallObjectFile->Open(); - m_ChunkBlocks[BlockIndex] = SmallObjectFile; - } - catch (const std::invalid_argument&) - { - // Non-valid file, skip it (or should we remove it?) + if (Entry.is_regular_file()) + { + const std::filesystem::path Path = Entry.path(); + if (Path.extension() != ".ucas") + { + continue; + } + if (IsNewStore) + { + std::filesystem::remove(Path); + continue; + } + std::string FileName = Path.stem().string(); + uint32_t BlockIndex; + bool OK = ParseHex(FileName, BlockIndex); + if (!OK) + { + continue; + } + if (!BlockUsage.contains(BlockIndex)) + { + // Clear out unused blocks + std::filesystem::remove(Path); + continue; + } + auto SmallObjectFile = std::make_shared<ChunkBlock>(m_BlocksBasePath, BlockIndex); + m_ChunkBlocks[BlockIndex] = SmallObjectFile; + } } + ++FolderOffset; } } @@ -1170,6 +1233,43 @@ TEST_CASE("compactcas.casdisklocation") CHECK(Middle == CasDiskLocation(Middle, 4).Get(4)); } +TEST_CASE("compactcas.hex") +{ + uint32_t Value; + std::string HexString; + CHECK(!ParseHex("", Value)); + char Hex[9]; + + FormatHex(0, Hex); + HexString = std::string(Hex); + CHECK(ParseHex(HexString, Value)); + CHECK(Value == 0); + + FormatHex(std::numeric_limits<std::uint32_t>::max(), Hex); + HexString = std::string(Hex); + CHECK(HexString == "ffffffff"); + CHECK(ParseHex(HexString, Value)); + CHECK(Value == std::numeric_limits<std::uint32_t>::max()); + + FormatHex(0xadf14711, Hex); + HexString = std::string(Hex); + CHECK(HexString == "adf14711"); + CHECK(ParseHex(HexString, Value)); + CHECK(Value == 0xadf14711); + + FormatHex(0x80000000, Hex); + HexString = std::string(Hex); + CHECK(HexString == "80000000"); + CHECK(ParseHex(HexString, Value)); + CHECK(Value == 0x80000000); + + FormatHex(0x718293a4, Hex); + HexString = std::string(Hex); + CHECK(HexString == "718293a4"); + CHECK(ParseHex(HexString, Value)); + CHECK(Value == 0x718293a4); +} + TEST_CASE("compactcas.compact.gc") { ScopedTemporaryDirectory TempDir; |