diff options
| author | Dan Engelbrecht <[email protected]> | 2022-03-16 11:55:00 +0100 |
|---|---|---|
| committer | Dan Engelbrecht <[email protected]> | 2022-03-31 11:28:32 +0200 |
| commit | 1045a36d0117be5fd792548f58f0ea90e7643552 (patch) | |
| tree | 8b1610214c6205197dcbd3a0da55269b0032ebf6 /zenstore/compactcas.cpp | |
| parent | Check usage of block instead of size on disk (diff) | |
| download | zen-1045a36d0117be5fd792548f58f0ea90e7643552.tar.xz zen-1045a36d0117be5fd792548f58f0ea90e7643552.zip | |
Migrate old store format to blocks
Diffstat (limited to 'zenstore/compactcas.cpp')
| -rw-r--r-- | zenstore/compactcas.cpp | 217 |
1 files changed, 192 insertions, 25 deletions
diff --git a/zenstore/compactcas.cpp b/zenstore/compactcas.cpp index 6c74cac2e..126a18102 100644 --- a/zenstore/compactcas.cpp +++ b/zenstore/compactcas.cpp @@ -529,7 +529,7 @@ CasContainerStrategy::CollectGarbage(GcContext& GcCtx) { ZEN_ERROR("unable to allocate a new block in {}, count limit {} exeeded", m_ContainerBaseName, - std::numeric_limits<uint16_t>::max() + 1); + static_cast<uint32_t>(std::numeric_limits<uint16_t>::max()) + 1); return; } } @@ -607,7 +607,7 @@ CasContainerStrategy::CollectGarbage(GcContext& GcCtx) { ZEN_ERROR("unable to allocate a new block in {}, count limit {} exeeded", m_ContainerBaseName, - std::numeric_limits<uint16_t>::max() + 1); + static_cast<uint32_t>(std::numeric_limits<uint16_t>::max()) + 1); return; } } @@ -784,46 +784,213 @@ CasContainerStrategy::MakeIndexSnapshot() { fs::remove(STmpSidxPath); } + if (fs::exists(STmplogPath)) + { + fs::remove(STmplogPath); + } } +namespace { + struct LegacyCasDiskLocation + { + LegacyCasDiskLocation(uint64_t InOffset, uint64_t InSize) + { + ZEN_ASSERT(InOffset <= 0xff'ffff'ffff); + ZEN_ASSERT(InSize <= 0xff'ffff'ffff); + + memcpy(&m_Offset[0], &InOffset, sizeof m_Offset); + memcpy(&m_Size[0], &InSize, sizeof m_Size); + } + + LegacyCasDiskLocation() = default; + + inline uint64_t GetOffset() const + { + uint64_t Offset = 0; + memcpy(&Offset, &m_Offset, sizeof m_Offset); + return Offset; + } + + inline uint64_t GetSize() const + { + uint64_t Size = 0; + memcpy(&Size, &m_Size, sizeof m_Size); + return Size; + } + + private: + uint8_t m_Offset[5]; + uint8_t m_Size[5]; + }; + + struct LegacyCasDiskIndexEntry + { + static const uint8_t kTombstone = 0x01; + + IoHash Key; + LegacyCasDiskLocation Location; + ZenContentType ContentType = ZenContentType::kUnknownContentType; + uint8_t Flags = 0; + }; + +} // namespace + void CasContainerStrategy::OpenContainer(bool IsNewStore) { - // TODO: Pick up old Cas store format so we can use it in our store - - std::filesystem::path SlogPath = m_Config.RootDirectory / (m_ContainerBaseName + ".ulog"); - m_TotalSize = 0; m_LocationMap.clear(); - std::filesystem::path SidxPath = m_Config.RootDirectory / (m_ContainerBaseName + ".uidx"); - if (std::filesystem::exists(SidxPath)) + std::filesystem::path SidxPath = m_Config.RootDirectory / (m_ContainerBaseName + ".uidx"); + std::filesystem::path SlogPath = m_Config.RootDirectory / (m_ContainerBaseName + ".ulog"); + std::filesystem::path LegacySobsPath = m_Config.RootDirectory / (m_ContainerBaseName + ".ucas"); + if (IsNewStore) { - BasicFile SmallObjectIndex; - SmallObjectIndex.Open(SidxPath, false); - uint64_t Size = SmallObjectIndex.FileSize(); - uint64_t EntryCount = Size / sizeof(CasDiskIndexEntry); - std::vector<CasDiskIndexEntry> Entries{EntryCount}; - SmallObjectIndex.Read(Entries.data(), Size, 0); - for (const auto& Entry : Entries) + if (std::filesystem::exists(LegacySobsPath)) { - m_LocationMap[Entry.Key] = Entry.Location; + std::filesystem::remove(LegacySobsPath); } - SmallObjectIndex.Close(); + if (std::filesystem::exists(SlogPath)) + { + std::filesystem::remove(SlogPath); + } + if (std::filesystem::exists(SidxPath)) + { + std::filesystem::remove(SidxPath); + } + m_CasLog.Open(SlogPath, true); } - - m_CasLog.Open(SlogPath, IsNewStore); - m_CasLog.Replay([&](const CasDiskIndexEntry& Record) { - if (Record.Flags & CasDiskIndexEntry::kTombstone) + else + { + if (std::filesystem::exists(LegacySobsPath)) { - m_LocationMap.erase(Record.Key); + std::error_code Error; + DiskSpace Space = DiskSpaceInfo(m_Config.RootDirectory, Error); + if (Error) + { + ZEN_ERROR("get disk space in {} FAILED, reason '{}'", m_ContainerBaseName, Error.message()); + return; + } + + if (Space.Free < m_MaxBlockSize) // Never let GC steal the last block space + { + ZEN_ERROR("legacy store migration from '{}' FAILED, required disk space {}, free {}", + m_Config.RootDirectory / m_ContainerBaseName, + m_MaxBlockSize, + NiceBytes(Space.Free)); + return; + } + + BasicFile SmallObjectFile; + SmallObjectFile.Open(LegacySobsPath, false); + + uint64_t MaxRequiredChunkCount = SmallObjectFile.FileSize() / m_MaxBlockSize; + uint64_t MaxPossibleChunkCount = static_cast<uint64_t>(std::numeric_limits<std::uint16_t>::max()) + 1; + if (MaxRequiredChunkCount > MaxPossibleChunkCount) + { + ZEN_ERROR("legacy store migration from '{}' FAILED, required block count {}, possible {}", + m_Config.RootDirectory / m_ContainerBaseName, + MaxRequiredChunkCount, + MaxPossibleChunkCount); + return; + } + + std::unordered_map<IoHash, LegacyCasDiskIndexEntry, IoHash::Hasher> LegacyDiskIndex; + + TCasLogFile<LegacyCasDiskIndexEntry> LegacyCasLog; + LegacyCasLog.Open(SlogPath, false); + LegacyCasLog.Replay([&](const LegacyCasDiskIndexEntry& Record) { + if (Record.Flags & LegacyCasDiskIndexEntry::kTombstone) + { + m_LocationMap.erase(Record.Key); + } + else + { + LegacyDiskIndex[Record.Key] = Record; + } + }); + + std::vector<IoHash> ChunkHashes; + ChunkHashes.reserve(LegacyDiskIndex.size()); + for (const auto& Entry : LegacyDiskIndex) + { + ChunkHashes.push_back(Entry.first); + } + LegacyCasLog.Close(); + + // Sort from biggest position to smallest + std::sort(begin(ChunkHashes), end(ChunkHashes), [&](IoHash Lhs, IoHash Rhs) { + auto LhsKeyIt = LegacyDiskIndex.find(Lhs); + auto RhsKeyIt = LegacyDiskIndex.find(Rhs); + return RhsKeyIt->second.Location.GetOffset() < LhsKeyIt->second.Location.GetOffset(); + }); + + m_CasLog.Open(SlogPath, true); + + std::unique_ptr<ChunkBlock> NewBlockFile; + uint64_t WriteOffset = {}; + uint16_t NewBlockIndex = {}; + + std::vector<uint8_t> Chunk; + for (const auto& ChunkHash : ChunkHashes) + { + const auto& Entry = LegacyDiskIndex[ChunkHash]; + const LegacyCasDiskLocation& ChunkLocation = Entry.Location; + Chunk.resize(ChunkLocation.GetSize()); + SmallObjectFile.Read(Chunk.data(), Chunk.size(), ChunkLocation.GetOffset()); + if (!NewBlockFile) + { + NewBlockFile = std::make_unique<ChunkBlock>(m_Config.RootDirectory, m_ContainerBaseName, NewBlockIndex); + NewBlockFile->Create(m_MaxBlockSize); + } + else if (WriteOffset + Chunk.size() > m_MaxBlockSize) + { + uint64_t ChunkEnd = ChunkLocation.GetOffset() + Chunk.size(); + SmallObjectFile.SetFileSize(ChunkEnd); + NewBlockIndex = NewBlockIndex + 1; + NewBlockFile = std::make_unique<ChunkBlock>(m_Config.RootDirectory, m_ContainerBaseName, NewBlockIndex); + NewBlockFile->Create(m_MaxBlockSize); + WriteOffset = 0; + } + NewBlockFile->Write(Chunk.data(), Chunk.size(), WriteOffset); + CasDiskLocation NewChunkLocation(NewBlockIndex, gsl::narrow<uint32_t>(WriteOffset), gsl::narrow<uint32_t>(Chunk.size())); + m_CasLog.Append({.Key = ChunkHash, .Location = NewChunkLocation}); + WriteOffset = AlignPositon(WriteOffset + Chunk.size(), m_PayloadAlignment); + } + m_CasLog.Close(); + + SmallObjectFile.Close(); + std::filesystem::remove(LegacySobsPath); } - else + + if (std::filesystem::exists(SidxPath)) { - m_LocationMap[Record.Key] = Record.Location; + BasicFile SmallObjectIndex; + SmallObjectIndex.Open(SidxPath, false); + uint64_t Size = SmallObjectIndex.FileSize(); + uint64_t EntryCount = Size / sizeof(CasDiskIndexEntry); + std::vector<CasDiskIndexEntry> Entries{EntryCount}; + SmallObjectIndex.Read(Entries.data(), Size, 0); + for (const auto& Entry : Entries) + { + m_LocationMap[Entry.Key] = Entry.Location; + } + SmallObjectIndex.Close(); } - }); + + m_CasLog.Open(SlogPath, false); + m_CasLog.Replay([&](const CasDiskIndexEntry& Record) { + if (Record.Flags & CasDiskIndexEntry::kTombstone) + { + m_LocationMap.erase(Record.Key); + } + else + { + m_LocationMap[Record.Key] = Record.Location; + } + }); + } std::unordered_map<uint16_t, uint64_t> BlockUsage; for (const auto& Entry : m_LocationMap) |