aboutsummaryrefslogtreecommitdiff
path: root/zenstore/compactcas.cpp
diff options
context:
space:
mode:
authorDan Engelbrecht <[email protected]>2022-03-16 11:55:00 +0100
committerDan Engelbrecht <[email protected]>2022-03-31 11:28:32 +0200
commit1045a36d0117be5fd792548f58f0ea90e7643552 (patch)
tree8b1610214c6205197dcbd3a0da55269b0032ebf6 /zenstore/compactcas.cpp
parentCheck usage of block instead of size on disk (diff)
downloadzen-1045a36d0117be5fd792548f58f0ea90e7643552.tar.xz
zen-1045a36d0117be5fd792548f58f0ea90e7643552.zip
Migrate old store format to blocks
Diffstat (limited to 'zenstore/compactcas.cpp')
-rw-r--r--zenstore/compactcas.cpp217
1 files changed, 192 insertions, 25 deletions
diff --git a/zenstore/compactcas.cpp b/zenstore/compactcas.cpp
index 6c74cac2e..126a18102 100644
--- a/zenstore/compactcas.cpp
+++ b/zenstore/compactcas.cpp
@@ -529,7 +529,7 @@ CasContainerStrategy::CollectGarbage(GcContext& GcCtx)
{
ZEN_ERROR("unable to allocate a new block in {}, count limit {} exeeded",
m_ContainerBaseName,
- std::numeric_limits<uint16_t>::max() + 1);
+ static_cast<uint32_t>(std::numeric_limits<uint16_t>::max()) + 1);
return;
}
}
@@ -607,7 +607,7 @@ CasContainerStrategy::CollectGarbage(GcContext& GcCtx)
{
ZEN_ERROR("unable to allocate a new block in {}, count limit {} exeeded",
m_ContainerBaseName,
- std::numeric_limits<uint16_t>::max() + 1);
+ static_cast<uint32_t>(std::numeric_limits<uint16_t>::max()) + 1);
return;
}
}
@@ -784,46 +784,213 @@ CasContainerStrategy::MakeIndexSnapshot()
{
fs::remove(STmpSidxPath);
}
+ if (fs::exists(STmplogPath))
+ {
+ fs::remove(STmplogPath);
+ }
}
+namespace {
+ struct LegacyCasDiskLocation
+ {
+ LegacyCasDiskLocation(uint64_t InOffset, uint64_t InSize)
+ {
+ ZEN_ASSERT(InOffset <= 0xff'ffff'ffff);
+ ZEN_ASSERT(InSize <= 0xff'ffff'ffff);
+
+ memcpy(&m_Offset[0], &InOffset, sizeof m_Offset);
+ memcpy(&m_Size[0], &InSize, sizeof m_Size);
+ }
+
+ LegacyCasDiskLocation() = default;
+
+ inline uint64_t GetOffset() const
+ {
+ uint64_t Offset = 0;
+ memcpy(&Offset, &m_Offset, sizeof m_Offset);
+ return Offset;
+ }
+
+ inline uint64_t GetSize() const
+ {
+ uint64_t Size = 0;
+ memcpy(&Size, &m_Size, sizeof m_Size);
+ return Size;
+ }
+
+ private:
+ uint8_t m_Offset[5];
+ uint8_t m_Size[5];
+ };
+
+ struct LegacyCasDiskIndexEntry
+ {
+ static const uint8_t kTombstone = 0x01;
+
+ IoHash Key;
+ LegacyCasDiskLocation Location;
+ ZenContentType ContentType = ZenContentType::kUnknownContentType;
+ uint8_t Flags = 0;
+ };
+
+} // namespace
+
void
CasContainerStrategy::OpenContainer(bool IsNewStore)
{
- // TODO: Pick up old Cas store format so we can use it in our store
-
- std::filesystem::path SlogPath = m_Config.RootDirectory / (m_ContainerBaseName + ".ulog");
-
m_TotalSize = 0;
m_LocationMap.clear();
- std::filesystem::path SidxPath = m_Config.RootDirectory / (m_ContainerBaseName + ".uidx");
- if (std::filesystem::exists(SidxPath))
+ std::filesystem::path SidxPath = m_Config.RootDirectory / (m_ContainerBaseName + ".uidx");
+ std::filesystem::path SlogPath = m_Config.RootDirectory / (m_ContainerBaseName + ".ulog");
+ std::filesystem::path LegacySobsPath = m_Config.RootDirectory / (m_ContainerBaseName + ".ucas");
+ if (IsNewStore)
{
- BasicFile SmallObjectIndex;
- SmallObjectIndex.Open(SidxPath, false);
- uint64_t Size = SmallObjectIndex.FileSize();
- uint64_t EntryCount = Size / sizeof(CasDiskIndexEntry);
- std::vector<CasDiskIndexEntry> Entries{EntryCount};
- SmallObjectIndex.Read(Entries.data(), Size, 0);
- for (const auto& Entry : Entries)
+ if (std::filesystem::exists(LegacySobsPath))
{
- m_LocationMap[Entry.Key] = Entry.Location;
+ std::filesystem::remove(LegacySobsPath);
}
- SmallObjectIndex.Close();
+ if (std::filesystem::exists(SlogPath))
+ {
+ std::filesystem::remove(SlogPath);
+ }
+ if (std::filesystem::exists(SidxPath))
+ {
+ std::filesystem::remove(SidxPath);
+ }
+ m_CasLog.Open(SlogPath, true);
}
-
- m_CasLog.Open(SlogPath, IsNewStore);
- m_CasLog.Replay([&](const CasDiskIndexEntry& Record) {
- if (Record.Flags & CasDiskIndexEntry::kTombstone)
+ else
+ {
+ if (std::filesystem::exists(LegacySobsPath))
{
- m_LocationMap.erase(Record.Key);
+ std::error_code Error;
+ DiskSpace Space = DiskSpaceInfo(m_Config.RootDirectory, Error);
+ if (Error)
+ {
+ ZEN_ERROR("get disk space in {} FAILED, reason '{}'", m_ContainerBaseName, Error.message());
+ return;
+ }
+
+ if (Space.Free < m_MaxBlockSize) // Never let GC steal the last block space
+ {
+ ZEN_ERROR("legacy store migration from '{}' FAILED, required disk space {}, free {}",
+ m_Config.RootDirectory / m_ContainerBaseName,
+ m_MaxBlockSize,
+ NiceBytes(Space.Free));
+ return;
+ }
+
+ BasicFile SmallObjectFile;
+ SmallObjectFile.Open(LegacySobsPath, false);
+
+ uint64_t MaxRequiredChunkCount = SmallObjectFile.FileSize() / m_MaxBlockSize;
+ uint64_t MaxPossibleChunkCount = static_cast<uint64_t>(std::numeric_limits<std::uint16_t>::max()) + 1;
+ if (MaxRequiredChunkCount > MaxPossibleChunkCount)
+ {
+ ZEN_ERROR("legacy store migration from '{}' FAILED, required block count {}, possible {}",
+ m_Config.RootDirectory / m_ContainerBaseName,
+ MaxRequiredChunkCount,
+ MaxPossibleChunkCount);
+ return;
+ }
+
+ std::unordered_map<IoHash, LegacyCasDiskIndexEntry, IoHash::Hasher> LegacyDiskIndex;
+
+ TCasLogFile<LegacyCasDiskIndexEntry> LegacyCasLog;
+ LegacyCasLog.Open(SlogPath, false);
+ LegacyCasLog.Replay([&](const LegacyCasDiskIndexEntry& Record) {
+ if (Record.Flags & LegacyCasDiskIndexEntry::kTombstone)
+ {
+ m_LocationMap.erase(Record.Key);
+ }
+ else
+ {
+ LegacyDiskIndex[Record.Key] = Record;
+ }
+ });
+
+ std::vector<IoHash> ChunkHashes;
+ ChunkHashes.reserve(LegacyDiskIndex.size());
+ for (const auto& Entry : LegacyDiskIndex)
+ {
+ ChunkHashes.push_back(Entry.first);
+ }
+ LegacyCasLog.Close();
+
+ // Sort from biggest position to smallest
+ std::sort(begin(ChunkHashes), end(ChunkHashes), [&](IoHash Lhs, IoHash Rhs) {
+ auto LhsKeyIt = LegacyDiskIndex.find(Lhs);
+ auto RhsKeyIt = LegacyDiskIndex.find(Rhs);
+ return RhsKeyIt->second.Location.GetOffset() < LhsKeyIt->second.Location.GetOffset();
+ });
+
+ m_CasLog.Open(SlogPath, true);
+
+ std::unique_ptr<ChunkBlock> NewBlockFile;
+ uint64_t WriteOffset = {};
+ uint16_t NewBlockIndex = {};
+
+ std::vector<uint8_t> Chunk;
+ for (const auto& ChunkHash : ChunkHashes)
+ {
+ const auto& Entry = LegacyDiskIndex[ChunkHash];
+ const LegacyCasDiskLocation& ChunkLocation = Entry.Location;
+ Chunk.resize(ChunkLocation.GetSize());
+ SmallObjectFile.Read(Chunk.data(), Chunk.size(), ChunkLocation.GetOffset());
+ if (!NewBlockFile)
+ {
+ NewBlockFile = std::make_unique<ChunkBlock>(m_Config.RootDirectory, m_ContainerBaseName, NewBlockIndex);
+ NewBlockFile->Create(m_MaxBlockSize);
+ }
+ else if (WriteOffset + Chunk.size() > m_MaxBlockSize)
+ {
+ uint64_t ChunkEnd = ChunkLocation.GetOffset() + Chunk.size();
+ SmallObjectFile.SetFileSize(ChunkEnd);
+ NewBlockIndex = NewBlockIndex + 1;
+ NewBlockFile = std::make_unique<ChunkBlock>(m_Config.RootDirectory, m_ContainerBaseName, NewBlockIndex);
+ NewBlockFile->Create(m_MaxBlockSize);
+ WriteOffset = 0;
+ }
+ NewBlockFile->Write(Chunk.data(), Chunk.size(), WriteOffset);
+ CasDiskLocation NewChunkLocation(NewBlockIndex, gsl::narrow<uint32_t>(WriteOffset), gsl::narrow<uint32_t>(Chunk.size()));
+ m_CasLog.Append({.Key = ChunkHash, .Location = NewChunkLocation});
+ WriteOffset = AlignPositon(WriteOffset + Chunk.size(), m_PayloadAlignment);
+ }
+ m_CasLog.Close();
+
+ SmallObjectFile.Close();
+ std::filesystem::remove(LegacySobsPath);
}
- else
+
+ if (std::filesystem::exists(SidxPath))
{
- m_LocationMap[Record.Key] = Record.Location;
+ BasicFile SmallObjectIndex;
+ SmallObjectIndex.Open(SidxPath, false);
+ uint64_t Size = SmallObjectIndex.FileSize();
+ uint64_t EntryCount = Size / sizeof(CasDiskIndexEntry);
+ std::vector<CasDiskIndexEntry> Entries{EntryCount};
+ SmallObjectIndex.Read(Entries.data(), Size, 0);
+ for (const auto& Entry : Entries)
+ {
+ m_LocationMap[Entry.Key] = Entry.Location;
+ }
+ SmallObjectIndex.Close();
}
- });
+
+ m_CasLog.Open(SlogPath, false);
+ m_CasLog.Replay([&](const CasDiskIndexEntry& Record) {
+ if (Record.Flags & CasDiskIndexEntry::kTombstone)
+ {
+ m_LocationMap.erase(Record.Key);
+ }
+ else
+ {
+ m_LocationMap[Record.Key] = Record.Location;
+ }
+ });
+ }
std::unordered_map<uint16_t, uint64_t> BlockUsage;
for (const auto& Entry : m_LocationMap)