aboutsummaryrefslogtreecommitdiff
path: root/zenstore/compactcas.cpp
diff options
context:
space:
mode:
authorDan Engelbrecht <[email protected]>2022-03-24 22:41:46 +0100
committerDan Engelbrecht <[email protected]>2022-03-31 11:29:27 +0200
commit52bf08afc4b9da9ccdd73089c8ebfc7bda859bd3 (patch)
tree87fdb172c98fd05b5c64398cce6b1c6be6db257e /zenstore/compactcas.cpp
parentclean up paths (diff)
downloadzen-52bf08afc4b9da9ccdd73089c8ebfc7bda859bd3.tar.xz
zen-52bf08afc4b9da9ccdd73089c8ebfc7bda859bd3.zip
Migration now works in larger disk IO chunks
BasicFile and CasLogFile now has new explicit modes instead of create true/false
Diffstat (limited to 'zenstore/compactcas.cpp')
-rw-r--r--zenstore/compactcas.cpp624
1 files changed, 380 insertions, 244 deletions
diff --git a/zenstore/compactcas.cpp b/zenstore/compactcas.cpp
index ec0a5a7ba..9d0f72442 100644
--- a/zenstore/compactcas.cpp
+++ b/zenstore/compactcas.cpp
@@ -27,15 +27,15 @@ namespace zen {
struct CasDiskIndexHeader
{
- static constexpr uint32_t ExpectedMagic = 0x75696478; // 'uidx';
- static constexpr uint32_t CurrentVersion = 1;
- uint32_t Magic = ExpectedMagic;
- uint32_t Version = CurrentVersion;
- uint32_t PayloadAlignement = 0;
- uint32_t Reserved0 = 0;
- uint64_t EntryCount = 0;
- uint32_t Reserved1 = 0;
- uint32_t Reserved2 = 0;
+ static constexpr uint32_t ExpectedMagic = 0x75696478; // 'uidx';
+ static constexpr uint32_t CurrentVersion = 1;
+ uint32_t Magic = ExpectedMagic;
+ uint32_t Version = CurrentVersion;
+ uint32_t PayloadAlignment = 0;
+ uint32_t Reserved0 = 0;
+ uint64_t EntryCount = 0;
+ uint32_t Reserved1 = 0;
+ uint32_t Reserved2 = 0;
};
static_assert(sizeof(CasDiskIndexHeader) == 32);
@@ -122,10 +122,351 @@ namespace {
return RootPath / (ContainerBaseName + DataExtension);
}
+ struct LegacyCasDiskLocation
+ {
+ LegacyCasDiskLocation(uint64_t InOffset, uint64_t InSize)
+ {
+ ZEN_ASSERT(InOffset <= 0xff'ffff'ffff);
+ ZEN_ASSERT(InSize <= 0xff'ffff'ffff);
+
+ memcpy(&m_Offset[0], &InOffset, sizeof m_Offset);
+ memcpy(&m_Size[0], &InSize, sizeof m_Size);
+ }
+
+ LegacyCasDiskLocation() = default;
+
+ inline uint64_t GetOffset() const
+ {
+ uint64_t Offset = 0;
+ memcpy(&Offset, &m_Offset, sizeof m_Offset);
+ return Offset;
+ }
+
+ inline uint64_t GetSize() const
+ {
+ uint64_t Size = 0;
+ memcpy(&Size, &m_Size, sizeof m_Size);
+ return Size;
+ }
+
+ private:
+ uint8_t m_Offset[5];
+ uint8_t m_Size[5];
+ };
+
+ struct LegacyCasDiskIndexEntry
+ {
+ static const uint8_t kTombstone = 0x01;
+
+ IoHash Key;
+ LegacyCasDiskLocation Location;
+ ZenContentType ContentType = ZenContentType::kUnknownContentType;
+ uint8_t Flags = 0;
+ };
+
+ void Migrate(const std::filesystem::path& RootPath,
+ const std::string& ContainerBaseName,
+ uint64_t MaxBlockSize,
+ uint64_t PayloadAlignment,
+ bool Destructive,
+ bool Overwrite)
+ {
+ std::filesystem::path BlocksBasePath = GetBlocksBasePath(RootPath, ContainerBaseName);
+ std::filesystem::path LegacyLogPath = GetLegacyLogPath(RootPath, ContainerBaseName);
+ std::filesystem::path LegacySobsPath = GetLegacyUcasPath(RootPath, ContainerBaseName);
+ if (!std::filesystem::is_regular_file(LegacyLogPath) || !std::filesystem::is_regular_file(LegacySobsPath))
+ {
+ ZEN_DEBUG("migrate of {} SKIPPED, no legacy data found", RootPath / ContainerBaseName);
+ return;
+ }
+
+ std::filesystem::path SlogPath = GetLogPath(RootPath, ContainerBaseName);
+ if (std::filesystem::is_directory(SlogPath.parent_path()))
+ {
+ if (!Overwrite)
+ {
+ ZEN_WARN("migrate of {} SKIPPED, new content already exists", RootPath / ContainerBaseName);
+ return;
+ }
+ }
+
+ uint32_t NewBlockIndex = 0;
+ Stopwatch MigrationTimer;
+ uint64_t TotalSize = 0;
+ const auto Guard = MakeGuard([RootPath, ContainerBaseName, &MigrationTimer, &NewBlockIndex, &TotalSize] {
+ ZEN_INFO("migrated store {} to {} blocks in {} ({})",
+ RootPath / ContainerBaseName,
+ NewBlockIndex + 1,
+ NiceTimeSpanMs(MigrationTimer.GetElapsedTimeMs()),
+ NiceBytes(TotalSize));
+ });
-// void Migrate(const std::filesystem::path& RootPath, const std::string_view ContainerBaseName, uint32_t MaxBlockSize, uint64_t Alignment)
-// {
-// }
+ std::error_code Error;
+ DiskSpace Space = DiskSpaceInfo(RootPath, Error);
+ if (Error)
+ {
+ ZEN_ERROR("get disk space in {} FAILED, reason '{}'", ContainerBaseName, Error.message());
+ return;
+ }
+
+ if (Space.Free < MaxBlockSize)
+ {
+ ZEN_ERROR("legacy store migration from '{}' FAILED, required disk space {}, free {}",
+ RootPath / ContainerBaseName,
+ MaxBlockSize,
+ NiceBytes(Space.Free));
+ return;
+ }
+
+ BasicFile BlockFile;
+ BlockFile.Open(LegacySobsPath, BasicFile::EMode::kRead);
+ uint64_t FileSize = BlockFile.FileSize();
+
+ std::unordered_map<IoHash, LegacyCasDiskIndexEntry, IoHash::Hasher> LegacyDiskIndex;
+
+ TCasLogFile<LegacyCasDiskIndexEntry> LegacyCasLog;
+ LegacyCasLog.Open(LegacyLogPath, CasLogFile::EMode::kRead);
+ LegacyCasLog.Replay([&](const LegacyCasDiskIndexEntry& Record) {
+ if (Record.Flags & LegacyCasDiskIndexEntry::kTombstone)
+ {
+ LegacyDiskIndex.erase(Record.Key);
+ return;
+ }
+ uint64_t EntryEnd = Record.Location.GetOffset() + Record.Location.GetSize();
+ if (EntryEnd > FileSize)
+ {
+ return;
+ }
+ LegacyDiskIndex[Record.Key] = Record;
+ });
+
+ uint64_t MaxUsedSize = 0;
+ for (const auto& Entry : LegacyDiskIndex)
+ {
+ const LegacyCasDiskIndexEntry& Record(Entry.second);
+ uint64_t EntryEnd = Record.Location.GetOffset() + Record.Location.GetSize();
+ if (EntryEnd > MaxUsedSize)
+ {
+ MaxUsedSize = EntryEnd;
+ }
+ TotalSize += Record.Location.GetSize();
+ }
+
+ uint64_t RequiredDiskSpace = TotalSize + ((PayloadAlignment - 1) * LegacyDiskIndex.size());
+ uint64_t MaxRequiredBlockCount = RoundUp(RequiredDiskSpace, MaxBlockSize) / MaxBlockSize;
+ if (MaxRequiredBlockCount > BlockStoreDiskLocation::MaxBlockIndex)
+ {
+ ZEN_ERROR("legacy store migration from '{}' FAILED, required block count {}, possible {}",
+ RootPath / ContainerBaseName,
+ MaxRequiredBlockCount,
+ BlockStoreDiskLocation::MaxBlockIndex);
+ return;
+ }
+ if (Destructive)
+ {
+ if (Space.Free < (MaxBlockSize + (1 << 28)))
+ {
+ ZEN_INFO("legacy store migration from {} aborted, not enough disk space available {} ({})",
+ RootPath / ContainerBaseName,
+ NewBlockIndex + 1,
+ NiceBytes(MaxBlockSize + (1 << 28)),
+ NiceBytes(Space.Free));
+ return;
+ }
+ }
+ else
+ {
+ if (Space.Free < (RequiredDiskSpace + (1 << 28)))
+ {
+ ZEN_INFO("legacy store migration from {} aborted, not enough disk space available {} ({})",
+ RootPath / ContainerBaseName,
+ NewBlockIndex + 1,
+ NiceBytes(RequiredDiskSpace + (1 << 28)),
+ NiceBytes(Space.Free));
+ return;
+ }
+ }
+
+ CreateDirectories(SlogPath.parent_path());
+ TCasLogFile<CasDiskIndexEntry> CasLog;
+ CasLog.Open(SlogPath, CasLogFile::EMode::kTruncate);
+
+ if (Destructive && (MaxRequiredBlockCount < 2))
+ {
+ std::vector<CasDiskIndexEntry> LogEntries;
+ LogEntries.reserve(LegacyDiskIndex.size());
+
+ // We can use the block as is, just move it and add the blocks to our new log
+ for (auto& Entry : LegacyDiskIndex)
+ {
+ const LegacyCasDiskIndexEntry& Record(Entry.second);
+
+ BlockStoreLocation NewChunkLocation(0, Record.Location.GetOffset(), Record.Location.GetSize());
+ LogEntries.push_back({.Key = Entry.second.Key,
+ .Location = BlockStoreDiskLocation(NewChunkLocation, PayloadAlignment),
+ .ContentType = Record.ContentType,
+ .Flags = Record.Flags});
+ }
+ auto BlockPath = GetBlockPath(BlocksBasePath, 0);
+ CreateDirectories(BlockPath.parent_path());
+ BlockFile.Close();
+ std::filesystem::rename(LegacySobsPath, BlockPath);
+ }
+ else
+ {
+ std::vector<IoHash> ChunkHashes;
+ ChunkHashes.reserve(LegacyDiskIndex.size());
+ for (const auto& Entry : LegacyDiskIndex)
+ {
+ ChunkHashes.push_back(Entry.first);
+ }
+
+ std::sort(begin(ChunkHashes), end(ChunkHashes), [&](IoHash Lhs, IoHash Rhs) {
+ auto LhsKeyIt = LegacyDiskIndex.find(Lhs);
+ auto RhsKeyIt = LegacyDiskIndex.find(Rhs);
+ return LhsKeyIt->second.Location.GetOffset() < RhsKeyIt->second.Location.GetOffset();
+ });
+
+ uint64_t BlockSize = 0;
+ uint64_t BlockOffset = 0;
+ uint32_t BlockIndex = 0;
+ std::vector<BlockStoreLocation> NewLocations;
+ struct BlockData
+ {
+ std::vector<std::pair<IoHash, BlockStoreLocation>> Chunks;
+ uint64_t BlockOffset;
+ uint64_t BlockSize;
+ uint32_t BlockIndex;
+ };
+
+ std::vector<BlockData> BlockRanges;
+ std::vector<std::pair<IoHash, BlockStoreLocation>> Chunks;
+ BlockRanges.reserve(MaxRequiredBlockCount);
+ for (const IoHash& ChunkHash : ChunkHashes)
+ {
+ const auto& LegacyEntry = LegacyDiskIndex[ChunkHash];
+ const LegacyCasDiskLocation& LegacyChunkLocation = LegacyEntry.Location;
+
+ uint64_t ChunkOffset = LegacyChunkLocation.GetOffset();
+ uint64_t ChunkSize = LegacyChunkLocation.GetSize();
+
+ #if 0
+ {
+ std::vector<uint8_t> Data(ChunkSize);
+ BlockFile.Read(Data.data(), ChunkSize, ChunkOffset);
+ const IoHash ComputedHash = IoHash::HashBuffer(Data.data(), ChunkSize);
+ if (ComputedHash != ChunkHash)
+ {
+ ZEN_ERROR("migrating store {}, invalid hash for chunk {}. Got {}",
+ RootPath / ContainerBaseName,
+ ChunkHash,
+ ComputedHash);
+ }
+ }
+ #endif // 0
+
+ if (BlockSize == 0)
+ {
+ BlockOffset = ChunkOffset;
+ }
+ if ((BlockSize + ChunkSize) > MaxBlockSize)
+ {
+ BlockData BlockRange{.BlockOffset = BlockOffset, .BlockSize = BlockSize, .BlockIndex = BlockIndex};
+ BlockRange.Chunks.swap(Chunks);
+ BlockRanges.push_back(BlockRange);
+
+ BlockIndex++;
+ BlockOffset = ChunkOffset;
+ BlockSize = 0;
+ }
+ BlockSize = RoundUp(BlockSize, PayloadAlignment);
+ BlockStoreLocation ChunkLocation = {.BlockIndex = BlockIndex, .Offset = BlockSize, .Size = ChunkSize};
+ Chunks.push_back({ChunkHash, ChunkLocation});
+ BlockSize += ChunkSize;
+ }
+ if (BlockSize > 0)
+ {
+ BlockRanges.push_back(
+ {.Chunks = std::move(Chunks), .BlockOffset = BlockOffset, .BlockSize = BlockSize, .BlockIndex = BlockIndex});
+ }
+
+ std::reverse(BlockRanges.begin(), BlockRanges.end());
+ std::vector<std::uint8_t> Buffer(1 << 28);
+ for (size_t Idx = 0; Idx < BlockRanges.size(); ++Idx)
+ {
+ const BlockData& BlockRange = BlockRanges[Idx];
+
+ ZEN_INFO("migrating store {} {}/{} blocks, remaining {} ({})",
+ RootPath / ContainerBaseName,
+ Idx,
+ BlockRanges.size(),
+ NiceBytes(BlockRange.BlockOffset),
+ NiceBytes(TotalSize));
+
+ auto BlockPath = GetBlockPath(BlocksBasePath, BlockRange.BlockIndex);
+ BlockStoreFile ChunkBlock(BlockPath);
+ ChunkBlock.Create(BlockRange.BlockSize);
+ uint64_t Offset = 0;
+ while (Offset < BlockRange.BlockSize)
+ {
+ uint64_t Size = BlockRange.BlockSize - Offset;
+ if (Size > Buffer.size())
+ {
+ Size = Buffer.size();
+ }
+ BlockFile.Read(Buffer.data(), Size, BlockRange.BlockOffset + Offset);
+ ChunkBlock.Write(Buffer.data(), Size, Offset);
+ Offset += Size;
+ }
+ ChunkBlock.Flush();
+
+ std::vector<CasDiskIndexEntry> LogEntries;
+ LogEntries.reserve(BlockRange.Chunks.size());
+ for (const auto& Entry : BlockRange.Chunks)
+ {
+ const LegacyCasDiskIndexEntry& LegacyEntry = LegacyDiskIndex[Entry.first];
+ BlockStoreDiskLocation Location(Entry.second, PayloadAlignment);
+ LogEntries.push_back(
+ {.Key = Entry.first, .Location = Location, .ContentType = LegacyEntry.ContentType, .Flags = LegacyEntry.Flags});
+ }
+ CasLog.Append(LogEntries);
+ #if 0
+ for (const CasDiskIndexEntry& Entry : LogEntries)
+ {
+ std::vector<uint8_t> Data(Entry.Location.GetSize());
+ ChunkBlock.Read(Data.data(), Entry.Location.GetSize(), Entry.Location.GetOffset(PayloadAlignment));
+ const IoHash ComputedHash = IoHash::HashBuffer(Data.data(), Entry.Location.GetSize());
+ if (ComputedHash != Entry.Key)
+ {
+ ZEN_ERROR("migrating store {}, invalid hash for chunk {}. Got {}",
+ RootPath / ContainerBaseName,
+ Entry.Key,
+ ComputedHash);
+ }
+ }
+ #endif // 0
+
+ if (Destructive)
+ {
+ std::vector<LegacyCasDiskIndexEntry> LegacyLogEntries;
+ LegacyLogEntries.reserve(BlockRange.Chunks.size());
+ for (const auto& Entry : BlockRange.Chunks)
+ {
+ LegacyLogEntries.push_back({.Key = Entry.first, .Flags = LegacyCasDiskIndexEntry::kTombstone});
+ }
+ LegacyCasLog.Append(LegacyLogEntries);
+ BlockFile.SetFileSize(BlockRange.BlockOffset);
+ }
+ }
+ }
+ LegacyCasLog.Close();
+ CasLog.Close();
+
+ if (Destructive)
+ {
+ std::filesystem::remove(LegacyLogPath);
+ }
+ }
} // namespace
@@ -219,7 +560,7 @@ CasContainerStrategy::InsertChunk(const void* ChunkData, size_t ChunkSize, const
BlockStoreDiskLocation Location({.BlockIndex = WriteBlockIndex, .Offset = InsertOffset, .Size = ChunkSize}, m_PayloadAlignment);
const CasDiskIndexEntry IndexEntry{.Key = ChunkHash, .Location = Location};
- m_TotalSize.fetch_add(static_cast<uint64_t>(ChunkSize));
+ m_TotalSize.fetch_add(static_cast<uint64_t>(ChunkSize), std::memory_order_release);
{
RwLock::ExclusiveLockScope __(m_LocationMapLock);
m_LocationMap.emplace(ChunkHash, Location);
@@ -508,8 +849,8 @@ CasContainerStrategy::CollectGarbage(GcContext& GcCtx)
TotalChunkCount = LocationMap.size();
std::unordered_map<uint32_t, size_t> BlockIndexToChunkMapIndex;
- std::vector<std::vector<IoHash> > KeepChunks;
- std::vector<std::vector<IoHash> > DeleteChunks;
+ std::vector<std::vector<IoHash>> KeepChunks;
+ std::vector<std::vector<IoHash>> DeleteChunks;
BlockIndexToChunkMapIndex.reserve(BlockCount);
KeepChunks.reserve(BlockCount);
@@ -774,7 +1115,7 @@ CasContainerStrategy::CollectGarbage(GcContext& GcCtx)
}
BasicFile GCReserveFile;
CreateDirectories(GCReservePath.parent_path());
- GCReserveFile.Open(GCReservePath, true);
+ GCReserveFile.Open(GCReservePath, BasicFile::EMode::kTruncate);
GCReserveFile.SetFileSize(m_MaxBlockSize);
ZEN_DEBUG("recreated garbage collect reserve '{}', {} bytes", m_Config.RootDirectory / m_ContainerBaseName, NiceBytes(Space.Free));
@@ -817,7 +1158,7 @@ CasContainerStrategy::MakeIndexSnapshot()
fs::rename(SlogPath, STmplogPath);
// Open an new log
- m_CasLog.Open(SlogPath, true);
+ m_CasLog.Open(SlogPath, CasLogFile::EMode::kTruncate);
}
try
@@ -839,8 +1180,8 @@ CasContainerStrategy::MakeIndexSnapshot()
}
BasicFile ObjectIndexFile;
- ObjectIndexFile.Open(SidxPath, true);
- CasDiskIndexHeader Header = {.PayloadAlignement = gsl::narrow<uint32_t>(m_PayloadAlignment), .EntryCount = Entries.size()};
+ ObjectIndexFile.Open(SidxPath, BasicFile::EMode::kTruncate);
+ CasDiskIndexHeader Header = {.PayloadAlignment = gsl::narrow<uint32_t>(m_PayloadAlignment), .EntryCount = Entries.size()};
ObjectIndexFile.Write(&Header, sizeof(CasDiskIndexEntry), 0);
ObjectIndexFile.Write(Entries.data(), Entries.size() * sizeof(CasDiskIndexEntry), sizeof(CasDiskIndexEntry));
ObjectIndexFile.Close();
@@ -857,7 +1198,7 @@ CasContainerStrategy::MakeIndexSnapshot()
Records.reserve(m_LocationMap.size());
{
TCasLogFile<CasDiskIndexEntry> OldCasLog;
- OldCasLog.Open(STmplogPath, false);
+ OldCasLog.Open(STmplogPath, CasLogFile::EMode::kRead);
OldCasLog.Replay([&](const CasDiskIndexEntry& Record) { Records.push_back(Record); });
}
{
@@ -865,7 +1206,7 @@ CasContainerStrategy::MakeIndexSnapshot()
}
TCasLogFile<CasDiskIndexEntry> RecoveredCasLog;
- RecoveredCasLog.Open(SRecoveredlogPath, true);
+ RecoveredCasLog.Open(SRecoveredlogPath, CasLogFile::EMode::kWrite);
RecoveredCasLog.Append(Records);
RecoveredCasLog.Close();
@@ -896,52 +1237,6 @@ CasContainerStrategy::MakeIndexSnapshot()
}
}
-namespace {
- struct LegacyCasDiskLocation
- {
- LegacyCasDiskLocation(uint64_t InOffset, uint64_t InSize)
- {
- ZEN_ASSERT(InOffset <= 0xff'ffff'ffff);
- ZEN_ASSERT(InSize <= 0xff'ffff'ffff);
-
- memcpy(&m_Offset[0], &InOffset, sizeof m_Offset);
- memcpy(&m_Size[0], &InSize, sizeof m_Size);
- }
-
- LegacyCasDiskLocation() = default;
-
- inline uint64_t GetOffset() const
- {
- uint64_t Offset = 0;
- memcpy(&Offset, &m_Offset, sizeof m_Offset);
- return Offset;
- }
-
- inline uint64_t GetSize() const
- {
- uint64_t Size = 0;
- memcpy(&Size, &m_Size, sizeof m_Size);
- return Size;
- }
-
- private:
- uint8_t m_Offset[5];
- uint8_t m_Size[5];
- };
-
- struct LegacyCasDiskIndexEntry
- {
- static const uint8_t kTombstone = 0x01;
-
- IoHash Key;
- LegacyCasDiskLocation Location;
- ZenContentType ContentType = ZenContentType::kUnknownContentType;
- uint8_t Flags = 0;
- };
-
-} // namespace
-
-
void
CasContainerStrategy::OpenContainer(bool IsNewStore)
{
@@ -975,179 +1270,17 @@ CasContainerStrategy::OpenContainer(bool IsNewStore)
std::filesystem::remove(SidxPath);
}
CreateDirectories(SlogPath.parent_path());
- m_CasLog.Open(SlogPath, true);
+ m_CasLog.Open(SlogPath, CasLogFile::EMode::kTruncate);
}
else
{
- if (std::filesystem::is_regular_file(LegacyLogPath) && std::filesystem::is_regular_file(LegacySobsPath))
- {
- uint32_t NewBlockIndex = 0;
- Stopwatch MigrationTimer;
- uint64_t TotalSize = 0;
- const auto Guard = MakeGuard([this, &MigrationTimer, &NewBlockIndex, &TotalSize] {
- ZEN_INFO("migrated store {} to {} blocks in {} ({})",
- m_Config.RootDirectory / m_ContainerBaseName,
- NewBlockIndex + 1,
- NiceTimeSpanMs(MigrationTimer.GetElapsedTimeMs()),
- NiceBytes(TotalSize));
- });
-
- std::error_code Error;
- DiskSpace Space = DiskSpaceInfo(m_Config.RootDirectory, Error);
- if (Error)
- {
- ZEN_ERROR("get disk space in {} FAILED, reason '{}'", m_ContainerBaseName, Error.message());
- return;
- }
-
- if (Space.Free < m_MaxBlockSize) // Never let GC steal the last block space
- {
- ZEN_ERROR("legacy store migration from '{}' FAILED, required disk space {}, free {}",
- m_Config.RootDirectory / m_ContainerBaseName,
- m_MaxBlockSize,
- NiceBytes(Space.Free));
- return;
- }
-
- BasicFile BlockFile;
- BlockFile.Open(LegacySobsPath, false);
-
- std::unordered_map<IoHash, LegacyCasDiskIndexEntry, IoHash::Hasher> LegacyDiskIndex;
-
- TCasLogFile<LegacyCasDiskIndexEntry> LegacyCasLog;
- LegacyCasLog.Open(LegacyLogPath, false);
- LegacyCasLog.Replay([&](const LegacyCasDiskIndexEntry& Record) {
- if (Record.Flags & LegacyCasDiskIndexEntry::kTombstone)
- {
- LegacyDiskIndex.erase(Record.Key);
- }
- else
- {
- LegacyDiskIndex[Record.Key] = Record;
- }
- });
-
- uint64_t MaxUsedSize = 0;
- for (const auto& Entry : LegacyDiskIndex)
- {
- const LegacyCasDiskIndexEntry& Record(Entry.second);
- uint64_t EntryEnd = Record.Location.GetOffset() + Record.Location.GetSize();
- if (EntryEnd > MaxUsedSize)
- {
- MaxUsedSize = EntryEnd;
- }
- TotalSize += Record.Location.GetSize();
- }
- LegacyCasLog.Close();
-
- BlockFile.SetFileSize(MaxUsedSize);
- uint64_t MaxRequiredBlockCount = RoundUp(MaxUsedSize, m_MaxBlockSize) / m_MaxBlockSize;
- if (MaxRequiredBlockCount > BlockStoreDiskLocation::MaxBlockIndex)
- {
- ZEN_ERROR("legacy store migration from '{}' FAILED, required block count {}, possible {}",
- m_Config.RootDirectory / m_ContainerBaseName,
- MaxRequiredBlockCount,
- BlockStoreDiskLocation::MaxBlockIndex);
- return;
- }
-
- CreateDirectories(SlogPath.parent_path());
- m_CasLog.Open(SlogPath, true);
-
- std::vector<CasDiskIndexEntry> LogEntries;
- LogEntries.reserve(LegacyDiskIndex.size());
- if (MaxRequiredBlockCount < 2)
- {
- for (const auto& Entry : LegacyDiskIndex)
- {
- const LegacyCasDiskIndexEntry& Record(Entry.second);
- BlockStoreLocation NewChunkLocation(0, Record.Location.GetOffset(), Record.Location.GetSize());
- LogEntries.push_back({.Key = Entry.second.Key,
- .Location = BlockStoreDiskLocation(NewChunkLocation, m_PayloadAlignment),
- .ContentType = Record.ContentType,
- .Flags = Record.Flags});
- }
- auto BlockPath = GetBlockPath(m_BlocksBasePath, 0);
- CreateDirectories(BlockPath.parent_path());
- BlockFile.Close();
- std::filesystem::rename(LegacySobsPath, BlockPath);
- }
- else
- {
- std::vector<IoHash> ChunkHashes;
- ChunkHashes.reserve(LegacyDiskIndex.size());
- for (const auto& Entry : LegacyDiskIndex)
- {
- ChunkHashes.push_back(Entry.first);
- }
-
- // Sort from biggest position to smallest
- std::sort(begin(ChunkHashes), end(ChunkHashes), [&](IoHash Lhs, IoHash Rhs) {
- auto LhsKeyIt = LegacyDiskIndex.find(Lhs);
- auto RhsKeyIt = LegacyDiskIndex.find(Rhs);
- return RhsKeyIt->second.Location.GetOffset() < LhsKeyIt->second.Location.GetOffset();
- });
-
- std::unique_ptr<BlockStoreFile> NewBlockFile;
- uint64_t WriteOffset = 0;
-
- std::vector<uint8_t> Chunk;
- for (const IoHash& ChunkHash : ChunkHashes)
- {
- const auto& Entry = LegacyDiskIndex[ChunkHash];
- const LegacyCasDiskLocation& ChunkLocation = Entry.Location;
- Chunk.resize(ChunkLocation.GetSize());
- BlockFile.Read(Chunk.data(), Chunk.size(), ChunkLocation.GetOffset());
- if (!NewBlockFile)
- {
- auto BlockPath = GetBlockPath(m_BlocksBasePath, NewBlockIndex);
- NewBlockFile = std::make_unique<BlockStoreFile>(BlockPath);
- NewBlockFile->Create(m_MaxBlockSize);
- }
- else if (WriteOffset + Chunk.size() > m_MaxBlockSize)
- {
- m_CasLog.Append(LogEntries);
- LogEntries.clear();
- NewBlockFile.reset();
- uint64_t ChunkEnd = ChunkLocation.GetOffset() + Chunk.size();
- BlockFile.SetFileSize(ChunkEnd);
- NewBlockIndex = NewBlockIndex + 1;
- ZEN_INFO("migrating store {} {}/{} blocks, remaining {} ({})",
- m_Config.RootDirectory / m_ContainerBaseName,
- NewBlockIndex,
- MaxRequiredBlockCount,
- NiceBytes(ChunkEnd),
- NiceBytes(TotalSize));
- auto BlockPath = GetBlockPath(m_BlocksBasePath, NewBlockIndex);
- NewBlockFile = std::make_unique<BlockStoreFile>(BlockPath);
- NewBlockFile->Create(m_MaxBlockSize);
- WriteOffset = 0;
- }
- NewBlockFile->Write(Chunk.data(), Chunk.size(), WriteOffset);
- BlockStoreLocation NewChunkLocation(NewBlockIndex, WriteOffset, Chunk.size());
- LogEntries.push_back({.Key = ChunkHash,
- .Location = BlockStoreDiskLocation(NewChunkLocation, m_PayloadAlignment),
- .ContentType = Entry.ContentType,
- .Flags = Entry.Flags});
- WriteOffset = RoundUp(WriteOffset + Chunk.size(), m_PayloadAlignment);
- }
- NewBlockFile.reset();
- BlockFile.Close();
- }
- if (!LogEntries.empty())
- {
- m_CasLog.Append(LogEntries);
- }
- m_CasLog.Close();
-
- std::filesystem::remove(LegacyLogPath);
- CasLogEmpty = false;
- }
+ // Keep the old cache intact for now
+ Migrate(m_Config.RootDirectory, m_ContainerBaseName, m_MaxBlockSize, m_PayloadAlignment, false, true);
if (std::filesystem::is_regular_file(SidxPath))
{
BasicFile ObjectIndexFile;
- ObjectIndexFile.Open(SidxPath, false);
+ ObjectIndexFile.Open(SidxPath, BasicFile::EMode::kRead);
uint64_t Size = ObjectIndexFile.FileSize();
if (Size >= sizeof(CasDiskIndexHeader))
{
@@ -1155,7 +1288,7 @@ CasContainerStrategy::OpenContainer(bool IsNewStore)
CasDiskIndexHeader Header;
ObjectIndexFile.Read(&Header, sizeof(Header), 0);
if (Header.Magic == CasDiskIndexHeader::ExpectedMagic && Header.Version == CasDiskIndexHeader::CurrentVersion &&
- Header.PayloadAlignement > 0 && Header.EntryCount == ExpectedEntryCount)
+ Header.PayloadAlignment > 0 && Header.EntryCount == ExpectedEntryCount)
{
std::vector<CasDiskIndexEntry> Entries{Header.EntryCount};
ObjectIndexFile.Read(Entries.data(), Header.EntryCount * sizeof(CasDiskIndexEntry), sizeof(CasDiskIndexHeader));
@@ -1164,12 +1297,12 @@ CasContainerStrategy::OpenContainer(bool IsNewStore)
{
m_LocationMap[Entry.Key] = Entry.Location;
}
- m_PayloadAlignment = Header.PayloadAlignement;
+ m_PayloadAlignment = Header.PayloadAlignment;
}
}
}
- m_CasLog.Open(SlogPath, false);
+ m_CasLog.Open(SlogPath, CasLogFile::EMode::kWrite);
m_CasLog.Replay([&](const CasDiskIndexEntry& Record) {
if (Record.Flags & CasDiskIndexEntry::kTombstone)
{
@@ -1187,7 +1320,7 @@ CasContainerStrategy::OpenContainer(bool IsNewStore)
for (const auto& Entry : m_LocationMap)
{
const BlockStoreDiskLocation& Location = Entry.second;
- m_TotalSize.fetch_add(Location.GetSize());
+ m_TotalSize.fetch_add(Location.GetSize(), std::memory_order_release);
BlockUsage.insert(Location.GetBlockIndex());
}
@@ -1253,7 +1386,7 @@ CasContainerStrategy::OpenContainer(bool IsNewStore)
BasicFile GCReserveFile;
if (std::filesystem::is_regular_file(GCReservePath))
{
- GCReserveFile.Open(GCReservePath, false);
+ GCReserveFile.Open(GCReservePath, BasicFile::EMode::kWrite);
std::uint64_t CurrentSize = GCReserveFile.FileSize();
if (CurrentSize != m_MaxBlockSize)
{
@@ -1281,7 +1414,7 @@ CasContainerStrategy::OpenContainer(bool IsNewStore)
if (Space.Free > m_MaxBlockSize)
{
CreateDirectories(GCReservePath.parent_path());
- GCReserveFile.Open(GCReservePath, true);
+ GCReserveFile.Open(GCReservePath, BasicFile::EMode::kTruncate);
GCReserveFile.SetFileSize(m_MaxBlockSize);
}
}
@@ -1938,7 +2071,7 @@ TEST_CASE("compactcas.legacyconversion")
if (std::filesystem::is_regular_file(SidxPath))
{
BasicFile ObjectIndexFile;
- ObjectIndexFile.Open(SidxPath, false);
+ ObjectIndexFile.Open(SidxPath, BasicFile::EMode::kRead);
uint64_t Size = ObjectIndexFile.FileSize();
if (Size >= sizeof(CasDiskIndexHeader))
{
@@ -1946,7 +2079,7 @@ TEST_CASE("compactcas.legacyconversion")
CasDiskIndexHeader Header;
ObjectIndexFile.Read(&Header, sizeof(Header), 0);
if (Header.Magic == CasDiskIndexHeader::ExpectedMagic && Header.Version == CasDiskIndexHeader::CurrentVersion &&
- Header.PayloadAlignement > 0 && Header.EntryCount == ExpectedEntryCount)
+ Header.PayloadAlignment > 0 && Header.EntryCount == ExpectedEntryCount)
{
LogEntries.resize(Header.EntryCount);
ObjectIndexFile.Read(LogEntries.data(), Header.EntryCount * sizeof(CasDiskIndexEntry), sizeof(CasDiskIndexHeader));
@@ -1959,12 +2092,12 @@ TEST_CASE("compactcas.legacyconversion")
std::filesystem::path SlogPath = GetLogPath(CasConfig.RootDirectory, "test");
{
TCasLogFile<CasDiskIndexEntry> CasLog;
- CasLog.Open(SlogPath, false);
+ CasLog.Open(SlogPath, CasLogFile::EMode::kRead);
CasLog.Replay([&](const CasDiskIndexEntry& Record) { LogEntries.push_back(Record); });
}
TCasLogFile<LegacyCasDiskIndexEntry> LegacyCasLog;
std::filesystem::path SLegacylogPath = GetLegacyLogPath(CasConfig.RootDirectory, "test");
- LegacyCasLog.Open(SLegacylogPath, true);
+ LegacyCasLog.Open(SLegacylogPath, CasLogFile::EMode::kTruncate);
for (const CasDiskIndexEntry& Entry : LogEntries)
{
BlockStoreLocation Location = Entry.Location.Get(16);
@@ -2186,7 +2319,10 @@ TEST_CASE("compactcas.threadedinsert") // * doctest::skip(true))
TEST_CASE("compactcas.migrate.large.data" * doctest::skip(true))
{
- const char* BigDataPath = "D:\\zen-data\\dc4-zen-cache-t\\cas";
+ const char* BigDataPath = "D:\\zen-data\\dc4-zen-cache-t\\cas";
+ Migrate(BigDataPath, "tobs", 1u << 28, 16, false, true);
+ Migrate(BigDataPath, "sobs", 1u << 30, 4096, false, true);
+
CasStoreConfiguration CasConfig;
CasConfig.RootDirectory = BigDataPath;