aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorDan Engelbrecht <[email protected]>2022-03-25 14:08:58 +0100
committerDan Engelbrecht <[email protected]>2022-03-31 11:29:28 +0200
commit5facd2821026b30f28c4d87a98ea5c8cdc288531 (patch)
tree00b2c94ec111e8943259bc2995f451bc2079f9a1
parentcleanup (diff)
downloadzen-5facd2821026b30f28c4d87a98ea5c8cdc288531.tar.xz
zen-5facd2821026b30f28c4d87a98ea5c8cdc288531.zip
make code a bit easier to follow
-rw-r--r--zenstore/compactcas.cpp179
1 files changed, 104 insertions, 75 deletions
diff --git a/zenstore/compactcas.cpp b/zenstore/compactcas.cpp
index 244277330..8076bbbde 100644
--- a/zenstore/compactcas.cpp
+++ b/zenstore/compactcas.cpp
@@ -174,12 +174,12 @@ namespace {
uint8_t Flags = 0;
};
- bool ReadIndex(const std::filesystem::path& RootDirectory,
- const std::string& ContainerBaseName,
- uint64_t& InOutPayloadAlignment,
- std::unordered_map<IoHash, BlockStoreDiskLocation, IoHash::Hasher>& OutLocationMap)
+ std::vector<CasDiskIndexEntry> ReadIndexFile(const std::filesystem::path& RootDirectory,
+ const std::string& ContainerBaseName,
+ uint64_t& InOutPayloadAlignment)
{
- std::filesystem::path SidxPath = GetIndexPath(RootDirectory, ContainerBaseName);
+ std::vector<CasDiskIndexEntry> Entries;
+ std::filesystem::path SidxPath = GetIndexPath(RootDirectory, ContainerBaseName);
if (std::filesystem::is_regular_file(SidxPath))
{
BasicFile ObjectIndexFile;
@@ -193,54 +193,39 @@ namespace {
if (Header.Magic == CasDiskIndexHeader::ExpectedMagic && Header.Version == CasDiskIndexHeader::CurrentVersion &&
Header.PayloadAlignment > 0 && Header.EntryCount == ExpectedEntryCount)
{
- std::vector<CasDiskIndexEntry> Entries{Header.EntryCount};
+ Entries.resize(Header.EntryCount);
ObjectIndexFile.Read(Entries.data(), Header.EntryCount * sizeof(CasDiskIndexEntry), sizeof(CasDiskIndexHeader));
- for (const auto& Entry : Entries)
- {
- OutLocationMap[Entry.Key] = Entry.Location;
- }
InOutPayloadAlignment = Header.PayloadAlignment;
}
}
}
+ return Entries;
+ }
- bool AddedFromCasLog = false;
- TCasLogFile<CasDiskIndexEntry> CasLog;
+ std::vector<CasDiskIndexEntry> ReadLog(const std::filesystem::path& RootDirectory, const std::string& ContainerBaseName)
+ {
+ std::vector<CasDiskIndexEntry> Entries;
std::filesystem::path SlogPath = GetLogPath(RootDirectory, ContainerBaseName);
if (std::filesystem::is_regular_file(SlogPath))
{
+ TCasLogFile<CasDiskIndexEntry> CasLog;
CasLog.Open(SlogPath, CasLogFile::EMode::kRead);
- CasLog.Replay([&](const CasDiskIndexEntry& Record) {
- if (Record.Flags & CasDiskIndexEntry::kTombstone)
- {
- OutLocationMap.erase(Record.Key);
- }
- else
- {
- OutLocationMap[Record.Key] = Record.Location;
- }
- AddedFromCasLog = true;
- });
+ CasLog.Replay([&](const CasDiskIndexEntry& Record) { Entries.push_back(Record); });
}
- return AddedFromCasLog;
+ return Entries;
}
- uint64_t MigrateLegacyData(const std::filesystem::path& RootPath,
- const std::string& ContainerBaseName,
- uint64_t MaxBlockSize,
- uint64_t PayloadAlignment,
- bool CleanSource,
- std::unordered_map<IoHash, BlockStoreDiskLocation, IoHash::Hasher>& InOutLocationMap)
+ std::vector<CasDiskIndexEntry> MigrateLegacyData(const std::filesystem::path& RootPath,
+ const std::string& ContainerBaseName,
+ uint64_t MaxBlockSize,
+ uint64_t PayloadAlignment,
+ bool CleanSource,
+ const std::unordered_set<IoHash, IoHash::Hasher>& ExistingChunks)
{
std::filesystem::path BlocksBasePath = GetBlocksBasePath(RootPath, ContainerBaseName);
std::filesystem::path LegacyLogPath = GetLegacyLogPath(RootPath, ContainerBaseName);
std::filesystem::path LegacySobsPath = GetLegacyUcasPath(RootPath, ContainerBaseName);
std::filesystem::path LegacySidxPath = GetLegacyUidxPath(RootPath, ContainerBaseName);
- if (!std::filesystem::is_regular_file(LegacyLogPath) || !std::filesystem::is_regular_file(LegacySobsPath))
- {
- ZEN_DEBUG("migrate of {} SKIPPED, no legacy data found", RootPath / ContainerBaseName);
- return 0;
- }
uint64_t MigratedChunkCount = 0;
uint32_t MigratedBlockCount = 0;
@@ -256,6 +241,8 @@ namespace {
NiceBytes(TotalSize));
});
+ std::vector<CasDiskIndexEntry> Result;
+
uint32_t WriteBlockIndex = 0;
while (std::filesystem::exists(GetBlockPath(BlocksBasePath, WriteBlockIndex)))
{
@@ -267,7 +254,7 @@ namespace {
if (Error)
{
ZEN_ERROR("get disk space in {} FAILED, reason '{}'", ContainerBaseName, Error.message());
- return 0;
+ return Result;
}
if (Space.Free < MaxBlockSize)
@@ -276,7 +263,7 @@ namespace {
RootPath / ContainerBaseName,
MaxBlockSize,
NiceBytes(Space.Free));
- return 0;
+ return Result;
}
BasicFile BlockFile;
@@ -298,7 +285,7 @@ namespace {
{
return;
}
- if (InOutLocationMap.contains(Record.Key))
+ if (ExistingChunks.contains(Record.Key))
{
return;
}
@@ -315,7 +302,7 @@ namespace {
std::filesystem::remove(LegacySobsPath);
std::filesystem::remove(LegacySidxPath);
}
- return 0;
+ return Result;
}
uint64_t MaxUsedSize = 0;
@@ -338,7 +325,7 @@ namespace {
RootPath / ContainerBaseName,
MaxRequiredBlockCount,
BlockStoreDiskLocation::MaxBlockIndex);
- return 0;
+ return Result;
}
if (CleanSource)
@@ -350,7 +337,7 @@ namespace {
NewBlockIndex + 1,
NiceBytes(MaxBlockSize + (1 << 28)),
NiceBytes(Space.Free));
- return 0;
+ return Result;
}
}
else
@@ -362,7 +349,7 @@ namespace {
NewBlockIndex + 1,
NiceBytes(RequiredDiskSpace + (1 << 28)),
NiceBytes(Space.Free));
- return 0;
+ return Result;
}
}
@@ -373,8 +360,7 @@ namespace {
if (CleanSource && (MaxRequiredBlockCount < 2))
{
- std::vector<CasDiskIndexEntry> LogEntries;
- LogEntries.reserve(LegacyDiskIndex.size());
+ Result.reserve(LegacyDiskIndex.size());
// We can use the block as is, just move it and add the blocks to our new log
for (auto& Entry : LegacyDiskIndex)
@@ -383,16 +369,15 @@ namespace {
BlockStoreLocation NewChunkLocation(WriteBlockIndex, Record.Location.GetOffset(), Record.Location.GetSize());
BlockStoreDiskLocation NewLocation(NewChunkLocation, PayloadAlignment);
- LogEntries.push_back(
+ Result.push_back(
{.Key = Entry.second.Key, .Location = NewLocation, .ContentType = Record.ContentType, .Flags = Record.Flags});
- InOutLocationMap[Entry.second.Key] = NewLocation;
}
auto BlockPath = GetBlockPath(BlocksBasePath, WriteBlockIndex);
CreateDirectories(BlockPath.parent_path());
BlockFile.Close();
std::filesystem::rename(LegacySobsPath, BlockPath);
- CasLog.Append(LogEntries);
- MigratedChunkCount = LogEntries.size();
+ CasLog.Append(Result);
+ MigratedChunkCount += Result.size();
MigratedBlockCount++;
}
else
@@ -499,9 +484,9 @@ namespace {
BlockStoreDiskLocation Location(Entry.second, PayloadAlignment);
LogEntries.push_back(
{.Key = Entry.first, .Location = Location, .ContentType = LegacyEntry.ContentType, .Flags = LegacyEntry.Flags});
- InOutLocationMap[Entry.first] = Location;
}
CasLog.Append(LogEntries);
+ Result.insert(Result.end(), LogEntries.begin(), LogEntries.end());
MigratedChunkCount += LogEntries.size();
MigratedBlockCount++;
@@ -528,7 +513,7 @@ namespace {
std::filesystem::remove(LegacySobsPath);
std::filesystem::remove(LegacySidxPath);
}
- return MigratedChunkCount;
+ return Result;
}
} // namespace
@@ -1307,20 +1292,55 @@ CasContainerStrategy::OpenContainer(bool IsNewStore)
m_LocationMap.clear();
- std::filesystem::path BasePath = GetBasePath(m_Config.RootDirectory, m_ContainerBaseName);
+ std::filesystem::path BasePath = GetBasePath(m_Config.RootDirectory, m_ContainerBaseName);
+ std::filesystem::path LegacyLogPath = GetLegacyLogPath(m_Config.RootDirectory, m_ContainerBaseName);
if (IsNewStore)
{
- std::filesystem::path LegacyLogPath = GetLegacyLogPath(m_Config.RootDirectory, m_ContainerBaseName);
std::filesystem::path LegacySobsPath = GetLegacyUcasPath(m_Config.RootDirectory, m_ContainerBaseName);
std::filesystem::remove(LegacyLogPath);
std::filesystem::remove(LegacySobsPath);
std::filesystem::remove_all(BasePath);
}
- bool AddedFromCasLog = ReadIndex(m_Config.RootDirectory, m_ContainerBaseName, m_PayloadAlignment, m_LocationMap);
+ {
+ std::vector<CasDiskIndexEntry> IndexEntries = ReadIndexFile(m_Config.RootDirectory, m_ContainerBaseName, m_PayloadAlignment);
+ for (const auto& Entry : IndexEntries)
+ {
+ m_LocationMap[Entry.Key] = Entry.Location;
+ }
+ }
- MigrateLegacyData(m_Config.RootDirectory, m_ContainerBaseName, m_MaxBlockSize, m_PayloadAlignment, true, m_LocationMap);
+ bool MakeSnapshot = false;
+ {
+ std::vector<CasDiskIndexEntry> LogEntries = ReadLog(m_Config.RootDirectory, m_ContainerBaseName);
+ for (const auto& Entry : LogEntries)
+ {
+ if (Entry.Flags & CasDiskIndexEntry::kTombstone)
+ {
+ continue;
+ }
+ m_LocationMap[Entry.Key] = Entry.Location;
+ }
+ MakeSnapshot = !LogEntries.empty();
+ }
+
+ if (std::filesystem::is_regular_file(LegacyLogPath))
+ {
+ std::unordered_set<IoHash, IoHash::Hasher> ExistingChunks;
+ ExistingChunks.reserve(m_LocationMap.size());
+ for (const auto& Entry : m_LocationMap)
+ {
+ ExistingChunks.insert(Entry.first);
+ }
+ std::vector<CasDiskIndexEntry> LegacyEntries =
+ MigrateLegacyData(m_Config.RootDirectory, m_ContainerBaseName, m_MaxBlockSize, m_PayloadAlignment, true, ExistingChunks);
+ for (const auto& Entry : LegacyEntries)
+ {
+ m_LocationMap[Entry.Key] = Entry.Location;
+ }
+ MakeSnapshot |= !LegacyEntries.empty();
+ }
CreateDirectories(BasePath);
@@ -1431,7 +1451,7 @@ CasContainerStrategy::OpenContainer(bool IsNewStore)
}
}
- if (AddedFromCasLog)
+ if (MakeSnapshot)
{
MakeIndexSnapshot();
}
@@ -2329,32 +2349,33 @@ TEST_CASE("compactcas.threadedinsert") // * doctest::skip(true))
}
}
-TEST_CASE("compactcas.migrate.large.data" * doctest::skip(true))
+TEST_CASE("compactcas.migrate.large.data" * doctest::skip(false))
{
+ auto getChunkSet = [](const std::vector<CasDiskIndexEntry>& Entries) {
+ std::unordered_set<IoHash, IoHash::Hasher> ChunkHashes;
+ ChunkHashes.reserve(Entries.size());
+ for (const auto& Entry : Entries)
+ {
+ ChunkHashes.insert(Entry.Key);
+ }
+ return ChunkHashes;
+ };
+
const char* BigDataPath = "D:\\zen-data\\dc4-zen-cache-t\\cas";
std::filesystem::path TobsBasePath = GetBasePath(BigDataPath, "tobs");
std::filesystem::path SobsBasePath = GetBasePath(BigDataPath, "sobs");
std::filesystem::remove_all(TobsBasePath);
std::filesystem::remove_all(SobsBasePath);
- uint64_t TobsPayloadAlignment = 16;
- uint64_t TobsBlockSize = 1u << 28;
- std::unordered_map<IoHash, BlockStoreDiskLocation, IoHash::Hasher> TobsLocationMap;
- uint64_t MigratedTobsCount = MigrateLegacyData(BigDataPath, "tobs", TobsBlockSize, TobsPayloadAlignment, false, TobsLocationMap);
- CHECK(MigratedTobsCount > 0);
- TobsLocationMap.clear();
- ReadIndex(BigDataPath, "tobs", TobsPayloadAlignment, TobsLocationMap);
- uint64_t MigratedTobsCount2 = MigrateLegacyData(BigDataPath, "tobs", TobsBlockSize, TobsPayloadAlignment, false, TobsLocationMap);
- CHECK(MigratedTobsCount2 == 0);
-
- uint64_t SobsPayloadAlignment = 4096;
- uint64_t SobsBlockSize = 1u << 30;
- std::unordered_map<IoHash, BlockStoreDiskLocation, IoHash::Hasher> SobsLocationMap;
- uint64_t MigratedSobsCount = MigrateLegacyData(BigDataPath, "sobs", SobsBlockSize, SobsPayloadAlignment, false, SobsLocationMap);
- CHECK(MigratedSobsCount > 0);
- SobsLocationMap.clear();
- ReadIndex(BigDataPath, "sobs", SobsPayloadAlignment, SobsLocationMap);
- uint64_t MigratedSobsCount2 = MigrateLegacyData(BigDataPath, "sobs", SobsBlockSize, SobsPayloadAlignment, false, SobsLocationMap);
- CHECK(MigratedSobsCount2 == 0);
+ uint64_t TobsPayloadAlignment = 16;
+ uint64_t TobsBlockSize = 1u << 28;
+ auto TobsMigratedChunks = MigrateLegacyData(BigDataPath, "tobs", TobsBlockSize, TobsPayloadAlignment, false, {});
+ CHECK(TobsMigratedChunks.size() > 0);
+
+ uint64_t SobsPayloadAlignment = 4096;
+ uint64_t SobsBlockSize = 1u << 30;
+
+ auto SobsMigratedChunks = MigrateLegacyData(BigDataPath, "sobs", SobsBlockSize, SobsPayloadAlignment, false, {});
+ CHECK(SobsMigratedChunks.size() > 0);
CasStoreConfiguration CasConfig;
CasConfig.RootDirectory = BigDataPath;
@@ -2364,12 +2385,20 @@ TEST_CASE("compactcas.migrate.large.data" * doctest::skip(true))
TobsCas.Initialize("tobs", 1u << 28, 16, false);
GcContext TobsGcCtx;
TobsCas.CollectGarbage(TobsGcCtx);
+ for (const CasDiskIndexEntry& Entry : TobsMigratedChunks)
+ {
+ CHECK(TobsCas.HaveChunk(Entry.Key));
+ }
CasGc SobsCasGc;
CasContainerStrategy SobsCas(CasConfig, SobsCasGc);
SobsCas.Initialize("sobs", 1u << 30, 4096, false);
GcContext SobsGcCtx;
SobsCas.CollectGarbage(SobsGcCtx);
+ for (const CasDiskIndexEntry& Entry : SobsMigratedChunks)
+ {
+ CHECK(SobsCas.HaveChunk(Entry.Key));
+ }
}
#endif