diff options
| author | Dan Engelbrecht <[email protected]> | 2022-03-25 14:08:58 +0100 |
|---|---|---|
| committer | Dan Engelbrecht <[email protected]> | 2022-03-31 11:29:28 +0200 |
| commit | 5facd2821026b30f28c4d87a98ea5c8cdc288531 (patch) | |
| tree | 00b2c94ec111e8943259bc2995f451bc2079f9a1 /zenstore/compactcas.cpp | |
| parent | cleanup (diff) | |
| download | zen-5facd2821026b30f28c4d87a98ea5c8cdc288531.tar.xz zen-5facd2821026b30f28c4d87a98ea5c8cdc288531.zip | |
make code a bit easier to follow
Diffstat (limited to 'zenstore/compactcas.cpp')
| -rw-r--r-- | zenstore/compactcas.cpp | 179 |
1 files changed, 104 insertions, 75 deletions
diff --git a/zenstore/compactcas.cpp b/zenstore/compactcas.cpp index 244277330..8076bbbde 100644 --- a/zenstore/compactcas.cpp +++ b/zenstore/compactcas.cpp @@ -174,12 +174,12 @@ namespace { uint8_t Flags = 0; }; - bool ReadIndex(const std::filesystem::path& RootDirectory, - const std::string& ContainerBaseName, - uint64_t& InOutPayloadAlignment, - std::unordered_map<IoHash, BlockStoreDiskLocation, IoHash::Hasher>& OutLocationMap) + std::vector<CasDiskIndexEntry> ReadIndexFile(const std::filesystem::path& RootDirectory, + const std::string& ContainerBaseName, + uint64_t& InOutPayloadAlignment) { - std::filesystem::path SidxPath = GetIndexPath(RootDirectory, ContainerBaseName); + std::vector<CasDiskIndexEntry> Entries; + std::filesystem::path SidxPath = GetIndexPath(RootDirectory, ContainerBaseName); if (std::filesystem::is_regular_file(SidxPath)) { BasicFile ObjectIndexFile; @@ -193,54 +193,39 @@ namespace { if (Header.Magic == CasDiskIndexHeader::ExpectedMagic && Header.Version == CasDiskIndexHeader::CurrentVersion && Header.PayloadAlignment > 0 && Header.EntryCount == ExpectedEntryCount) { - std::vector<CasDiskIndexEntry> Entries{Header.EntryCount}; + Entries.resize(Header.EntryCount); ObjectIndexFile.Read(Entries.data(), Header.EntryCount * sizeof(CasDiskIndexEntry), sizeof(CasDiskIndexHeader)); - for (const auto& Entry : Entries) - { - OutLocationMap[Entry.Key] = Entry.Location; - } InOutPayloadAlignment = Header.PayloadAlignment; } } } + return Entries; + } - bool AddedFromCasLog = false; - TCasLogFile<CasDiskIndexEntry> CasLog; + std::vector<CasDiskIndexEntry> ReadLog(const std::filesystem::path& RootDirectory, const std::string& ContainerBaseName) + { + std::vector<CasDiskIndexEntry> Entries; std::filesystem::path SlogPath = GetLogPath(RootDirectory, ContainerBaseName); if (std::filesystem::is_regular_file(SlogPath)) { + TCasLogFile<CasDiskIndexEntry> CasLog; CasLog.Open(SlogPath, CasLogFile::EMode::kRead); - CasLog.Replay([&](const CasDiskIndexEntry& Record) { - if (Record.Flags & CasDiskIndexEntry::kTombstone) - { - OutLocationMap.erase(Record.Key); - } - else - { - OutLocationMap[Record.Key] = Record.Location; - } - AddedFromCasLog = true; - }); + CasLog.Replay([&](const CasDiskIndexEntry& Record) { Entries.push_back(Record); }); } - return AddedFromCasLog; + return Entries; } - uint64_t MigrateLegacyData(const std::filesystem::path& RootPath, - const std::string& ContainerBaseName, - uint64_t MaxBlockSize, - uint64_t PayloadAlignment, - bool CleanSource, - std::unordered_map<IoHash, BlockStoreDiskLocation, IoHash::Hasher>& InOutLocationMap) + std::vector<CasDiskIndexEntry> MigrateLegacyData(const std::filesystem::path& RootPath, + const std::string& ContainerBaseName, + uint64_t MaxBlockSize, + uint64_t PayloadAlignment, + bool CleanSource, + const std::unordered_set<IoHash, IoHash::Hasher>& ExistingChunks) { std::filesystem::path BlocksBasePath = GetBlocksBasePath(RootPath, ContainerBaseName); std::filesystem::path LegacyLogPath = GetLegacyLogPath(RootPath, ContainerBaseName); std::filesystem::path LegacySobsPath = GetLegacyUcasPath(RootPath, ContainerBaseName); std::filesystem::path LegacySidxPath = GetLegacyUidxPath(RootPath, ContainerBaseName); - if (!std::filesystem::is_regular_file(LegacyLogPath) || !std::filesystem::is_regular_file(LegacySobsPath)) - { - ZEN_DEBUG("migrate of {} SKIPPED, no legacy data found", RootPath / ContainerBaseName); - return 0; - } uint64_t MigratedChunkCount = 0; uint32_t MigratedBlockCount = 0; @@ -256,6 +241,8 @@ namespace { NiceBytes(TotalSize)); }); + std::vector<CasDiskIndexEntry> Result; + uint32_t WriteBlockIndex = 0; while (std::filesystem::exists(GetBlockPath(BlocksBasePath, WriteBlockIndex))) { @@ -267,7 +254,7 @@ namespace { if (Error) { ZEN_ERROR("get disk space in {} FAILED, reason '{}'", ContainerBaseName, Error.message()); - return 0; + return Result; } if (Space.Free < MaxBlockSize) @@ -276,7 +263,7 @@ namespace { RootPath / ContainerBaseName, MaxBlockSize, NiceBytes(Space.Free)); - return 0; + return Result; } BasicFile BlockFile; @@ -298,7 +285,7 @@ namespace { { return; } - if (InOutLocationMap.contains(Record.Key)) + if (ExistingChunks.contains(Record.Key)) { return; } @@ -315,7 +302,7 @@ namespace { std::filesystem::remove(LegacySobsPath); std::filesystem::remove(LegacySidxPath); } - return 0; + return Result; } uint64_t MaxUsedSize = 0; @@ -338,7 +325,7 @@ namespace { RootPath / ContainerBaseName, MaxRequiredBlockCount, BlockStoreDiskLocation::MaxBlockIndex); - return 0; + return Result; } if (CleanSource) @@ -350,7 +337,7 @@ namespace { NewBlockIndex + 1, NiceBytes(MaxBlockSize + (1 << 28)), NiceBytes(Space.Free)); - return 0; + return Result; } } else @@ -362,7 +349,7 @@ namespace { NewBlockIndex + 1, NiceBytes(RequiredDiskSpace + (1 << 28)), NiceBytes(Space.Free)); - return 0; + return Result; } } @@ -373,8 +360,7 @@ namespace { if (CleanSource && (MaxRequiredBlockCount < 2)) { - std::vector<CasDiskIndexEntry> LogEntries; - LogEntries.reserve(LegacyDiskIndex.size()); + Result.reserve(LegacyDiskIndex.size()); // We can use the block as is, just move it and add the blocks to our new log for (auto& Entry : LegacyDiskIndex) @@ -383,16 +369,15 @@ namespace { BlockStoreLocation NewChunkLocation(WriteBlockIndex, Record.Location.GetOffset(), Record.Location.GetSize()); BlockStoreDiskLocation NewLocation(NewChunkLocation, PayloadAlignment); - LogEntries.push_back( + Result.push_back( {.Key = Entry.second.Key, .Location = NewLocation, .ContentType = Record.ContentType, .Flags = Record.Flags}); - InOutLocationMap[Entry.second.Key] = NewLocation; } auto BlockPath = GetBlockPath(BlocksBasePath, WriteBlockIndex); CreateDirectories(BlockPath.parent_path()); BlockFile.Close(); std::filesystem::rename(LegacySobsPath, BlockPath); - CasLog.Append(LogEntries); - MigratedChunkCount = LogEntries.size(); + CasLog.Append(Result); + MigratedChunkCount += Result.size(); MigratedBlockCount++; } else @@ -499,9 +484,9 @@ namespace { BlockStoreDiskLocation Location(Entry.second, PayloadAlignment); LogEntries.push_back( {.Key = Entry.first, .Location = Location, .ContentType = LegacyEntry.ContentType, .Flags = LegacyEntry.Flags}); - InOutLocationMap[Entry.first] = Location; } CasLog.Append(LogEntries); + Result.insert(Result.end(), LogEntries.begin(), LogEntries.end()); MigratedChunkCount += LogEntries.size(); MigratedBlockCount++; @@ -528,7 +513,7 @@ namespace { std::filesystem::remove(LegacySobsPath); std::filesystem::remove(LegacySidxPath); } - return MigratedChunkCount; + return Result; } } // namespace @@ -1307,20 +1292,55 @@ CasContainerStrategy::OpenContainer(bool IsNewStore) m_LocationMap.clear(); - std::filesystem::path BasePath = GetBasePath(m_Config.RootDirectory, m_ContainerBaseName); + std::filesystem::path BasePath = GetBasePath(m_Config.RootDirectory, m_ContainerBaseName); + std::filesystem::path LegacyLogPath = GetLegacyLogPath(m_Config.RootDirectory, m_ContainerBaseName); if (IsNewStore) { - std::filesystem::path LegacyLogPath = GetLegacyLogPath(m_Config.RootDirectory, m_ContainerBaseName); std::filesystem::path LegacySobsPath = GetLegacyUcasPath(m_Config.RootDirectory, m_ContainerBaseName); std::filesystem::remove(LegacyLogPath); std::filesystem::remove(LegacySobsPath); std::filesystem::remove_all(BasePath); } - bool AddedFromCasLog = ReadIndex(m_Config.RootDirectory, m_ContainerBaseName, m_PayloadAlignment, m_LocationMap); + { + std::vector<CasDiskIndexEntry> IndexEntries = ReadIndexFile(m_Config.RootDirectory, m_ContainerBaseName, m_PayloadAlignment); + for (const auto& Entry : IndexEntries) + { + m_LocationMap[Entry.Key] = Entry.Location; + } + } - MigrateLegacyData(m_Config.RootDirectory, m_ContainerBaseName, m_MaxBlockSize, m_PayloadAlignment, true, m_LocationMap); + bool MakeSnapshot = false; + { + std::vector<CasDiskIndexEntry> LogEntries = ReadLog(m_Config.RootDirectory, m_ContainerBaseName); + for (const auto& Entry : LogEntries) + { + if (Entry.Flags & CasDiskIndexEntry::kTombstone) + { + continue; + } + m_LocationMap[Entry.Key] = Entry.Location; + } + MakeSnapshot = !LogEntries.empty(); + } + + if (std::filesystem::is_regular_file(LegacyLogPath)) + { + std::unordered_set<IoHash, IoHash::Hasher> ExistingChunks; + ExistingChunks.reserve(m_LocationMap.size()); + for (const auto& Entry : m_LocationMap) + { + ExistingChunks.insert(Entry.first); + } + std::vector<CasDiskIndexEntry> LegacyEntries = + MigrateLegacyData(m_Config.RootDirectory, m_ContainerBaseName, m_MaxBlockSize, m_PayloadAlignment, true, ExistingChunks); + for (const auto& Entry : LegacyEntries) + { + m_LocationMap[Entry.Key] = Entry.Location; + } + MakeSnapshot |= !LegacyEntries.empty(); + } CreateDirectories(BasePath); @@ -1431,7 +1451,7 @@ CasContainerStrategy::OpenContainer(bool IsNewStore) } } - if (AddedFromCasLog) + if (MakeSnapshot) { MakeIndexSnapshot(); } @@ -2329,32 +2349,33 @@ TEST_CASE("compactcas.threadedinsert") // * doctest::skip(true)) } } -TEST_CASE("compactcas.migrate.large.data" * doctest::skip(true)) +TEST_CASE("compactcas.migrate.large.data" * doctest::skip(false)) { + auto getChunkSet = [](const std::vector<CasDiskIndexEntry>& Entries) { + std::unordered_set<IoHash, IoHash::Hasher> ChunkHashes; + ChunkHashes.reserve(Entries.size()); + for (const auto& Entry : Entries) + { + ChunkHashes.insert(Entry.Key); + } + return ChunkHashes; + }; + const char* BigDataPath = "D:\\zen-data\\dc4-zen-cache-t\\cas"; std::filesystem::path TobsBasePath = GetBasePath(BigDataPath, "tobs"); std::filesystem::path SobsBasePath = GetBasePath(BigDataPath, "sobs"); std::filesystem::remove_all(TobsBasePath); std::filesystem::remove_all(SobsBasePath); - uint64_t TobsPayloadAlignment = 16; - uint64_t TobsBlockSize = 1u << 28; - std::unordered_map<IoHash, BlockStoreDiskLocation, IoHash::Hasher> TobsLocationMap; - uint64_t MigratedTobsCount = MigrateLegacyData(BigDataPath, "tobs", TobsBlockSize, TobsPayloadAlignment, false, TobsLocationMap); - CHECK(MigratedTobsCount > 0); - TobsLocationMap.clear(); - ReadIndex(BigDataPath, "tobs", TobsPayloadAlignment, TobsLocationMap); - uint64_t MigratedTobsCount2 = MigrateLegacyData(BigDataPath, "tobs", TobsBlockSize, TobsPayloadAlignment, false, TobsLocationMap); - CHECK(MigratedTobsCount2 == 0); - - uint64_t SobsPayloadAlignment = 4096; - uint64_t SobsBlockSize = 1u << 30; - std::unordered_map<IoHash, BlockStoreDiskLocation, IoHash::Hasher> SobsLocationMap; - uint64_t MigratedSobsCount = MigrateLegacyData(BigDataPath, "sobs", SobsBlockSize, SobsPayloadAlignment, false, SobsLocationMap); - CHECK(MigratedSobsCount > 0); - SobsLocationMap.clear(); - ReadIndex(BigDataPath, "sobs", SobsPayloadAlignment, SobsLocationMap); - uint64_t MigratedSobsCount2 = MigrateLegacyData(BigDataPath, "sobs", SobsBlockSize, SobsPayloadAlignment, false, SobsLocationMap); - CHECK(MigratedSobsCount2 == 0); + uint64_t TobsPayloadAlignment = 16; + uint64_t TobsBlockSize = 1u << 28; + auto TobsMigratedChunks = MigrateLegacyData(BigDataPath, "tobs", TobsBlockSize, TobsPayloadAlignment, false, {}); + CHECK(TobsMigratedChunks.size() > 0); + + uint64_t SobsPayloadAlignment = 4096; + uint64_t SobsBlockSize = 1u << 30; + + auto SobsMigratedChunks = MigrateLegacyData(BigDataPath, "sobs", SobsBlockSize, SobsPayloadAlignment, false, {}); + CHECK(SobsMigratedChunks.size() > 0); CasStoreConfiguration CasConfig; CasConfig.RootDirectory = BigDataPath; @@ -2364,12 +2385,20 @@ TEST_CASE("compactcas.migrate.large.data" * doctest::skip(true)) TobsCas.Initialize("tobs", 1u << 28, 16, false); GcContext TobsGcCtx; TobsCas.CollectGarbage(TobsGcCtx); + for (const CasDiskIndexEntry& Entry : TobsMigratedChunks) + { + CHECK(TobsCas.HaveChunk(Entry.Key)); + } CasGc SobsCasGc; CasContainerStrategy SobsCas(CasConfig, SobsCasGc); SobsCas.Initialize("sobs", 1u << 30, 4096, false); GcContext SobsGcCtx; SobsCas.CollectGarbage(SobsGcCtx); + for (const CasDiskIndexEntry& Entry : SobsMigratedChunks) + { + CHECK(SobsCas.HaveChunk(Entry.Key)); + } } #endif |