diff options
| author | Dan Engelbrecht <[email protected]> | 2022-06-17 07:06:21 -0700 |
|---|---|---|
| committer | GitHub <[email protected]> | 2022-06-17 07:06:21 -0700 |
| commit | c7e22a4ef1cce7103b9afbeec487461cb32f8dbe (patch) | |
| tree | 8b99d51bf496c96f82161c18fbdcfd5c6f8f31fd /zenstore/compactcas.cpp | |
| parent | fixed merge mistake which caused a build error (diff) | |
| download | zen-0.1.4-pre6.tar.xz zen-0.1.4-pre6.zip | |
Make cas storage an hidden implementation detail of CidStore (#130)v0.1.4-pre6v0.1.4-pre5
- Bumped ZEN_SCHEMA_VERSION
- CasStore no longer a public API, it is hidden behind CidStore
- Moved cas.h from public header folder
- CidStore no longer maps from Cid -> Cas, we store entries in Cas under RawHash
- CasStore now decompresses data to validate content (matching against RawHash)
- CasChunkSet renames to HashKeySet and put in separate header/cpp file
- Disabled "Chunk" command for now as it relied on CAS being exposed as a service
- Changed CAS http service to Cid http server
- Moved "Run" command completely inside ZEN_WITH_EXEC_SERVICES define
- Removed "cas.basic" test
- Uncommented ".exec.basic" test and added return-skip at start of test
- Moved ScrubContext to separate header file
- Renamed CasGC to GcManager
- Cleaned up configuration passing in cas store classes
- Removed CAS stuff from GcContext and clarified naming in class
- Remove migration code
Diffstat (limited to 'zenstore/compactcas.cpp')
| -rw-r--r-- | zenstore/compactcas.cpp | 745 |
1 files changed, 149 insertions, 596 deletions
diff --git a/zenstore/compactcas.cpp b/zenstore/compactcas.cpp index 5aed02e7f..a7fdfa1f5 100644 --- a/zenstore/compactcas.cpp +++ b/zenstore/compactcas.cpp @@ -2,13 +2,16 @@ #include "compactcas.h" -#include <zenstore/cas.h> +#include "cas.h" +#include <zencore/compress.h> #include <zencore/except.h> #include <zencore/filesystem.h> #include <zencore/fmtutils.h> #include <zencore/logging.h> #include <zencore/scopeguard.h> +#include <zenstore/scrubcontext.h> + #include <gsl/gsl-lite.hpp> #include <xxhash.h> @@ -76,94 +79,6 @@ namespace { return GetBasePath(RootPath, ContainerBaseName) / "blocks"; } - std::filesystem::path GetLegacyLogPath(const std::filesystem::path& RootPath, const std::string& ContainerBaseName) - { - return RootPath / (ContainerBaseName + LogExtension); - } - - std::filesystem::path GetLegacyDataPath(const std::filesystem::path& RootPath, const std::string& ContainerBaseName) - { - return RootPath / (ContainerBaseName + ".ucas"); - } - - std::filesystem::path GetLegacyIndexPath(const std::filesystem::path& RootPath, const std::string& ContainerBaseName) - { - return RootPath / (ContainerBaseName + IndexExtension); - } - - struct LegacyCasDiskLocation - { - LegacyCasDiskLocation(uint64_t InOffset, uint64_t InSize) - { - ZEN_ASSERT(InOffset <= 0xff'ffff'ffff); - ZEN_ASSERT(InSize <= 0xff'ffff'ffff); - - memcpy(&m_Offset[0], &InOffset, sizeof m_Offset); - memcpy(&m_Size[0], &InSize, sizeof m_Size); - } - - LegacyCasDiskLocation() = default; - - inline uint64_t GetOffset() const - { - uint64_t Offset = 0; - memcpy(&Offset, &m_Offset, sizeof m_Offset); - return Offset; - } - - inline uint64_t GetSize() const - { - uint64_t Size = 0; - memcpy(&Size, &m_Size, sizeof m_Size); - return Size; - } - - private: - uint8_t m_Offset[5]; - uint8_t m_Size[5]; - }; - - struct LegacyCasDiskIndexEntry - { - static const uint8_t kTombstone = 0x01; - - IoHash Key; - LegacyCasDiskLocation Location; - ZenContentType ContentType = ZenContentType::kUnknownContentType; - uint8_t Flags = 0; - }; - - bool ValidateLegacyEntry(const LegacyCasDiskIndexEntry& Entry, std::string& OutReason) - { - if (Entry.Key == IoHash::Zero) - { - OutReason = fmt::format("Invalid hash key {}", Entry.Key.ToHexString()); - return false; - } - if ((Entry.Flags & ~LegacyCasDiskIndexEntry::kTombstone) != 0) - { - OutReason = fmt::format("Invalid flags {} for entry {}", Entry.Flags, Entry.Key.ToHexString()); - return false; - } - if (Entry.Flags & LegacyCasDiskIndexEntry::kTombstone) - { - return true; - } - if (Entry.ContentType != ZenContentType::kUnknownContentType) - { - OutReason = - fmt::format("Invalid content type {} for entry {}", static_cast<uint8_t>(Entry.ContentType), Entry.Key.ToHexString()); - return false; - } - uint64_t Size = Entry.Location.GetSize(); - if (Size == 0) - { - OutReason = fmt::format("Invalid size {} for entry {}", Size, Entry.Key.ToHexString()); - return false; - } - return true; - } - bool ValidateEntry(const CasDiskIndexEntry& Entry, std::string& OutReason) { if (Entry.Key == IoHash::Zero) @@ -199,10 +114,7 @@ namespace { ////////////////////////////////////////////////////////////////////////// -CasContainerStrategy::CasContainerStrategy(const CasStoreConfiguration& Config, CasGc& Gc) -: GcStorage(Gc) -, m_Config(Config) -, m_Log(logging::Get("containercas")) +CasContainerStrategy::CasContainerStrategy(GcManager& Gc) : GcStorage(Gc), m_Log(logging::Get("containercas")) { } @@ -211,16 +123,21 @@ CasContainerStrategy::~CasContainerStrategy() } void -CasContainerStrategy::Initialize(const std::string_view ContainerBaseName, uint32_t MaxBlockSize, uint64_t Alignment, bool IsNewStore) +CasContainerStrategy::Initialize(const std::filesystem::path& RootDirectory, + const std::string_view ContainerBaseName, + uint32_t MaxBlockSize, + uint64_t Alignment, + bool IsNewStore) { ZEN_ASSERT(IsPow2(Alignment)); ZEN_ASSERT(!m_IsInitialized); ZEN_ASSERT(MaxBlockSize > 0); + m_RootDirectory = RootDirectory; m_ContainerBaseName = ContainerBaseName; m_PayloadAlignment = Alignment; m_MaxBlockSize = MaxBlockSize; - m_BlocksBasePath = GetBlocksBasePath(m_Config.RootDirectory, m_ContainerBaseName); + m_BlocksBasePath = GetBlocksBasePath(m_RootDirectory, m_ContainerBaseName); OpenContainer(IsNewStore); @@ -267,6 +184,9 @@ CasContainerStrategy::InsertChunk(const void* ChunkData, size_t ChunkSize, const CasStore::InsertResult CasContainerStrategy::InsertChunk(IoBuffer Chunk, const IoHash& ChunkHash) { +#if !ZEN_WITH_TESTS + ZEN_ASSERT(Chunk.GetContentType() == ZenContentType::kCompressedBinary); +#endif return InsertChunk(Chunk.Data(), Chunk.Size(), ChunkHash); } @@ -293,7 +213,7 @@ CasContainerStrategy::HaveChunk(const IoHash& ChunkHash) } void -CasContainerStrategy::FilterChunks(CasChunkSet& InOutChunks) +CasContainerStrategy::FilterChunks(HashKeySet& InOutChunks) { // This implementation is good enough for relatively small // chunk sets (in terms of chunk identifiers), but would @@ -302,7 +222,7 @@ CasContainerStrategy::FilterChunks(CasChunkSet& InOutChunks) // we're likely to already have a large proportion of the // chunks in the set - InOutChunks.RemoveChunksIf([&](const IoHash& Hash) { return HaveChunk(Hash); }); + InOutChunks.RemoveHashesIf([&](const IoHash& Hash) { return HaveChunk(Hash); }); } void @@ -316,6 +236,7 @@ void CasContainerStrategy::Scrub(ScrubContext& Ctx) { std::vector<IoHash> BadKeys; + uint64_t ChunkCount{0}, ChunkBytes{0}; std::vector<BlockStoreLocation> ChunkLocations; std::vector<IoHash> ChunkIndexToChunkHash; @@ -337,6 +258,9 @@ CasContainerStrategy::Scrub(ScrubContext& Ctx) } const auto ValidateSmallChunk = [&](size_t ChunkIndex, const void* Data, uint64_t Size) { + ++ChunkCount; + ChunkBytes += Size; + const IoHash& Hash = ChunkIndexToChunkHash[ChunkIndex]; if (!Data) { @@ -344,66 +268,97 @@ CasContainerStrategy::Scrub(ScrubContext& Ctx) BadKeys.push_back(Hash); return; } - const IoHash ComputedHash = IoHash::HashBuffer(Data, Size); - if (ComputedHash != Hash) + + IoBuffer Buffer(IoBuffer::Wrap, Data, Size); + if (CompressedBuffer Compressed = CompressedBuffer::FromCompressed(SharedBuffer(Buffer)); Compressed) + { + if (IoHash::FromBLAKE3(Compressed.GetRawHash()) != Hash) + { + // Hash mismatch + BadKeys.push_back(Hash); + return; + } + return; + } +#if ZEN_WITH_TESTS + IoHash ComputedHash = IoHash::HashBuffer(Data, Size); + if (ComputedHash == Hash) { - // Hash mismatch - BadKeys.push_back(Hash); return; } +#endif + BadKeys.push_back(Hash); }; const auto ValidateLargeChunk = [&](size_t ChunkIndex, BlockStoreFile& File, uint64_t Offset, uint64_t Size) { + ++ChunkCount; + ChunkBytes += Size; + + const IoHash& Hash = ChunkIndexToChunkHash[ChunkIndex]; + IoBuffer Buffer(IoBuffer::BorrowedFile, File.GetBasicFile().Handle(), Offset, Size); + // TODO: Add API to verify compressed buffer without having to memorymap the whole file + if (CompressedBuffer Compressed = CompressedBuffer::FromCompressed(SharedBuffer(Buffer)); Compressed) + { + if (IoHash::FromBLAKE3(Compressed.GetRawHash()) != Hash) + { + // Hash mismatch + BadKeys.push_back(Hash); + return; + } + return; + } +#if ZEN_WITH_TESTS IoHashStream Hasher; - File.StreamByteRange(Offset, Size, [&](const void* Data, uint64_t Size) { Hasher.Append(Data, Size); }); - IoHash ComputedHash = Hasher.GetHash(); - const IoHash& Hash = ChunkIndexToChunkHash[ChunkIndex]; - if (ComputedHash != Hash) + File.StreamByteRange(Offset, Size, [&](const void* Data, size_t Size) { Hasher.Append(Data, Size); }); + IoHash ComputedHash = Hasher.GetHash(); + if (ComputedHash == Hash) { - // Hash mismatch - BadKeys.push_back(Hash); return; } +#endif + BadKeys.push_back(Hash); }; m_BlockStore.IterateChunks(ChunkLocations, ValidateSmallChunk, ValidateLargeChunk); _.ReleaseNow(); - if (BadKeys.empty()) - { - return; - } - - ZEN_ERROR("Scrubbing found #{} bad chunks in '{}'", BadKeys.size(), m_Config.RootDirectory / m_ContainerBaseName); + Ctx.ReportScrubbed(ChunkCount, ChunkBytes); - if (Ctx.RunRecovery()) + if (!BadKeys.empty()) { - // Deal with bad chunks by removing them from our lookup map + ZEN_ERROR("Scrubbing found #{} bad chunks in '{}'", BadKeys.size(), m_RootDirectory / m_ContainerBaseName); - std::vector<CasDiskIndexEntry> LogEntries; - LogEntries.reserve(BadKeys.size()); + if (Ctx.RunRecovery()) { - RwLock::ExclusiveLockScope __(m_LocationMapLock); - for (const IoHash& ChunkHash : BadKeys) + // Deal with bad chunks by removing them from our lookup map + + std::vector<CasDiskIndexEntry> LogEntries; + LogEntries.reserve(BadKeys.size()); { - const auto KeyIt = m_LocationMap.find(ChunkHash); - if (KeyIt == m_LocationMap.end()) + RwLock::ExclusiveLockScope __(m_LocationMapLock); + for (const IoHash& ChunkHash : BadKeys) { - // Might have been GC'd - continue; + const auto KeyIt = m_LocationMap.find(ChunkHash); + if (KeyIt == m_LocationMap.end()) + { + // Might have been GC'd + continue; + } + LogEntries.push_back({.Key = KeyIt->first, .Location = KeyIt->second, .Flags = CasDiskIndexEntry::kTombstone}); + m_LocationMap.erase(KeyIt); } - LogEntries.push_back({.Key = KeyIt->first, .Location = KeyIt->second, .Flags = CasDiskIndexEntry::kTombstone}); - m_LocationMap.erase(KeyIt); } + m_CasLog.Append(LogEntries); } - m_CasLog.Append(LogEntries); } // Let whomever it concerns know about the bad chunks. This could // be used to invalidate higher level data structures more efficiently // than a full validation pass might be able to do - Ctx.ReportBadCasChunks(BadKeys); + Ctx.ReportBadCidChunks(BadKeys); + + ZEN_INFO("compact cas scrubbed: {} chunks ({})", ChunkCount, NiceBytes(ChunkBytes)); } void @@ -432,7 +387,7 @@ CasContainerStrategy::CollectGarbage(GcContext& GcCtx) // do a blocking operation and update the m_LocationMap after each new block is // written and figuring out the path to the next new block. - ZEN_INFO("collecting garbage from '{}'", m_Config.RootDirectory / m_ContainerBaseName); + ZEN_INFO("collecting garbage from '{}'", m_RootDirectory / m_ContainerBaseName); uint64_t WriteBlockTimeUs = 0; uint64_t WriteBlockLongestTimeUs = 0; @@ -468,7 +423,7 @@ CasContainerStrategy::CollectGarbage(GcContext& GcCtx) ChunkLocations.reserve(TotalChunkCount); ChunkIndexToChunkHash.reserve(TotalChunkCount); - GcCtx.FilterCas(TotalChunkHashes, [&](const IoHash& ChunkHash, bool Keep) { + GcCtx.FilterCids(TotalChunkHashes, [&](const IoHash& ChunkHash, bool Keep) { auto KeyIt = LocationMap.find(ChunkHash); const BlockStoreDiskLocation& DiskLocation = KeyIt->second; BlockStoreLocation Location = DiskLocation.Get(m_PayloadAlignment); @@ -539,26 +494,26 @@ CasContainerStrategy::CollectGarbage(GcContext& GcCtx) }, [&GcCtx]() { return GcCtx.CollectSmallObjects(); }); - GcCtx.DeletedCas(DeletedChunks); + GcCtx.AddDeletedCids(DeletedChunks); } void CasContainerStrategy::MakeIndexSnapshot() { - ZEN_INFO("write store snapshot for '{}'", m_Config.RootDirectory / m_ContainerBaseName); + ZEN_INFO("write store snapshot for '{}'", m_RootDirectory / m_ContainerBaseName); uint64_t EntryCount = 0; Stopwatch Timer; const auto _ = MakeGuard([&] { ZEN_INFO("wrote store snapshot for '{}' containing #{} entries in {}", - m_Config.RootDirectory / m_ContainerBaseName, + m_RootDirectory / m_ContainerBaseName, EntryCount, NiceTimeSpanMs(Timer.GetElapsedTimeMs())); }); namespace fs = std::filesystem; - fs::path IndexPath = GetIndexPath(m_Config.RootDirectory, m_ContainerBaseName); - fs::path TempIndexPath = GetTempIndexPath(m_Config.RootDirectory, m_ContainerBaseName); + fs::path IndexPath = GetIndexPath(m_RootDirectory, m_ContainerBaseName); + fs::path TempIndexPath = GetTempIndexPath(m_RootDirectory, m_ContainerBaseName); // Move index away, we keep it if something goes wrong if (fs::is_regular_file(TempIndexPath)) @@ -629,13 +584,13 @@ uint64_t CasContainerStrategy::ReadIndexFile() { std::vector<CasDiskIndexEntry> Entries; - std::filesystem::path IndexPath = GetIndexPath(m_Config.RootDirectory, m_ContainerBaseName); + std::filesystem::path IndexPath = GetIndexPath(m_RootDirectory, m_ContainerBaseName); if (std::filesystem::is_regular_file(IndexPath)) { Stopwatch Timer; const auto _ = MakeGuard([&] { ZEN_INFO("read store '{}' index containing #{} entries in {}", - m_Config.RootDirectory / m_ContainerBaseName, + m_RootDirectory / m_ContainerBaseName, Entries.size(), NiceTimeSpanMs(Timer.GetElapsedTimeMs())); }); @@ -682,13 +637,13 @@ uint64_t CasContainerStrategy::ReadLog(uint64_t SkipEntryCount) { std::vector<CasDiskIndexEntry> Entries; - std::filesystem::path LogPath = GetLogPath(m_Config.RootDirectory, m_ContainerBaseName); + std::filesystem::path LogPath = GetLogPath(m_RootDirectory, m_ContainerBaseName); if (std::filesystem::is_regular_file(LogPath)) { Stopwatch Timer; const auto _ = MakeGuard([&] { ZEN_INFO("read store '{}' log containing #{} entries in {}", - m_Config.RootDirectory / m_ContainerBaseName, + m_RootDirectory / m_ContainerBaseName, Entries.size(), NiceTimeSpanMs(Timer.GetElapsedTimeMs())); }); @@ -727,208 +682,6 @@ CasContainerStrategy::ReadLog(uint64_t SkipEntryCount) return 0; } -uint64_t -CasContainerStrategy::MigrateLegacyData(bool CleanSource) -{ - std::filesystem::path LegacyLogPath = GetLegacyLogPath(m_Config.RootDirectory, m_ContainerBaseName); - - if (!std::filesystem::is_regular_file(LegacyLogPath) || std::filesystem::file_size(LegacyLogPath) == 0) - { - return 0; - } - - ZEN_INFO("migrating store '{}'", m_Config.RootDirectory / m_ContainerBaseName); - - std::filesystem::path LegacyDataPath = GetLegacyDataPath(m_Config.RootDirectory, m_ContainerBaseName); - std::filesystem::path LegacyIndexPath = GetLegacyIndexPath(m_Config.RootDirectory, m_ContainerBaseName); - - uint64_t MigratedChunkCount = 0; - uint32_t MigratedBlockCount = 0; - Stopwatch MigrationTimer; - uint64_t TotalSize = 0; - const auto _ = MakeGuard([&] { - ZEN_INFO("migrated store '{}' to #{} chunks in #{} blocks in {} ({})", - m_Config.RootDirectory / m_ContainerBaseName, - MigratedChunkCount, - MigratedBlockCount, - NiceTimeSpanMs(MigrationTimer.GetElapsedTimeMs()), - NiceBytes(TotalSize)); - }); - - uint64_t BlockFileSize = 0; - { - BasicFile BlockFile; - BlockFile.Open(LegacyDataPath, CleanSource ? BasicFile::Mode::kWrite : BasicFile::Mode::kRead); - BlockFileSize = BlockFile.FileSize(); - } - - std::unordered_map<IoHash, LegacyCasDiskIndexEntry, IoHash::Hasher> LegacyDiskIndex; - uint64_t InvalidEntryCount = 0; - - TCasLogFile<LegacyCasDiskIndexEntry> LegacyCasLog; - LegacyCasLog.Open(LegacyLogPath, CleanSource ? CasLogFile::Mode::kWrite : CasLogFile::Mode::kRead); - { - Stopwatch Timer; - const auto __ = MakeGuard([&] { - ZEN_INFO("read store '{}' legacy log containing #{} entries in {}", - m_Config.RootDirectory / m_ContainerBaseName, - LegacyDiskIndex.size(), - NiceTimeSpanMs(Timer.GetElapsedTimeMs())); - }); - if (LegacyCasLog.Initialize()) - { - LegacyDiskIndex.reserve(LegacyCasLog.GetLogCount()); - LegacyCasLog.Replay( - [&](const LegacyCasDiskIndexEntry& Record) { - std::string InvalidEntryReason; - if (Record.Flags & LegacyCasDiskIndexEntry::kTombstone) - { - LegacyDiskIndex.erase(Record.Key); - return; - } - if (!ValidateLegacyEntry(Record, InvalidEntryReason)) - { - ZEN_WARN("skipping invalid entry in '{}', reason: '{}'", LegacyLogPath, InvalidEntryReason); - InvalidEntryCount++; - return; - } - LegacyDiskIndex.insert_or_assign(Record.Key, Record); - }, - 0); - - std::vector<IoHash> BadEntries; - for (const auto& Entry : LegacyDiskIndex) - { - const LegacyCasDiskIndexEntry& Record(Entry.second); - if (Record.Location.GetOffset() + Record.Location.GetSize() <= BlockFileSize) - { - continue; - } - ZEN_WARN("skipping invalid entry in '{}', reason: location is outside of file", LegacyLogPath); - BadEntries.push_back(Entry.first); - } - for (const IoHash& BadHash : BadEntries) - { - LegacyDiskIndex.erase(BadHash); - } - InvalidEntryCount += BadEntries.size(); - } - } - - if (InvalidEntryCount) - { - ZEN_WARN("found #{} invalid entries in '{}'", InvalidEntryCount, m_Config.RootDirectory / m_ContainerBaseName); - } - - if (LegacyDiskIndex.empty()) - { - LegacyCasLog.Close(); - if (CleanSource) - { - // Older versions of CasContainerStrategy expects the legacy files to exist if it can find - // a CAS manifest and crashes on startup if they don't. - // In order to not break startup when switching back an older version, lets just reset - // the legacy data files to zero length. - - BasicFile LegacyLog; - LegacyLog.Open(LegacyLogPath, BasicFile::Mode::kTruncate); - BasicFile LegacySobs; - LegacySobs.Open(LegacyDataPath, BasicFile::Mode::kTruncate); - BasicFile LegacySidx; - LegacySidx.Open(LegacyIndexPath, BasicFile::Mode::kTruncate); - } - return 0; - } - - std::filesystem::path LogPath = GetLogPath(m_Config.RootDirectory, m_ContainerBaseName); - CreateDirectories(LogPath.parent_path()); - TCasLogFile<CasDiskIndexEntry> CasLog; - CasLog.Open(LogPath, CasLogFile::Mode::kWrite); - - std::unordered_map<size_t, IoHash> ChunkIndexToChunkHash; - std::vector<BlockStoreLocation> ChunkLocations; - ChunkIndexToChunkHash.reserve(LegacyDiskIndex.size()); - ChunkLocations.reserve(LegacyDiskIndex.size()); - for (const auto& Entry : LegacyDiskIndex) - { - const LegacyCasDiskLocation& Location = Entry.second.Location; - const IoHash& ChunkHash = Entry.first; - size_t ChunkIndex = ChunkLocations.size(); - ChunkLocations.push_back({.BlockIndex = 0, .Offset = Location.GetOffset(), .Size = Location.GetSize()}); - ChunkIndexToChunkHash[ChunkIndex] = ChunkHash; - TotalSize += Location.GetSize(); - } - m_BlockStore.Split( - ChunkLocations, - LegacyDataPath, - m_BlocksBasePath, - m_MaxBlockSize, - BlockStoreDiskLocation::MaxBlockIndex + 1, - m_PayloadAlignment, - CleanSource, - [this, &LegacyDiskIndex, &ChunkIndexToChunkHash, &LegacyCasLog, &CasLog, CleanSource, &MigratedBlockCount, &MigratedChunkCount]( - const BlockStore::MovedChunksArray& MovedChunks) { - std::vector<CasDiskIndexEntry> LogEntries; - LogEntries.reserve(MovedChunks.size()); - for (const auto& Entry : MovedChunks) - { - size_t ChunkIndex = Entry.first; - const BlockStoreLocation& NewLocation = Entry.second; - const IoHash& ChunkHash = ChunkIndexToChunkHash[ChunkIndex]; - const LegacyCasDiskIndexEntry& OldEntry = LegacyDiskIndex[ChunkHash]; - LogEntries.push_back({.Key = ChunkHash, - .Location = {NewLocation, m_PayloadAlignment}, - .ContentType = OldEntry.ContentType, - .Flags = OldEntry.Flags}); - } - for (const CasDiskIndexEntry& Entry : LogEntries) - { - m_LocationMap.insert_or_assign(Entry.Key, Entry.Location); - } - CasLog.Append(LogEntries); - CasLog.Flush(); - if (CleanSource) - { - std::vector<LegacyCasDiskIndexEntry> LegacyLogEntries; - LegacyLogEntries.reserve(MovedChunks.size()); - for (const auto& Entry : MovedChunks) - { - size_t ChunkIndex = Entry.first; - const IoHash& ChunkHash = ChunkIndexToChunkHash[ChunkIndex]; - const LegacyCasDiskIndexEntry& OldEntry = LegacyDiskIndex[ChunkHash]; - LegacyLogEntries.push_back( - LegacyCasDiskIndexEntry{.Key = ChunkHash, - .Location = OldEntry.Location, - .ContentType = OldEntry.ContentType, - .Flags = (uint8_t)(OldEntry.Flags | LegacyCasDiskIndexEntry::kTombstone)}); - } - LegacyCasLog.Append(LegacyLogEntries); - LegacyCasLog.Flush(); - } - MigratedBlockCount++; - MigratedChunkCount += MovedChunks.size(); - }); - - LegacyCasLog.Close(); - CasLog.Close(); - - if (CleanSource) - { - // Older versions of CasContainerStrategy expects the legacy files to exist if it can find - // a CAS manifest and crashes on startup if they don't. - // In order to not break startup when switching back an older version, lets just reset - // the legacy data files to zero length. - - BasicFile LegacyLog; - LegacyLog.Open(LegacyLogPath, BasicFile::Mode::kTruncate); - BasicFile LegacySobs; - LegacySobs.Open(LegacyDataPath, BasicFile::Mode::kTruncate); - BasicFile LegacySidx; - LegacySidx.Open(LegacyIndexPath, BasicFile::Mode::kTruncate); - } - return MigratedChunkCount; -} - void CasContainerStrategy::OpenContainer(bool IsNewStore) { @@ -937,25 +690,19 @@ CasContainerStrategy::OpenContainer(bool IsNewStore) m_LocationMap.clear(); - std::filesystem::path BasePath = GetBasePath(m_Config.RootDirectory, m_ContainerBaseName); + std::filesystem::path BasePath = GetBasePath(m_RootDirectory, m_ContainerBaseName); if (IsNewStore) { - std::filesystem::path LegacyDataPath = GetLegacyDataPath(m_Config.RootDirectory, m_ContainerBaseName); - std::filesystem::path LegacyLogPath = GetLegacyLogPath(m_Config.RootDirectory, m_ContainerBaseName); - - std::filesystem::remove(LegacyLogPath); - std::filesystem::remove(LegacyDataPath); std::filesystem::remove_all(BasePath); } - uint64_t LogPosition = ReadIndexFile(); - uint64_t LogEntryCount = ReadLog(LogPosition); - uint64_t LegacyLogEntryCount = MigrateLegacyData(true); + uint64_t LogPosition = ReadIndexFile(); + uint64_t LogEntryCount = ReadLog(LogPosition); CreateDirectories(BasePath); - std::filesystem::path LogPath = GetLogPath(m_Config.RootDirectory, m_ContainerBaseName); + std::filesystem::path LogPath = GetLogPath(m_RootDirectory, m_ContainerBaseName); m_CasLog.Open(LogPath, CasLogFile::Mode::kWrite); std::vector<BlockStoreLocation> KnownLocations; @@ -969,7 +716,7 @@ CasContainerStrategy::OpenContainer(bool IsNewStore) m_BlockStore.Initialize(m_BlocksBasePath, m_MaxBlockSize, BlockStoreDiskLocation::MaxBlockIndex + 1, KnownLocations); - if (IsNewStore || ((LogEntryCount + LegacyLogEntryCount) > 0)) + if (IsNewStore || (LogEntryCount > 0)) { MakeIndexSnapshot(); } @@ -1040,18 +787,14 @@ TEST_CASE("compactcas.compact.gc") { ScopedTemporaryDirectory TempDir; - CasStoreConfiguration CasConfig; - CasConfig.RootDirectory = TempDir.Path(); - CreateDirectories(CasConfig.RootDirectory); - const int kIterationCount = 1000; std::vector<IoHash> Keys(kIterationCount); { - CasGc Gc; - CasContainerStrategy Cas(CasConfig, Gc); - Cas.Initialize("test", 65536, 16, true); + GcManager Gc; + CasContainerStrategy Cas(Gc); + Cas.Initialize(TempDir.Path(), "test", 65536, 16, true); for (int i = 0; i < kIterationCount; ++i) { @@ -1083,9 +826,9 @@ TEST_CASE("compactcas.compact.gc") // the original cas store { - CasGc Gc; - CasContainerStrategy Cas(CasConfig, Gc); - Cas.Initialize("test", 65536, 16, false); + GcManager Gc; + CasContainerStrategy Cas(Gc); + Cas.Initialize(TempDir.Path(), "test", 65536, 16, false); for (int i = 0; i < kIterationCount; ++i) { @@ -1109,18 +852,13 @@ TEST_CASE("compactcas.compact.totalsize") { ScopedTemporaryDirectory TempDir; - CasStoreConfiguration CasConfig; - CasConfig.RootDirectory = TempDir.Path(); - - CreateDirectories(CasConfig.RootDirectory); - const uint64_t kChunkSize = 1024; const int32_t kChunkCount = 16; { - CasGc Gc; - CasContainerStrategy Cas(CasConfig, Gc); - Cas.Initialize("test", 65536, 16, true); + GcManager Gc; + CasContainerStrategy Cas(Gc); + Cas.Initialize(TempDir.Path(), "test", 65536, 16, true); for (int32_t Idx = 0; Idx < kChunkCount; ++Idx) { @@ -1135,9 +873,9 @@ TEST_CASE("compactcas.compact.totalsize") } { - CasGc Gc; - CasContainerStrategy Cas(CasConfig, Gc); - Cas.Initialize("test", 65536, 16, false); + GcManager Gc; + CasContainerStrategy Cas(Gc); + Cas.Initialize(TempDir.Path(), "test", 65536, 16, false); const uint64_t TotalSize = Cas.StorageSize().DiskSize; CHECK_EQ(kChunkSize * kChunkCount, TotalSize); @@ -1145,9 +883,9 @@ TEST_CASE("compactcas.compact.totalsize") // Re-open again, this time we should have a snapshot { - CasGc Gc; - CasContainerStrategy Cas(CasConfig, Gc); - Cas.Initialize("test", 65536, 16, false); + GcManager Gc; + CasContainerStrategy Cas(Gc); + Cas.Initialize(TempDir.Path(), "test", 65536, 16, false); const uint64_t TotalSize = Cas.StorageSize().DiskSize; CHECK_EQ(kChunkSize * kChunkCount, TotalSize); @@ -1159,13 +897,9 @@ TEST_CASE("compactcas.gc.basic") { ScopedTemporaryDirectory TempDir; - CasStoreConfiguration CasConfig; - CasConfig.RootDirectory = TempDir.Path(); - CreateDirectories(CasConfig.RootDirectory); - - CasGc Gc; - CasContainerStrategy Cas(CasConfig, Gc); - Cas.Initialize("cb", 65536, 1 << 4, true); + GcManager Gc; + CasContainerStrategy Cas(Gc); + Cas.Initialize(TempDir.Path(), "cb", 65536, 1 << 4, true); IoBuffer Chunk = CreateChunk(128); IoHash ChunkHash = IoHash::HashBuffer(Chunk); @@ -1186,16 +920,12 @@ TEST_CASE("compactcas.gc.removefile") { ScopedTemporaryDirectory TempDir; - CasStoreConfiguration CasConfig; - CasConfig.RootDirectory = TempDir.Path(); - CreateDirectories(CasConfig.RootDirectory); - IoBuffer Chunk = CreateChunk(128); IoHash ChunkHash = IoHash::HashBuffer(Chunk); { - CasGc Gc; - CasContainerStrategy Cas(CasConfig, Gc); - Cas.Initialize("cb", 65536, 1 << 4, true); + GcManager Gc; + CasContainerStrategy Cas(Gc); + Cas.Initialize(TempDir.Path(), "cb", 65536, 1 << 4, true); const CasStore::InsertResult InsertResult = Cas.InsertChunk(Chunk, ChunkHash); CHECK(InsertResult.New); @@ -1204,9 +934,9 @@ TEST_CASE("compactcas.gc.removefile") Cas.Flush(); } - CasGc Gc; - CasContainerStrategy Cas(CasConfig, Gc); - Cas.Initialize("cb", 65536, 1 << 4, false); + GcManager Gc; + CasContainerStrategy Cas(Gc); + Cas.Initialize(TempDir.Path(), "cb", 65536, 1 << 4, false); GcContext GcCtx; GcCtx.CollectSmallObjects(true); @@ -1222,13 +952,9 @@ TEST_CASE("compactcas.gc.compact") { ScopedTemporaryDirectory TempDir; - CasStoreConfiguration CasConfig; - CasConfig.RootDirectory = TempDir.Path(); - CreateDirectories(CasConfig.RootDirectory); - - CasGc Gc; - CasContainerStrategy Cas(CasConfig, Gc); - Cas.Initialize("cb", 2048, 1 << 4, true); + GcManager Gc; + CasContainerStrategy Cas(Gc); + Cas.Initialize(TempDir.Path(), "cb", 2048, 1 << 4, true); uint64_t ChunkSizes[9] = {128, 541, 1023, 781, 218, 37, 4, 997, 5}; std::vector<IoBuffer> Chunks; @@ -1275,7 +1001,7 @@ TEST_CASE("compactcas.gc.compact") std::vector<IoHash> KeepChunks; KeepChunks.push_back(ChunkHashes[0]); KeepChunks.push_back(ChunkHashes[8]); - GcCtx.ContributeCas(KeepChunks); + GcCtx.AddRetainedCids(KeepChunks); Cas.Flush(); Cas.CollectGarbage(GcCtx); @@ -1308,7 +1034,7 @@ TEST_CASE("compactcas.gc.compact") GcCtx.CollectSmallObjects(true); std::vector<IoHash> KeepChunks; KeepChunks.push_back(ChunkHashes[8]); - GcCtx.ContributeCas(KeepChunks); + GcCtx.AddRetainedCids(KeepChunks); Cas.Flush(); Cas.CollectGarbage(GcCtx); @@ -1342,7 +1068,7 @@ TEST_CASE("compactcas.gc.compact") KeepChunks.push_back(ChunkHashes[1]); KeepChunks.push_back(ChunkHashes[4]); KeepChunks.push_back(ChunkHashes[7]); - GcCtx.ContributeCas(KeepChunks); + GcCtx.AddRetainedCids(KeepChunks); Cas.Flush(); Cas.CollectGarbage(GcCtx); @@ -1377,7 +1103,7 @@ TEST_CASE("compactcas.gc.compact") KeepChunks.push_back(ChunkHashes[6]); KeepChunks.push_back(ChunkHashes[7]); KeepChunks.push_back(ChunkHashes[8]); - GcCtx.ContributeCas(KeepChunks); + GcCtx.AddRetainedCids(KeepChunks); Cas.Flush(); Cas.CollectGarbage(GcCtx); @@ -1414,7 +1140,7 @@ TEST_CASE("compactcas.gc.compact") KeepChunks.push_back(ChunkHashes[4]); KeepChunks.push_back(ChunkHashes[6]); KeepChunks.push_back(ChunkHashes[8]); - GcCtx.ContributeCas(KeepChunks); + GcCtx.AddRetainedCids(KeepChunks); Cas.Flush(); Cas.CollectGarbage(GcCtx); @@ -1476,13 +1202,10 @@ TEST_CASE("compactcas.gc.deleteblockonopen") ChunkHashes.push_back(IoHash::HashBuffer(Chunk.Data(), Chunk.Size())); } - CasStoreConfiguration CasConfig; - CasConfig.RootDirectory = TempDir.Path(); - CreateDirectories(CasConfig.RootDirectory); { - CasGc Gc; - CasContainerStrategy Cas(CasConfig, Gc); - Cas.Initialize("test", 1024, 16, true); + GcManager Gc; + CasContainerStrategy Cas(Gc); + Cas.Initialize(TempDir.Path(), "test", 1024, 16, true); for (size_t i = 0; i < 20; i++) { @@ -1498,7 +1221,7 @@ TEST_CASE("compactcas.gc.deleteblockonopen") { KeepChunks.push_back(ChunkHashes[i]); } - GcCtx.ContributeCas(KeepChunks); + GcCtx.AddRetainedCids(KeepChunks); Cas.Flush(); Cas.CollectGarbage(GcCtx); @@ -1513,9 +1236,9 @@ TEST_CASE("compactcas.gc.deleteblockonopen") } { // Re-open - CasGc Gc; - CasContainerStrategy Cas(CasConfig, Gc); - Cas.Initialize("test", 1024, 16, false); + GcManager Gc; + CasContainerStrategy Cas(Gc); + Cas.Initialize(TempDir.Path(), "test", 1024, 16, false); for (size_t i = 0; i < 20; i += 2) { @@ -1545,13 +1268,9 @@ TEST_CASE("compactcas.gc.handleopeniobuffer") ChunkHashes.push_back(IoHash::HashBuffer(Chunk.Data(), Chunk.Size())); } - CasStoreConfiguration CasConfig; - CasConfig.RootDirectory = TempDir.Path(); - CreateDirectories(CasConfig.RootDirectory); - - CasGc Gc; - CasContainerStrategy Cas(CasConfig, Gc); - Cas.Initialize("test", 1024, 16, true); + GcManager Gc; + CasContainerStrategy Cas(Gc); + Cas.Initialize(TempDir.Path(), "test", 1024, 16, true); for (size_t i = 0; i < 20; i++) { @@ -1574,131 +1293,12 @@ TEST_CASE("compactcas.gc.handleopeniobuffer") CHECK(ChunkHashes[5] == IoHash::HashBuffer(RetainChunk)); } -TEST_CASE("compactcas.legacyconversion") -{ - ScopedTemporaryDirectory TempDir; - - uint64_t ChunkSizes[] = {2041, 1123, 1223, 1239, 341, 1412, 912, 774, 341, 431, 554, 1098, 2048, 339, 561, 16, 16, 2048, 2048}; - size_t ChunkCount = sizeof(ChunkSizes) / sizeof(uint64_t); - size_t SingleBlockSize = 0; - std::vector<IoBuffer> Chunks; - Chunks.reserve(ChunkCount); - for (uint64_t Size : ChunkSizes) - { - Chunks.push_back(CreateChunk(Size)); - SingleBlockSize += Size; - } - - std::vector<IoHash> ChunkHashes; - ChunkHashes.reserve(ChunkCount); - for (const IoBuffer& Chunk : Chunks) - { - ChunkHashes.push_back(IoHash::HashBuffer(Chunk.Data(), Chunk.Size())); - } - - CasStoreConfiguration CasConfig; - CasConfig.RootDirectory = TempDir.Path(); - CreateDirectories(CasConfig.RootDirectory); - - { - CasGc Gc; - CasContainerStrategy Cas(CasConfig, Gc); - Cas.Initialize("test", gsl::narrow<uint32_t>(SingleBlockSize * 2), 16, true); - - for (size_t i = 0; i < ChunkCount; i++) - { - CHECK(Cas.InsertChunk(Chunks[i], ChunkHashes[i]).New); - } - - std::vector<IoHash> KeepChunks; - for (size_t i = 0; i < ChunkCount; i += 2) - { - KeepChunks.push_back(ChunkHashes[i]); - } - GcContext GcCtx; - GcCtx.CollectSmallObjects(true); - GcCtx.ContributeCas(KeepChunks); - Cas.Flush(); - Gc.CollectGarbage(GcCtx); - } - - std::filesystem::path BlockPath = BlockStore::GetBlockPath(GetBlocksBasePath(CasConfig.RootDirectory, "test"), 1); - std::filesystem::path LegacyDataPath = GetLegacyDataPath(CasConfig.RootDirectory, "test"); - std::filesystem::rename(BlockPath, LegacyDataPath); - - std::vector<CasDiskIndexEntry> LogEntries; - std::filesystem::path IndexPath = GetIndexPath(CasConfig.RootDirectory, "test"); - if (std::filesystem::is_regular_file(IndexPath)) - { - BasicFile ObjectIndexFile; - ObjectIndexFile.Open(IndexPath, BasicFile::Mode::kRead); - uint64_t Size = ObjectIndexFile.FileSize(); - if (Size >= sizeof(CasDiskIndexHeader)) - { - uint64_t ExpectedEntryCount = (Size - sizeof(sizeof(CasDiskIndexHeader))) / sizeof(CasDiskIndexEntry); - CasDiskIndexHeader Header; - ObjectIndexFile.Read(&Header, sizeof(Header), 0); - if (Header.Magic == CasDiskIndexHeader::ExpectedMagic && Header.Version == CasDiskIndexHeader::CurrentVersion && - Header.PayloadAlignment > 0 && Header.EntryCount == ExpectedEntryCount) - { - LogEntries.resize(Header.EntryCount); - ObjectIndexFile.Read(LogEntries.data(), Header.EntryCount * sizeof(CasDiskIndexEntry), sizeof(CasDiskIndexHeader)); - } - } - ObjectIndexFile.Close(); - std::filesystem::remove(IndexPath); - } - - std::filesystem::path LogPath = GetLogPath(CasConfig.RootDirectory, "test"); - { - TCasLogFile<CasDiskIndexEntry> CasLog; - CasLog.Open(LogPath, CasLogFile::Mode::kRead); - LogEntries.reserve(CasLog.GetLogCount()); - CasLog.Replay([&](const CasDiskIndexEntry& Record) { LogEntries.push_back(Record); }, 0); - } - TCasLogFile<LegacyCasDiskIndexEntry> LegacyCasLog; - std::filesystem::path LegacylogPath = GetLegacyLogPath(CasConfig.RootDirectory, "test"); - LegacyCasLog.Open(LegacylogPath, CasLogFile::Mode::kTruncate); - - for (const CasDiskIndexEntry& Entry : LogEntries) - { - BlockStoreLocation Location = Entry.Location.Get(16); - LegacyCasDiskLocation LegacyLocation(Location.Offset, Location.Size); - LegacyCasDiskIndexEntry LegacyEntry = {.Key = Entry.Key, - .Location = LegacyLocation, - .ContentType = Entry.ContentType, - .Flags = Entry.Flags}; - LegacyCasLog.Append(LegacyEntry); - } - LegacyCasLog.Close(); - - std::filesystem::remove_all(CasConfig.RootDirectory / "test"); - - { - CasGc Gc; - CasContainerStrategy Cas(CasConfig, Gc); - Cas.Initialize("test", 2048, 16, false); - - for (size_t i = 0; i < ChunkCount; i += 2) - { - CHECK(Cas.HaveChunk(ChunkHashes[i])); - CHECK(!Cas.HaveChunk(ChunkHashes[i + 1])); - CHECK(ChunkHashes[i] == IoHash::HashBuffer(Cas.FindChunk(ChunkHashes[i]))); - } - } -} - TEST_CASE("compactcas.threadedinsert") { // for (uint32_t i = 0; i < 100; ++i) { ScopedTemporaryDirectory TempDir; - CasStoreConfiguration CasConfig; - CasConfig.RootDirectory = TempDir.Path(); - - CreateDirectories(CasConfig.RootDirectory); - const uint64_t kChunkSize = 1048; const int32_t kChunkCount = 4096; uint64_t ExpectedSize = 0; @@ -1724,9 +1324,9 @@ TEST_CASE("compactcas.threadedinsert") std::atomic<size_t> WorkCompleted = 0; WorkerThreadPool ThreadPool(4); - CasGc Gc; - CasContainerStrategy Cas(CasConfig, Gc); - Cas.Initialize("test", 32768, 16, true); + GcManager Gc; + CasContainerStrategy Cas(Gc); + Cas.Initialize(TempDir.Path(), "test", 32768, 16, true); { for (const auto& Chunk : Chunks) { @@ -1838,10 +1438,10 @@ TEST_CASE("compactcas.threadedinsert") GcContext GcCtx; GcCtx.CollectSmallObjects(true); - GcCtx.ContributeCas(KeepHashes); + GcCtx.AddRetainedCids(KeepHashes); Cas.CollectGarbage(GcCtx); - CasChunkSet& Deleted = GcCtx.DeletedCas(); - Deleted.IterateChunks([&GcChunkHashes](const IoHash& ChunkHash) { GcChunkHashes.erase(ChunkHash); }); + const HashKeySet& Deleted = GcCtx.DeletedCids(); + Deleted.IterateHashes([&GcChunkHashes](const IoHash& ChunkHash) { GcChunkHashes.erase(ChunkHash); }); } while (WorkCompleted < NewChunks.size() + Chunks.size()) @@ -1879,10 +1479,10 @@ TEST_CASE("compactcas.threadedinsert") GcContext GcCtx; GcCtx.CollectSmallObjects(true); - GcCtx.ContributeCas(KeepHashes); + GcCtx.AddRetainedCids(KeepHashes); Cas.CollectGarbage(GcCtx); - CasChunkSet& Deleted = GcCtx.DeletedCas(); - Deleted.IterateChunks([&GcChunkHashes](const IoHash& ChunkHash) { GcChunkHashes.erase(ChunkHash); }); + const HashKeySet& Deleted = GcCtx.DeletedCids(); + Deleted.IterateHashes([&GcChunkHashes](const IoHash& ChunkHash) { GcChunkHashes.erase(ChunkHash); }); } { WorkCompleted = 0; @@ -1902,53 +1502,6 @@ TEST_CASE("compactcas.threadedinsert") } } -TEST_CASE("compactcas.migrate.large.data") // * doctest::skip(true)) -{ - if (true) - { - return; - } - const char* BigDataPath = "D:\\zen-data\\dc4-zen-cache-t\\cas"; - std::filesystem::path TobsBasePath = GetBasePath(BigDataPath, "tobs"); - std::filesystem::path SobsBasePath = GetBasePath(BigDataPath, "sobs"); - std::filesystem::remove_all(TobsBasePath); - std::filesystem::remove_all(SobsBasePath); - - CasStoreConfiguration CasConfig; - CasConfig.RootDirectory = BigDataPath; - uint64_t TObsSize = 0; - { - CasGc TobsCasGc; - CasContainerStrategy TobsCas(CasConfig, TobsCasGc); - TobsCas.Initialize("tobs", 1u << 28, 16, false); - TObsSize = TobsCas.StorageSize().DiskSize; - CHECK(TObsSize > 0); - } - - uint64_t SObsSize = 0; - { - CasGc SobsCasGc; - CasContainerStrategy SobsCas(CasConfig, SobsCasGc); - SobsCas.Initialize("sobs", 1u << 30, 4096, false); - SObsSize = SobsCas.StorageSize().DiskSize; - CHECK(SObsSize > 0); - } - - CasGc TobsCasGc; - CasContainerStrategy TobsCas(CasConfig, TobsCasGc); - TobsCas.Initialize("tobs", 1u << 28, 16, false); - GcContext TobsGcCtx; - TobsCas.CollectGarbage(TobsGcCtx); - CHECK(TobsCas.StorageSize().DiskSize == TObsSize); - - CasGc SobsCasGc; - CasContainerStrategy SobsCas(CasConfig, SobsCasGc); - SobsCas.Initialize("sobs", 1u << 30, 4096, false); - GcContext SobsGcCtx; - SobsCas.CollectGarbage(SobsGcCtx); - CHECK(SobsCas.StorageSize().DiskSize == SObsSize); -} - #endif void |