aboutsummaryrefslogtreecommitdiff
path: root/zenstore/compactcas.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'zenstore/compactcas.cpp')
-rw-r--r--zenstore/compactcas.cpp745
1 files changed, 149 insertions, 596 deletions
diff --git a/zenstore/compactcas.cpp b/zenstore/compactcas.cpp
index 5aed02e7f..a7fdfa1f5 100644
--- a/zenstore/compactcas.cpp
+++ b/zenstore/compactcas.cpp
@@ -2,13 +2,16 @@
#include "compactcas.h"
-#include <zenstore/cas.h>
+#include "cas.h"
+#include <zencore/compress.h>
#include <zencore/except.h>
#include <zencore/filesystem.h>
#include <zencore/fmtutils.h>
#include <zencore/logging.h>
#include <zencore/scopeguard.h>
+#include <zenstore/scrubcontext.h>
+
#include <gsl/gsl-lite.hpp>
#include <xxhash.h>
@@ -76,94 +79,6 @@ namespace {
return GetBasePath(RootPath, ContainerBaseName) / "blocks";
}
- std::filesystem::path GetLegacyLogPath(const std::filesystem::path& RootPath, const std::string& ContainerBaseName)
- {
- return RootPath / (ContainerBaseName + LogExtension);
- }
-
- std::filesystem::path GetLegacyDataPath(const std::filesystem::path& RootPath, const std::string& ContainerBaseName)
- {
- return RootPath / (ContainerBaseName + ".ucas");
- }
-
- std::filesystem::path GetLegacyIndexPath(const std::filesystem::path& RootPath, const std::string& ContainerBaseName)
- {
- return RootPath / (ContainerBaseName + IndexExtension);
- }
-
- struct LegacyCasDiskLocation
- {
- LegacyCasDiskLocation(uint64_t InOffset, uint64_t InSize)
- {
- ZEN_ASSERT(InOffset <= 0xff'ffff'ffff);
- ZEN_ASSERT(InSize <= 0xff'ffff'ffff);
-
- memcpy(&m_Offset[0], &InOffset, sizeof m_Offset);
- memcpy(&m_Size[0], &InSize, sizeof m_Size);
- }
-
- LegacyCasDiskLocation() = default;
-
- inline uint64_t GetOffset() const
- {
- uint64_t Offset = 0;
- memcpy(&Offset, &m_Offset, sizeof m_Offset);
- return Offset;
- }
-
- inline uint64_t GetSize() const
- {
- uint64_t Size = 0;
- memcpy(&Size, &m_Size, sizeof m_Size);
- return Size;
- }
-
- private:
- uint8_t m_Offset[5];
- uint8_t m_Size[5];
- };
-
- struct LegacyCasDiskIndexEntry
- {
- static const uint8_t kTombstone = 0x01;
-
- IoHash Key;
- LegacyCasDiskLocation Location;
- ZenContentType ContentType = ZenContentType::kUnknownContentType;
- uint8_t Flags = 0;
- };
-
- bool ValidateLegacyEntry(const LegacyCasDiskIndexEntry& Entry, std::string& OutReason)
- {
- if (Entry.Key == IoHash::Zero)
- {
- OutReason = fmt::format("Invalid hash key {}", Entry.Key.ToHexString());
- return false;
- }
- if ((Entry.Flags & ~LegacyCasDiskIndexEntry::kTombstone) != 0)
- {
- OutReason = fmt::format("Invalid flags {} for entry {}", Entry.Flags, Entry.Key.ToHexString());
- return false;
- }
- if (Entry.Flags & LegacyCasDiskIndexEntry::kTombstone)
- {
- return true;
- }
- if (Entry.ContentType != ZenContentType::kUnknownContentType)
- {
- OutReason =
- fmt::format("Invalid content type {} for entry {}", static_cast<uint8_t>(Entry.ContentType), Entry.Key.ToHexString());
- return false;
- }
- uint64_t Size = Entry.Location.GetSize();
- if (Size == 0)
- {
- OutReason = fmt::format("Invalid size {} for entry {}", Size, Entry.Key.ToHexString());
- return false;
- }
- return true;
- }
-
bool ValidateEntry(const CasDiskIndexEntry& Entry, std::string& OutReason)
{
if (Entry.Key == IoHash::Zero)
@@ -199,10 +114,7 @@ namespace {
//////////////////////////////////////////////////////////////////////////
-CasContainerStrategy::CasContainerStrategy(const CasStoreConfiguration& Config, CasGc& Gc)
-: GcStorage(Gc)
-, m_Config(Config)
-, m_Log(logging::Get("containercas"))
+CasContainerStrategy::CasContainerStrategy(GcManager& Gc) : GcStorage(Gc), m_Log(logging::Get("containercas"))
{
}
@@ -211,16 +123,21 @@ CasContainerStrategy::~CasContainerStrategy()
}
void
-CasContainerStrategy::Initialize(const std::string_view ContainerBaseName, uint32_t MaxBlockSize, uint64_t Alignment, bool IsNewStore)
+CasContainerStrategy::Initialize(const std::filesystem::path& RootDirectory,
+ const std::string_view ContainerBaseName,
+ uint32_t MaxBlockSize,
+ uint64_t Alignment,
+ bool IsNewStore)
{
ZEN_ASSERT(IsPow2(Alignment));
ZEN_ASSERT(!m_IsInitialized);
ZEN_ASSERT(MaxBlockSize > 0);
+ m_RootDirectory = RootDirectory;
m_ContainerBaseName = ContainerBaseName;
m_PayloadAlignment = Alignment;
m_MaxBlockSize = MaxBlockSize;
- m_BlocksBasePath = GetBlocksBasePath(m_Config.RootDirectory, m_ContainerBaseName);
+ m_BlocksBasePath = GetBlocksBasePath(m_RootDirectory, m_ContainerBaseName);
OpenContainer(IsNewStore);
@@ -267,6 +184,9 @@ CasContainerStrategy::InsertChunk(const void* ChunkData, size_t ChunkSize, const
CasStore::InsertResult
CasContainerStrategy::InsertChunk(IoBuffer Chunk, const IoHash& ChunkHash)
{
+#if !ZEN_WITH_TESTS
+ ZEN_ASSERT(Chunk.GetContentType() == ZenContentType::kCompressedBinary);
+#endif
return InsertChunk(Chunk.Data(), Chunk.Size(), ChunkHash);
}
@@ -293,7 +213,7 @@ CasContainerStrategy::HaveChunk(const IoHash& ChunkHash)
}
void
-CasContainerStrategy::FilterChunks(CasChunkSet& InOutChunks)
+CasContainerStrategy::FilterChunks(HashKeySet& InOutChunks)
{
// This implementation is good enough for relatively small
// chunk sets (in terms of chunk identifiers), but would
@@ -302,7 +222,7 @@ CasContainerStrategy::FilterChunks(CasChunkSet& InOutChunks)
// we're likely to already have a large proportion of the
// chunks in the set
- InOutChunks.RemoveChunksIf([&](const IoHash& Hash) { return HaveChunk(Hash); });
+ InOutChunks.RemoveHashesIf([&](const IoHash& Hash) { return HaveChunk(Hash); });
}
void
@@ -316,6 +236,7 @@ void
CasContainerStrategy::Scrub(ScrubContext& Ctx)
{
std::vector<IoHash> BadKeys;
+ uint64_t ChunkCount{0}, ChunkBytes{0};
std::vector<BlockStoreLocation> ChunkLocations;
std::vector<IoHash> ChunkIndexToChunkHash;
@@ -337,6 +258,9 @@ CasContainerStrategy::Scrub(ScrubContext& Ctx)
}
const auto ValidateSmallChunk = [&](size_t ChunkIndex, const void* Data, uint64_t Size) {
+ ++ChunkCount;
+ ChunkBytes += Size;
+
const IoHash& Hash = ChunkIndexToChunkHash[ChunkIndex];
if (!Data)
{
@@ -344,66 +268,97 @@ CasContainerStrategy::Scrub(ScrubContext& Ctx)
BadKeys.push_back(Hash);
return;
}
- const IoHash ComputedHash = IoHash::HashBuffer(Data, Size);
- if (ComputedHash != Hash)
+
+ IoBuffer Buffer(IoBuffer::Wrap, Data, Size);
+ if (CompressedBuffer Compressed = CompressedBuffer::FromCompressed(SharedBuffer(Buffer)); Compressed)
+ {
+ if (IoHash::FromBLAKE3(Compressed.GetRawHash()) != Hash)
+ {
+ // Hash mismatch
+ BadKeys.push_back(Hash);
+ return;
+ }
+ return;
+ }
+#if ZEN_WITH_TESTS
+ IoHash ComputedHash = IoHash::HashBuffer(Data, Size);
+ if (ComputedHash == Hash)
{
- // Hash mismatch
- BadKeys.push_back(Hash);
return;
}
+#endif
+ BadKeys.push_back(Hash);
};
const auto ValidateLargeChunk = [&](size_t ChunkIndex, BlockStoreFile& File, uint64_t Offset, uint64_t Size) {
+ ++ChunkCount;
+ ChunkBytes += Size;
+
+ const IoHash& Hash = ChunkIndexToChunkHash[ChunkIndex];
+ IoBuffer Buffer(IoBuffer::BorrowedFile, File.GetBasicFile().Handle(), Offset, Size);
+ // TODO: Add API to verify compressed buffer without having to memorymap the whole file
+ if (CompressedBuffer Compressed = CompressedBuffer::FromCompressed(SharedBuffer(Buffer)); Compressed)
+ {
+ if (IoHash::FromBLAKE3(Compressed.GetRawHash()) != Hash)
+ {
+ // Hash mismatch
+ BadKeys.push_back(Hash);
+ return;
+ }
+ return;
+ }
+#if ZEN_WITH_TESTS
IoHashStream Hasher;
- File.StreamByteRange(Offset, Size, [&](const void* Data, uint64_t Size) { Hasher.Append(Data, Size); });
- IoHash ComputedHash = Hasher.GetHash();
- const IoHash& Hash = ChunkIndexToChunkHash[ChunkIndex];
- if (ComputedHash != Hash)
+ File.StreamByteRange(Offset, Size, [&](const void* Data, size_t Size) { Hasher.Append(Data, Size); });
+ IoHash ComputedHash = Hasher.GetHash();
+ if (ComputedHash == Hash)
{
- // Hash mismatch
- BadKeys.push_back(Hash);
return;
}
+#endif
+ BadKeys.push_back(Hash);
};
m_BlockStore.IterateChunks(ChunkLocations, ValidateSmallChunk, ValidateLargeChunk);
_.ReleaseNow();
- if (BadKeys.empty())
- {
- return;
- }
-
- ZEN_ERROR("Scrubbing found #{} bad chunks in '{}'", BadKeys.size(), m_Config.RootDirectory / m_ContainerBaseName);
+ Ctx.ReportScrubbed(ChunkCount, ChunkBytes);
- if (Ctx.RunRecovery())
+ if (!BadKeys.empty())
{
- // Deal with bad chunks by removing them from our lookup map
+ ZEN_ERROR("Scrubbing found #{} bad chunks in '{}'", BadKeys.size(), m_RootDirectory / m_ContainerBaseName);
- std::vector<CasDiskIndexEntry> LogEntries;
- LogEntries.reserve(BadKeys.size());
+ if (Ctx.RunRecovery())
{
- RwLock::ExclusiveLockScope __(m_LocationMapLock);
- for (const IoHash& ChunkHash : BadKeys)
+ // Deal with bad chunks by removing them from our lookup map
+
+ std::vector<CasDiskIndexEntry> LogEntries;
+ LogEntries.reserve(BadKeys.size());
{
- const auto KeyIt = m_LocationMap.find(ChunkHash);
- if (KeyIt == m_LocationMap.end())
+ RwLock::ExclusiveLockScope __(m_LocationMapLock);
+ for (const IoHash& ChunkHash : BadKeys)
{
- // Might have been GC'd
- continue;
+ const auto KeyIt = m_LocationMap.find(ChunkHash);
+ if (KeyIt == m_LocationMap.end())
+ {
+ // Might have been GC'd
+ continue;
+ }
+ LogEntries.push_back({.Key = KeyIt->first, .Location = KeyIt->second, .Flags = CasDiskIndexEntry::kTombstone});
+ m_LocationMap.erase(KeyIt);
}
- LogEntries.push_back({.Key = KeyIt->first, .Location = KeyIt->second, .Flags = CasDiskIndexEntry::kTombstone});
- m_LocationMap.erase(KeyIt);
}
+ m_CasLog.Append(LogEntries);
}
- m_CasLog.Append(LogEntries);
}
// Let whomever it concerns know about the bad chunks. This could
// be used to invalidate higher level data structures more efficiently
// than a full validation pass might be able to do
- Ctx.ReportBadCasChunks(BadKeys);
+ Ctx.ReportBadCidChunks(BadKeys);
+
+ ZEN_INFO("compact cas scrubbed: {} chunks ({})", ChunkCount, NiceBytes(ChunkBytes));
}
void
@@ -432,7 +387,7 @@ CasContainerStrategy::CollectGarbage(GcContext& GcCtx)
// do a blocking operation and update the m_LocationMap after each new block is
// written and figuring out the path to the next new block.
- ZEN_INFO("collecting garbage from '{}'", m_Config.RootDirectory / m_ContainerBaseName);
+ ZEN_INFO("collecting garbage from '{}'", m_RootDirectory / m_ContainerBaseName);
uint64_t WriteBlockTimeUs = 0;
uint64_t WriteBlockLongestTimeUs = 0;
@@ -468,7 +423,7 @@ CasContainerStrategy::CollectGarbage(GcContext& GcCtx)
ChunkLocations.reserve(TotalChunkCount);
ChunkIndexToChunkHash.reserve(TotalChunkCount);
- GcCtx.FilterCas(TotalChunkHashes, [&](const IoHash& ChunkHash, bool Keep) {
+ GcCtx.FilterCids(TotalChunkHashes, [&](const IoHash& ChunkHash, bool Keep) {
auto KeyIt = LocationMap.find(ChunkHash);
const BlockStoreDiskLocation& DiskLocation = KeyIt->second;
BlockStoreLocation Location = DiskLocation.Get(m_PayloadAlignment);
@@ -539,26 +494,26 @@ CasContainerStrategy::CollectGarbage(GcContext& GcCtx)
},
[&GcCtx]() { return GcCtx.CollectSmallObjects(); });
- GcCtx.DeletedCas(DeletedChunks);
+ GcCtx.AddDeletedCids(DeletedChunks);
}
void
CasContainerStrategy::MakeIndexSnapshot()
{
- ZEN_INFO("write store snapshot for '{}'", m_Config.RootDirectory / m_ContainerBaseName);
+ ZEN_INFO("write store snapshot for '{}'", m_RootDirectory / m_ContainerBaseName);
uint64_t EntryCount = 0;
Stopwatch Timer;
const auto _ = MakeGuard([&] {
ZEN_INFO("wrote store snapshot for '{}' containing #{} entries in {}",
- m_Config.RootDirectory / m_ContainerBaseName,
+ m_RootDirectory / m_ContainerBaseName,
EntryCount,
NiceTimeSpanMs(Timer.GetElapsedTimeMs()));
});
namespace fs = std::filesystem;
- fs::path IndexPath = GetIndexPath(m_Config.RootDirectory, m_ContainerBaseName);
- fs::path TempIndexPath = GetTempIndexPath(m_Config.RootDirectory, m_ContainerBaseName);
+ fs::path IndexPath = GetIndexPath(m_RootDirectory, m_ContainerBaseName);
+ fs::path TempIndexPath = GetTempIndexPath(m_RootDirectory, m_ContainerBaseName);
// Move index away, we keep it if something goes wrong
if (fs::is_regular_file(TempIndexPath))
@@ -629,13 +584,13 @@ uint64_t
CasContainerStrategy::ReadIndexFile()
{
std::vector<CasDiskIndexEntry> Entries;
- std::filesystem::path IndexPath = GetIndexPath(m_Config.RootDirectory, m_ContainerBaseName);
+ std::filesystem::path IndexPath = GetIndexPath(m_RootDirectory, m_ContainerBaseName);
if (std::filesystem::is_regular_file(IndexPath))
{
Stopwatch Timer;
const auto _ = MakeGuard([&] {
ZEN_INFO("read store '{}' index containing #{} entries in {}",
- m_Config.RootDirectory / m_ContainerBaseName,
+ m_RootDirectory / m_ContainerBaseName,
Entries.size(),
NiceTimeSpanMs(Timer.GetElapsedTimeMs()));
});
@@ -682,13 +637,13 @@ uint64_t
CasContainerStrategy::ReadLog(uint64_t SkipEntryCount)
{
std::vector<CasDiskIndexEntry> Entries;
- std::filesystem::path LogPath = GetLogPath(m_Config.RootDirectory, m_ContainerBaseName);
+ std::filesystem::path LogPath = GetLogPath(m_RootDirectory, m_ContainerBaseName);
if (std::filesystem::is_regular_file(LogPath))
{
Stopwatch Timer;
const auto _ = MakeGuard([&] {
ZEN_INFO("read store '{}' log containing #{} entries in {}",
- m_Config.RootDirectory / m_ContainerBaseName,
+ m_RootDirectory / m_ContainerBaseName,
Entries.size(),
NiceTimeSpanMs(Timer.GetElapsedTimeMs()));
});
@@ -727,208 +682,6 @@ CasContainerStrategy::ReadLog(uint64_t SkipEntryCount)
return 0;
}
-uint64_t
-CasContainerStrategy::MigrateLegacyData(bool CleanSource)
-{
- std::filesystem::path LegacyLogPath = GetLegacyLogPath(m_Config.RootDirectory, m_ContainerBaseName);
-
- if (!std::filesystem::is_regular_file(LegacyLogPath) || std::filesystem::file_size(LegacyLogPath) == 0)
- {
- return 0;
- }
-
- ZEN_INFO("migrating store '{}'", m_Config.RootDirectory / m_ContainerBaseName);
-
- std::filesystem::path LegacyDataPath = GetLegacyDataPath(m_Config.RootDirectory, m_ContainerBaseName);
- std::filesystem::path LegacyIndexPath = GetLegacyIndexPath(m_Config.RootDirectory, m_ContainerBaseName);
-
- uint64_t MigratedChunkCount = 0;
- uint32_t MigratedBlockCount = 0;
- Stopwatch MigrationTimer;
- uint64_t TotalSize = 0;
- const auto _ = MakeGuard([&] {
- ZEN_INFO("migrated store '{}' to #{} chunks in #{} blocks in {} ({})",
- m_Config.RootDirectory / m_ContainerBaseName,
- MigratedChunkCount,
- MigratedBlockCount,
- NiceTimeSpanMs(MigrationTimer.GetElapsedTimeMs()),
- NiceBytes(TotalSize));
- });
-
- uint64_t BlockFileSize = 0;
- {
- BasicFile BlockFile;
- BlockFile.Open(LegacyDataPath, CleanSource ? BasicFile::Mode::kWrite : BasicFile::Mode::kRead);
- BlockFileSize = BlockFile.FileSize();
- }
-
- std::unordered_map<IoHash, LegacyCasDiskIndexEntry, IoHash::Hasher> LegacyDiskIndex;
- uint64_t InvalidEntryCount = 0;
-
- TCasLogFile<LegacyCasDiskIndexEntry> LegacyCasLog;
- LegacyCasLog.Open(LegacyLogPath, CleanSource ? CasLogFile::Mode::kWrite : CasLogFile::Mode::kRead);
- {
- Stopwatch Timer;
- const auto __ = MakeGuard([&] {
- ZEN_INFO("read store '{}' legacy log containing #{} entries in {}",
- m_Config.RootDirectory / m_ContainerBaseName,
- LegacyDiskIndex.size(),
- NiceTimeSpanMs(Timer.GetElapsedTimeMs()));
- });
- if (LegacyCasLog.Initialize())
- {
- LegacyDiskIndex.reserve(LegacyCasLog.GetLogCount());
- LegacyCasLog.Replay(
- [&](const LegacyCasDiskIndexEntry& Record) {
- std::string InvalidEntryReason;
- if (Record.Flags & LegacyCasDiskIndexEntry::kTombstone)
- {
- LegacyDiskIndex.erase(Record.Key);
- return;
- }
- if (!ValidateLegacyEntry(Record, InvalidEntryReason))
- {
- ZEN_WARN("skipping invalid entry in '{}', reason: '{}'", LegacyLogPath, InvalidEntryReason);
- InvalidEntryCount++;
- return;
- }
- LegacyDiskIndex.insert_or_assign(Record.Key, Record);
- },
- 0);
-
- std::vector<IoHash> BadEntries;
- for (const auto& Entry : LegacyDiskIndex)
- {
- const LegacyCasDiskIndexEntry& Record(Entry.second);
- if (Record.Location.GetOffset() + Record.Location.GetSize() <= BlockFileSize)
- {
- continue;
- }
- ZEN_WARN("skipping invalid entry in '{}', reason: location is outside of file", LegacyLogPath);
- BadEntries.push_back(Entry.first);
- }
- for (const IoHash& BadHash : BadEntries)
- {
- LegacyDiskIndex.erase(BadHash);
- }
- InvalidEntryCount += BadEntries.size();
- }
- }
-
- if (InvalidEntryCount)
- {
- ZEN_WARN("found #{} invalid entries in '{}'", InvalidEntryCount, m_Config.RootDirectory / m_ContainerBaseName);
- }
-
- if (LegacyDiskIndex.empty())
- {
- LegacyCasLog.Close();
- if (CleanSource)
- {
- // Older versions of CasContainerStrategy expects the legacy files to exist if it can find
- // a CAS manifest and crashes on startup if they don't.
- // In order to not break startup when switching back an older version, lets just reset
- // the legacy data files to zero length.
-
- BasicFile LegacyLog;
- LegacyLog.Open(LegacyLogPath, BasicFile::Mode::kTruncate);
- BasicFile LegacySobs;
- LegacySobs.Open(LegacyDataPath, BasicFile::Mode::kTruncate);
- BasicFile LegacySidx;
- LegacySidx.Open(LegacyIndexPath, BasicFile::Mode::kTruncate);
- }
- return 0;
- }
-
- std::filesystem::path LogPath = GetLogPath(m_Config.RootDirectory, m_ContainerBaseName);
- CreateDirectories(LogPath.parent_path());
- TCasLogFile<CasDiskIndexEntry> CasLog;
- CasLog.Open(LogPath, CasLogFile::Mode::kWrite);
-
- std::unordered_map<size_t, IoHash> ChunkIndexToChunkHash;
- std::vector<BlockStoreLocation> ChunkLocations;
- ChunkIndexToChunkHash.reserve(LegacyDiskIndex.size());
- ChunkLocations.reserve(LegacyDiskIndex.size());
- for (const auto& Entry : LegacyDiskIndex)
- {
- const LegacyCasDiskLocation& Location = Entry.second.Location;
- const IoHash& ChunkHash = Entry.first;
- size_t ChunkIndex = ChunkLocations.size();
- ChunkLocations.push_back({.BlockIndex = 0, .Offset = Location.GetOffset(), .Size = Location.GetSize()});
- ChunkIndexToChunkHash[ChunkIndex] = ChunkHash;
- TotalSize += Location.GetSize();
- }
- m_BlockStore.Split(
- ChunkLocations,
- LegacyDataPath,
- m_BlocksBasePath,
- m_MaxBlockSize,
- BlockStoreDiskLocation::MaxBlockIndex + 1,
- m_PayloadAlignment,
- CleanSource,
- [this, &LegacyDiskIndex, &ChunkIndexToChunkHash, &LegacyCasLog, &CasLog, CleanSource, &MigratedBlockCount, &MigratedChunkCount](
- const BlockStore::MovedChunksArray& MovedChunks) {
- std::vector<CasDiskIndexEntry> LogEntries;
- LogEntries.reserve(MovedChunks.size());
- for (const auto& Entry : MovedChunks)
- {
- size_t ChunkIndex = Entry.first;
- const BlockStoreLocation& NewLocation = Entry.second;
- const IoHash& ChunkHash = ChunkIndexToChunkHash[ChunkIndex];
- const LegacyCasDiskIndexEntry& OldEntry = LegacyDiskIndex[ChunkHash];
- LogEntries.push_back({.Key = ChunkHash,
- .Location = {NewLocation, m_PayloadAlignment},
- .ContentType = OldEntry.ContentType,
- .Flags = OldEntry.Flags});
- }
- for (const CasDiskIndexEntry& Entry : LogEntries)
- {
- m_LocationMap.insert_or_assign(Entry.Key, Entry.Location);
- }
- CasLog.Append(LogEntries);
- CasLog.Flush();
- if (CleanSource)
- {
- std::vector<LegacyCasDiskIndexEntry> LegacyLogEntries;
- LegacyLogEntries.reserve(MovedChunks.size());
- for (const auto& Entry : MovedChunks)
- {
- size_t ChunkIndex = Entry.first;
- const IoHash& ChunkHash = ChunkIndexToChunkHash[ChunkIndex];
- const LegacyCasDiskIndexEntry& OldEntry = LegacyDiskIndex[ChunkHash];
- LegacyLogEntries.push_back(
- LegacyCasDiskIndexEntry{.Key = ChunkHash,
- .Location = OldEntry.Location,
- .ContentType = OldEntry.ContentType,
- .Flags = (uint8_t)(OldEntry.Flags | LegacyCasDiskIndexEntry::kTombstone)});
- }
- LegacyCasLog.Append(LegacyLogEntries);
- LegacyCasLog.Flush();
- }
- MigratedBlockCount++;
- MigratedChunkCount += MovedChunks.size();
- });
-
- LegacyCasLog.Close();
- CasLog.Close();
-
- if (CleanSource)
- {
- // Older versions of CasContainerStrategy expects the legacy files to exist if it can find
- // a CAS manifest and crashes on startup if they don't.
- // In order to not break startup when switching back an older version, lets just reset
- // the legacy data files to zero length.
-
- BasicFile LegacyLog;
- LegacyLog.Open(LegacyLogPath, BasicFile::Mode::kTruncate);
- BasicFile LegacySobs;
- LegacySobs.Open(LegacyDataPath, BasicFile::Mode::kTruncate);
- BasicFile LegacySidx;
- LegacySidx.Open(LegacyIndexPath, BasicFile::Mode::kTruncate);
- }
- return MigratedChunkCount;
-}
-
void
CasContainerStrategy::OpenContainer(bool IsNewStore)
{
@@ -937,25 +690,19 @@ CasContainerStrategy::OpenContainer(bool IsNewStore)
m_LocationMap.clear();
- std::filesystem::path BasePath = GetBasePath(m_Config.RootDirectory, m_ContainerBaseName);
+ std::filesystem::path BasePath = GetBasePath(m_RootDirectory, m_ContainerBaseName);
if (IsNewStore)
{
- std::filesystem::path LegacyDataPath = GetLegacyDataPath(m_Config.RootDirectory, m_ContainerBaseName);
- std::filesystem::path LegacyLogPath = GetLegacyLogPath(m_Config.RootDirectory, m_ContainerBaseName);
-
- std::filesystem::remove(LegacyLogPath);
- std::filesystem::remove(LegacyDataPath);
std::filesystem::remove_all(BasePath);
}
- uint64_t LogPosition = ReadIndexFile();
- uint64_t LogEntryCount = ReadLog(LogPosition);
- uint64_t LegacyLogEntryCount = MigrateLegacyData(true);
+ uint64_t LogPosition = ReadIndexFile();
+ uint64_t LogEntryCount = ReadLog(LogPosition);
CreateDirectories(BasePath);
- std::filesystem::path LogPath = GetLogPath(m_Config.RootDirectory, m_ContainerBaseName);
+ std::filesystem::path LogPath = GetLogPath(m_RootDirectory, m_ContainerBaseName);
m_CasLog.Open(LogPath, CasLogFile::Mode::kWrite);
std::vector<BlockStoreLocation> KnownLocations;
@@ -969,7 +716,7 @@ CasContainerStrategy::OpenContainer(bool IsNewStore)
m_BlockStore.Initialize(m_BlocksBasePath, m_MaxBlockSize, BlockStoreDiskLocation::MaxBlockIndex + 1, KnownLocations);
- if (IsNewStore || ((LogEntryCount + LegacyLogEntryCount) > 0))
+ if (IsNewStore || (LogEntryCount > 0))
{
MakeIndexSnapshot();
}
@@ -1040,18 +787,14 @@ TEST_CASE("compactcas.compact.gc")
{
ScopedTemporaryDirectory TempDir;
- CasStoreConfiguration CasConfig;
- CasConfig.RootDirectory = TempDir.Path();
- CreateDirectories(CasConfig.RootDirectory);
-
const int kIterationCount = 1000;
std::vector<IoHash> Keys(kIterationCount);
{
- CasGc Gc;
- CasContainerStrategy Cas(CasConfig, Gc);
- Cas.Initialize("test", 65536, 16, true);
+ GcManager Gc;
+ CasContainerStrategy Cas(Gc);
+ Cas.Initialize(TempDir.Path(), "test", 65536, 16, true);
for (int i = 0; i < kIterationCount; ++i)
{
@@ -1083,9 +826,9 @@ TEST_CASE("compactcas.compact.gc")
// the original cas store
{
- CasGc Gc;
- CasContainerStrategy Cas(CasConfig, Gc);
- Cas.Initialize("test", 65536, 16, false);
+ GcManager Gc;
+ CasContainerStrategy Cas(Gc);
+ Cas.Initialize(TempDir.Path(), "test", 65536, 16, false);
for (int i = 0; i < kIterationCount; ++i)
{
@@ -1109,18 +852,13 @@ TEST_CASE("compactcas.compact.totalsize")
{
ScopedTemporaryDirectory TempDir;
- CasStoreConfiguration CasConfig;
- CasConfig.RootDirectory = TempDir.Path();
-
- CreateDirectories(CasConfig.RootDirectory);
-
const uint64_t kChunkSize = 1024;
const int32_t kChunkCount = 16;
{
- CasGc Gc;
- CasContainerStrategy Cas(CasConfig, Gc);
- Cas.Initialize("test", 65536, 16, true);
+ GcManager Gc;
+ CasContainerStrategy Cas(Gc);
+ Cas.Initialize(TempDir.Path(), "test", 65536, 16, true);
for (int32_t Idx = 0; Idx < kChunkCount; ++Idx)
{
@@ -1135,9 +873,9 @@ TEST_CASE("compactcas.compact.totalsize")
}
{
- CasGc Gc;
- CasContainerStrategy Cas(CasConfig, Gc);
- Cas.Initialize("test", 65536, 16, false);
+ GcManager Gc;
+ CasContainerStrategy Cas(Gc);
+ Cas.Initialize(TempDir.Path(), "test", 65536, 16, false);
const uint64_t TotalSize = Cas.StorageSize().DiskSize;
CHECK_EQ(kChunkSize * kChunkCount, TotalSize);
@@ -1145,9 +883,9 @@ TEST_CASE("compactcas.compact.totalsize")
// Re-open again, this time we should have a snapshot
{
- CasGc Gc;
- CasContainerStrategy Cas(CasConfig, Gc);
- Cas.Initialize("test", 65536, 16, false);
+ GcManager Gc;
+ CasContainerStrategy Cas(Gc);
+ Cas.Initialize(TempDir.Path(), "test", 65536, 16, false);
const uint64_t TotalSize = Cas.StorageSize().DiskSize;
CHECK_EQ(kChunkSize * kChunkCount, TotalSize);
@@ -1159,13 +897,9 @@ TEST_CASE("compactcas.gc.basic")
{
ScopedTemporaryDirectory TempDir;
- CasStoreConfiguration CasConfig;
- CasConfig.RootDirectory = TempDir.Path();
- CreateDirectories(CasConfig.RootDirectory);
-
- CasGc Gc;
- CasContainerStrategy Cas(CasConfig, Gc);
- Cas.Initialize("cb", 65536, 1 << 4, true);
+ GcManager Gc;
+ CasContainerStrategy Cas(Gc);
+ Cas.Initialize(TempDir.Path(), "cb", 65536, 1 << 4, true);
IoBuffer Chunk = CreateChunk(128);
IoHash ChunkHash = IoHash::HashBuffer(Chunk);
@@ -1186,16 +920,12 @@ TEST_CASE("compactcas.gc.removefile")
{
ScopedTemporaryDirectory TempDir;
- CasStoreConfiguration CasConfig;
- CasConfig.RootDirectory = TempDir.Path();
- CreateDirectories(CasConfig.RootDirectory);
-
IoBuffer Chunk = CreateChunk(128);
IoHash ChunkHash = IoHash::HashBuffer(Chunk);
{
- CasGc Gc;
- CasContainerStrategy Cas(CasConfig, Gc);
- Cas.Initialize("cb", 65536, 1 << 4, true);
+ GcManager Gc;
+ CasContainerStrategy Cas(Gc);
+ Cas.Initialize(TempDir.Path(), "cb", 65536, 1 << 4, true);
const CasStore::InsertResult InsertResult = Cas.InsertChunk(Chunk, ChunkHash);
CHECK(InsertResult.New);
@@ -1204,9 +934,9 @@ TEST_CASE("compactcas.gc.removefile")
Cas.Flush();
}
- CasGc Gc;
- CasContainerStrategy Cas(CasConfig, Gc);
- Cas.Initialize("cb", 65536, 1 << 4, false);
+ GcManager Gc;
+ CasContainerStrategy Cas(Gc);
+ Cas.Initialize(TempDir.Path(), "cb", 65536, 1 << 4, false);
GcContext GcCtx;
GcCtx.CollectSmallObjects(true);
@@ -1222,13 +952,9 @@ TEST_CASE("compactcas.gc.compact")
{
ScopedTemporaryDirectory TempDir;
- CasStoreConfiguration CasConfig;
- CasConfig.RootDirectory = TempDir.Path();
- CreateDirectories(CasConfig.RootDirectory);
-
- CasGc Gc;
- CasContainerStrategy Cas(CasConfig, Gc);
- Cas.Initialize("cb", 2048, 1 << 4, true);
+ GcManager Gc;
+ CasContainerStrategy Cas(Gc);
+ Cas.Initialize(TempDir.Path(), "cb", 2048, 1 << 4, true);
uint64_t ChunkSizes[9] = {128, 541, 1023, 781, 218, 37, 4, 997, 5};
std::vector<IoBuffer> Chunks;
@@ -1275,7 +1001,7 @@ TEST_CASE("compactcas.gc.compact")
std::vector<IoHash> KeepChunks;
KeepChunks.push_back(ChunkHashes[0]);
KeepChunks.push_back(ChunkHashes[8]);
- GcCtx.ContributeCas(KeepChunks);
+ GcCtx.AddRetainedCids(KeepChunks);
Cas.Flush();
Cas.CollectGarbage(GcCtx);
@@ -1308,7 +1034,7 @@ TEST_CASE("compactcas.gc.compact")
GcCtx.CollectSmallObjects(true);
std::vector<IoHash> KeepChunks;
KeepChunks.push_back(ChunkHashes[8]);
- GcCtx.ContributeCas(KeepChunks);
+ GcCtx.AddRetainedCids(KeepChunks);
Cas.Flush();
Cas.CollectGarbage(GcCtx);
@@ -1342,7 +1068,7 @@ TEST_CASE("compactcas.gc.compact")
KeepChunks.push_back(ChunkHashes[1]);
KeepChunks.push_back(ChunkHashes[4]);
KeepChunks.push_back(ChunkHashes[7]);
- GcCtx.ContributeCas(KeepChunks);
+ GcCtx.AddRetainedCids(KeepChunks);
Cas.Flush();
Cas.CollectGarbage(GcCtx);
@@ -1377,7 +1103,7 @@ TEST_CASE("compactcas.gc.compact")
KeepChunks.push_back(ChunkHashes[6]);
KeepChunks.push_back(ChunkHashes[7]);
KeepChunks.push_back(ChunkHashes[8]);
- GcCtx.ContributeCas(KeepChunks);
+ GcCtx.AddRetainedCids(KeepChunks);
Cas.Flush();
Cas.CollectGarbage(GcCtx);
@@ -1414,7 +1140,7 @@ TEST_CASE("compactcas.gc.compact")
KeepChunks.push_back(ChunkHashes[4]);
KeepChunks.push_back(ChunkHashes[6]);
KeepChunks.push_back(ChunkHashes[8]);
- GcCtx.ContributeCas(KeepChunks);
+ GcCtx.AddRetainedCids(KeepChunks);
Cas.Flush();
Cas.CollectGarbage(GcCtx);
@@ -1476,13 +1202,10 @@ TEST_CASE("compactcas.gc.deleteblockonopen")
ChunkHashes.push_back(IoHash::HashBuffer(Chunk.Data(), Chunk.Size()));
}
- CasStoreConfiguration CasConfig;
- CasConfig.RootDirectory = TempDir.Path();
- CreateDirectories(CasConfig.RootDirectory);
{
- CasGc Gc;
- CasContainerStrategy Cas(CasConfig, Gc);
- Cas.Initialize("test", 1024, 16, true);
+ GcManager Gc;
+ CasContainerStrategy Cas(Gc);
+ Cas.Initialize(TempDir.Path(), "test", 1024, 16, true);
for (size_t i = 0; i < 20; i++)
{
@@ -1498,7 +1221,7 @@ TEST_CASE("compactcas.gc.deleteblockonopen")
{
KeepChunks.push_back(ChunkHashes[i]);
}
- GcCtx.ContributeCas(KeepChunks);
+ GcCtx.AddRetainedCids(KeepChunks);
Cas.Flush();
Cas.CollectGarbage(GcCtx);
@@ -1513,9 +1236,9 @@ TEST_CASE("compactcas.gc.deleteblockonopen")
}
{
// Re-open
- CasGc Gc;
- CasContainerStrategy Cas(CasConfig, Gc);
- Cas.Initialize("test", 1024, 16, false);
+ GcManager Gc;
+ CasContainerStrategy Cas(Gc);
+ Cas.Initialize(TempDir.Path(), "test", 1024, 16, false);
for (size_t i = 0; i < 20; i += 2)
{
@@ -1545,13 +1268,9 @@ TEST_CASE("compactcas.gc.handleopeniobuffer")
ChunkHashes.push_back(IoHash::HashBuffer(Chunk.Data(), Chunk.Size()));
}
- CasStoreConfiguration CasConfig;
- CasConfig.RootDirectory = TempDir.Path();
- CreateDirectories(CasConfig.RootDirectory);
-
- CasGc Gc;
- CasContainerStrategy Cas(CasConfig, Gc);
- Cas.Initialize("test", 1024, 16, true);
+ GcManager Gc;
+ CasContainerStrategy Cas(Gc);
+ Cas.Initialize(TempDir.Path(), "test", 1024, 16, true);
for (size_t i = 0; i < 20; i++)
{
@@ -1574,131 +1293,12 @@ TEST_CASE("compactcas.gc.handleopeniobuffer")
CHECK(ChunkHashes[5] == IoHash::HashBuffer(RetainChunk));
}
-TEST_CASE("compactcas.legacyconversion")
-{
- ScopedTemporaryDirectory TempDir;
-
- uint64_t ChunkSizes[] = {2041, 1123, 1223, 1239, 341, 1412, 912, 774, 341, 431, 554, 1098, 2048, 339, 561, 16, 16, 2048, 2048};
- size_t ChunkCount = sizeof(ChunkSizes) / sizeof(uint64_t);
- size_t SingleBlockSize = 0;
- std::vector<IoBuffer> Chunks;
- Chunks.reserve(ChunkCount);
- for (uint64_t Size : ChunkSizes)
- {
- Chunks.push_back(CreateChunk(Size));
- SingleBlockSize += Size;
- }
-
- std::vector<IoHash> ChunkHashes;
- ChunkHashes.reserve(ChunkCount);
- for (const IoBuffer& Chunk : Chunks)
- {
- ChunkHashes.push_back(IoHash::HashBuffer(Chunk.Data(), Chunk.Size()));
- }
-
- CasStoreConfiguration CasConfig;
- CasConfig.RootDirectory = TempDir.Path();
- CreateDirectories(CasConfig.RootDirectory);
-
- {
- CasGc Gc;
- CasContainerStrategy Cas(CasConfig, Gc);
- Cas.Initialize("test", gsl::narrow<uint32_t>(SingleBlockSize * 2), 16, true);
-
- for (size_t i = 0; i < ChunkCount; i++)
- {
- CHECK(Cas.InsertChunk(Chunks[i], ChunkHashes[i]).New);
- }
-
- std::vector<IoHash> KeepChunks;
- for (size_t i = 0; i < ChunkCount; i += 2)
- {
- KeepChunks.push_back(ChunkHashes[i]);
- }
- GcContext GcCtx;
- GcCtx.CollectSmallObjects(true);
- GcCtx.ContributeCas(KeepChunks);
- Cas.Flush();
- Gc.CollectGarbage(GcCtx);
- }
-
- std::filesystem::path BlockPath = BlockStore::GetBlockPath(GetBlocksBasePath(CasConfig.RootDirectory, "test"), 1);
- std::filesystem::path LegacyDataPath = GetLegacyDataPath(CasConfig.RootDirectory, "test");
- std::filesystem::rename(BlockPath, LegacyDataPath);
-
- std::vector<CasDiskIndexEntry> LogEntries;
- std::filesystem::path IndexPath = GetIndexPath(CasConfig.RootDirectory, "test");
- if (std::filesystem::is_regular_file(IndexPath))
- {
- BasicFile ObjectIndexFile;
- ObjectIndexFile.Open(IndexPath, BasicFile::Mode::kRead);
- uint64_t Size = ObjectIndexFile.FileSize();
- if (Size >= sizeof(CasDiskIndexHeader))
- {
- uint64_t ExpectedEntryCount = (Size - sizeof(sizeof(CasDiskIndexHeader))) / sizeof(CasDiskIndexEntry);
- CasDiskIndexHeader Header;
- ObjectIndexFile.Read(&Header, sizeof(Header), 0);
- if (Header.Magic == CasDiskIndexHeader::ExpectedMagic && Header.Version == CasDiskIndexHeader::CurrentVersion &&
- Header.PayloadAlignment > 0 && Header.EntryCount == ExpectedEntryCount)
- {
- LogEntries.resize(Header.EntryCount);
- ObjectIndexFile.Read(LogEntries.data(), Header.EntryCount * sizeof(CasDiskIndexEntry), sizeof(CasDiskIndexHeader));
- }
- }
- ObjectIndexFile.Close();
- std::filesystem::remove(IndexPath);
- }
-
- std::filesystem::path LogPath = GetLogPath(CasConfig.RootDirectory, "test");
- {
- TCasLogFile<CasDiskIndexEntry> CasLog;
- CasLog.Open(LogPath, CasLogFile::Mode::kRead);
- LogEntries.reserve(CasLog.GetLogCount());
- CasLog.Replay([&](const CasDiskIndexEntry& Record) { LogEntries.push_back(Record); }, 0);
- }
- TCasLogFile<LegacyCasDiskIndexEntry> LegacyCasLog;
- std::filesystem::path LegacylogPath = GetLegacyLogPath(CasConfig.RootDirectory, "test");
- LegacyCasLog.Open(LegacylogPath, CasLogFile::Mode::kTruncate);
-
- for (const CasDiskIndexEntry& Entry : LogEntries)
- {
- BlockStoreLocation Location = Entry.Location.Get(16);
- LegacyCasDiskLocation LegacyLocation(Location.Offset, Location.Size);
- LegacyCasDiskIndexEntry LegacyEntry = {.Key = Entry.Key,
- .Location = LegacyLocation,
- .ContentType = Entry.ContentType,
- .Flags = Entry.Flags};
- LegacyCasLog.Append(LegacyEntry);
- }
- LegacyCasLog.Close();
-
- std::filesystem::remove_all(CasConfig.RootDirectory / "test");
-
- {
- CasGc Gc;
- CasContainerStrategy Cas(CasConfig, Gc);
- Cas.Initialize("test", 2048, 16, false);
-
- for (size_t i = 0; i < ChunkCount; i += 2)
- {
- CHECK(Cas.HaveChunk(ChunkHashes[i]));
- CHECK(!Cas.HaveChunk(ChunkHashes[i + 1]));
- CHECK(ChunkHashes[i] == IoHash::HashBuffer(Cas.FindChunk(ChunkHashes[i])));
- }
- }
-}
-
TEST_CASE("compactcas.threadedinsert")
{
// for (uint32_t i = 0; i < 100; ++i)
{
ScopedTemporaryDirectory TempDir;
- CasStoreConfiguration CasConfig;
- CasConfig.RootDirectory = TempDir.Path();
-
- CreateDirectories(CasConfig.RootDirectory);
-
const uint64_t kChunkSize = 1048;
const int32_t kChunkCount = 4096;
uint64_t ExpectedSize = 0;
@@ -1724,9 +1324,9 @@ TEST_CASE("compactcas.threadedinsert")
std::atomic<size_t> WorkCompleted = 0;
WorkerThreadPool ThreadPool(4);
- CasGc Gc;
- CasContainerStrategy Cas(CasConfig, Gc);
- Cas.Initialize("test", 32768, 16, true);
+ GcManager Gc;
+ CasContainerStrategy Cas(Gc);
+ Cas.Initialize(TempDir.Path(), "test", 32768, 16, true);
{
for (const auto& Chunk : Chunks)
{
@@ -1838,10 +1438,10 @@ TEST_CASE("compactcas.threadedinsert")
GcContext GcCtx;
GcCtx.CollectSmallObjects(true);
- GcCtx.ContributeCas(KeepHashes);
+ GcCtx.AddRetainedCids(KeepHashes);
Cas.CollectGarbage(GcCtx);
- CasChunkSet& Deleted = GcCtx.DeletedCas();
- Deleted.IterateChunks([&GcChunkHashes](const IoHash& ChunkHash) { GcChunkHashes.erase(ChunkHash); });
+ const HashKeySet& Deleted = GcCtx.DeletedCids();
+ Deleted.IterateHashes([&GcChunkHashes](const IoHash& ChunkHash) { GcChunkHashes.erase(ChunkHash); });
}
while (WorkCompleted < NewChunks.size() + Chunks.size())
@@ -1879,10 +1479,10 @@ TEST_CASE("compactcas.threadedinsert")
GcContext GcCtx;
GcCtx.CollectSmallObjects(true);
- GcCtx.ContributeCas(KeepHashes);
+ GcCtx.AddRetainedCids(KeepHashes);
Cas.CollectGarbage(GcCtx);
- CasChunkSet& Deleted = GcCtx.DeletedCas();
- Deleted.IterateChunks([&GcChunkHashes](const IoHash& ChunkHash) { GcChunkHashes.erase(ChunkHash); });
+ const HashKeySet& Deleted = GcCtx.DeletedCids();
+ Deleted.IterateHashes([&GcChunkHashes](const IoHash& ChunkHash) { GcChunkHashes.erase(ChunkHash); });
}
{
WorkCompleted = 0;
@@ -1902,53 +1502,6 @@ TEST_CASE("compactcas.threadedinsert")
}
}
-TEST_CASE("compactcas.migrate.large.data") // * doctest::skip(true))
-{
- if (true)
- {
- return;
- }
- const char* BigDataPath = "D:\\zen-data\\dc4-zen-cache-t\\cas";
- std::filesystem::path TobsBasePath = GetBasePath(BigDataPath, "tobs");
- std::filesystem::path SobsBasePath = GetBasePath(BigDataPath, "sobs");
- std::filesystem::remove_all(TobsBasePath);
- std::filesystem::remove_all(SobsBasePath);
-
- CasStoreConfiguration CasConfig;
- CasConfig.RootDirectory = BigDataPath;
- uint64_t TObsSize = 0;
- {
- CasGc TobsCasGc;
- CasContainerStrategy TobsCas(CasConfig, TobsCasGc);
- TobsCas.Initialize("tobs", 1u << 28, 16, false);
- TObsSize = TobsCas.StorageSize().DiskSize;
- CHECK(TObsSize > 0);
- }
-
- uint64_t SObsSize = 0;
- {
- CasGc SobsCasGc;
- CasContainerStrategy SobsCas(CasConfig, SobsCasGc);
- SobsCas.Initialize("sobs", 1u << 30, 4096, false);
- SObsSize = SobsCas.StorageSize().DiskSize;
- CHECK(SObsSize > 0);
- }
-
- CasGc TobsCasGc;
- CasContainerStrategy TobsCas(CasConfig, TobsCasGc);
- TobsCas.Initialize("tobs", 1u << 28, 16, false);
- GcContext TobsGcCtx;
- TobsCas.CollectGarbage(TobsGcCtx);
- CHECK(TobsCas.StorageSize().DiskSize == TObsSize);
-
- CasGc SobsCasGc;
- CasContainerStrategy SobsCas(CasConfig, SobsCasGc);
- SobsCas.Initialize("sobs", 1u << 30, 4096, false);
- GcContext SobsGcCtx;
- SobsCas.CollectGarbage(SobsGcCtx);
- CHECK(SobsCas.StorageSize().DiskSize == SObsSize);
-}
-
#endif
void