aboutsummaryrefslogtreecommitdiff
path: root/zenstore/filecas.cpp
diff options
context:
space:
mode:
authorStefan Boberg <[email protected]>2021-10-21 21:13:13 +0200
committerStefan Boberg <[email protected]>2021-10-21 21:13:13 +0200
commitcb1a2f52e37d9d92a908a8761a7f69d6d33cc4b1 (patch)
treea96e89cfc076d8e8a2f1ceca5d3985cdd3ba0c37 /zenstore/filecas.cpp
parentRemoved accidentally committed test code (diff)
downloadzen-cb1a2f52e37d9d92a908a8761a7f69d6d33cc4b1.tar.xz
zen-cb1a2f52e37d9d92a908a8761a7f69d6d33cc4b1.zip
filecas: Added commit log, chunk gc
Diffstat (limited to 'zenstore/filecas.cpp')
-rw-r--r--zenstore/filecas.cpp95
1 files changed, 89 insertions, 6 deletions
diff --git a/zenstore/filecas.cpp b/zenstore/filecas.cpp
index 9cb6e5c79..8c4df4029 100644
--- a/zenstore/filecas.cpp
+++ b/zenstore/filecas.cpp
@@ -70,7 +70,10 @@ FileCasStrategy::ShardingHelper::ShardingHelper(const std::filesystem::path& Roo
//////////////////////////////////////////////////////////////////////////
-FileCasStrategy::FileCasStrategy(const CasStoreConfiguration& Config, CasGc& Gc) : GcStorage(Gc), m_Config(Config), m_Log(logging::Get("filecas"))
+FileCasStrategy::FileCasStrategy(const CasStoreConfiguration& Config, CasGc& Gc)
+: GcStorage(Gc)
+, m_Config(Config)
+, m_Log(logging::Get("filecas"))
{
}
@@ -78,9 +81,23 @@ FileCasStrategy::~FileCasStrategy()
{
}
+void
+FileCasStrategy::Initialize(bool IsNewStore)
+{
+ m_IsInitialized = true;
+
+ CreateDirectories(m_Config.RootDirectory);
+
+ m_CasLog.Open(m_Config.RootDirectory / "cas.ulog", IsNewStore);
+
+ m_CasLog.Replay([&](const FileCasIndexEntry& Entry) {});
+}
+
CasStore::InsertResult
FileCasStrategy::InsertChunk(IoBuffer Chunk, const IoHash& ChunkHash)
{
+ ZEN_ASSERT(m_IsInitialized);
+
// File-based chunks have special case handling whereby we move the file into
// place in the file store directory, thus avoiding unnecessary copying
@@ -212,6 +229,8 @@ FileCasStrategy::InsertChunk(IoBuffer Chunk, const IoHash& ChunkHash)
if (Success)
{
+ m_CasLog.Append({.Key = ChunkHash, .Size = Chunk.Size()});
+
return CasStore::InsertResult{.New = true};
}
@@ -237,6 +256,8 @@ FileCasStrategy::InsertChunk(IoBuffer Chunk, const IoHash& ChunkHash)
CasStore::InsertResult
FileCasStrategy::InsertChunk(const void* const ChunkData, const size_t ChunkSize, const IoHash& ChunkHash)
{
+ ZEN_ASSERT(m_IsInitialized);
+
ShardingHelper Name(m_Config.RootDirectory.c_str(), ChunkHash);
// See if file already exists
@@ -309,12 +330,16 @@ FileCasStrategy::InsertChunk(const void* const ChunkData, const size_t ChunkSize
// *after* the lock is released due to the initialization order
PayloadFile.Close();
+ m_CasLog.Append({.Key = ChunkHash, .Size = ChunkSize});
+
return {.New = true};
}
IoBuffer
FileCasStrategy::FindChunk(const IoHash& ChunkHash)
{
+ ZEN_ASSERT(m_IsInitialized);
+
ShardingHelper Name(m_Config.RootDirectory.c_str(), ChunkHash);
RwLock::SharedLockScope _(LockForHash(ChunkHash));
@@ -325,6 +350,8 @@ FileCasStrategy::FindChunk(const IoHash& ChunkHash)
bool
FileCasStrategy::HaveChunk(const IoHash& ChunkHash)
{
+ ZEN_ASSERT(m_IsInitialized);
+
ShardingHelper Name(m_Config.RootDirectory.c_str(), ChunkHash);
RwLock::SharedLockScope _(LockForHash(ChunkHash));
@@ -345,11 +372,18 @@ FileCasStrategy::DeleteChunk(const IoHash& ChunkHash, std::error_code& Ec)
ZEN_DEBUG("deleting CAS payload file '{}'", WideToUtf8(Name.ShardedPath));
std::filesystem::remove(Name.ShardedPath.c_str(), Ec);
+
+ if (!Ec)
+ {
+ m_CasLog.Append({.Key = ChunkHash, .Size = ~(0ull)});
+ }
}
void
FileCasStrategy::FilterChunks(CasChunkSet& InOutChunks)
{
+ ZEN_ASSERT(m_IsInitialized);
+
// NOTE: it's not a problem now, but in the future if a GC should happen while this
// is in flight, the result could be wrong since chunks could go away in the meantime.
//
@@ -364,6 +398,8 @@ FileCasStrategy::FilterChunks(CasChunkSet& InOutChunks)
void
FileCasStrategy::IterateChunks(std::function<void(const IoHash& Hash, BasicFile& PayloadFile)>&& Callback)
{
+ ZEN_ASSERT(m_IsInitialized);
+
struct Visitor : public FileSystemTraversal::TreeVisitor
{
Visitor(const std::filesystem::path& RootDir) : RootDirectory(RootDir) {}
@@ -435,6 +471,8 @@ FileCasStrategy::Flush()
void
FileCasStrategy::Scrub(ScrubContext& Ctx)
{
+ ZEN_ASSERT(m_IsInitialized);
+
std::vector<IoHash> BadHashes;
std::atomic<uint64_t> ChunkCount{0}, ChunkBytes{0};
@@ -483,7 +521,53 @@ FileCasStrategy::Scrub(ScrubContext& Ctx)
void
FileCasStrategy::CollectGarbage(GcContext& GcCtx)
{
- ZEN_UNUSED(GcCtx);
+ ZEN_ASSERT(m_IsInitialized);
+
+ ZEN_INFO("collecting garbage from {}", m_Config.RootDirectory);
+
+ std::vector<IoHash> ChunksToDelete;
+ std::atomic<uint64_t> ChunksToDeleteBytes{0};
+ std::atomic<uint64_t> ChunkCount{0}, ChunkBytes{0};
+
+ std::vector<IoHash> CandidateCas;
+
+ IterateChunks([&](const IoHash& Hash, BasicFile& Payload) {
+ bool KeepThis = false;
+ CandidateCas.clear();
+ CandidateCas.push_back(Hash);
+ GcCtx.FilterCas(CandidateCas, [&](const IoHash& Hash) { KeepThis = true; });
+
+ const uint64_t FileSize = Payload.FileSize();
+
+ if (!KeepThis)
+ {
+ ChunksToDelete.push_back(Hash);
+ ChunksToDeleteBytes.fetch_add(FileSize);
+ }
+
+ ++ChunkCount;
+ ChunkBytes.fetch_add(FileSize);
+ });
+
+ ZEN_INFO("file CAS gc scanned: {} chunks ({})", ChunkCount.load(), NiceBytes(ChunkBytes));
+
+ if (ChunksToDelete.empty())
+ {
+ return;
+ }
+
+ ZEN_INFO("deleting file CAS garbage: {} chunks ({})", ChunkCount.load(), NiceBytes(ChunksToDeleteBytes));
+
+ for (const IoHash& Hash : ChunksToDelete)
+ {
+ std::error_code Ec;
+ DeleteChunk(Hash, Ec);
+
+ if (Ec)
+ {
+ ZEN_WARN("failed to delete file for chunk {}: '{}'", Hash, Ec.message());
+ }
+ }
}
//////////////////////////////////////////////////////////////////////////
@@ -503,6 +587,7 @@ TEST_CASE("cas.file.move")
CasConfig.RootDirectory = TempDir.Path() / "cas";
FileCasStrategy FileCas(CasConfig, Gc);
+ FileCas.Initialize(/* IsNewStore */true);
{
std::filesystem::path Payload1Path{TempDir.Path() / "payload_1"};
@@ -577,12 +662,12 @@ TEST_CASE("cas.file.gc")
// specifying an absolute path here can be helpful when using procmon to dig into things
ScopedTemporaryDirectory TempDir; // {"d:\\filecas_testdir"};
- CasGc Gc;
-
CasStoreConfiguration CasConfig;
CasConfig.RootDirectory = TempDir.Path() / "cas";
+ CasGc Gc;
FileCasStrategy FileCas(CasConfig, Gc);
+ FileCas.Initialize(/* IsNewStore */ true);
for (int i = 0; i < 1000; ++i)
{
@@ -594,11 +679,9 @@ TEST_CASE("cas.file.gc")
IoHash Hash = HashBuffer(ObjBuffer);
FileCas.InsertChunk(ObjBuffer, Hash);
- ;
}
GcContext Ctx;
-
FileCas.CollectGarbage(Ctx);
}