aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorPer Larsson <[email protected]>2021-12-01 16:17:30 +0100
committerPer Larsson <[email protected]>2021-12-01 16:17:30 +0100
commit9045ebeb0f1bf4290013749482a8ee8f9c007088 (patch)
treeb9f7345ea400e8e8c3d1d493dd23299442593cb6
parentAdded CacheStore and CAS store sizes to status endpoint. (diff)
downloadzen-9045ebeb0f1bf4290013749482a8ee8f9c007088.tar.xz
zen-9045ebeb0f1bf4290013749482a8ee8f9c007088.zip
Added naive container CAS GC support.
-rw-r--r--zenserver/cache/structuredcache.cpp1
-rw-r--r--zenstore/CAS.cpp4
-rw-r--r--zenstore/compactcas.cpp228
-rw-r--r--zenstore/compactcas.h14
-rw-r--r--zenstore/gc.cpp17
-rw-r--r--zenstore/include/zenstore/gc.h3
6 files changed, 221 insertions, 46 deletions
diff --git a/zenserver/cache/structuredcache.cpp b/zenserver/cache/structuredcache.cpp
index fe3f44e00..cf3915363 100644
--- a/zenserver/cache/structuredcache.cpp
+++ b/zenserver/cache/structuredcache.cpp
@@ -1201,7 +1201,6 @@ HttpStructuredCacheService::HandleStatsRequest(zen::HttpServerRequest& Request)
Cbo << "memory" << CacheSize.MemorySize;
Cbo.EndObject();
Cbo << "upstream_ratio" << (HitCount > 0 ? (double(UpstreamHitCount) / double(HitCount)) : 0.0);
- Cbo << "cas_tiny_size" << CasSize.TinySize;
Cbo << "hits" << HitCount << "misses" << MissCount;
Cbo << "hit_ratio" << (TotalCount > 0 ? (double(HitCount) / double(TotalCount)) : 0.0);
Cbo << "upstream_hits" << m_CacheStats.UpstreamHitCount;
diff --git a/zenstore/CAS.cpp b/zenstore/CAS.cpp
index d2ff1514e..a0d47c213 100644
--- a/zenstore/CAS.cpp
+++ b/zenstore/CAS.cpp
@@ -125,7 +125,7 @@ private:
void UpdateManifest();
};
-CasImpl::CasImpl(CasGc& Gc) : m_TinyStrategy(m_Config), m_SmallStrategy(m_Config), m_LargeStrategy(m_Config, Gc)
+CasImpl::CasImpl(CasGc& Gc) : m_TinyStrategy(m_Config, Gc), m_SmallStrategy(m_Config, Gc), m_LargeStrategy(m_Config, Gc)
{
}
@@ -316,6 +316,8 @@ CasImpl::Scrub(ScrubContext& Ctx)
void
CasImpl::GarbageCollect(GcContext& GcCtx)
{
+ m_SmallStrategy.CollectGarbage(GcCtx);
+ m_TinyStrategy.CollectGarbage(GcCtx);
m_LargeStrategy.CollectGarbage(GcCtx);
}
diff --git a/zenstore/compactcas.cpp b/zenstore/compactcas.cpp
index 584db496b..fd4b9441e 100644
--- a/zenstore/compactcas.cpp
+++ b/zenstore/compactcas.cpp
@@ -7,6 +7,7 @@
#include <zencore/compactbinarybuilder.h>
#include <zencore/except.h>
#include <zencore/filesystem.h>
+#include <zencore/fmtutils.h>
#include <zencore/logging.h>
#include <zencore/memory.h>
#include <zencore/string.h>
@@ -30,7 +31,12 @@
namespace zen {
-CasContainerStrategy::CasContainerStrategy(const CasStoreConfiguration& Config) : m_Config(Config)
+using namespace fmt::literals;
+
+CasContainerStrategy::CasContainerStrategy(const CasStoreConfiguration& Config, CasGc& Gc)
+: GcStorage(Gc)
+, m_Config(Config)
+, m_Log(logging::Get("containercas"))
{
}
@@ -47,42 +53,9 @@ CasContainerStrategy::Initialize(const std::string_view ContainerBaseName, uint6
m_ContainerBaseName = ContainerBaseName;
m_PayloadAlignment = Alignment;
- std::string BaseName(ContainerBaseName);
- std::filesystem::path SobsPath = m_Config.RootDirectory / (BaseName + ".ucas");
- std::filesystem::path SidxPath = m_Config.RootDirectory / (BaseName + ".uidx");
- std::filesystem::path SlogPath = m_Config.RootDirectory / (BaseName + ".ulog");
-
- m_SmallObjectFile.Open(SobsPath, IsNewStore);
- m_SmallObjectIndex.Open(SidxPath, IsNewStore);
- m_CasLog.Open(SlogPath, IsNewStore);
-
- // TODO: should validate integrity of container files here
+ OpenContainer(IsNewStore);
- uint64_t MaxFileOffset = 0;
-
- {
- // This is not technically necessary (nobody should be accessing us from
- // another thread at this stage) but may help static analysis
-
- RwLock::ExclusiveLockScope _(m_LocationMapLock);
-
- m_CasLog.Replay([&](const CasDiskIndexEntry& Record) {
- if (Record.Flags & CasDiskIndexEntry::kTombstone)
- {
- m_TotalSize.fetch_sub(Record.Location.GetSize());
- }
- else
- {
- m_TotalSize.fetch_add(Record.Location.GetSize());
- m_LocationMap[Record.Key] = Record.Location;
- MaxFileOffset = std::max<uint64_t>(MaxFileOffset, Record.Location.GetOffset() + Record.Location.GetSize());
- }
- });
- }
-
- m_CurrentInsertOffset = (MaxFileOffset + m_PayloadAlignment - 1) & ~(m_PayloadAlignment - 1);
- m_CurrentIndexOffset = m_SmallObjectIndex.FileSize();
- m_IsInitialized = true;
+ m_IsInitialized = true;
}
CasStore::InsertResult
@@ -282,7 +255,130 @@ CasContainerStrategy::Scrub(ScrubContext& Ctx)
void
CasContainerStrategy::CollectGarbage(GcContext& GcCtx)
{
- ZEN_UNUSED(GcCtx);
+ namespace fs = std::filesystem;
+
+ ZEN_INFO("collecting garbage from '{}'", m_Config.RootDirectory / m_ContainerBaseName);
+
+ RwLock::ExclusiveLockScope _(m_LocationMapLock);
+
+ Flush();
+
+ std::vector<IoHash> Candidates;
+ std::vector<IoHash> Keep;
+ const uint64_t ChunkCount = m_LocationMap.size();
+ uint64_t TotalSize{};
+
+ Candidates.reserve(m_LocationMap.size());
+
+ for (auto& Entry : m_LocationMap)
+ {
+ Candidates.push_back(Entry.first);
+ TotalSize += Entry.second.GetSize();
+ }
+
+ Keep.reserve(Candidates.size());
+ GcCtx.FilterCas(Candidates, [&](const IoHash& Hash) { Keep.push_back(Hash); });
+
+ if (m_LocationMap.empty() || Keep.size() == m_LocationMap.size())
+ {
+ ZEN_INFO("garbage collect DONE, scanned #{} {} chunks from '{}', nothing to delete",
+ ChunkCount,
+ NiceBytes(TotalSize),
+ m_Config.RootDirectory / m_ContainerBaseName);
+ return;
+ }
+
+ const uint64_t NewChunkCount = Keep.size();
+ uint64_t NewTotalSize = 0;
+
+ for (const IoHash& Key : Keep)
+ {
+ const CasDiskLocation& Loc = m_LocationMap[Key];
+ NewTotalSize += Loc.GetSize();
+ }
+
+ const bool GcEnabled = GcCtx.IsDeletionMode() && GcCtx.IsContainerGcEnabled();
+
+ if (GcEnabled)
+ {
+ ZEN_INFO("garbage collect from '{}' DISABLED, found #{} {} chunks of total #{} {}",
+ m_Config.RootDirectory / m_ContainerBaseName,
+ ChunkCount - NewChunkCount,
+ NiceBytes(TotalSize - NewTotalSize),
+ ChunkCount,
+ NiceBytes(TotalSize));
+ return;
+ }
+
+ fs::path TmpSobsPath = m_Config.RootDirectory / (m_ContainerBaseName + ".gc.ucas");
+ fs::path TmpSlogPath = m_Config.RootDirectory / (m_ContainerBaseName + ".gc.ulog");
+
+ {
+ ZEN_DEBUG("creating temporary container cas '{}'...", TmpSobsPath);
+
+ TCasLogFile<CasDiskIndexEntry> TmpLog;
+ BasicFile TmpObjectFile;
+ bool IsNew = true;
+
+ TmpLog.Open(TmpSlogPath, IsNew);
+ TmpObjectFile.Open(TmpSobsPath, IsNew);
+
+ std::vector<uint8_t> Chunk;
+ uint64_t NextInsertOffset{};
+
+ for (const IoHash& Key : Keep)
+ {
+ const auto Entry = m_LocationMap.find(Key);
+ const auto& Loc = Entry->second;
+
+ Chunk.resize(Loc.GetSize());
+ m_SmallObjectFile.Read(Chunk.data(), Chunk.size(), Loc.GetOffset());
+
+ const uint64_t InsertOffset = NextInsertOffset;
+ TmpObjectFile.Write(Chunk.data(), Chunk.size(), InsertOffset);
+ TmpLog.Append({.Key = Key, .Location = {InsertOffset, Chunk.size()}});
+
+ NextInsertOffset = (NextInsertOffset + Chunk.size() + m_PayloadAlignment - 1) & ~(m_PayloadAlignment - 1);
+ }
+ }
+
+ try
+ {
+ CloseContainer();
+
+ fs::path SobsPath = m_Config.RootDirectory / (m_ContainerBaseName + ".ucas");
+ fs::path SidxPath = m_Config.RootDirectory / (m_ContainerBaseName + ".uidx");
+ fs::path SlogPath = m_Config.RootDirectory / (m_ContainerBaseName + ".ulog");
+
+ fs::remove(SobsPath);
+ fs::remove(SidxPath);
+ fs::remove(SlogPath);
+
+ fs::rename(TmpSobsPath, SobsPath);
+ fs::rename(TmpSlogPath, SlogPath);
+
+ {
+ // Create a new empty index file
+ BasicFile SidxFile;
+ SidxFile.Open(SidxPath, true);
+ }
+
+ OpenContainer(false /* IsNewStore */);
+
+ ZEN_INFO("garbage collect from '{}' DONE, collected #{} {} chunks of total #{} {}",
+ m_Config.RootDirectory / m_ContainerBaseName,
+ ChunkCount - NewChunkCount,
+ NiceBytes(TotalSize - NewTotalSize),
+ ChunkCount,
+ NiceBytes(TotalSize));
+ }
+ catch (std::exception& Err)
+ {
+ ZEN_ERROR("garbage collection FAILED, reason '{}'", Err.what());
+
+ // Something went wrong, try create a new container
+ OpenContainer(true /* IsNewStore */);
+ }
}
void
@@ -303,6 +399,52 @@ CasContainerStrategy::MakeSnapshot()
m_SmallObjectIndex.Write(Entries.data(), Entries.size() * sizeof(CasDiskIndexEntry), 0);
}
+void
+CasContainerStrategy::OpenContainer(bool IsNewStore)
+{
+ std::filesystem::path SobsPath = m_Config.RootDirectory / (m_ContainerBaseName + ".ucas");
+ std::filesystem::path SidxPath = m_Config.RootDirectory / (m_ContainerBaseName + ".uidx");
+ std::filesystem::path SlogPath = m_Config.RootDirectory / (m_ContainerBaseName + ".ulog");
+
+ m_SmallObjectFile.Open(SobsPath, IsNewStore);
+ m_SmallObjectIndex.Open(SidxPath, IsNewStore);
+ m_CasLog.Open(SlogPath, IsNewStore);
+
+ // TODO: should validate integrity of container files here
+
+ m_CurrentInsertOffset = 0;
+ m_CurrentIndexOffset = 0;
+ m_TotalSize = 0;
+
+ m_LocationMap.clear();
+
+ uint64_t MaxFileOffset = 0;
+
+ m_CasLog.Replay([&](const CasDiskIndexEntry& Record) {
+ if (Record.Flags & CasDiskIndexEntry::kTombstone)
+ {
+ m_TotalSize.fetch_sub(Record.Location.GetSize());
+ }
+ else
+ {
+ m_TotalSize.fetch_add(Record.Location.GetSize());
+ m_LocationMap[Record.Key] = Record.Location;
+ MaxFileOffset = std::max<uint64_t>(MaxFileOffset, Record.Location.GetOffset() + Record.Location.GetSize());
+ }
+ });
+
+ m_CurrentInsertOffset = (MaxFileOffset + m_PayloadAlignment - 1) & ~(m_PayloadAlignment - 1);
+ m_CurrentIndexOffset = m_SmallObjectIndex.FileSize();
+}
+
+void
+CasContainerStrategy::CloseContainer()
+{
+ m_SmallObjectFile.Close();
+ m_SmallObjectIndex.Close();
+ m_CasLog.Close();
+}
+
//////////////////////////////////////////////////////////////////////////
#if ZEN_WITH_TESTS
@@ -321,7 +463,8 @@ TEST_CASE("cas.compact.gc")
std::vector<IoHash> Keys(kIterationCount);
{
- CasContainerStrategy Cas(CasConfig);
+ CasGc Gc;
+ CasContainerStrategy Cas(CasConfig, Gc);
Cas.Initialize("test", 16, true);
for (int i = 0; i < kIterationCount; ++i)
@@ -354,7 +497,8 @@ TEST_CASE("cas.compact.gc")
// the original cas store
{
- CasContainerStrategy Cas(CasConfig);
+ CasGc Gc;
+ CasContainerStrategy Cas(CasConfig, Gc);
Cas.Initialize("test", 16, false);
for (int i = 0; i < kIterationCount; ++i)
@@ -402,7 +546,8 @@ TEST_CASE("cas.compact.totalsize")
const int32_t kChunkCount = 16;
{
- CasContainerStrategy Cas(CasConfig);
+ CasGc Gc;
+ CasContainerStrategy Cas(CasConfig, Gc);
Cas.Initialize("test", 16, true);
for (int32_t Idx = 0; Idx < kChunkCount; ++Idx)
@@ -418,7 +563,8 @@ TEST_CASE("cas.compact.totalsize")
}
{
- CasContainerStrategy Cas(CasConfig);
+ CasGc Gc;
+ CasContainerStrategy Cas(CasConfig, Gc);
Cas.Initialize("test", 16, false);
const uint64_t TotalSize = Cas.TotalSize();
diff --git a/zenstore/compactcas.h b/zenstore/compactcas.h
index 9154768b3..1d3a2beff 100644
--- a/zenstore/compactcas.h
+++ b/zenstore/compactcas.h
@@ -13,6 +13,11 @@
#include <zenstore/basicfile.h>
#include <zenstore/cas.h>
#include <zenstore/caslog.h>
+#include <zenstore/gc.h>
+
+namespace spdlog {
+class logger;
+}
namespace zen {
@@ -75,9 +80,9 @@ static_assert(sizeof(CasDiskIndexEntry) == 32);
*
*/
-struct CasContainerStrategy
+struct CasContainerStrategy : public GcStorage
{
- CasContainerStrategy(const CasStoreConfiguration& Config);
+ CasContainerStrategy(const CasStoreConfiguration& Config, CasGc& Gc);
~CasContainerStrategy();
CasStore::InsertResult InsertChunk(const void* ChunkData, size_t ChunkSize, const IoHash& ChunkHash);
@@ -92,7 +97,12 @@ struct CasContainerStrategy
uint64_t TotalSize() const { return m_TotalSize; }
private:
+ void OpenContainer(bool IsNewStore);
+ void CloseContainer();
+ spdlog::logger& Log() { return m_Log; }
+
const CasStoreConfiguration& m_Config;
+ spdlog::logger& m_Log;
uint64_t m_PayloadAlignment = 1 << 4;
bool m_IsInitialized = false;
BasicFile m_SmallObjectFile;
diff --git a/zenstore/gc.cpp b/zenstore/gc.cpp
index 5c2ee2daa..1b987ca08 100644
--- a/zenstore/gc.cpp
+++ b/zenstore/gc.cpp
@@ -16,7 +16,8 @@ struct GcContext::GcState
{
CasChunkSet m_CasChunks;
CasChunkSet m_CidChunks;
- bool m_DeletionMode = true;
+ bool m_DeletionMode = true;
+ bool m_ContainerGcEnabled = false;
};
GcContext::GcContext() : m_State(std::make_unique<GcState>())
@@ -62,12 +63,25 @@ GcContext::IsDeletionMode() const
{
return m_State->m_DeletionMode;
}
+
void
GcContext::SetDeletionMode(bool NewState)
{
m_State->m_DeletionMode = NewState;
}
+bool
+GcContext::IsContainerGcEnabled() const
+{
+ return m_State->m_ContainerGcEnabled;
+}
+
+void
+GcContext::SetContainerGcEnabled(bool NewState)
+{
+ m_State->m_ContainerGcEnabled = NewState;
+}
+
//////////////////////////////////////////////////////////////////////////
GcContributor::GcContributor(CasGc& Gc) : m_Gc(Gc)
@@ -139,6 +153,7 @@ CasGc::CollectGarbage()
GcContext GcCtx;
GcCtx.SetDeletionMode(true);
+ GcCtx.SetContainerGcEnabled(false);
for (GcContributor* Contributor : m_GcContribs)
{
diff --git a/zenstore/include/zenstore/gc.h b/zenstore/include/zenstore/gc.h
index 8efe933a0..6b00f1ffb 100644
--- a/zenstore/include/zenstore/gc.h
+++ b/zenstore/include/zenstore/gc.h
@@ -38,6 +38,9 @@ public:
bool IsDeletionMode() const;
void SetDeletionMode(bool NewState);
+ bool IsContainerGcEnabled() const;
+ void SetContainerGcEnabled(bool NewState);
+
private:
struct GcState;