// Copyright Epic Games, Inc. All Rights Reserved. #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #if ZEN_WITH_TESTS # include # include # include #endif namespace zen { using namespace std::literals; namespace fs = std::filesystem; ////////////////////////////////////////////////////////////////////////// CbObject LoadCompactBinaryObject(const fs::path& Path) { FileContents Result = ReadFile(Path); if (!Result.ErrorCode) { IoBuffer Buffer = Result.Flatten(); if (CbValidateError Error = ValidateCompactBinary(Buffer, CbValidateMode::All); Error == CbValidateError::None) { return LoadCompactBinaryObject(Buffer); } } return CbObject(); } void SaveCompactBinaryObject(const fs::path& Path, const CbObject& Object) { WriteFile(Path, Object.GetBuffer().AsIoBuffer()); } ////////////////////////////////////////////////////////////////////////// struct GcContext::GcState { struct CacheBucket { std::vector ValidKeys; std::vector ExpiredKeys; }; using CacheBuckets = std::unordered_map; CacheBuckets m_CacheBuckets; CasChunkSet m_CasChunks; CasChunkSet m_DeletedCasChunks; CasChunkSet m_CidChunks; GcClock::TimePoint m_GcTime; GcClock::Duration m_MaxCacheDuration = std::chrono::hours(24); bool m_DeletionMode = true; bool m_CollectSmallObjects = false; }; GcContext::GcContext(GcClock::TimePoint Time) : m_State(std::make_unique()) { m_State->m_GcTime = Time; } GcContext::~GcContext() { } void GcContext::ContributeCids(std::span Cids) { m_State->m_CidChunks.AddChunksToSet(Cids); } void GcContext::ContributeCas(std::span Cas) { m_State->m_CasChunks.AddChunksToSet(Cas); } void GcContext::ContributeCacheKeys(const std::string& Bucket, std::vector ValidKeys, std::vector ExpiredKeys) { m_State->m_CacheBuckets[Bucket].ValidKeys = std::move(ValidKeys); m_State->m_CacheBuckets[Bucket].ExpiredKeys = std::move(ExpiredKeys); } void GcContext::IterateCids(std::function Callback) { m_State->m_CidChunks.IterateChunks([&](const IoHash& Hash) { Callback(Hash); }); } void GcContext::FilterCids(std::span Cid, std::function KeepFunc) { m_State->m_CidChunks.FilterChunks(Cid, [&](const IoHash& Hash) { KeepFunc(Hash); }); } void GcContext::FilterCas(std::span Cas, std::function KeepFunc) { m_State->m_CasChunks.FilterChunks(Cas, [&](const IoHash& Hash) { KeepFunc(Hash); }); } void GcContext::FilterCas(std::span Cas, std::function&& FilterFunc) { m_State->m_CasChunks.FilterChunks(Cas, std::move(FilterFunc)); } void GcContext::DeletedCas(std::span Cas) { m_State->m_DeletedCasChunks.AddChunksToSet(Cas); } CasChunkSet& GcContext::DeletedCas() { return m_State->m_DeletedCasChunks; } std::span GcContext::ValidCacheKeys(const std::string& Bucket) const { return m_State->m_CacheBuckets[Bucket].ValidKeys; } std::span GcContext::ExpiredCacheKeys(const std::string& Bucket) const { return m_State->m_CacheBuckets[Bucket].ExpiredKeys; } bool GcContext::IsDeletionMode() const { return m_State->m_DeletionMode; } void GcContext::SetDeletionMode(bool NewState) { m_State->m_DeletionMode = NewState; } bool GcContext::CollectSmallObjects() const { return m_State->m_CollectSmallObjects; } void GcContext::CollectSmallObjects(bool NewState) { m_State->m_CollectSmallObjects = NewState; } GcClock::TimePoint GcContext::Time() const { return m_State->m_GcTime; } GcClock::Duration GcContext::MaxCacheDuration() const { return m_State->m_MaxCacheDuration; } void GcContext::MaxCacheDuration(GcClock::Duration Duration) { m_State->m_MaxCacheDuration = Duration; } ////////////////////////////////////////////////////////////////////////// GcContributor::GcContributor(CasGc& Gc) : m_Gc(Gc) { m_Gc.AddGcContributor(this); } GcContributor::~GcContributor() { m_Gc.RemoveGcContributor(this); } ////////////////////////////////////////////////////////////////////////// GcStorage::GcStorage(CasGc& Gc) : m_Gc(Gc) { m_Gc.AddGcStorage(this); } GcStorage::~GcStorage() { m_Gc.AddGcStorage(this); } ////////////////////////////////////////////////////////////////////////// CasGc::CasGc() { } CasGc::~CasGc() { } void CasGc::AddGcContributor(GcContributor* Contributor) { RwLock::ExclusiveLockScope _(m_Lock); m_GcContribs.push_back(Contributor); } void CasGc::RemoveGcContributor(GcContributor* Contributor) { RwLock::ExclusiveLockScope _(m_Lock); std::erase_if(m_GcContribs, [&](GcContributor* $) { return $ == Contributor; }); } void CasGc::AddGcStorage(GcStorage* Storage) { RwLock::ExclusiveLockScope _(m_Lock); m_GcStorage.push_back(Storage); } void CasGc::RemoveGcStorage(GcStorage* Storage) { RwLock::ExclusiveLockScope _(m_Lock); std::erase_if(m_GcStorage, [&](GcStorage* $) { return $ == Storage; }); } void CasGc::CollectGarbage(GcContext& GcCtx) { RwLock::SharedLockScope _(m_Lock); // First gather reference set for (GcContributor* Contributor : m_GcContribs) { Contributor->GatherReferences(GcCtx); } // Cache records reference CAS chunks with the uncompressed // raw hash (Cid). Map the content ID to CAS hash to enable // the CAS storage backends to filter valid chunks. if (CidStore* CidStore = m_CidStore) { std::vector CasHashes; uint64_t UnknownChunks = 0; GcCtx.IterateCids([&](const IoHash& Cid) { IoHash Cas = CidStore->RemapCid(Cid); if (Cas == IoHash::Zero) { ++UnknownChunks; } else { CasHashes.push_back(Cas); } }); if (UnknownChunks) { ZEN_WARN("found {} unknown CIDs", UnknownChunks); } GcCtx.ContributeCas(CasHashes); } // Then trim storage for (GcStorage* Storage : m_GcStorage) { Storage->CollectGarbage(GcCtx); } // Remove Cid to CAS hash mappings. Scrub? if (CidStore* CidStore = m_CidStore) { CidStore->RemoveCids(GcCtx.DeletedCas()); } } void CasGc::SetCidStore(CidStore* Cids) { m_CidStore = Cids; } void CasGc::OnNewCidReferences(std::span Hashes) { ZEN_UNUSED(Hashes); } void CasGc::OnCommittedCidReferences(std::span Hashes) { ZEN_UNUSED(Hashes); } void CasGc::OnDroppedCidReferences(std::span Hashes) { ZEN_UNUSED(Hashes); } GcStorageSize CasGc::TotalStorageSize() const { RwLock::SharedLockScope _(m_Lock); GcStorageSize TotalSize; for (GcStorage* Storage : m_GcStorage) { const auto Size = Storage->StorageSize(); TotalSize.DiskSize += Size.DiskSize; TotalSize.MemorySize += Size.MemorySize; } return TotalSize; } ////////////////////////////////////////////////////////////////////////// GcScheduler::GcScheduler(CasGc& CasGc) : m_Log(logging::Get("gc")), m_CasGc(CasGc) { } GcScheduler::~GcScheduler() { Shutdown(); } void GcScheduler::Initialize(const GcSchedulerConfig& Config) { using namespace std::chrono; m_Config = Config; if (m_Config.Interval.count() && m_Config.Interval < m_Config.MonitorInterval) { m_Config.Interval = m_Config.MonitorInterval; } std::filesystem::create_directories(Config.RootDirectory); m_LastGcTime = GcClock::Now(); if (CbObject SchedulerState = LoadCompactBinaryObject(Config.RootDirectory / "gc_state")) { m_LastGcTime = GcClock::TimePoint(GcClock::Duration(SchedulerState["LastGcTime"sv].AsInt64())); if (m_LastGcTime + m_Config.Interval < GcClock::Now()) { // TODO: Trigger GC? m_LastGcTime = GcClock::Now(); } } m_NextGcTime = NextGcTime(m_LastGcTime); m_GcThread = std::thread(&GcScheduler::SchedulerThread, this); } void GcScheduler::Shutdown() { if (static_cast(GcSchedulerStatus::kStopped) != m_Status) { m_Status = static_cast(GcSchedulerStatus::kStopped); m_GcSignal.notify_one(); if (m_GcThread.joinable()) { m_GcThread.join(); } } } bool GcScheduler::Trigger(const GcScheduler::TriggerParams& Params) { if (m_Config.Enabled) { std::unique_lock Lock(m_GcMutex); if (static_cast(GcSchedulerStatus::kIdle) == m_Status) { m_TriggerParams = Params; m_Status = static_cast(GcSchedulerStatus::kRunning); m_GcSignal.notify_one(); return true; } } return false; } void GcScheduler::SchedulerThread() { std::chrono::seconds WaitTime = m_Config.MonitorInterval; for (;;) { bool Timeout = false; { ZEN_ASSERT(WaitTime.count() >= 0); std::unique_lock Lock(m_GcMutex); Timeout = std::cv_status::timeout == m_GcSignal.wait_for(Lock, WaitTime); } if (Status() == GcSchedulerStatus::kStopped) { break; } if (!m_Config.Enabled || (!Timeout && Status() == GcSchedulerStatus::kIdle)) { continue; } if (Timeout && Status() == GcSchedulerStatus::kIdle) { std::error_code Ec; DiskSpace Space = DiskSpaceInfo(m_Config.RootDirectory, Ec); GcStorageSize TotalSize = m_CasGc.TotalStorageSize(); std::chrono::seconds RemaingTime = std::chrono::duration_cast(m_NextGcTime - GcClock::Now()); if (RemaingTime < std::chrono::seconds::zero()) { RemaingTime = std::chrono::seconds::zero(); } if (Ec) { ZEN_WARN("get disk space info FAILED, reason '{}'", Ec.message()); } ZEN_INFO("{} in use, {} of total {} free disk space, {}", NiceBytes(TotalSize.DiskSize), NiceBytes(Space.Free), NiceBytes(Space.Total), m_Config.Interval.count() ? fmt::format("{} until next GC", NiceTimeSpanMs(uint64_t(std::chrono::milliseconds(RemaingTime).count()))) : std::string("next scheduled GC no set")); // TODO: Trigger GC if max disk usage water mark is reached if (RemaingTime.count() > 0) { WaitTime = m_Config.MonitorInterval < RemaingTime ? m_Config.MonitorInterval : RemaingTime; continue; } WaitTime = m_Config.MonitorInterval; m_Status = static_cast(GcSchedulerStatus::kRunning); } ZEN_ASSERT(Status() == GcSchedulerStatus::kRunning); GcContext GcCtx; GcCtx.SetDeletionMode(true); GcCtx.CollectSmallObjects(m_Config.CollectSmallObjects); GcCtx.MaxCacheDuration(m_Config.MaxCacheDuration); if (m_TriggerParams) { const auto TriggerParams = m_TriggerParams.value(); m_TriggerParams.reset(); GcCtx.CollectSmallObjects(TriggerParams.CollectSmallObjects); if (TriggerParams.MaxCacheDuration != std::chrono::seconds::max()) { GcCtx.MaxCacheDuration(TriggerParams.MaxCacheDuration); } } Stopwatch Timer; ZEN_INFO("garbage collection STARTING, small objects gc {}, max cache duration {}", GcCtx.CollectSmallObjects() ? "ENABLED"sv : "DISABLED"sv, NiceTimeSpanMs(uint64_t(std::chrono::duration_cast(GcCtx.MaxCacheDuration()).count()))); m_CasGc.CollectGarbage(GcCtx); m_LastGcTime = GcClock::Now(); m_NextGcTime = NextGcTime(m_LastGcTime); WaitTime = m_Config.MonitorInterval; { const fs::path Path = m_Config.RootDirectory / "gc_state"; ZEN_DEBUG("saving scheduler state to '{}'", Path); CbObjectWriter SchedulderState; SchedulderState << "LastGcTime"sv << static_cast(m_LastGcTime.time_since_epoch().count()); SaveCompactBinaryObject(Path, SchedulderState.Save()); } ZEN_INFO("garbage collection DONE after {}", NiceTimeSpanMs(Timer.GetElapsedTimeMs())); m_Status = static_cast(GcSchedulerStatus::kIdle); } } GcClock::TimePoint GcScheduler::NextGcTime(GcClock::TimePoint CurrentTime) { if (m_Config.Interval.count()) { return CurrentTime + m_Config.Interval; } else { return GcClock::TimePoint::max(); } } ////////////////////////////////////////////////////////////////////////// #if ZEN_WITH_TESTS namespace { static IoBuffer CreateChunk(uint64_t Size) { static std::random_device rd; static std::mt19937 g(rd()); const size_t Count = static_cast(Size / sizeof(uint32_t)); std::vector Values; Values.resize(Count); for (size_t Idx = 0; Idx < Count; ++Idx) { Values[Idx] = static_cast(Idx); } std::shuffle(Values.begin(), Values.end(), g); return IoBufferBuilder::MakeCloneFromMemory(Values.data(), Values.size() * sizeof(uint32_t)); } static CompressedBuffer Compress(IoBuffer Buffer) { return CompressedBuffer::Compress(SharedBuffer::MakeView(Buffer.GetData(), Buffer.GetSize())); } } // namespace TEST_CASE("gc.basic") { ScopedTemporaryDirectory TempDir; CasStoreConfiguration CasConfig; CasConfig.RootDirectory = TempDir.Path() / "cas"; CasGc Gc; std::unique_ptr CasStore = CreateCasStore(Gc); CidStore CidStore{*CasStore, TempDir.Path() / "cid"}; CasStore->Initialize(CasConfig); Gc.SetCidStore(&CidStore); IoBuffer Chunk = CreateChunk(128); auto CompressedChunk = Compress(Chunk); const auto InsertResult = CidStore.AddChunk(CompressedChunk); GcContext GcCtx; GcCtx.CollectSmallObjects(true); Gc.CollectGarbage(GcCtx); CHECK(!CidStore.ContainsChunk(InsertResult.DecompressedId)); } #endif void gc_forcelink() { } } // namespace zen