// Copyright Epic Games, Inc. All Rights Reserved. #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include "cas.h" #include #include #include #if ZEN_PLATFORM_WINDOWS # include #else # include # include # include # include #endif #if ZEN_WITH_TESTS # include # include # include #endif namespace zen { using namespace std::literals; namespace fs = std::filesystem; ////////////////////////////////////////////////////////////////////////// namespace { std::error_code CreateGCReserve(const std::filesystem::path& Path, uint64_t Size) { if (Size == 0) { std::filesystem::remove(Path); return std::error_code{}; } CreateDirectories(Path.parent_path()); if (std::filesystem::is_regular_file(Path) && std::filesystem::file_size(Path) == Size) { return std::error_code(); } #if ZEN_PLATFORM_WINDOWS DWORD dwCreationDisposition = CREATE_ALWAYS; DWORD dwDesiredAccess = GENERIC_READ | GENERIC_WRITE; const DWORD dwShareMode = 0; const DWORD dwFlagsAndAttributes = FILE_ATTRIBUTE_NORMAL; HANDLE hTemplateFile = nullptr; HANDLE FileHandle = CreateFile(Path.c_str(), dwDesiredAccess, dwShareMode, /* lpSecurityAttributes */ nullptr, dwCreationDisposition, dwFlagsAndAttributes, hTemplateFile); if (FileHandle == INVALID_HANDLE_VALUE) { return MakeErrorCodeFromLastError(); } bool Keep = true; auto _ = MakeGuard([&]() { ::CloseHandle(FileHandle); if (!Keep) { ::DeleteFile(Path.c_str()); } }); LARGE_INTEGER liFileSize; liFileSize.QuadPart = Size; BOOL OK = ::SetFilePointerEx(FileHandle, liFileSize, 0, FILE_BEGIN); if (!OK) { return MakeErrorCodeFromLastError(); } OK = ::SetEndOfFile(FileHandle); if (!OK) { return MakeErrorCodeFromLastError(); } Keep = true; #else int OpenFlags = O_CLOEXEC | O_RDWR | O_CREAT; int Fd = open(Path.c_str(), OpenFlags, 0666); if (Fd < 0) { return MakeErrorCodeFromLastError(); } bool Keep = true; auto _ = MakeGuard([&]() { close(Fd); if (!Keep) { unlink(Path.c_str()); } }); if (fchmod(Fd, 0666) < 0) { return MakeErrorCodeFromLastError(); } # if ZEN_PLATFORM_MAC if (ftruncate(Fd, (off_t)Size) < 0) { return MakeErrorCodeFromLastError(); } # else if (ftruncate64(Fd, (off64_t)Size) < 0) { return MakeErrorCodeFromLastError(); } int Error = posix_fallocate64(Fd, 0, (off64_t)Size); if (Error) { return MakeErrorCode(Error); } # endif Keep = true; #endif return std::error_code{}; } } // namespace ////////////////////////////////////////////////////////////////////////// CbObject LoadCompactBinaryObject(const fs::path& Path) { FileContents Result = ReadFile(Path); if (!Result.ErrorCode) { IoBuffer Buffer = Result.Flatten(); if (CbValidateError Error = ValidateCompactBinary(Buffer, CbValidateMode::All); Error == CbValidateError::None) { return LoadCompactBinaryObject(Buffer); } } return CbObject(); } void SaveCompactBinaryObject(const fs::path& Path, const CbObject& Object) { WriteFile(Path, Object.GetBuffer().AsIoBuffer()); } ////////////////////////////////////////////////////////////////////////// struct GcContext::GcState { using CacheKeyContexts = std::unordered_map>; CacheKeyContexts m_ExpiredCacheKeys; HashKeySet m_RetainedCids; HashKeySet m_DeletedCids; GcClock::TimePoint m_CacheExpireTime; GcClock::TimePoint m_ProjectStoreExpireTime; bool m_DeletionMode = true; bool m_CollectSmallObjects = false; bool m_SkipCid = false; std::filesystem::path DiskReservePath; }; GcContext::GcContext(const GcClock::TimePoint& CacheExpireTime, const GcClock::TimePoint& ProjectStoreExpireTime) : m_State(std::make_unique()) { m_State->m_CacheExpireTime = CacheExpireTime; m_State->m_ProjectStoreExpireTime = ProjectStoreExpireTime; } GcContext::~GcContext() { } void GcContext::AddRetainedCids(std::span Cids) { m_State->m_RetainedCids.AddHashesToSet(Cids); } void GcContext::SetExpiredCacheKeys(const std::string& CacheKeyContext, std::vector&& ExpiredKeys) { m_State->m_ExpiredCacheKeys[CacheKeyContext] = std::move(ExpiredKeys); } void GcContext::IterateCids(std::function Callback) { m_State->m_RetainedCids.IterateHashes([&](const IoHash& Hash) { Callback(Hash); }); } void GcContext::FilterCids(std::span Cid, std::function KeepFunc) { m_State->m_RetainedCids.FilterHashes(Cid, [&](const IoHash& Hash) { KeepFunc(Hash); }); } void GcContext::FilterCids(std::span Cid, std::function&& FilterFunc) { m_State->m_RetainedCids.FilterHashes(Cid, std::move(FilterFunc)); } void GcContext::AddDeletedCids(std::span Cas) { m_State->m_DeletedCids.AddHashesToSet(Cas); } const HashKeySet& GcContext::DeletedCids() { return m_State->m_DeletedCids; } std::span GcContext::ExpiredCacheKeys(const std::string& CacheKeyContext) const { return m_State->m_ExpiredCacheKeys[CacheKeyContext]; } bool GcContext::SkipCid() const { return m_State->m_SkipCid; } void GcContext::SetSkipCid(bool NewState) { m_State->m_SkipCid = NewState; } bool GcContext::IsDeletionMode() const { return m_State->m_DeletionMode; } void GcContext::SetDeletionMode(bool NewState) { m_State->m_DeletionMode = NewState; } bool GcContext::CollectSmallObjects() const { return m_State->m_CollectSmallObjects; } void GcContext::CollectSmallObjects(bool NewState) { m_State->m_CollectSmallObjects = NewState; } GcClock::TimePoint GcContext::CacheExpireTime() const { return m_State->m_CacheExpireTime; } GcClock::TimePoint GcContext::ProjectStoreExpireTime() const { return m_State->m_ProjectStoreExpireTime; } void GcContext::DiskReservePath(const std::filesystem::path& Path) { m_State->DiskReservePath = Path; } uint64_t GcContext::ClaimGCReserve() { if (!std::filesystem::is_regular_file(m_State->DiskReservePath)) { return 0; } uint64_t ReclaimedSize = std::filesystem::file_size(m_State->DiskReservePath); if (std::filesystem::remove(m_State->DiskReservePath)) { return ReclaimedSize; } return 0; } ////////////////////////////////////////////////////////////////////////// GcManager::GcManager() : m_Log(logging::Get("gc")) { } GcManager::~GcManager() { } //////// Begin GC V2 void WriteGcStats(CbObjectWriter& Writer, const GcStats& Stats, bool HumanReadable) { Writer << "Checked" << Stats.CheckedCount; Writer << "Found" << Stats.FoundCount; Writer << "Deleted" << Stats.DeletedCount; if (HumanReadable) { Writer << "FreedMemory" << NiceBytes(Stats.FreedMemory); } else { Writer << "FreedMemoryBytes" << Stats.FreedMemory; } Writer << "Elapsed" << ToTimeSpan(Stats.ElapsedMS); } void WriteCompactStoreStats(CbObjectWriter& Writer, const GcCompactStoreStats& Stats, bool HumanReadable) { if (HumanReadable) { Writer << "RemovedDisk" << NiceBytes(Stats.RemovedDisk); } else { Writer << "RemovedDiskBytes" << Stats.RemovedDisk; } Writer << "Elapsed" << ToTimeSpan(Stats.ElapsedMS); } void WriteReferencerStats(CbObjectWriter& Writer, const GcReferencerStats& Stats, bool HumanReadable) { if (Stats.RemoveExpiredDataStats.CheckedCount == 0) { return; } Writer.BeginObject("RemoveExpired"); { WriteGcStats(Writer, Stats.RemoveExpiredDataStats, HumanReadable); } Writer.EndObject(); Writer.BeginObject("Compact"); { WriteCompactStoreStats(Writer, Stats.CompactStoreStats, HumanReadable); } Writer.EndObject(); Writer << "CreateReferenceCheckers" << ToTimeSpan(Stats.CreateReferenceCheckersMS); Writer << "PreCacheState" << ToTimeSpan(Stats.PreCacheStateMS); Writer << "LockState" << ToTimeSpan(Stats.LockStateMS); Writer << "Elapsed" << ToTimeSpan(Stats.ElapsedMS); }; void WriteReferenceStoreStats(CbObjectWriter& Writer, const GcReferenceStoreStats& Stats, bool HumanReadable) { if (Stats.RemoveUnreferencedDataStats.CheckedCount == 0) { return; } Writer.BeginObject("RemoveUnreferenced"); { WriteGcStats(Writer, Stats.RemoveUnreferencedDataStats, HumanReadable); } Writer.EndObject(); Writer.BeginObject("Compact"); { WriteCompactStoreStats(Writer, Stats.CompactStoreStats, HumanReadable); } Writer.EndObject(); Writer << "CreateReferencePruners" << ToTimeSpan(Stats.CreateReferencePrunersMS); Writer << "Elapsed" << ToTimeSpan(Stats.ElapsedMS); }; void WriteGCResult(CbObjectWriter& Writer, const GcResult& Result, bool HumanReadable, bool IncludeDetails) { if (!IncludeDetails) { if (HumanReadable) { Writer << "RemovedDisk" << NiceBytes(Result.CompactStoresStatSum.RemovedDisk); Writer << "FreedMemory" << NiceBytes(Result.ReferencerStatSum.RemoveExpiredDataStats.FreedMemory); } else { Writer << "RemovedDiskBytes" << gsl::narrow(Result.CompactStoresStatSum.RemovedDisk); Writer << "RemovedMemoryBytes" << gsl::narrow(Result.ReferencerStatSum.RemoveExpiredDataStats.FreedMemory); } Writer << "WriteBlock" << ToTimeSpan(Result.WriteBlockMS); Writer << "Elapsed" << ToTimeSpan(Result.ElapsedMS); Writer << "Cancelled" << Result.WasCancelled; return; } Writer.BeginObject("Referencer"); { WriteReferencerStats(Writer, Result.ReferencerStatSum, HumanReadable); } Writer.EndObject(); Writer.BeginObject("ReferenceStore"); { WriteReferenceStoreStats(Writer, Result.ReferenceStoreStatSum, HumanReadable); } Writer.EndObject(); Writer.BeginObject("Compact"); { WriteCompactStoreStats(Writer, Result.CompactStoresStatSum, HumanReadable); } Writer.EndObject(); Writer << "RemoveExpiredData" << ToTimeSpan(Result.RemoveExpiredDataMS); Writer << "CreateReferenceCheckers" << ToTimeSpan(Result.CreateReferenceCheckersMS); Writer << "PreCacheState" << ToTimeSpan(Result.PreCacheStateMS); Writer << "LockState" << ToTimeSpan(Result.LockStateMS); Writer << "CreateReferencePruners" << ToTimeSpan(Result.CreateReferencePrunersMS); Writer << "RemoveUnreferencedData" << ToTimeSpan(Result.RemoveUnreferencedDataMS); Writer << "CompactStores" << ToTimeSpan(Result.CompactStoresMS); Writer << "WriteBlock" << ToTimeSpan(Result.WriteBlockMS); Writer << "Elapsed" << ToTimeSpan(Result.ElapsedMS); if (!Result.ReferencerStats.empty()) { Writer.BeginArray("Referencers"); { for (const std::pair& It : Result.ReferencerStats) { Writer.BeginObject(); Writer << "Name" << It.first; WriteReferencerStats(Writer, It.second, HumanReadable); Writer.EndObject(); } } Writer.EndArray(); } if (!Result.ReferenceStoreStats.empty()) { Writer.BeginArray("ReferenceStores"); for (const std::pair& It : Result.ReferenceStoreStats) { Writer.BeginObject(); Writer << "Name" << It.first; WriteReferenceStoreStats(Writer, It.second, HumanReadable); Writer.EndObject(); } Writer.EndArray(); } }; void Add(GcCompactStoreStats& Sum, const GcCompactStoreStats& Sub) { Sum.RemovedDisk += Sub.RemovedDisk; Sum.ElapsedMS += Sub.ElapsedMS; } void Add(GcStats& Sum, const GcStats& Sub) { Sum.CheckedCount += Sub.CheckedCount; Sum.FoundCount += Sub.FoundCount; Sum.DeletedCount += Sub.DeletedCount; Sum.FreedMemory += Sub.FreedMemory; Sum.ElapsedMS += Sub.ElapsedMS; } void Sum(GcReferencerStats& Stat) { Stat.ElapsedMS = Stat.RemoveExpiredDataStats.ElapsedMS + Stat.CompactStoreStats.ElapsedMS + Stat.CreateReferenceCheckersMS + Stat.PreCacheStateMS + Stat.LockStateMS; } void Add(GcReferencerStats& Sum, const GcReferencerStats& Sub) { Add(Sum.RemoveExpiredDataStats, Sub.RemoveExpiredDataStats); Add(Sum.CompactStoreStats, Sub.CompactStoreStats); Sum.CreateReferenceCheckersMS += Sub.CreateReferenceCheckersMS; Sum.PreCacheStateMS += Sub.PreCacheStateMS; Sum.LockStateMS += Sub.LockStateMS; Sum.ElapsedMS += Sub.ElapsedMS; } void Sum(GcReferenceStoreStats& Stat) { Stat.ElapsedMS = Stat.RemoveUnreferencedDataStats.ElapsedMS + Stat.CompactStoreStats.ElapsedMS + Stat.CreateReferencePrunersMS; } void Add(GcReferenceStoreStats& Sum, const GcReferenceStoreStats& Sub) { Add(Sum.RemoveUnreferencedDataStats, Sub.RemoveUnreferencedDataStats); Add(Sum.CompactStoreStats, Sub.CompactStoreStats); Sum.CreateReferencePrunersMS += Sub.CreateReferencePrunersMS; Sum.ElapsedMS += Sub.ElapsedMS; } GcResult& Sum(GcResult& Stat, bool Cancelled = false) { for (std::pair& Referencer : Stat.ReferencerStats) { GcReferencerStats& SubStat = Referencer.second; Sum(SubStat); Add(Stat.ReferencerStatSum, SubStat); } for (std::pair& ReferenceStore : Stat.ReferenceStoreStats) { GcReferenceStoreStats& SubStat = ReferenceStore.second; Sum(SubStat); Add(Stat.ReferenceStoreStatSum, SubStat); } Sum(Stat.ReferencerStatSum); Sum(Stat.ReferenceStoreStatSum); Add(Stat.CompactStoresStatSum, Stat.ReferencerStatSum.CompactStoreStats); Add(Stat.CompactStoresStatSum, Stat.ReferenceStoreStatSum.CompactStoreStats); Stat.WasCancelled = Cancelled; return Stat; } void GcManager::AddGcReferencer(GcReferencer& Referencer) { RwLock::ExclusiveLockScope _(m_Lock); m_GcReferencers.push_back(&Referencer); } void GcManager::RemoveGcReferencer(GcReferencer& Referencer) { RwLock::ExclusiveLockScope _(m_Lock); std::erase_if(m_GcReferencers, [&](GcReferencer* $) { return $ == &Referencer; }); } void GcManager::AddGcReferenceStore(GcReferenceStore& ReferenceStore) { RwLock::ExclusiveLockScope _(m_Lock); m_GcReferenceStores.push_back(&ReferenceStore); } void GcManager::RemoveGcReferenceStore(GcReferenceStore& ReferenceStore) { RwLock::ExclusiveLockScope _(m_Lock); std::erase_if(m_GcReferenceStores, [&](GcReferenceStore* $) { return $ == &ReferenceStore; }); } #define SCOPED_TIMER(closure) \ Stopwatch $Timer##__LINE__; \ auto $Guard##__LINE = MakeGuard([&, &Timer = $Timer##__LINE__]() { closure }) GcResult GcManager::CollectGarbage(const GcSettings& Settings) { ZEN_TRACE_CPU("GcV2::CollectGarbage"); GcCtx Ctx{.Settings = Settings, .IsCancelledFlag = m_CancelGC}; GcResult Result; { Stopwatch TotalTimer; auto __ = MakeGuard([&]() { Result.ElapsedMS = std::chrono::milliseconds(TotalTimer.GetElapsedTimeMs()); }); RwLock::SharedLockScope GcLock(m_Lock); Result.ReferencerStats.resize(m_GcReferencers.size()); std::unordered_map, GcCompactStoreStats*> StoreCompactors; RwLock StoreCompactorsLock; WorkerThreadPool& ThreadPool = Settings.SingleThread ? GetSyncWorkerPool() : GetMediumWorkerPool(); ZEN_INFO("GCV2: Removing expired data from {} referencers", m_GcReferencers.size()); if (!m_GcReferencers.empty()) { if (CheckGCCancel()) { return Sum(Result, true); } ZEN_TRACE_CPU("GcV2::RemoveExpiredData"); Latch WorkLeft(1); { // First remove any cache keys that may own references SCOPED_TIMER(Result.RemoveExpiredDataMS = std::chrono::milliseconds(Timer.GetElapsedTimeMs()); if (Ctx.Settings.Verbose) { ZEN_INFO("GCV2: Removed expired data for {} referenceners in {}", m_GcReferencers.size(), NiceTimeSpanMs(Result.RemoveExpiredDataMS.count())); }); for (size_t Index = 0; Index < m_GcReferencers.size(); Index++) { if (CheckGCCancel()) { WorkLeft.CountDown(); WorkLeft.Wait(); return Sum(Result, true); } GcReferencer* Owner = m_GcReferencers[Index]; std::pair* Stats = &Result.ReferencerStats[Index]; WorkLeft.AddCount(1); ThreadPool.ScheduleWork([this, &Ctx, &WorkLeft, Owner, Stats, &StoreCompactorsLock, &StoreCompactors]() { auto _ = MakeGuard([&WorkLeft]() { WorkLeft.CountDown(); }); try { Stats->first = Owner->GetGcName(Ctx); SCOPED_TIMER(Stats->second.RemoveExpiredDataStats.ElapsedMS = std::chrono::milliseconds(Timer.GetElapsedTimeMs());); std::unique_ptr StoreCompactor( Owner->RemoveExpiredData(Ctx, Stats->second.RemoveExpiredDataStats)); if (StoreCompactor) { RwLock::ExclusiveLockScope __(StoreCompactorsLock); StoreCompactors.insert_or_assign(std::move(StoreCompactor), &Stats->second.CompactStoreStats); } } catch (const std::exception& Ex) { ZEN_ERROR("GCV2: Failed removing expired data for {}. Reason: '{}'", Owner->GetGcName(Ctx), Ex.what()); } }); } WorkLeft.CountDown(); WorkLeft.Wait(); } } if (!Ctx.Settings.SkipCidDelete) { if (CheckGCCancel()) { return Sum(Result, true); } Result.ReferenceStoreStats.resize(m_GcReferenceStores.size()); ZEN_INFO("GCV2: Creating reference pruners from {} reference stores", m_GcReferenceStores.size()); std::unordered_map> ReferencePruners; if (!m_GcReferenceStores.empty()) { ZEN_TRACE_CPU("GcV2::CreateReferencePruners"); ReferencePruners.reserve(m_GcReferenceStores.size()); Latch WorkLeft(1); RwLock ReferencePrunersLock; { // CreateReferencePruner is usually not very heavy but big data sets change that SCOPED_TIMER(Result.CreateReferencePrunersMS = std::chrono::milliseconds(Timer.GetElapsedTimeMs()); if (Ctx.Settings.Verbose) { ZEN_INFO("GCV2: Created {} reference pruners using {} referencer stores in {}", ReferencePruners.size(), m_GcReferenceStores.size(), NiceTimeSpanMs(Result.CreateReferencePrunersMS.count())); }); for (size_t Index = 0; Index < m_GcReferenceStores.size(); Index++) { if (CheckGCCancel()) { WorkLeft.CountDown(); WorkLeft.Wait(); return Sum(Result, true); } GcReferenceStore* ReferenceStore = m_GcReferenceStores[Index]; std::pair* Stats = &Result.ReferenceStoreStats[Index]; WorkLeft.AddCount(1); ThreadPool.ScheduleWork( [this, &Ctx, ReferenceStore, Stats, Index, &WorkLeft, &ReferencePrunersLock, &ReferencePruners]() { auto _ = MakeGuard([&WorkLeft]() { WorkLeft.CountDown(); }); try { Stats->first = ReferenceStore->GetGcName(Ctx); std::unique_ptr ReferencePruner; { SCOPED_TIMER(Stats->second.CreateReferencePrunersMS = std::chrono::milliseconds(Timer.GetElapsedTimeMs());); // The ReferenceStore will pick a list of CId entries to check, returning a collector ReferencePruner = std::unique_ptr(ReferenceStore->CreateReferencePruner(Ctx, Stats->second)); } if (ReferencePruner) { RwLock::ExclusiveLockScope __(ReferencePrunersLock); ReferencePruners.insert_or_assign(Index, std::move(ReferencePruner)); } } catch (const std::exception& Ex) { ZEN_ERROR("GCV2: Failed creating reference pruners for {}. Reason: '{}'", ReferenceStore->GetGcName(Ctx), Ex.what()); } }); } WorkLeft.CountDown(); WorkLeft.Wait(); } } if (!ReferencePruners.empty()) { if (CheckGCCancel()) { return Sum(Result, true); } ZEN_INFO("GCV2: Creating reference checkers from {} referencers", m_GcReferencers.size()); std::unordered_map, size_t> ReferenceCheckers; if (!m_GcReferencers.empty()) { ZEN_TRACE_CPU("GcV2::CreateReferenceCheckers"); ReferenceCheckers.reserve(m_GcReferencers.size()); Latch WorkLeft(1); RwLock ReferenceCheckersLock; { SCOPED_TIMER(Result.CreateReferenceCheckersMS = std::chrono::milliseconds(Timer.GetElapsedTimeMs()); if (Ctx.Settings.Verbose) { ZEN_INFO("GCV2: Created {} reference checkers using {} referencers in {}", ReferenceCheckers.size(), m_GcReferencers.size(), NiceTimeSpanMs(Result.CreateReferenceCheckersMS.count())); }); // Lock all reference owners from changing the reference data and get access to check for referenced data for (size_t Index = 0; Index < m_GcReferencers.size(); Index++) { if (CheckGCCancel()) { WorkLeft.CountDown(); WorkLeft.Wait(); return Sum(Result, true); } GcReferencer* Referencer = m_GcReferencers[Index]; std::pair* Stats = &Result.ReferencerStats[Index]; WorkLeft.AddCount(1); ThreadPool.ScheduleWork( [this, &Ctx, &WorkLeft, Referencer, Index, Stats, &ReferenceCheckersLock, &ReferenceCheckers]() { auto _ = MakeGuard([&WorkLeft]() { WorkLeft.CountDown(); }); // The Referencer will create a reference checker that guarantees that the references do not change as // long as it lives std::vector Checkers; try { { SCOPED_TIMER(Stats->second.CreateReferenceCheckersMS = std::chrono::milliseconds(Timer.GetElapsedTimeMs());); Checkers = Referencer->CreateReferenceCheckers(Ctx); } if (!Checkers.empty()) { RwLock::ExclusiveLockScope __(ReferenceCheckersLock); for (auto& Checker : Checkers) { ReferenceCheckers.insert_or_assign(std::unique_ptr(Checker), Index); Checker = nullptr; } } } catch (const std::exception& Ex) { ZEN_ERROR("GCV2: Failed creating reference checkers for {}. Reason: '{}'", Referencer->GetGcName(Ctx), Ex.what()); while (!Checkers.empty()) { delete Checkers.back(); Checkers.pop_back(); } } }); } WorkLeft.CountDown(); WorkLeft.Wait(); } } { ZEN_INFO("GCV2: Precaching state for {} reference checkers", ReferenceCheckers.size()); if (!ReferenceCheckers.empty()) { if (CheckGCCancel()) { return Sum(Result, true); } ZEN_TRACE_CPU("GcV2::PreCache"); Latch WorkLeft(1); { SCOPED_TIMER(Result.PreCacheStateMS = std::chrono::milliseconds(Timer.GetElapsedTimeMs()); if (Ctx.Settings.Verbose) { ZEN_INFO("GCV2: Precached state using {} reference checkers in {}", ReferenceCheckers.size(), NiceTimeSpanMs(Result.PreCacheStateMS.count())); }); for (auto& It : ReferenceCheckers) { if (CheckGCCancel()) { WorkLeft.CountDown(); WorkLeft.Wait(); return Sum(Result, true); } GcReferenceChecker* Checker = It.first.get(); size_t Index = It.second; std::pair* Stats = &Result.ReferencerStats[Index]; WorkLeft.AddCount(1); ThreadPool.ScheduleWork([this, &Ctx, Checker, Index, Stats, &WorkLeft]() { auto _ = MakeGuard([&WorkLeft]() { WorkLeft.CountDown(); }); try { SCOPED_TIMER(Stats->second.PreCacheStateMS = std::chrono::milliseconds(Timer.GetElapsedTimeMs());); Checker->PreCache(Ctx); } catch (const std::exception& Ex) { ZEN_ERROR("GCV2: Failed precaching for {}. Reason: '{}'", Checker->GetGcName(Ctx), Ex.what()); } }); } WorkLeft.CountDown(); WorkLeft.Wait(); } } } SCOPED_TIMER(uint64_t ElapsedMS = Timer.GetElapsedTimeMs(); Result.WriteBlockMS = std::chrono::milliseconds(ElapsedMS); ZEN_INFO("GCV2: Writes blocked for {}", NiceTimeSpanMs(ElapsedMS))); { ZEN_INFO("GCV2: Locking state for {} reference checkers", ReferenceCheckers.size()); if (!ReferenceCheckers.empty()) { if (CheckGCCancel()) { return Sum(Result, true); } ZEN_TRACE_CPU("GcV2::LockState"); // Locking all references checkers so we have a steady state of which references are used // From this point we have blocked all writes to all References (DiskBucket/ProjectStore) until // we delete the ReferenceCheckers Latch WorkLeft(1); { SCOPED_TIMER(Result.LockStateMS = std::chrono::milliseconds(Timer.GetElapsedTimeMs()); if (Ctx.Settings.Verbose) { ZEN_INFO("GCV2: Locked state using {} reference checkers in {}", ReferenceCheckers.size(), NiceTimeSpanMs(Result.LockStateMS.count())); }); for (auto& It : ReferenceCheckers) { if (CheckGCCancel()) { WorkLeft.CountDown(); WorkLeft.Wait(); return Sum(Result, true); } GcReferenceChecker* Checker = It.first.get(); size_t Index = It.second; std::pair* Stats = &Result.ReferencerStats[Index]; WorkLeft.AddCount(1); ThreadPool.ScheduleWork([this, &Ctx, Checker, Index, Stats, &WorkLeft]() { auto _ = MakeGuard([&WorkLeft]() { WorkLeft.CountDown(); }); try { SCOPED_TIMER(Stats->second.LockStateMS = std::chrono::milliseconds(Timer.GetElapsedTimeMs());); Checker->LockState(Ctx); } catch (const std::exception& Ex) { ZEN_ERROR("GCV2: Failed locking state for {}. Reason: '{}'", Checker->GetGcName(Ctx), Ex.what()); } }); } WorkLeft.CountDown(); WorkLeft.Wait(); } } } { ZEN_INFO("GCV2: Removing unreferenced data for {} reference pruners", ReferencePruners.size()); { const auto GetUnusedReferences = [&ReferenceCheckers, &Ctx](std::span References) -> std::vector { HashSet UnusedCids(References.begin(), References.end()); for (const auto& It : ReferenceCheckers) { GcReferenceChecker* ReferenceChecker = It.first.get(); ReferenceChecker->RemoveUsedReferencesFromSet(Ctx, UnusedCids); if (UnusedCids.empty()) { return {}; } } return std::vector(UnusedCids.begin(), UnusedCids.end()); }; // checking all Cids agains references in cache // Ask stores to remove data that the ReferenceCheckers says are not referenced - this should be a lightweight // operation that only updates in-memory index, actual disk changes should be done by the ReferenceStoreCompactors ZEN_TRACE_CPU("GcV2::RemoveUnreferencedData"); Latch WorkLeft(1); { SCOPED_TIMER(Result.RemoveUnreferencedDataMS = std::chrono::milliseconds(Timer.GetElapsedTimeMs()); if (Ctx.Settings.Verbose) { ZEN_INFO("GCV2: Removed unused data using {} pruners in {}", ReferencePruners.size(), NiceTimeSpanMs(Result.RemoveUnreferencedDataMS.count())); }); for (auto& It : ReferencePruners) { if (CheckGCCancel()) { WorkLeft.CountDown(); WorkLeft.Wait(); return Sum(Result, true); } GcReferencePruner* Pruner = It.second.get(); size_t Index = It.first; GcReferenceStoreStats* Stats = &Result.ReferenceStoreStats[Index].second; WorkLeft.AddCount(1); ThreadPool.ScheduleWork( [this, &Ctx, Pruner, Stats, &WorkLeft, &GetUnusedReferences, &StoreCompactorsLock, &StoreCompactors]() { auto _ = MakeGuard([&WorkLeft]() { WorkLeft.CountDown(); }); // Go through all the ReferenceCheckers to see if the list of Cids the collector selected are // referenced or not. try { std::unique_ptr StoreCompactor; { SCOPED_TIMER(Stats->RemoveUnreferencedDataStats.ElapsedMS = std::chrono::milliseconds(Timer.GetElapsedTimeMs());); StoreCompactor = std::unique_ptr( Pruner->RemoveUnreferencedData(Ctx, Stats->RemoveUnreferencedDataStats, GetUnusedReferences)); } if (StoreCompactor) { RwLock::ExclusiveLockScope __(StoreCompactorsLock); StoreCompactors.insert_or_assign(std::move(StoreCompactor), &Stats->CompactStoreStats); } } catch (const std::exception& Ex) { ZEN_ERROR("GCV2: Failed locking state for {}. Reason: '{}'", Pruner->GetGcName(Ctx), Ex.what()); } }); } WorkLeft.CountDown(); WorkLeft.Wait(); } } // Let the GcReferencers add new data, we will only change on-disk data at this point, adding new data is allowed ReferenceCheckers.clear(); ReferencePruners.clear(); } } } ZEN_INFO("GCV2: Compacting using {} store compactors", StoreCompactors.size()); if (!StoreCompactors.empty()) { if (CheckGCCancel()) { return Sum(Result, true); } ZEN_TRACE_CPU("GcV2::CompactStores"); auto ClaimDiskReserve = [&]() -> uint64_t { if (!std::filesystem::is_regular_file(Settings.DiskReservePath)) { return 0; } uint64_t ReclaimedSize = std::filesystem::file_size(Settings.DiskReservePath); if (std::filesystem::remove(Settings.DiskReservePath)) { return ReclaimedSize; } return 0; }; // Remove the stuff we deemed unreferenced from disk - may be heavy operation // Don't do in parallel, we don't want to steal CPU/Disk from regular operation { SCOPED_TIMER(Result.CompactStoresMS = std::chrono::milliseconds(Timer.GetElapsedTimeMs()); if (Ctx.Settings.Verbose) { ZEN_INFO("GCV2: Compacted {} stores in {}", StoreCompactors.size(), NiceTimeSpanMs(Result.CompactStoresMS.count())); }); for (auto& It : StoreCompactors) { if (CheckGCCancel()) { return Sum(Result, true); } GcStoreCompactor* Compactor = It.first.get(); GcCompactStoreStats& Stats = *It.second; { // Go through all the ReferenceCheckers to see if the list of Cids the collector selected are referenced or not. SCOPED_TIMER(Stats.ElapsedMS = std::chrono::milliseconds(Timer.GetElapsedTimeMs());); Compactor->CompactStore(Ctx, Stats, ClaimDiskReserve); } } } StoreCompactors.clear(); } ZEN_INFO("GCV2: Completed in {}", NiceTimeSpanMs(TotalTimer.GetElapsedTimeMs())); } return Sum(Result); } #undef SCOPED_TIMER //////// End GC V2 void GcManager::SetCancelGC(bool CancelFlag) { m_CancelGC.store(CancelFlag); } void GcManager::AddGcContributor(GcContributor* Contributor) { RwLock::ExclusiveLockScope _(m_Lock); m_GcContribs.push_back(Contributor); } void GcManager::RemoveGcContributor(GcContributor* Contributor) { RwLock::ExclusiveLockScope _(m_Lock); std::erase_if(m_GcContribs, [&](GcContributor* $) { return $ == Contributor; }); } void GcManager::AddGcStorage(GcStorage* Storage) { ZEN_ASSERT(Storage != nullptr); RwLock::ExclusiveLockScope _(m_Lock); m_GcStorage.push_back(Storage); } void GcManager::RemoveGcStorage(GcStorage* Storage) { RwLock::ExclusiveLockScope _(m_Lock); std::erase_if(m_GcStorage, [&](GcStorage* $) { return $ == Storage; }); } void GcManager::ScrubStorage(ScrubContext& GcCtx) { RwLock::SharedLockScope _(m_Lock); for (GcStorage* Storage : m_GcStorage) { Storage->ScrubStorage(GcCtx); } } GcStorageSize GcManager::CollectGarbage(GcContext& GcCtx) { ZEN_TRACE_CPU("Gc::CollectGarbage"); GcStorageSize GCTotalSizeDiff; RwLock::SharedLockScope _(m_Lock); // First gather reference set { ZEN_TRACE_CPU("Gc::CollectGarbage::GatherReferences"); Stopwatch Timer; const auto Guard = MakeGuard([&] { ZEN_INFO("gathered references in {}", NiceTimeSpanMs(Timer.GetElapsedTimeMs())); }); for (GcContributor* Contributor : m_GcContribs) { if (CheckGCCancel()) { return GCTotalSizeDiff; } Contributor->GatherReferences(GcCtx); } } // Then trim storage { ZEN_TRACE_CPU("Gc::CollectGarbage::CollectGarbage"); Stopwatch Timer; const auto Guard = MakeGuard([&] { ZEN_INFO("collected garbage in {}. Removed {} disk space, {} memory", NiceTimeSpanMs(Timer.GetElapsedTimeMs()), NiceBytes(GCTotalSizeDiff.DiskSize), NiceBytes(GCTotalSizeDiff.MemorySize)); }); for (GcStorage* Storage : m_GcStorage) { if (CheckGCCancel()) { break; } const auto PreSize = Storage->StorageSize(); Storage->CollectGarbage(GcCtx); const auto PostSize = Storage->StorageSize(); GCTotalSizeDiff.DiskSize += PreSize.DiskSize > PostSize.DiskSize ? PreSize.DiskSize - PostSize.DiskSize : 0; GCTotalSizeDiff.MemorySize += PreSize.MemorySize > PostSize.MemorySize ? PreSize.MemorySize - PostSize.MemorySize : 0; } } return GCTotalSizeDiff; } GcStorageSize GcManager::TotalStorageSize() const { ZEN_TRACE_CPU("Gc::TotalStorageSize"); RwLock::SharedLockScope _(m_Lock); GcStorageSize TotalSize; for (GcStorage* Storage : m_GcStorage) { const auto Size = Storage->StorageSize(); TotalSize.DiskSize += Size.DiskSize; TotalSize.MemorySize += Size.MemorySize; } return TotalSize; } ////////////////////////////////////////////////////////////////////////// void DiskUsageWindow::KeepRange(GcClock::Tick StartTick, GcClock::Tick EndTick) { auto It = m_LogWindow.begin(); if (It == m_LogWindow.end()) { return; } while (It->SampleTime < StartTick) { ++It; if (It == m_LogWindow.end()) { m_LogWindow.clear(); return; } } m_LogWindow.erase(m_LogWindow.begin(), It); It = m_LogWindow.begin(); while (It != m_LogWindow.end()) { if (It->SampleTime >= EndTick) { m_LogWindow.erase(It, m_LogWindow.end()); return; } It++; } } std::vector DiskUsageWindow::GetDiskDeltas(GcClock::Tick StartTick, GcClock::Tick EndTick, GcClock::Tick DeltaWidth, uint64_t& OutMaxDelta) const { ZEN_ASSERT(StartTick != -1); ZEN_ASSERT(DeltaWidth > 0); std::vector Result; Result.reserve((EndTick - StartTick + DeltaWidth - 1) / DeltaWidth); size_t WindowSize = m_LogWindow.size(); GcClock::Tick FirstWindowTick = WindowSize < 2 ? EndTick : m_LogWindow[1].SampleTime; GcClock::Tick RangeStart = StartTick; while (FirstWindowTick >= RangeStart + DeltaWidth && RangeStart < EndTick) { Result.push_back(0); RangeStart += DeltaWidth; } uint64_t DeltaSum = 0; size_t WindowIndex = 1; while (WindowIndex < WindowSize && RangeStart < EndTick) { const DiskUsageEntry& Entry = m_LogWindow[WindowIndex]; if (Entry.SampleTime < RangeStart) { ++WindowIndex; continue; } GcClock::Tick RangeEnd = Min(EndTick, RangeStart + DeltaWidth); ZEN_ASSERT(Entry.SampleTime >= RangeStart); if (Entry.SampleTime >= RangeEnd) { Result.push_back(DeltaSum); OutMaxDelta = Max(DeltaSum, OutMaxDelta); DeltaSum = 0; RangeStart = RangeEnd; continue; } const DiskUsageEntry& PrevEntry = m_LogWindow[WindowIndex - 1]; if (Entry.DiskUsage > PrevEntry.DiskUsage) { uint64_t Delta = Entry.DiskUsage - PrevEntry.DiskUsage; DeltaSum += Delta; } WindowIndex++; } while (RangeStart < EndTick) { Result.push_back(DeltaSum); OutMaxDelta = Max(DeltaSum, OutMaxDelta); DeltaSum = 0; RangeStart += DeltaWidth; } return Result; } GcClock::Tick DiskUsageWindow::FindTimepointThatRemoves(uint64_t Amount, GcClock::Tick EndTick) const { ZEN_ASSERT(Amount > 0); uint64_t RemainingToFind = Amount; size_t Offset = 1; while (Offset < m_LogWindow.size()) { const DiskUsageEntry& Entry = m_LogWindow[Offset]; if (Entry.SampleTime >= EndTick) { return EndTick; } const DiskUsageEntry& PreviousEntry = m_LogWindow[Offset - 1]; uint64_t Delta = Entry.DiskUsage > PreviousEntry.DiskUsage ? Entry.DiskUsage - PreviousEntry.DiskUsage : 0; if (Delta >= RemainingToFind) { return m_LogWindow[Offset].SampleTime + 1; } RemainingToFind -= Delta; Offset++; } return EndTick; } ////////////////////////////////////////////////////////////////////////// GcScheduler::GcScheduler(GcManager& GcManager) : m_Log(logging::Get("gc")), m_GcManager(GcManager) { m_GcManager.SetDiskWriteBlocker(this); } GcScheduler::~GcScheduler() { m_GcManager.SetDiskWriteBlocker(nullptr); Shutdown(); } void GcScheduler::Initialize(const GcSchedulerConfig& Config) { using namespace std::chrono; m_Config = Config; if (m_Config.Interval.count() && m_Config.Interval < m_Config.MonitorInterval) { m_Config.Interval = m_Config.MonitorInterval; } if (m_Config.LightweightInterval.count() && m_Config.LightweightInterval < m_Config.MonitorInterval) { m_Config.LightweightInterval = m_Config.MonitorInterval; } std::filesystem::create_directories(Config.RootDirectory); std::error_code Ec = CreateGCReserve(m_Config.RootDirectory / "reserve.gc", m_Config.DiskReserveSize); if (Ec) { ZEN_WARN("unable to create GC reserve at '{}' with size {}, reason '{}'", m_Config.RootDirectory / "reserve.gc", NiceBytes(m_Config.DiskReserveSize), Ec.message()); } CheckDiskSpace(); m_LastGcTime = GcClock::Now(); m_LastLightweightGcTime = m_LastGcTime; m_LastGcExpireTime = GcClock::TimePoint::min(); if (CbObject SchedulerState = LoadCompactBinaryObject(Config.RootDirectory / "gc_state")) { m_LastGcTime = GcClock::TimePoint(GcClock::Duration(SchedulerState["LastGcTime"sv].AsInt64())); m_LastGcExpireTime = GcClock::TimePoint(GcClock::Duration(SchedulerState["LastGcExpireTime"].AsInt64(GcClock::Duration::min().count()))); if (m_LastGcTime + m_Config.Interval < GcClock::Now()) { // TODO: Trigger GC? m_LastGcTime = GcClock::Now(); m_LastLightweightGcTime = m_LastGcTime; } } m_DiskUsageLog.Open(m_Config.RootDirectory / "gc.dlog", CasLogFile::Mode::kWrite); m_DiskUsageLog.Initialize(); const GcClock::Tick LastGCTick = m_LastGcTime.time_since_epoch().count(); m_DiskUsageLog.Replay( [this, LastGCTick](const DiskUsageWindow::DiskUsageEntry& Entry) { if (Entry.SampleTime >= m_LastGcExpireTime.time_since_epoch().count()) { m_DiskUsageWindow.Append(Entry); } }, 0); m_GcThread = std::thread(&GcScheduler::SchedulerThread, this); } void GcScheduler::Shutdown() { if (static_cast(GcSchedulerStatus::kStopped) != m_Status) { bool GcIsRunning = m_Status == static_cast(GcSchedulerStatus::kRunning); if (GcIsRunning) { ZEN_INFO("Requesting cancel running garbage collection"); } m_GcManager.SetCancelGC(true); m_Status = static_cast(GcSchedulerStatus::kStopped); m_GcSignal.notify_one(); if (m_GcThread.joinable()) { if (GcIsRunning) { ZEN_INFO("Waiting for garbage collection to complete"); } m_GcThread.join(); } } m_DiskUsageLog.Flush(); m_DiskUsageLog.Close(); } bool GcScheduler::TriggerGc(const GcScheduler::TriggerGcParams& Params) { std::unique_lock Lock(m_GcMutex); if (static_cast(GcSchedulerStatus::kIdle) == m_Status) { m_TriggerGcParams = Params; uint32_t IdleState = static_cast(GcSchedulerStatus::kIdle); if (m_Status.compare_exchange_strong(/* expected */ IdleState, /* desired */ static_cast(GcSchedulerStatus::kRunning))) { m_GcSignal.notify_one(); return true; } } return false; } bool GcScheduler::TriggerScrub(const TriggerScrubParams& Params) { std::unique_lock Lock(m_GcMutex); if (static_cast(GcSchedulerStatus::kIdle) == m_Status) { m_TriggerScrubParams = Params; uint32_t IdleState = static_cast(GcSchedulerStatus::kIdle); if (m_Status.compare_exchange_strong(/* expected */ IdleState, /* desired */ static_cast(GcSchedulerStatus::kRunning))) { m_GcSignal.notify_one(); return true; } } return false; } bool GcScheduler::CancelGC() { std::unique_lock Lock(m_GcMutex); if (static_cast(GcSchedulerStatus::kRunning) == m_Status) { ZEN_INFO("Cancel requested for running garbage collection"); m_GcManager.SetCancelGC(true); return true; } return false; } DiskSpace GcScheduler::CheckDiskSpace() { std::error_code Ec; DiskSpace Space = DiskSpaceInfo(m_Config.RootDirectory, Ec); if (Ec) { m_AreDiskWritesBlocked.store(true); ZEN_WARN("get disk space info for path '{}' FAILED, reason: '{}'", m_Config.RootDirectory, Ec.message()); return {0, 0}; } bool AreDiskWritesBlocked = m_AreDiskWritesBlocked; bool IsLowOnDiskSpace = (m_Config.MinimumFreeDiskSpaceToAllowWrites) != 0 && (Space.Free < m_Config.MinimumFreeDiskSpaceToAllowWrites); if (IsLowOnDiskSpace != AreDiskWritesBlocked) { m_AreDiskWritesBlocked.store(IsLowOnDiskSpace); if (IsLowOnDiskSpace) { ZEN_WARN("Writing to disk is blocked, free disk space: {}, minimum required {}", NiceBytes(Space.Free), NiceBytes(m_Config.MinimumFreeDiskSpaceToAllowWrites)); } else { ZEN_INFO("Writing to disk is unblocked, free disk space: {}, minimum required {}", NiceBytes(Space.Free), NiceBytes(m_Config.MinimumFreeDiskSpaceToAllowWrites)); } } return Space; } void GcScheduler::AppendGCLog(GcClock::TimePoint StartTime, const GcSettings& Settings, const GcResult& Result) { try { std::vector Blob; { CbObjectWriter Writer; std::string Id = fmt::format("{}", gsl::narrow(StartTime.time_since_epoch().count())); Writer.BeginObject(Id); { Writer << "StartTime"sv << ToDateTime(StartTime); Writer.BeginObject("Settings"sv); { Writer << "CacheExpireTime"sv << ToDateTime(Settings.CacheExpireTime); Writer << "ProjectStoreExpireTime"sv << ToDateTime(Settings.ProjectStoreExpireTime); Writer << "CollectSmallObjects"sv << Settings.CollectSmallObjects; Writer << "IsDeleteMode"sv << Settings.IsDeleteMode; Writer << "SkipCidDelete"sv << Settings.SkipCidDelete; Writer << "Verbose"sv << Settings.Verbose; Writer << "SingleThread"sv << Settings.SingleThread; Writer << "CompactBlockUsageThresholdPercent"sv << Settings.CompactBlockUsageThresholdPercent; } Writer.EndObject(); const bool HumanReadable = false; const bool IncludeDetails = true; Writer.BeginObject("Result"sv); { WriteGCResult(Writer, Result, HumanReadable, IncludeDetails); } Writer.EndObject(); } Writer.EndObject(); CbObject Entry = Writer.Save(); Entry.AsFieldView().WriteToStream([&](const void* Data, size_t Size) { const uint8_t* BlobData(reinterpret_cast(Data)); Blob.insert(Blob.end(), BlobData, BlobData + Size); }); } BasicFile GcLogFile; const fs::path Path = m_Config.RootDirectory / "gc.log"; MemoryView EntryBuffer(Blob.data(), Blob.size()); { RwLock::ExclusiveLockScope _(m_GcLogLock); GcLogFile.Open(Path, BasicFile::Mode::kWrite); uint64_t AppendPos = GcLogFile.FileSize(); const uint64_t MaxGcLogFileSize = 1024 * 1024 * 8; const uint64_t MaxGcLogFileCount = 16; if (AppendPos + EntryBuffer.GetSize() > MaxGcLogFileSize) { GcLogFile.Close(); std::error_code Err = RotateFiles(Path, MaxGcLogFileCount); if (Err) { ZEN_WARN("Failed to rotate gc log files at '{}'. Reason: '{}'", Path, Err.message()); } GcLogFile.Open(Path, BasicFile::Mode::kWrite); AppendPos = 0; } GcLogFile.Write(EntryBuffer, AppendPos); } } catch (const std::system_error& SystemError) { if (IsOOM(SystemError.code())) { ZEN_WARN("writing gc result ran out of memory: '{}'", SystemError.what()); } else if (IsOOD(SystemError.code())) { ZEN_WARN("writing gc result ran out of disk space: '{}'", SystemError.what()); } else { ZEN_ERROR("writing gc result failed with system error exception: '{}'", SystemError.what()); } } catch (const std::bad_alloc& BadAlloc) { ZEN_WARN("writing gc result ran out of memory: '{}'", BadAlloc.what()); } catch (const std::exception& Ex) { ZEN_ERROR("writing gc result failed with: '{}'", Ex.what()); } } GcSchedulerState GcScheduler::GetState() const { GcClock::TimePoint Now = GcClock::Now(); const GcStorageSize TotalSize = m_GcManager.TotalStorageSize(); GcSchedulerState Result{.Status = Status(), .Config = m_Config, .AreDiskWritesBlocked = m_AreDiskWritesBlocked.load()}; std::error_code Ec; DiskSpace Space = DiskSpaceInfo(Result.Config.RootDirectory, Ec); if (!Ec) { Result.DiskSize = Space.Total; Result.DiskUsed = Space.Total - Space.Free; Result.DiskFree = Space.Free; if (Result.Config.DiskSizeSoftLimit != 0) { Result.RemainingSpaceUntilFullGC = Result.Config.DiskSizeSoftLimit > TotalSize.DiskSize ? (Result.Config.DiskSizeSoftLimit - TotalSize.DiskSize) : 0; } } if (Result.Config.DiskReserveSize != 0) { Ec.clear(); Result.HasDiskReserve = std::filesystem::is_regular_file(Result.Config.RootDirectory / "reserve.gc", Ec) && !Ec; } if (Result.Status != GcSchedulerStatus::kRunning) { { std::unique_lock Lock(m_GcMutex); Result.LastFullGcTime = m_LastGcTime; Result.LastFullGCDiff = m_LastFullGCDiff; Result.LastFullGcDuration = m_LastFullGcDuration; Result.LastLightweightGcTime = m_LastLightweightGcTime; Result.LastLightweightGCDiff = m_LastLightweightGCDiff; Result.LastLightweightGcDuration = m_LastLightweightGcDuration; Result.LastLightweightGCV2Result = m_LastLightweightGCV2Result; Result.LastFullGCV2Result = m_LastFullGCV2Result; } Result.RemainingTimeUntilFullGc = Result.Config.Interval.count() == 0 ? std::chrono::seconds::max() : std::chrono::duration_cast(Result.LastFullGcTime + Result.Config.Interval - Now); if (Result.RemainingTimeUntilFullGc < std::chrono::seconds::zero()) { Result.RemainingTimeUntilFullGc = std::chrono::seconds::zero(); } Result.RemainingTimeUntilLightweightGc = Result.Config.LightweightInterval.count() == 0 ? std::chrono::seconds::max() : std::chrono::duration_cast(Result.LastLightweightGcTime + Result.Config.LightweightInterval - Now); if (Result.RemainingTimeUntilLightweightGc < std::chrono::seconds::zero()) { Result.RemainingTimeUntilLightweightGc = std::chrono::seconds::zero(); } } return Result; } void GcScheduler::SchedulerThread() { SetCurrentThreadName("GcScheduler"); std::chrono::seconds WaitTime{0}; for (;;) { bool Timeout = false; { ZEN_ASSERT(WaitTime.count() >= 0); std::unique_lock Lock(m_GcMutex); Timeout = std::cv_status::timeout == m_GcSignal.wait_for(Lock, WaitTime); } if (Status() == GcSchedulerStatus::kStopped) { break; } if (!m_Config.Enabled && !m_TriggerScrubParams && !m_TriggerGcParams) { WaitTime = std::chrono::seconds::max(); continue; } if (!Timeout && Status() == GcSchedulerStatus::kIdle) { continue; } try { bool DoGc = m_Config.Enabled; bool DoScrubbing = false; std::chrono::seconds ScrubTimeslice = std::chrono::seconds::max(); bool DoDelete = true; bool CollectSmallObjects = m_Config.CollectSmallObjects; std::chrono::seconds GcInterval = m_Config.Interval; std::chrono::seconds LightweightGcInterval = m_Config.LightweightInterval; std::chrono::seconds MaxCacheDuration = m_Config.MaxCacheDuration; std::chrono::seconds MaxProjectStoreDuration = m_Config.MaxProjectStoreDuration; uint64_t DiskSizeSoftLimit = m_Config.DiskSizeSoftLimit; bool SkipCid = false; GcVersion UseGCVersion = m_Config.UseGCVersion; uint32_t CompactBlockUsageThresholdPercent = m_Config.CompactBlockUsageThresholdPercent; bool Verbose = m_Config.Verbose; bool DiskSpaceGCTriggered = false; bool TimeBasedGCTriggered = false; GcClock::TimePoint Now = GcClock::Now(); if (m_TriggerGcParams) { const auto TriggerParams = m_TriggerGcParams.value(); m_TriggerGcParams.reset(); CollectSmallObjects = TriggerParams.CollectSmallObjects; if (TriggerParams.MaxCacheDuration != std::chrono::seconds::max()) { MaxCacheDuration = TriggerParams.MaxCacheDuration; } if (TriggerParams.MaxProjectStoreDuration != std::chrono::seconds::max()) { MaxProjectStoreDuration = TriggerParams.MaxProjectStoreDuration; } if (TriggerParams.DiskSizeSoftLimit != 0) { DiskSizeSoftLimit = TriggerParams.DiskSizeSoftLimit; } if (TriggerParams.SkipCid) { SkipCid = true; } if (TriggerParams.SkipDelete) { DoDelete = false; } UseGCVersion = TriggerParams.ForceGCVersion.value_or(UseGCVersion); CompactBlockUsageThresholdPercent = TriggerParams.CompactBlockUsageThresholdPercent.value_or(CompactBlockUsageThresholdPercent); Verbose = TriggerParams.Verbose.value_or(Verbose); DoGc = true; } if (m_TriggerScrubParams) { DoScrubbing = true; if (m_TriggerScrubParams->SkipGc) { DoGc = false; } if (m_TriggerScrubParams->SkipCas) { SkipCid = true; } DoDelete = !m_TriggerScrubParams->SkipDelete; ScrubTimeslice = m_TriggerScrubParams->MaxTimeslice; } if (DoScrubbing) { ScrubStorage(DoDelete, SkipCid, ScrubTimeslice); m_TriggerScrubParams.reset(); } if (!DoGc) { continue; } GcClock::TimePoint CacheExpireTime = MaxCacheDuration == GcClock::Duration::max() ? GcClock::TimePoint::min() : Now - MaxCacheDuration; GcClock::TimePoint ProjectStoreExpireTime = MaxProjectStoreDuration == GcClock::Duration::max() ? GcClock::TimePoint::min() : Now - MaxProjectStoreDuration; const GcStorageSize TotalSize = m_GcManager.TotalStorageSize(); if (Timeout && Status() == GcSchedulerStatus::kIdle) { DiskSpace Space = CheckDiskSpace(); const int64_t PressureGraphLength = 30; const std::chrono::duration LoadGraphTime = PressureGraphLength * m_Config.MonitorInterval; std::vector DiskDeltas; uint64_t MaxLoad = 0; { const GcClock::Tick EpochTickCount = GcClock::Now().time_since_epoch().count(); std::unique_lock Lock(m_GcMutex); if (AreDiskWritesAllowed()) { m_DiskUsageLog.Append({.SampleTime = EpochTickCount, .DiskUsage = TotalSize.DiskSize}); } m_DiskUsageWindow.Append({.SampleTime = EpochTickCount, .DiskUsage = TotalSize.DiskSize}); const GcClock::TimePoint LoadGraphStartTime = Now - LoadGraphTime; const GcClock::Tick Start = LoadGraphStartTime.time_since_epoch().count(); const GcClock::Tick End = Now.time_since_epoch().count(); DiskDeltas = m_DiskUsageWindow.GetDiskDeltas(Start, End, Max(1, (End - Start + PressureGraphLength - 1) / PressureGraphLength), MaxLoad); } std::string LoadGraph; LoadGraph.resize(DiskDeltas.size(), '0'); if (DiskDeltas.size() > 0 && MaxLoad > 0) { static const char LoadIndicator[11] = "0123456789"; for (size_t Index = 0; Index < DiskDeltas.size(); ++Index) { size_t LoadIndex = (9 * DiskDeltas[Index] + MaxLoad - 1) / MaxLoad; LoadGraph[Index] = LoadIndicator[LoadIndex]; } } uint64_t GcDiskSpaceGoal = 0; if (DiskSizeSoftLimit != 0 && TotalSize.DiskSize > DiskSizeSoftLimit) { GcDiskSpaceGoal = TotalSize.DiskSize - DiskSizeSoftLimit; std::unique_lock Lock(m_GcMutex); GcClock::Tick AgeTick = m_DiskUsageWindow.FindTimepointThatRemoves(GcDiskSpaceGoal, Now.time_since_epoch().count()); GcClock::TimePoint SizeBasedExpireTime = GcClock::TimePointFromTick(AgeTick); if (SizeBasedExpireTime > CacheExpireTime) { CacheExpireTime = SizeBasedExpireTime; } if (SizeBasedExpireTime > ProjectStoreExpireTime) { ProjectStoreExpireTime = SizeBasedExpireTime; } } std::chrono::seconds RemainingTimeUntilGc = GcInterval.count() == 0 ? std::chrono::seconds::max() : std::chrono::duration_cast(m_LastGcTime + GcInterval - GcClock::Now()); if (RemainingTimeUntilGc < std::chrono::seconds::zero()) { RemainingTimeUntilGc = std::chrono::seconds::zero(); } std::chrono::seconds RemainingTimeUntilLightweightGc = LightweightGcInterval.count() == 0 ? std::chrono::seconds::max() : std::chrono::duration_cast( m_LastLightweightGcTime + LightweightGcInterval - GcClock::Now()); if (RemainingTimeUntilLightweightGc < std::chrono::seconds::zero()) { RemainingTimeUntilLightweightGc = std::chrono::seconds::zero(); } // Don't schedule a lightweight GC if a full GC is // due quite soon anyway if (RemainingTimeUntilGc < LightweightGcInterval) { RemainingTimeUntilLightweightGc = RemainingTimeUntilGc; } if (GcDiskSpaceGoal > 0) { DiskSpaceGCTriggered = true; } else if (RemainingTimeUntilGc.count() == 0) { TimeBasedGCTriggered = true; } else if (RemainingTimeUntilLightweightGc.count() == 0) { TimeBasedGCTriggered = true; SkipCid = true; } std::string NextTriggerStatus; if (GcInterval.count() != 0 || LightweightGcInterval.count() != 0 || DiskSizeSoftLimit != 0) { ExtendableStringBuilder<256> Sb; if (DiskSpaceGCTriggered) { Sb.Append(fmt::format(" Disk space exceeds {}, trying to reclaim {}.", NiceBytes(DiskSizeSoftLimit), NiceBytes(GcDiskSpaceGoal))); } else if (TimeBasedGCTriggered) { if (SkipCid) { Sb.Append(fmt::format(" Lightweight GC schedule triggered.")); } else { Sb.Append(fmt::format(" GC schedule triggered.")); } } else { if (GcInterval.count() != 0) { Sb.Append(fmt::format(" Full GC in {}.", NiceTimeSpanMs(uint64_t(std::chrono::milliseconds(RemainingTimeUntilGc).count())))); } if (LightweightGcInterval.count() != 0) { Sb.Append( fmt::format(" Lightweight GC in {}.", NiceTimeSpanMs(uint64_t(std::chrono::milliseconds(RemainingTimeUntilLightweightGc).count())))); } if (DiskSizeSoftLimit != 0 && DiskSizeSoftLimit > TotalSize.DiskSize) { Sb.Append(fmt::format(" Disk usage GC in {}.", NiceBytes(DiskSizeSoftLimit - TotalSize.DiskSize))); } } NextTriggerStatus = Sb; } ZEN_INFO("{} used{}. '{}': {} in use, {} free. Disk writes last {} per {} [{}], peak {}/s.{}", NiceBytes(TotalSize.DiskSize), DiskSizeSoftLimit == 0 ? "" : fmt::format(", {} soft limit", NiceBytes(DiskSizeSoftLimit)), m_Config.RootDirectory, NiceBytes(Space.Total - Space.Free), NiceBytes(Space.Free), NiceTimeSpanMs(uint64_t(std::chrono::milliseconds(LoadGraphTime).count())), NiceTimeSpanMs(uint64_t(std::chrono::milliseconds(LoadGraphTime).count() / PressureGraphLength)), LoadGraph, NiceBytes(MaxLoad / uint64_t(std::chrono::seconds(m_Config.MonitorInterval).count())), NextTriggerStatus); if (!DiskSpaceGCTriggered && !TimeBasedGCTriggered) { WaitTime = m_Config.MonitorInterval; if (RemainingTimeUntilGc < WaitTime) { WaitTime = RemainingTimeUntilGc; } if (RemainingTimeUntilLightweightGc < WaitTime) { WaitTime = RemainingTimeUntilLightweightGc; } continue; } uint32_t IdleState = static_cast(GcSchedulerStatus::kIdle); if (!m_Status.compare_exchange_strong(IdleState, static_cast(GcSchedulerStatus::kRunning))) { WaitTime = m_Config.MonitorInterval; continue; } } CollectGarbage(CacheExpireTime, ProjectStoreExpireTime, DoDelete, CollectSmallObjects, SkipCid, UseGCVersion, CompactBlockUsageThresholdPercent, Verbose); m_GcManager.SetCancelGC(false); uint32_t RunningState = static_cast(GcSchedulerStatus::kRunning); if (!m_Status.compare_exchange_strong(RunningState, static_cast(GcSchedulerStatus::kIdle))) { ZEN_ASSERT(m_Status == static_cast(GcSchedulerStatus::kStopped)); break; } WaitTime = std::chrono::seconds(0); } catch (const std::system_error& SystemError) { if (IsOOM(SystemError.code())) { ZEN_WARN("scheduling garbage collection ran out of memory: '{}'", SystemError.what()); } else if (IsOOD(SystemError.code())) { ZEN_WARN("scheduling garbage collection ran out of disk space: '{}'", SystemError.what()); } else { ZEN_ERROR("scheduling garbage collection failed with system error exception: '{}'", SystemError.what()); } m_LastGcTime = GcClock::Now(); m_LastLightweightGcTime = m_LastGcTime; WaitTime = m_Config.MonitorInterval; } catch (const std::bad_alloc& BadAlloc) { ZEN_WARN("scheduling garbage collection ran out of memory: '{}'", BadAlloc.what()); m_LastGcTime = GcClock::Now(); m_LastLightweightGcTime = m_LastGcTime; WaitTime = m_Config.MonitorInterval; } catch (const std::exception& Ex) { ZEN_ERROR("scheduling garbage collection failed with: '{}'", Ex.what()); m_LastGcTime = GcClock::Now(); m_LastLightweightGcTime = m_LastGcTime; WaitTime = m_Config.MonitorInterval; } } } void GcScheduler::ScrubStorage(bool DoDelete, bool SkipCid, std::chrono::seconds TimeSlice) { const std::chrono::steady_clock::time_point TimeNow = std::chrono::steady_clock::now(); std::chrono::steady_clock::time_point Deadline = TimeNow + TimeSlice; // there really should be a saturating add in std::chrono if (Deadline < TimeNow) { Deadline = std::chrono::steady_clock::time_point::max(); } Stopwatch Timer; ZEN_INFO("scrubbing STARTING (delete mode => {}, skip CID => {})", DoDelete, SkipCid); WorkerThreadPool& ThreadPool = GetMediumWorkerPool(); ScrubContext Ctx{ThreadPool, Deadline}; try { Ctx.SetSkipCas(SkipCid); Ctx.SetShouldDelete(DoDelete); m_GcManager.ScrubStorage(Ctx); } catch (const ScrubDeadlineExpiredException&) { ZEN_INFO("scrubbing deadline expired (top level), operation incomplete!"); } ZEN_INFO("scrubbing DONE (in {})", NiceTimeSpanMs(Timer.GetElapsedTimeMs())); } void GcScheduler::CollectGarbage(const GcClock::TimePoint& CacheExpireTime, const GcClock::TimePoint& ProjectStoreExpireTime, bool Delete, bool CollectSmallObjects, bool SkipCid, GcVersion UseGCVersion, uint32_t CompactBlockUsageThresholdPercent, bool Verbose) { ZEN_TRACE_CPU("GcScheduler::CollectGarbage"); GcContext GcCtx(CacheExpireTime, ProjectStoreExpireTime); GcCtx.SetDeletionMode(Delete); GcCtx.SetSkipCid(SkipCid); GcCtx.CollectSmallObjects(CollectSmallObjects); GcCtx.DiskReservePath(m_Config.RootDirectory / "reserve.gc"); auto ReclaimDiskReserve = [&]() { std::error_code Ec = CreateGCReserve(m_Config.RootDirectory / "reserve.gc", m_Config.DiskReserveSize); if (Ec) { ZEN_WARN("unable to create GC reserve at '{}' with size {}, reason: '{}'", m_Config.RootDirectory / "reserve.gc", NiceBytes(m_Config.DiskReserveSize), Ec.message()); } }; ReclaimDiskReserve(); const auto _ = MakeGuard([&] { ReclaimDiskReserve(); }); CheckDiskSpace(); if (m_AreDiskWritesBlocked.load()) { // We are low on disk, check if we can release our extra storage reserve, if we can't bail from doing GC uint64_t ReleasedSpace = GcCtx.ClaimGCReserve(); if (ReleasedSpace == 0) { ZEN_WARN("Disk space is very low and we have no GC reserve, skipping GC as this requires at least some space to write to '{}'", m_Config.RootDirectory); return; } } ZEN_INFO("garbage collection STARTING, small objects gc {}, {} CAS. Cache cutoff time {}, project store cutoff time {}", GcCtx.CollectSmallObjects() ? "ENABLED"sv : "DISABLED"sv, SkipCid ? "skip"sv : "include"sv, CacheExpireTime, ProjectStoreExpireTime); { Stopwatch Timer; const auto __ = MakeGuard([&] { ZEN_INFO("garbage collection DONE in {}", NiceTimeSpanMs(Timer.GetElapsedTimeMs())); }); GcStorageSize Diff; switch (UseGCVersion) { case GcVersion::kV1: Diff = m_GcManager.CollectGarbage(GcCtx); if (SkipCid) { m_LastLightweightGCV2Result.reset(); } else { m_LastFullGCV2Result.reset(); } break; case GcVersion::kV2: { const GcSettings Settings = {.CacheExpireTime = CacheExpireTime, .ProjectStoreExpireTime = ProjectStoreExpireTime, .CollectSmallObjects = CollectSmallObjects, .IsDeleteMode = Delete, .SkipCidDelete = SkipCid, .Verbose = Verbose, .CompactBlockUsageThresholdPercent = CompactBlockUsageThresholdPercent, .DiskReservePath = m_Config.RootDirectory / "reserve.gc"}; GcClock::TimePoint GcStartTime = GcClock::Now(); GcResult Result = m_GcManager.CollectGarbage(Settings); ZEN_INFO( "GCV2: Found {} expired items out of {}, deleted {}. " "Found {} unreferenced Cid entries out of {}, deleted {}. " "Freed {} on disk and {} of memory in {}. " "CacheExpireTime: {}, ProjectStoreExpireTime: {}, CollectSmallObjects: {}, " "IsDeleteMode: {}, SkipCidDelete: {}", Result.ReferencerStatSum.RemoveExpiredDataStats.FoundCount, Result.ReferencerStatSum.RemoveExpiredDataStats.CheckedCount, Result.ReferencerStatSum.RemoveExpiredDataStats.DeletedCount, Result.ReferenceStoreStatSum.RemoveUnreferencedDataStats.FoundCount, Result.ReferenceStoreStatSum.RemoveUnreferencedDataStats.CheckedCount, Result.ReferenceStoreStatSum.RemoveUnreferencedDataStats.DeletedCount, NiceBytes(Result.CompactStoresStatSum.RemovedDisk), NiceBytes(Result.ReferencerStatSum.RemoveExpiredDataStats.FreedMemory), NiceTimeSpanMs(Result.ElapsedMS.count()), Settings.CacheExpireTime, Settings.ProjectStoreExpireTime, Settings.CollectSmallObjects, Settings.IsDeleteMode, Settings.SkipCidDelete); AppendGCLog(GcStartTime, Settings, Result); if (SkipCid) { m_LastLightweightGCV2Result = Result; } else { m_LastFullGCV2Result = Result; } Diff.DiskSize = Result.CompactStoresStatSum.RemovedDisk; Diff.MemorySize = Result.ReferencerStatSum.RemoveExpiredDataStats.FreedMemory; } break; } std::chrono::milliseconds ElapsedMS = std::chrono::milliseconds(Timer.GetElapsedTimeMs()); if (SkipCid) { m_LastLightweightGcTime = GcClock::Now(); m_LastLightweightGcDuration = ElapsedMS; m_LastLightweightGCDiff = Diff; } else { if (Delete) { GcClock::TimePoint KeepRangeStart = Min(CacheExpireTime, ProjectStoreExpireTime); m_LastGcExpireTime = KeepRangeStart; std::unique_lock Lock(m_GcMutex); m_DiskUsageWindow.KeepRange(KeepRangeStart.time_since_epoch().count(), GcClock::Duration::max().count()); } m_LastGcTime = GcClock::Now(); m_LastLightweightGcTime = m_LastGcTime; m_LastFullGcDuration = ElapsedMS; m_LastFullGCDiff = Diff; } try { const fs::path Path = m_Config.RootDirectory / "gc_state"; ZEN_DEBUG("saving scheduler state to '{}'", Path); CbObjectWriter SchedulerState; SchedulerState << "LastGcTime"sv << static_cast(m_LastGcTime.time_since_epoch().count()); SchedulerState << "LastGcExpireTime"sv << static_cast(m_LastGcExpireTime.time_since_epoch().count()); SaveCompactBinaryObject(Path, SchedulerState.Save()); } catch (const std::system_error& SystemError) { if (IsOOM(SystemError.code())) { ZEN_WARN("writing gc scheduler state ran out of memory: '{}'", SystemError.what()); } else if (IsOOD(SystemError.code())) { ZEN_WARN("writing gc scheduler state ran out of disk space: '{}'", SystemError.what()); } else { ZEN_ERROR("writing gc scheduler state failed with system error exception: '{}'", SystemError.what()); } } catch (const std::bad_alloc& BadAlloc) { ZEN_WARN("writing gc scheduler state ran out of memory: '{}'", BadAlloc.what()); } catch (const std::exception& Ex) { ZEN_ERROR("writing gc scheduler state failed with: '{}'", Ex.what()); } } } ////////////////////////////////////////////////////////////////////////// #if ZEN_WITH_TESTS namespace gc::impl { static CompressedBuffer Compress(IoBuffer Buffer) { return CompressedBuffer::Compress(SharedBuffer::MakeView(Buffer.GetData(), Buffer.GetSize())); } } // namespace gc::impl TEST_CASE("gc.basic") { using namespace gc::impl; ScopedTemporaryDirectory TempDir; CidStoreConfiguration CasConfig; CasConfig.RootDirectory = TempDir.Path() / "cas"; GcManager Gc; CidStore CidStore(Gc); CidStore.Initialize(CasConfig); IoBuffer Chunk = CreateRandomBlob(128); auto CompressedChunk = Compress(Chunk); const auto InsertResult = CidStore.AddChunk(CompressedChunk.GetCompressed().Flatten().AsIoBuffer(), CompressedChunk.DecodeRawHash()); CHECK(InsertResult.New); GcContext GcCtx(GcClock::Now() - std::chrono::hours(24), GcClock::Now() - std::chrono::hours(24)); GcCtx.CollectSmallObjects(true); CidStore.Flush(); Gc.CollectGarbage(GcCtx); CHECK(!CidStore.ContainsChunk(CompressedChunk.DecodeRawHash())); } TEST_CASE("gc.full") { using namespace gc::impl; ScopedTemporaryDirectory TempDir; CidStoreConfiguration CasConfig; CasConfig.RootDirectory = TempDir.Path() / "cas"; GcManager Gc; std::unique_ptr CasStore = CreateCasStore(Gc); CasStore->Initialize(CasConfig); uint64_t ChunkSizes[9] = {128, 541, 1023, 781, 218, 37, 4, 997, 5}; IoBuffer Chunks[9] = {CreateRandomBlob(ChunkSizes[0]), CreateRandomBlob(ChunkSizes[1]), CreateRandomBlob(ChunkSizes[2]), CreateRandomBlob(ChunkSizes[3]), CreateRandomBlob(ChunkSizes[4]), CreateRandomBlob(ChunkSizes[5]), CreateRandomBlob(ChunkSizes[6]), CreateRandomBlob(ChunkSizes[7]), CreateRandomBlob(ChunkSizes[8])}; IoHash ChunkHashes[9] = { IoHash::HashBuffer(Chunks[0].Data(), Chunks[0].Size()), IoHash::HashBuffer(Chunks[1].Data(), Chunks[1].Size()), IoHash::HashBuffer(Chunks[2].Data(), Chunks[2].Size()), IoHash::HashBuffer(Chunks[3].Data(), Chunks[3].Size()), IoHash::HashBuffer(Chunks[4].Data(), Chunks[4].Size()), IoHash::HashBuffer(Chunks[5].Data(), Chunks[5].Size()), IoHash::HashBuffer(Chunks[6].Data(), Chunks[6].Size()), IoHash::HashBuffer(Chunks[7].Data(), Chunks[7].Size()), IoHash::HashBuffer(Chunks[8].Data(), Chunks[8].Size()), }; CasStore->InsertChunk(Chunks[0], ChunkHashes[0]); CasStore->InsertChunk(Chunks[1], ChunkHashes[1]); CasStore->InsertChunk(Chunks[2], ChunkHashes[2]); CasStore->InsertChunk(Chunks[3], ChunkHashes[3]); CasStore->InsertChunk(Chunks[4], ChunkHashes[4]); CasStore->InsertChunk(Chunks[5], ChunkHashes[5]); CasStore->InsertChunk(Chunks[6], ChunkHashes[6]); CasStore->InsertChunk(Chunks[7], ChunkHashes[7]); CasStore->InsertChunk(Chunks[8], ChunkHashes[8]); CidStoreSize InitialSize = CasStore->TotalSize(); // Keep first and last { GcContext GcCtx(GcClock::Now() - std::chrono::hours(24), GcClock::Now() - std::chrono::hours(24)); GcCtx.CollectSmallObjects(true); std::vector KeepChunks; KeepChunks.push_back(ChunkHashes[0]); KeepChunks.push_back(ChunkHashes[8]); GcCtx.AddRetainedCids(KeepChunks); CasStore->Flush(); Gc.CollectGarbage(GcCtx); CHECK(CasStore->ContainsChunk(ChunkHashes[0])); CHECK(!CasStore->ContainsChunk(ChunkHashes[1])); CHECK(!CasStore->ContainsChunk(ChunkHashes[2])); CHECK(!CasStore->ContainsChunk(ChunkHashes[3])); CHECK(!CasStore->ContainsChunk(ChunkHashes[4])); CHECK(!CasStore->ContainsChunk(ChunkHashes[5])); CHECK(!CasStore->ContainsChunk(ChunkHashes[6])); CHECK(!CasStore->ContainsChunk(ChunkHashes[7])); CHECK(CasStore->ContainsChunk(ChunkHashes[8])); CHECK(ChunkHashes[0] == IoHash::HashBuffer(CasStore->FindChunk(ChunkHashes[0]))); CHECK(ChunkHashes[8] == IoHash::HashBuffer(CasStore->FindChunk(ChunkHashes[8]))); } CasStore->InsertChunk(Chunks[1], ChunkHashes[1]); CasStore->InsertChunk(Chunks[2], ChunkHashes[2]); CasStore->InsertChunk(Chunks[3], ChunkHashes[3]); CasStore->InsertChunk(Chunks[4], ChunkHashes[4]); CasStore->InsertChunk(Chunks[5], ChunkHashes[5]); CasStore->InsertChunk(Chunks[6], ChunkHashes[6]); CasStore->InsertChunk(Chunks[7], ChunkHashes[7]); // Keep last { GcContext GcCtx(GcClock::Now() - std::chrono::hours(24), GcClock::Now() - std::chrono::hours(24)); GcCtx.CollectSmallObjects(true); std::vector KeepChunks; KeepChunks.push_back(ChunkHashes[8]); GcCtx.AddRetainedCids(KeepChunks); CasStore->Flush(); Gc.CollectGarbage(GcCtx); CHECK(!CasStore->ContainsChunk(ChunkHashes[0])); CHECK(!CasStore->ContainsChunk(ChunkHashes[1])); CHECK(!CasStore->ContainsChunk(ChunkHashes[2])); CHECK(!CasStore->ContainsChunk(ChunkHashes[3])); CHECK(!CasStore->ContainsChunk(ChunkHashes[4])); CHECK(!CasStore->ContainsChunk(ChunkHashes[5])); CHECK(!CasStore->ContainsChunk(ChunkHashes[6])); CHECK(!CasStore->ContainsChunk(ChunkHashes[7])); CHECK(CasStore->ContainsChunk(ChunkHashes[8])); CHECK(ChunkHashes[8] == IoHash::HashBuffer(CasStore->FindChunk(ChunkHashes[8]))); CasStore->InsertChunk(Chunks[1], ChunkHashes[1]); CasStore->InsertChunk(Chunks[2], ChunkHashes[2]); CasStore->InsertChunk(Chunks[3], ChunkHashes[3]); CasStore->InsertChunk(Chunks[4], ChunkHashes[4]); CasStore->InsertChunk(Chunks[5], ChunkHashes[5]); CasStore->InsertChunk(Chunks[6], ChunkHashes[6]); CasStore->InsertChunk(Chunks[7], ChunkHashes[7]); } // Keep mixed { GcContext GcCtx(GcClock::Now() - std::chrono::hours(24), GcClock::Now() - std::chrono::hours(24)); GcCtx.CollectSmallObjects(true); std::vector KeepChunks; KeepChunks.push_back(ChunkHashes[1]); KeepChunks.push_back(ChunkHashes[4]); KeepChunks.push_back(ChunkHashes[7]); GcCtx.AddRetainedCids(KeepChunks); CasStore->Flush(); Gc.CollectGarbage(GcCtx); CHECK(!CasStore->ContainsChunk(ChunkHashes[0])); CHECK(CasStore->ContainsChunk(ChunkHashes[1])); CHECK(!CasStore->ContainsChunk(ChunkHashes[2])); CHECK(!CasStore->ContainsChunk(ChunkHashes[3])); CHECK(CasStore->ContainsChunk(ChunkHashes[4])); CHECK(!CasStore->ContainsChunk(ChunkHashes[5])); CHECK(!CasStore->ContainsChunk(ChunkHashes[6])); CHECK(CasStore->ContainsChunk(ChunkHashes[7])); CHECK(!CasStore->ContainsChunk(ChunkHashes[8])); CHECK(ChunkHashes[1] == IoHash::HashBuffer(CasStore->FindChunk(ChunkHashes[1]))); CHECK(ChunkHashes[4] == IoHash::HashBuffer(CasStore->FindChunk(ChunkHashes[4]))); CHECK(ChunkHashes[7] == IoHash::HashBuffer(CasStore->FindChunk(ChunkHashes[7]))); CasStore->InsertChunk(Chunks[0], ChunkHashes[0]); CasStore->InsertChunk(Chunks[2], ChunkHashes[2]); CasStore->InsertChunk(Chunks[3], ChunkHashes[3]); CasStore->InsertChunk(Chunks[5], ChunkHashes[5]); CasStore->InsertChunk(Chunks[6], ChunkHashes[6]); CasStore->InsertChunk(Chunks[8], ChunkHashes[8]); } // Keep multiple at end { GcContext GcCtx(GcClock::Now() - std::chrono::hours(24), GcClock::Now() - std::chrono::hours(24)); GcCtx.CollectSmallObjects(true); std::vector KeepChunks; KeepChunks.push_back(ChunkHashes[6]); KeepChunks.push_back(ChunkHashes[7]); KeepChunks.push_back(ChunkHashes[8]); GcCtx.AddRetainedCids(KeepChunks); CasStore->Flush(); Gc.CollectGarbage(GcCtx); CHECK(!CasStore->ContainsChunk(ChunkHashes[0])); CHECK(!CasStore->ContainsChunk(ChunkHashes[1])); CHECK(!CasStore->ContainsChunk(ChunkHashes[2])); CHECK(!CasStore->ContainsChunk(ChunkHashes[3])); CHECK(!CasStore->ContainsChunk(ChunkHashes[4])); CHECK(!CasStore->ContainsChunk(ChunkHashes[5])); CHECK(CasStore->ContainsChunk(ChunkHashes[6])); CHECK(CasStore->ContainsChunk(ChunkHashes[7])); CHECK(CasStore->ContainsChunk(ChunkHashes[8])); CHECK(ChunkHashes[6] == IoHash::HashBuffer(CasStore->FindChunk(ChunkHashes[6]))); CHECK(ChunkHashes[7] == IoHash::HashBuffer(CasStore->FindChunk(ChunkHashes[7]))); CHECK(ChunkHashes[8] == IoHash::HashBuffer(CasStore->FindChunk(ChunkHashes[8]))); CasStore->InsertChunk(Chunks[0], ChunkHashes[0]); CasStore->InsertChunk(Chunks[1], ChunkHashes[1]); CasStore->InsertChunk(Chunks[2], ChunkHashes[2]); CasStore->InsertChunk(Chunks[3], ChunkHashes[3]); CasStore->InsertChunk(Chunks[4], ChunkHashes[4]); CasStore->InsertChunk(Chunks[5], ChunkHashes[5]); } // Verify that we nicely appended blocks even after all GC operations CHECK(ChunkHashes[0] == IoHash::HashBuffer(CasStore->FindChunk(ChunkHashes[0]))); CHECK(ChunkHashes[1] == IoHash::HashBuffer(CasStore->FindChunk(ChunkHashes[1]))); CHECK(ChunkHashes[2] == IoHash::HashBuffer(CasStore->FindChunk(ChunkHashes[2]))); CHECK(ChunkHashes[3] == IoHash::HashBuffer(CasStore->FindChunk(ChunkHashes[3]))); CHECK(ChunkHashes[4] == IoHash::HashBuffer(CasStore->FindChunk(ChunkHashes[4]))); CHECK(ChunkHashes[5] == IoHash::HashBuffer(CasStore->FindChunk(ChunkHashes[5]))); CHECK(ChunkHashes[6] == IoHash::HashBuffer(CasStore->FindChunk(ChunkHashes[6]))); CHECK(ChunkHashes[7] == IoHash::HashBuffer(CasStore->FindChunk(ChunkHashes[7]))); CHECK(ChunkHashes[8] == IoHash::HashBuffer(CasStore->FindChunk(ChunkHashes[8]))); auto FinalSize = CasStore->TotalSize(); CHECK_LE(InitialSize.TinySize, FinalSize.TinySize); CHECK_GE(InitialSize.TinySize + (1u << 28), FinalSize.TinySize); } TEST_CASE("gc.diskusagewindow") { using namespace gc::impl; DiskUsageWindow Stats; Stats.Append({.SampleTime = 0, .DiskUsage = 0}); // 0 0 Stats.Append({.SampleTime = 10, .DiskUsage = 10}); // 1 10 Stats.Append({.SampleTime = 20, .DiskUsage = 20}); // 2 10 Stats.Append({.SampleTime = 30, .DiskUsage = 20}); // 3 0 Stats.Append({.SampleTime = 40, .DiskUsage = 15}); // 4 0 Stats.Append({.SampleTime = 50, .DiskUsage = 25}); // 5 10 Stats.Append({.SampleTime = 60, .DiskUsage = 30}); // 6 5 Stats.Append({.SampleTime = 70, .DiskUsage = 45}); // 7 15 SUBCASE("Truncate start") { Stats.KeepRange(-15, 31); CHECK(Stats.m_LogWindow.size() == 4); CHECK(Stats.m_LogWindow[0].SampleTime == 0); CHECK(Stats.m_LogWindow[3].SampleTime == 30); } SUBCASE("Truncate end") { Stats.KeepRange(70, 71); CHECK(Stats.m_LogWindow.size() == 1); CHECK(Stats.m_LogWindow[0].SampleTime == 70); } SUBCASE("Truncate middle") { Stats.KeepRange(29, 69); CHECK(Stats.m_LogWindow.size() == 4); CHECK(Stats.m_LogWindow[0].SampleTime == 30); CHECK(Stats.m_LogWindow[3].SampleTime == 60); } SUBCASE("Full range") { uint64_t MaxDelta = 0; // 0-10, 10-20, 20-30, 30-40, 40-50, 50-60, 60-70, 70-80 std::vector DiskDeltas = Stats.GetDiskDeltas(0, 80, 10, MaxDelta); CHECK(DiskDeltas.size() == 8); CHECK(MaxDelta == 15); CHECK(DiskDeltas[0] == 0); CHECK(DiskDeltas[1] == 10); CHECK(DiskDeltas[2] == 10); CHECK(DiskDeltas[3] == 0); CHECK(DiskDeltas[4] == 0); CHECK(DiskDeltas[5] == 10); CHECK(DiskDeltas[6] == 5); CHECK(DiskDeltas[7] == 15); } SUBCASE("Sub range") { uint64_t MaxDelta = 0; std::vector DiskDeltas = Stats.GetDiskDeltas(20, 40, 10, MaxDelta); CHECK(DiskDeltas.size() == 2); CHECK(MaxDelta == 10); CHECK(DiskDeltas[0] == 10); // [20:30] CHECK(DiskDeltas[1] == 0); // [30:40] } SUBCASE("Unaligned sub range 1") { uint64_t MaxDelta = 0; std::vector DiskDeltas = Stats.GetDiskDeltas(21, 51, 10, MaxDelta); CHECK(DiskDeltas.size() == 3); CHECK(MaxDelta == 10); CHECK(DiskDeltas[0] == 0); // [21:31] CHECK(DiskDeltas[1] == 0); // [31:41] CHECK(DiskDeltas[2] == 10); // [41:51] } SUBCASE("Unaligned end range") { uint64_t MaxDelta = 0; std::vector DiskDeltas = Stats.GetDiskDeltas(29, 79, 10, MaxDelta); CHECK(DiskDeltas.size() == 5); CHECK(MaxDelta == 15); CHECK(DiskDeltas[0] == 0); // [29:39] CHECK(DiskDeltas[1] == 0); // [39:49] CHECK(DiskDeltas[2] == 10); // [49:59] CHECK(DiskDeltas[3] == 5); // [59:69] CHECK(DiskDeltas[4] == 15); // [69:79] } SUBCASE("Ahead of window") { uint64_t MaxDelta = 0; std::vector DiskDeltas = Stats.GetDiskDeltas(-40, 0, 10, MaxDelta); CHECK(DiskDeltas.size() == 4); CHECK(MaxDelta == 0); CHECK(DiskDeltas[0] == 0); // [-40:-30] CHECK(DiskDeltas[1] == 0); // [-30:-20] CHECK(DiskDeltas[2] == 0); // [-20:-10] CHECK(DiskDeltas[3] == 0); // [-10:0] } SUBCASE("After of window") { uint64_t MaxDelta = 0; std::vector DiskDeltas = Stats.GetDiskDeltas(90, 120, 10, MaxDelta); CHECK(DiskDeltas.size() == 3); CHECK(MaxDelta == 0); CHECK(DiskDeltas[0] == 0); // [90:100] CHECK(DiskDeltas[1] == 0); // [100:110] CHECK(DiskDeltas[2] == 0); // [110:120] } SUBCASE("Encapsulating window") { uint64_t MaxDelta = 0; std::vector DiskDeltas = Stats.GetDiskDeltas(-20, 100, 10, MaxDelta); CHECK(DiskDeltas.size() == 12); CHECK(MaxDelta == 15); CHECK(DiskDeltas[0] == 0); // [-20:-10] CHECK(DiskDeltas[1] == 0); // [ -10:0] CHECK(DiskDeltas[2] == 0); // [0:10] CHECK(DiskDeltas[3] == 10); // [10:20] CHECK(DiskDeltas[4] == 10); // [20:30] CHECK(DiskDeltas[5] == 0); // [30:40] CHECK(DiskDeltas[6] == 0); // [40:50] CHECK(DiskDeltas[7] == 10); // [50:60] CHECK(DiskDeltas[8] == 5); // [60:70] CHECK(DiskDeltas[9] == 15); // [70:80] CHECK(DiskDeltas[10] == 0); // [80:90] CHECK(DiskDeltas[11] == 0); // [90:100] } SUBCASE("Full range half stride") { uint64_t MaxDelta = 0; std::vector DiskDeltas = Stats.GetDiskDeltas(0, 80, 20, MaxDelta); CHECK(DiskDeltas.size() == 4); CHECK(MaxDelta == 20); CHECK(DiskDeltas[0] == 10); // [0:20] CHECK(DiskDeltas[1] == 10); // [20:40] CHECK(DiskDeltas[2] == 10); // [40:60] CHECK(DiskDeltas[3] == 20); // [60:80] } SUBCASE("Partial odd stride") { uint64_t MaxDelta = 0; std::vector DiskDeltas = Stats.GetDiskDeltas(13, 67, 18, MaxDelta); CHECK(DiskDeltas.size() == 3); CHECK(MaxDelta == 15); CHECK(DiskDeltas[0] == 10); // [13:31] CHECK(DiskDeltas[1] == 0); // [31:49] CHECK(DiskDeltas[2] == 15); // [49:67] } SUBCASE("Find size window") { DiskUsageWindow Empty; CHECK(Empty.FindTimepointThatRemoves(15u, 10000) == 10000); CHECK(Stats.FindTimepointThatRemoves(15u, 40) == 21); CHECK(Stats.FindTimepointThatRemoves(15u, 20) == 20); CHECK(Stats.FindTimepointThatRemoves(100000u, 50) == 50); CHECK(Stats.FindTimepointThatRemoves(100000u, 1000)); } } TEST_CASE("scrub.basic") { using namespace gc::impl; ScopedTemporaryDirectory TempDir; CidStoreConfiguration CasConfig; CasConfig.RootDirectory = TempDir.Path() / "cas"; GcManager Gc; CidStore CidStore(Gc); CidStore.Initialize(CasConfig); IoBuffer Chunk = CreateRandomBlob(128); auto CompressedChunk = Compress(Chunk); const auto InsertResult = CidStore.AddChunk(CompressedChunk.GetCompressed().Flatten().AsIoBuffer(), CompressedChunk.DecodeRawHash()); CHECK(InsertResult.New); GcContext GcCtx(GcClock::Now() - std::chrono::hours(24), GcClock::Now() - std::chrono::hours(24)); GcCtx.CollectSmallObjects(true); CidStore.Flush(); Gc.CollectGarbage(GcCtx); CHECK(!CidStore.ContainsChunk(CompressedChunk.DecodeRawHash())); } #endif void gc_forcelink() { } } // namespace zen