diff options
Diffstat (limited to 'zenstore/gc.cpp')
| -rw-r--r-- | zenstore/gc.cpp | 419 |
1 files changed, 393 insertions, 26 deletions
diff --git a/zenstore/gc.cpp b/zenstore/gc.cpp index 3b090cae9..287dfb48a 100644 --- a/zenstore/gc.cpp +++ b/zenstore/gc.cpp @@ -5,9 +5,11 @@ #include <zencore/compactbinary.h> #include <zencore/compactbinarybuilder.h> #include <zencore/compactbinaryvalidation.h> +#include <zencore/except.h> #include <zencore/filesystem.h> #include <zencore/fmtutils.h> #include <zencore/logging.h> +#include <zencore/scopeguard.h> #include <zencore/string.h> #include <zencore/testing.h> #include <zencore/testutils.h> @@ -18,6 +20,15 @@ #include <fmt/format.h> #include <filesystem> +#if ZEN_PLATFORM_WINDOWS +# include <zencore/windows.h> +#else +# include <fcntl.h> +# include <sys/file.h> +# include <sys/stat.h> +# include <unistd.h> +#endif + #if ZEN_WITH_TESTS # include <zencore/compress.h> # include <algorithm> @@ -31,6 +42,112 @@ namespace fs = std::filesystem; ////////////////////////////////////////////////////////////////////////// +namespace { + std::error_code CreateGCReserve(const std::filesystem::path& Path, uint64_t Size) + { + if (Size == 0) + { + std::filesystem::remove(Path); + return std::error_code{}; + } + CreateDirectories(Path.parent_path()); + if (std::filesystem::is_regular_file(Path) && std::filesystem::file_size(Path) == Size) + { + return std::error_code(); + } +#if ZEN_PLATFORM_WINDOWS + DWORD dwCreationDisposition = CREATE_ALWAYS; + DWORD dwDesiredAccess = GENERIC_READ | GENERIC_WRITE; + + const DWORD dwShareMode = 0; + const DWORD dwFlagsAndAttributes = FILE_ATTRIBUTE_NORMAL; + HANDLE hTemplateFile = nullptr; + + HANDLE FileHandle = CreateFile(Path.c_str(), + dwDesiredAccess, + dwShareMode, + /* lpSecurityAttributes */ nullptr, + dwCreationDisposition, + dwFlagsAndAttributes, + hTemplateFile); + + if (FileHandle == INVALID_HANDLE_VALUE) + { + return MakeErrorCodeFromLastError(); + } + bool Keep = true; + auto _ = MakeGuard([FileHandle, &Keep, Path]() { + ::CloseHandle(FileHandle); + if (!Keep) + { + ::DeleteFile(Path.c_str()); + } + }); + LARGE_INTEGER liFileSize; + liFileSize.QuadPart = Size; + BOOL OK = ::SetFilePointerEx(FileHandle, liFileSize, 0, FILE_BEGIN); + if (!OK) + { + return MakeErrorCodeFromLastError(); + } + OK = ::SetEndOfFile(FileHandle); + if (!OK) + { + return MakeErrorCodeFromLastError(); + } + Keep = true; +#else + int OpenFlags = O_CLOEXEC | O_RDWR | O_CREAT; + int Fd = open(Path.c_str(), OpenFlags, 0666); + if (Fd < 0) + { + return MakeErrorCodeFromLastError(); + } + + bool Keep = true; + auto _ = MakeGuard([Fd, &Keep, Path]() { + close(Fd); + if (!Keep) + { + unlink(Path.c_str()); + } + }); + + if (fchmod(Fd, 0666) < 0) + { + return MakeErrorCodeFromLastError(); + } + +# if ZEN_PLATFORM_MAC + if (ftruncate(Fd, (off_t)Size) < 0) + { + return MakeErrorCodeFromLastError(); + } + int Error = posix_fallocate(Fd, 0, (off_t)Size); + if (Error) + { + return MakeErrorCode(Error); + } +# else + if (ftruncate64(Fd, (off64_t)Size) < 0) + { + return MakeErrorCodeFromLastError(); + } + int Error = posix_fallocate64(Fd, 0, (off64_t)Size); + if (Error) + { + return MakeErrorCode(Error); + } +# endif + Keep = true; +#endif + return std::error_code{}; + } + +} // namespace + +////////////////////////////////////////////////////////////////////////// + CbObject LoadCompactBinaryObject(const fs::path& Path) { @@ -74,6 +191,8 @@ struct GcContext::GcState GcClock::Duration m_MaxCacheDuration = std::chrono::hours(24); bool m_DeletionMode = true; bool m_CollectSmallObjects = false; + + std::filesystem::path DiskReservePath; }; GcContext::GcContext(GcClock::TimePoint Time) : m_State(std::make_unique<GcState>()) @@ -194,6 +313,27 @@ GcContext::MaxCacheDuration(GcClock::Duration Duration) m_State->m_MaxCacheDuration = Duration; } +void +GcContext::DiskReservePath(const std::filesystem::path& Path) +{ + m_State->DiskReservePath = Path; +} + +uint64_t +GcContext::ClaimGCReserve() +{ + if (!std::filesystem::is_regular_file(m_State->DiskReservePath)) + { + return 0; + } + uint64_t ReclaimedSize = std::filesystem::file_size(m_State->DiskReservePath); + if (std::filesystem::remove(m_State->DiskReservePath)) + { + return ReclaimedSize; + } + return 0; +} + ////////////////////////////////////////////////////////////////////////// GcContributor::GcContributor(CasGc& Gc) : m_Gc(Gc) @@ -262,10 +402,13 @@ CasGc::CollectGarbage(GcContext& GcCtx) RwLock::SharedLockScope _(m_Lock); // First gather reference set - - for (GcContributor* Contributor : m_GcContribs) { - Contributor->GatherReferences(GcCtx); + Stopwatch Timer; + const auto Guard = MakeGuard([this, &Timer] { ZEN_INFO("gathered references in {}", NiceTimeSpanMs(Timer.GetElapsedTimeMs())); }); + for (GcContributor* Contributor : m_GcContribs) + { + Contributor->GatherReferences(GcCtx); + } } // Cache records reference CAS chunks with the uncompressed @@ -300,15 +443,22 @@ CasGc::CollectGarbage(GcContext& GcCtx) // Then trim storage - for (GcStorage* Storage : m_GcStorage) { - Storage->CollectGarbage(GcCtx); + Stopwatch Timer; + const auto Guard = MakeGuard([this, &Timer] { ZEN_INFO("collected garbage in {}", NiceTimeSpanMs(Timer.GetElapsedTimeMs())); }); + for (GcStorage* Storage : m_GcStorage) + { + Storage->CollectGarbage(GcCtx); + } } // Remove Cid to CAS hash mappings. Scrub? if (CidStore* CidStore = m_CidStore) { + Stopwatch Timer; + const auto Guard = + MakeGuard([this, &Timer] { ZEN_INFO("clean up deleted content ids in {}", NiceTimeSpanMs(Timer.GetElapsedTimeMs())); }); CidStore->RemoveCids(GcCtx.DeletedCas()); } } @@ -379,6 +529,15 @@ GcScheduler::Initialize(const GcSchedulerConfig& Config) std::filesystem::create_directories(Config.RootDirectory); + std::error_code Ec = CreateGCReserve(m_Config.RootDirectory / "reserve.gc", m_Config.DiskReserveSize); + if (Ec) + { + ZEN_WARN("unable to create GC reserve at '{}' with size {}, reason '{}'", + m_Config.RootDirectory / "reserve.gc", + NiceBytes(m_Config.DiskReserveSize), + Ec.message()); + } + m_LastGcTime = GcClock::Now(); if (CbObject SchedulerState = LoadCompactBinaryObject(Config.RootDirectory / "gc_state")) @@ -475,7 +634,7 @@ GcScheduler::SchedulerThread() if (Ec) { - ZEN_WARN("get disk space info FAILED, reason '{}'", Ec.message()); + ZEN_WARN("get disk space info FAILED, reason: '{}'", Ec.message()); } ZEN_INFO("{} in use, {} of total {} free disk space, {}", @@ -506,6 +665,7 @@ GcScheduler::SchedulerThread() GcCtx.SetDeletionMode(true); GcCtx.CollectSmallObjects(m_Config.CollectSmallObjects); GcCtx.MaxCacheDuration(m_Config.MaxCacheDuration); + GcCtx.DiskReservePath(m_Config.RootDirectory / "reserve.gc"); if (m_TriggerParams) { @@ -519,27 +679,37 @@ GcScheduler::SchedulerThread() } } - Stopwatch Timer; - ZEN_INFO("garbage collection STARTING, small objects gc {}, max cache duration {}", GcCtx.CollectSmallObjects() ? "ENABLED"sv : "DISABLED"sv, NiceTimeSpanMs(uint64_t(std::chrono::duration_cast<std::chrono::milliseconds>(GcCtx.MaxCacheDuration()).count()))); + { + Stopwatch Timer; + const auto __ = + MakeGuard([this, &Timer] { ZEN_INFO("garbage collection DONE after {}", NiceTimeSpanMs(Timer.GetElapsedTimeMs())); }); - m_CasGc.CollectGarbage(GcCtx); + m_CasGc.CollectGarbage(GcCtx); - m_LastGcTime = GcClock::Now(); - m_NextGcTime = NextGcTime(m_LastGcTime); - WaitTime = m_Config.MonitorInterval; + m_LastGcTime = GcClock::Now(); + m_NextGcTime = NextGcTime(m_LastGcTime); + WaitTime = m_Config.MonitorInterval; - { - const fs::path Path = m_Config.RootDirectory / "gc_state"; - ZEN_DEBUG("saving scheduler state to '{}'", Path); - CbObjectWriter SchedulderState; - SchedulderState << "LastGcTime"sv << static_cast<int64_t>(m_LastGcTime.time_since_epoch().count()); - SaveCompactBinaryObject(Path, SchedulderState.Save()); - } + { + const fs::path Path = m_Config.RootDirectory / "gc_state"; + ZEN_DEBUG("saving scheduler state to '{}'", Path); + CbObjectWriter SchedulderState; + SchedulderState << "LastGcTime"sv << static_cast<int64_t>(m_LastGcTime.time_since_epoch().count()); + SaveCompactBinaryObject(Path, SchedulderState.Save()); + } - ZEN_INFO("garbage collection DONE after {}", NiceTimeSpanMs(Timer.GetElapsedTimeMs())); + std::error_code Ec = CreateGCReserve(m_Config.RootDirectory / "reserve.gc", m_Config.DiskReserveSize); + if (Ec) + { + ZEN_WARN("unable to create GC reserve at '{}' with size {}, reason: '{}'", + m_Config.RootDirectory / "reserve.gc", + NiceBytes(m_Config.DiskReserveSize), + Ec.message()); + } + } uint32_t RunningState = static_cast<uint32_t>(GcSchedulerStatus::kRunning); if (!m_Status.compare_exchange_strong(RunningState, static_cast<uint32_t>(GcSchedulerStatus::kIdle))) @@ -573,16 +743,15 @@ namespace { static std::random_device rd; static std::mt19937 g(rd()); - const size_t Count = static_cast<size_t>(Size / sizeof(uint32_t)); - std::vector<uint32_t> Values; - Values.resize(Count); - for (size_t Idx = 0; Idx < Count; ++Idx) + std::vector<uint8_t> Values; + Values.resize(Size); + for (size_t Idx = 0; Idx < Size; ++Idx) { - Values[Idx] = static_cast<uint32_t>(Idx); + Values[Idx] = static_cast<uint8_t>(Idx); } std::shuffle(Values.begin(), Values.end(), g); - return IoBufferBuilder::MakeCloneFromMemory(Values.data(), Values.size() * sizeof(uint32_t)); + return IoBufferBuilder::MakeCloneFromMemory(Values.data(), Values.size()); } static CompressedBuffer Compress(IoBuffer Buffer) @@ -613,11 +782,209 @@ TEST_CASE("gc.basic") GcContext GcCtx; GcCtx.CollectSmallObjects(true); + CasStore->Flush(); Gc.CollectGarbage(GcCtx); CHECK(!CidStore.ContainsChunk(InsertResult.DecompressedId)); } +TEST_CASE("gc.full") +{ + ScopedTemporaryDirectory TempDir; + + CasStoreConfiguration CasConfig; + CasConfig.RootDirectory = TempDir.Path() / "cas"; + + CasGc Gc; + std::unique_ptr<CasStore> CasStore = CreateCasStore(Gc); + + CasStore->Initialize(CasConfig); + + uint64_t ChunkSizes[9] = {128, 541, 1023, 781, 218, 37, 4, 997, 5}; + IoBuffer Chunks[9] = {CreateChunk(ChunkSizes[0]), + CreateChunk(ChunkSizes[1]), + CreateChunk(ChunkSizes[2]), + CreateChunk(ChunkSizes[3]), + CreateChunk(ChunkSizes[4]), + CreateChunk(ChunkSizes[5]), + CreateChunk(ChunkSizes[6]), + CreateChunk(ChunkSizes[7]), + CreateChunk(ChunkSizes[8])}; + IoHash ChunkHashes[9] = { + IoHash::HashBuffer(Chunks[0].Data(), Chunks[0].Size()), + IoHash::HashBuffer(Chunks[1].Data(), Chunks[1].Size()), + IoHash::HashBuffer(Chunks[2].Data(), Chunks[2].Size()), + IoHash::HashBuffer(Chunks[3].Data(), Chunks[3].Size()), + IoHash::HashBuffer(Chunks[4].Data(), Chunks[4].Size()), + IoHash::HashBuffer(Chunks[5].Data(), Chunks[5].Size()), + IoHash::HashBuffer(Chunks[6].Data(), Chunks[6].Size()), + IoHash::HashBuffer(Chunks[7].Data(), Chunks[7].Size()), + IoHash::HashBuffer(Chunks[8].Data(), Chunks[8].Size()), + }; + + CasStore->InsertChunk(Chunks[0], ChunkHashes[0]); + CasStore->InsertChunk(Chunks[1], ChunkHashes[1]); + CasStore->InsertChunk(Chunks[2], ChunkHashes[2]); + CasStore->InsertChunk(Chunks[3], ChunkHashes[3]); + CasStore->InsertChunk(Chunks[4], ChunkHashes[4]); + CasStore->InsertChunk(Chunks[5], ChunkHashes[5]); + CasStore->InsertChunk(Chunks[6], ChunkHashes[6]); + CasStore->InsertChunk(Chunks[7], ChunkHashes[7]); + CasStore->InsertChunk(Chunks[8], ChunkHashes[8]); + + CasStoreSize InitialSize = CasStore->TotalSize(); + + // Keep first and last + { + GcContext GcCtx; + GcCtx.CollectSmallObjects(true); + + std::vector<IoHash> KeepChunks; + KeepChunks.push_back(ChunkHashes[0]); + KeepChunks.push_back(ChunkHashes[8]); + GcCtx.ContributeCas(KeepChunks); + + CasStore->Flush(); + Gc.CollectGarbage(GcCtx); + + CHECK(CasStore->ContainsChunk(ChunkHashes[0])); + CHECK(!CasStore->ContainsChunk(ChunkHashes[1])); + CHECK(!CasStore->ContainsChunk(ChunkHashes[2])); + CHECK(!CasStore->ContainsChunk(ChunkHashes[3])); + CHECK(!CasStore->ContainsChunk(ChunkHashes[4])); + CHECK(!CasStore->ContainsChunk(ChunkHashes[5])); + CHECK(!CasStore->ContainsChunk(ChunkHashes[6])); + CHECK(!CasStore->ContainsChunk(ChunkHashes[7])); + CHECK(CasStore->ContainsChunk(ChunkHashes[8])); + + CHECK(ChunkHashes[0] == IoHash::HashBuffer(CasStore->FindChunk(ChunkHashes[0]))); + CHECK(ChunkHashes[8] == IoHash::HashBuffer(CasStore->FindChunk(ChunkHashes[8]))); + } + + CasStore->InsertChunk(Chunks[1], ChunkHashes[1]); + CasStore->InsertChunk(Chunks[2], ChunkHashes[2]); + CasStore->InsertChunk(Chunks[3], ChunkHashes[3]); + CasStore->InsertChunk(Chunks[4], ChunkHashes[4]); + CasStore->InsertChunk(Chunks[5], ChunkHashes[5]); + CasStore->InsertChunk(Chunks[6], ChunkHashes[6]); + CasStore->InsertChunk(Chunks[7], ChunkHashes[7]); + + // Keep last + { + GcContext GcCtx; + GcCtx.CollectSmallObjects(true); + std::vector<IoHash> KeepChunks; + KeepChunks.push_back(ChunkHashes[8]); + GcCtx.ContributeCas(KeepChunks); + + CasStore->Flush(); + Gc.CollectGarbage(GcCtx); + + CHECK(!CasStore->ContainsChunk(ChunkHashes[0])); + CHECK(!CasStore->ContainsChunk(ChunkHashes[1])); + CHECK(!CasStore->ContainsChunk(ChunkHashes[2])); + CHECK(!CasStore->ContainsChunk(ChunkHashes[3])); + CHECK(!CasStore->ContainsChunk(ChunkHashes[4])); + CHECK(!CasStore->ContainsChunk(ChunkHashes[5])); + CHECK(!CasStore->ContainsChunk(ChunkHashes[6])); + CHECK(!CasStore->ContainsChunk(ChunkHashes[7])); + CHECK(CasStore->ContainsChunk(ChunkHashes[8])); + + CHECK(ChunkHashes[8] == IoHash::HashBuffer(CasStore->FindChunk(ChunkHashes[8]))); + + CasStore->InsertChunk(Chunks[1], ChunkHashes[1]); + CasStore->InsertChunk(Chunks[2], ChunkHashes[2]); + CasStore->InsertChunk(Chunks[3], ChunkHashes[3]); + CasStore->InsertChunk(Chunks[4], ChunkHashes[4]); + CasStore->InsertChunk(Chunks[5], ChunkHashes[5]); + CasStore->InsertChunk(Chunks[6], ChunkHashes[6]); + CasStore->InsertChunk(Chunks[7], ChunkHashes[7]); + } + + // Keep mixed + { + GcContext GcCtx; + GcCtx.CollectSmallObjects(true); + std::vector<IoHash> KeepChunks; + KeepChunks.push_back(ChunkHashes[1]); + KeepChunks.push_back(ChunkHashes[4]); + KeepChunks.push_back(ChunkHashes[7]); + GcCtx.ContributeCas(KeepChunks); + + CasStore->Flush(); + Gc.CollectGarbage(GcCtx); + + CHECK(!CasStore->ContainsChunk(ChunkHashes[0])); + CHECK(CasStore->ContainsChunk(ChunkHashes[1])); + CHECK(!CasStore->ContainsChunk(ChunkHashes[2])); + CHECK(!CasStore->ContainsChunk(ChunkHashes[3])); + CHECK(CasStore->ContainsChunk(ChunkHashes[4])); + CHECK(!CasStore->ContainsChunk(ChunkHashes[5])); + CHECK(!CasStore->ContainsChunk(ChunkHashes[6])); + CHECK(CasStore->ContainsChunk(ChunkHashes[7])); + CHECK(!CasStore->ContainsChunk(ChunkHashes[8])); + + CHECK(ChunkHashes[1] == IoHash::HashBuffer(CasStore->FindChunk(ChunkHashes[1]))); + CHECK(ChunkHashes[4] == IoHash::HashBuffer(CasStore->FindChunk(ChunkHashes[4]))); + CHECK(ChunkHashes[7] == IoHash::HashBuffer(CasStore->FindChunk(ChunkHashes[7]))); + + CasStore->InsertChunk(Chunks[0], ChunkHashes[0]); + CasStore->InsertChunk(Chunks[2], ChunkHashes[2]); + CasStore->InsertChunk(Chunks[3], ChunkHashes[3]); + CasStore->InsertChunk(Chunks[5], ChunkHashes[5]); + CasStore->InsertChunk(Chunks[6], ChunkHashes[6]); + CasStore->InsertChunk(Chunks[8], ChunkHashes[8]); + } + + // Keep multiple at end + { + GcContext GcCtx; + GcCtx.CollectSmallObjects(true); + std::vector<IoHash> KeepChunks; + KeepChunks.push_back(ChunkHashes[6]); + KeepChunks.push_back(ChunkHashes[7]); + KeepChunks.push_back(ChunkHashes[8]); + GcCtx.ContributeCas(KeepChunks); + + CasStore->Flush(); + Gc.CollectGarbage(GcCtx); + + CHECK(!CasStore->ContainsChunk(ChunkHashes[0])); + CHECK(!CasStore->ContainsChunk(ChunkHashes[1])); + CHECK(!CasStore->ContainsChunk(ChunkHashes[2])); + CHECK(!CasStore->ContainsChunk(ChunkHashes[3])); + CHECK(!CasStore->ContainsChunk(ChunkHashes[4])); + CHECK(!CasStore->ContainsChunk(ChunkHashes[5])); + CHECK(CasStore->ContainsChunk(ChunkHashes[6])); + CHECK(CasStore->ContainsChunk(ChunkHashes[7])); + CHECK(CasStore->ContainsChunk(ChunkHashes[8])); + + CHECK(ChunkHashes[6] == IoHash::HashBuffer(CasStore->FindChunk(ChunkHashes[6]))); + CHECK(ChunkHashes[7] == IoHash::HashBuffer(CasStore->FindChunk(ChunkHashes[7]))); + CHECK(ChunkHashes[8] == IoHash::HashBuffer(CasStore->FindChunk(ChunkHashes[8]))); + + CasStore->InsertChunk(Chunks[0], ChunkHashes[0]); + CasStore->InsertChunk(Chunks[1], ChunkHashes[1]); + CasStore->InsertChunk(Chunks[2], ChunkHashes[2]); + CasStore->InsertChunk(Chunks[3], ChunkHashes[3]); + CasStore->InsertChunk(Chunks[4], ChunkHashes[4]); + CasStore->InsertChunk(Chunks[5], ChunkHashes[5]); + } + + // Verify that we nicely appended blocks even after all GC operations + CHECK(ChunkHashes[0] == IoHash::HashBuffer(CasStore->FindChunk(ChunkHashes[0]))); + CHECK(ChunkHashes[1] == IoHash::HashBuffer(CasStore->FindChunk(ChunkHashes[1]))); + CHECK(ChunkHashes[2] == IoHash::HashBuffer(CasStore->FindChunk(ChunkHashes[2]))); + CHECK(ChunkHashes[3] == IoHash::HashBuffer(CasStore->FindChunk(ChunkHashes[3]))); + CHECK(ChunkHashes[4] == IoHash::HashBuffer(CasStore->FindChunk(ChunkHashes[4]))); + CHECK(ChunkHashes[5] == IoHash::HashBuffer(CasStore->FindChunk(ChunkHashes[5]))); + CHECK(ChunkHashes[6] == IoHash::HashBuffer(CasStore->FindChunk(ChunkHashes[6]))); + CHECK(ChunkHashes[7] == IoHash::HashBuffer(CasStore->FindChunk(ChunkHashes[7]))); + CHECK(ChunkHashes[8] == IoHash::HashBuffer(CasStore->FindChunk(ChunkHashes[8]))); + + auto FinalSize = CasStore->TotalSize(); + CHECK(InitialSize.TinySize == FinalSize.TinySize); +} #endif void |