From b8460468bcdb9f331d06afb2b3b9967bdf915aab Mon Sep 17 00:00:00 2001 From: Dan Engelbrecht Date: Wed, 8 Nov 2023 15:25:56 +0100 Subject: disk layer gc and error/warnings cleanup (#515) - Improvement: Use GC reserve when writing index/manifest for a disk cache bucket when disk is low when available - Improvement: Demote errors to warning for issues that are not critical and we handle gracefully - Improvement: Treat more out of memory errors from windows as Out Of Memory errors Fixed wrong sizeof() statement for compactcas index (luckily the two structs are of same size) --- src/zenserver/cache/cachedisklayer.cpp | 115 +++++++++++++++++++++++---------- 1 file changed, 81 insertions(+), 34 deletions(-) (limited to 'src/zenserver/cache/cachedisklayer.cpp') diff --git a/src/zenserver/cache/cachedisklayer.cpp b/src/zenserver/cache/cachedisklayer.cpp index a9ac46cab..d66430f15 100644 --- a/src/zenserver/cache/cachedisklayer.cpp +++ b/src/zenserver/cache/cachedisklayer.cpp @@ -6,6 +6,7 @@ #include #include #include +#include #include #include #include @@ -157,12 +158,6 @@ LoadCompactBinaryObject(const fs::path& Path) return CbObject(); } -static void -SaveCompactBinaryObject(const fs::path& Path, const CbObject& Object) -{ - WriteFile(Path, Object.GetBuffer().AsIoBuffer()); -} - ////////////////////////////////////////////////////////////////////////// ZenCacheDiskLayer::CacheBucket::CacheBucket(GcManager& Gc, @@ -231,7 +226,7 @@ ZenCacheDiskLayer::CacheBucket::OpenOrCreate(std::filesystem::path BucketDir, bo Writer << "BucketId"sv << m_BucketId; Writer << "Version"sv << CurrentDiskBucketVersion; Manifest = Writer.Save(); - SaveCompactBinaryObject(ManifestPath, Manifest); + WriteFile(m_BucketDir / "zen_manifest", Manifest.GetBuffer().AsIoBuffer()); IsNew = true; } else @@ -348,7 +343,7 @@ ZenCacheDiskLayer::CacheBucket::OpenOrCreate(std::filesystem::path BucketDir, bo } void -ZenCacheDiskLayer::CacheBucket::MakeIndexSnapshot() +ZenCacheDiskLayer::CacheBucket::MakeIndexSnapshot(const std::function& ClaimDiskReserveFunc) { ZEN_TRACE_CPU("Z$::Disk::Bucket::MakeIndexSnapshot"); @@ -405,6 +400,26 @@ ZenCacheDiskLayer::CacheBucket::MakeIndexSnapshot() } } + uint64_t IndexSize = sizeof(CacheBucketIndexHeader) + Entries.size() * sizeof(DiskIndexEntry); + std::error_code Error; + DiskSpace Space = DiskSpaceInfo(m_BucketDir, Error); + if (Error) + { + throw std::system_error(Error, fmt::format("get disk space in '{}' FAILED", m_BucketDir)); + } + + bool EnoughSpace = Space.Free >= IndexSize + 1024 * 512; + if (!EnoughSpace) + { + uint64_t ReclaimedSpace = ClaimDiskReserveFunc(); + EnoughSpace = (Space.Free + ReclaimedSpace) >= IndexSize + 1024 * 512; + } + if (!EnoughSpace) + { + throw std::runtime_error( + fmt::format("not enough free disk space in '{}' to save index of size {}", m_BucketDir, NiceBytes(IndexSize))); + } + BasicFile ObjectIndexFile; ObjectIndexFile.Open(IndexPath, BasicFile::Mode::kTruncate); CacheBucketIndexHeader Header = {.EntryCount = Entries.size(), @@ -412,7 +427,6 @@ ZenCacheDiskLayer::CacheBucket::MakeIndexSnapshot() .PayloadAlignment = gsl::narrow(m_Configuration.PayloadAlignment)}; Header.Checksum = CacheBucketIndexHeader::ComputeChecksum(Header); - ObjectIndexFile.Write(&Header, sizeof(CacheBucketIndexHeader), 0); ObjectIndexFile.Write(Entries.data(), Entries.size() * sizeof(DiskIndexEntry), sizeof(CacheBucketIndexHeader)); ObjectIndexFile.Flush(); @@ -951,34 +965,61 @@ ZenCacheDiskLayer::CacheBucket::Flush() ZEN_INFO("Flushing bucket {}", m_BucketDir); - m_BlockStore.Flush(/*ForceNewBlock*/ false); - m_SlogFile.Flush(); + try + { + m_BlockStore.Flush(/*ForceNewBlock*/ false); + m_SlogFile.Flush(); - std::vector AccessTimes; - std::vector Payloads; - IndexMap Index; + std::vector AccessTimes; + std::vector Payloads; + IndexMap Index; + { + RwLock::SharedLockScope IndexLock(m_IndexLock); + MakeIndexSnapshot(); + Index = m_Index; + Payloads = m_Payloads; + AccessTimes = m_AccessTimes; + } + SaveManifest(MakeManifest(std::move(Index), std::move(AccessTimes), Payloads)); + } + catch (std::exception& Ex) { - RwLock::SharedLockScope IndexLock(m_IndexLock); - MakeIndexSnapshot(); - Index = m_Index; - Payloads = m_Payloads; - AccessTimes = m_AccessTimes; + ZEN_WARN("Failed to flush bucket in '{}'. Reason: '{}'", m_BucketDir, Ex.what()); } - SaveManifest(MakeManifest(std::move(Index), std::move(AccessTimes), Payloads)); } void -ZenCacheDiskLayer::CacheBucket::SaveManifest(CbObject&& Manifest) +ZenCacheDiskLayer::CacheBucket::SaveManifest(CbObject&& Manifest, const std::function& ClaimDiskReserveFunc) { ZEN_TRACE_CPU("Z$::Disk::Bucket::SaveManifest"); try { - SaveCompactBinaryObject(m_BucketDir / "zen_manifest", Manifest); + IoBuffer Buffer = Manifest.GetBuffer().AsIoBuffer(); + + std::error_code Error; + DiskSpace Space = DiskSpaceInfo(m_BucketDir, Error); + if (Error) + { + ZEN_WARN("get disk space in '{}' FAILED, reason: '{}'", m_BucketDir, Error.message()); + return; + } + bool EnoughSpace = Space.Free >= Buffer.GetSize() + 1024 * 512; + if (!EnoughSpace) + { + uint64_t ReclaimedSpace = ClaimDiskReserveFunc(); + EnoughSpace = (Space.Free + ReclaimedSpace) >= Buffer.GetSize() + 1024 * 512; + } + if (!EnoughSpace) + { + ZEN_WARN("not enough free disk space in '{}'. FAILED to save manifest of size {}", m_BucketDir, NiceBytes(Buffer.GetSize())); + return; + } + WriteFile(m_BucketDir / "zen_manifest", Buffer); } catch (std::exception& Err) { - ZEN_WARN("writing manifest FAILED, reason: '{}'", Err.what()); + ZEN_WARN("writing manifest in '{}' FAILED, reason: '{}'", m_BucketDir, Err.what()); } } @@ -1626,17 +1667,24 @@ ZenCacheDiskLayer::CacheBucket::CollectGarbage(GcContext& GcCtx) } auto FlushingGuard = MakeGuard([&] { m_IsFlushing.store(false); }); - std::vector AccessTimes; - std::vector Payloads; - IndexMap Index; + try { - RwLock::SharedLockScope IndexLock(m_IndexLock); - MakeIndexSnapshot(); - Index = m_Index; - Payloads = m_Payloads; - AccessTimes = m_AccessTimes; + std::vector AccessTimes; + std::vector Payloads; + IndexMap Index; + { + RwLock::SharedLockScope IndexLock(m_IndexLock); + MakeIndexSnapshot([&]() { return GcCtx.ClaimGCReserve(); }); + Index = m_Index; + Payloads = m_Payloads; + AccessTimes = m_AccessTimes; + } + SaveManifest(MakeManifest(std::move(Index), std::move(AccessTimes), Payloads), [&]() { return GcCtx.ClaimGCReserve(); }); + } + catch (std::exception& Ex) + { + ZEN_WARN("Failed to write index and manifest after GC in '{}'. Reason: '{}'", m_BucketDir, Ex.what()); } - SaveManifest(MakeManifest(std::move(Index), std::move(AccessTimes), Payloads)); }); m_SlogFile.Flush(); @@ -1727,7 +1775,6 @@ ZenCacheDiskLayer::CacheBucket::CollectGarbage(GcContext& GcCtx) m_SlogFile.Append(ExpiredStandaloneEntries); } } - SaveManifest(MakeManifest(std::move(Index), std::move(AccessTimes), Payloads)); } if (GcCtx.IsDeletionMode()) @@ -3186,7 +3233,7 @@ ZenCacheDiskLayer::DiscoverBuckets() } catch (const std::exception& Err) { - ZEN_ERROR("creating bucket '{}' in '{}' FAILED, reason: '{}'", BucketName, BucketPath, Err.what()); + ZEN_ERROR("Opening bucket '{}' in '{}' FAILED, reason: '{}'", BucketName, BucketPath, Err.what()); return; } }); -- cgit v1.2.3