diff options
| author | Dan Engelbrecht <[email protected]> | 2023-10-02 12:00:00 +0200 |
|---|---|---|
| committer | GitHub <[email protected]> | 2023-10-02 12:00:00 +0200 |
| commit | 0abf7994e8913c19360a0f0b8527495c0f99de87 (patch) | |
| tree | a9a0338d69a95a6f20d9634a2a0e9f5b1595b639 /src | |
| parent | Limit size of memory cache layer (#423) (diff) | |
| download | zen-0abf7994e8913c19360a0f0b8527495c0f99de87.tar.xz zen-0abf7994e8913c19360a0f0b8527495c0f99de87.zip | |
Handle OOM and OOD more gracefully to not spam Sentry with error reports (#434)
- Improvement: Catch Out Of Memory and Out Of Disk exceptions and report back to reqeuster without reporting an error to Sentry
- Improvement: If creating bucket fails when storing and item in the structured cache, log a warning and propagate error to requester without reporting an error to Sentry
- Improvement: Make an explicit flush of the active block written to in blockstore flush
- Improvement: Make sure cache and cas MakeIndexSnapshot does not throw exception on failure which would cause and abnormal termniation at exit
Diffstat (limited to 'src')
| -rw-r--r-- | src/zencore/except.cpp | 58 | ||||
| -rw-r--r-- | src/zencore/include/zencore/except.h | 3 | ||||
| -rw-r--r-- | src/zencore/iobuffer.cpp | 26 | ||||
| -rw-r--r-- | src/zenhttp/httpasio.cpp | 26 | ||||
| -rw-r--r-- | src/zenhttp/httpsys.cpp | 17 | ||||
| -rw-r--r-- | src/zenserver/cache/cachedisklayer.cpp | 35 | ||||
| -rw-r--r-- | src/zenstore/blockstore.cpp | 42 | ||||
| -rw-r--r-- | src/zenstore/compactcas.cpp | 31 | ||||
| -rw-r--r-- | src/zenstore/filecas.cpp | 34 | ||||
| -rw-r--r-- | src/zenstore/gc.cpp | 40 |
10 files changed, 259 insertions, 53 deletions
diff --git a/src/zencore/except.cpp b/src/zencore/except.cpp index 65f5ebc62..f98743ea9 100644 --- a/src/zencore/except.cpp +++ b/src/zencore/except.cpp @@ -110,4 +110,62 @@ ThrowOutOfMemory(std::string_view Message) } #endif +#if ZEN_PLATFORM_WINDOWS +bool +IsOOM(const std::system_error& SystemError) +{ + switch (SystemError.code().value()) + { + case ERROR_NOT_ENOUGH_MEMORY: + case ERROR_OUTOFMEMORY: + case ERROR_PAGEFILE_QUOTA_EXCEEDED: + case ERROR_NONPAGED_SYSTEM_RESOURCES: + case ERROR_PAGED_SYSTEM_RESOURCES: + case ERROR_PAGEFILE_QUOTA: + case ERROR_COMMITMENT_LIMIT: + return true; + default: + return false; + } +} +bool +IsOOD(const std::system_error& SystemError) +{ + switch (SystemError.code().value()) + { + case ERROR_HANDLE_DISK_FULL: + case ERROR_DISK_FULL: + case ERROR_DISK_RESOURCES_EXHAUSTED: + case ERROR_DISK_QUOTA_EXCEEDED: + return true; + default: + return false; + } +} +#else +bool +IsOOM(const std::system_error& SystemError) +{ + switch (SystemError.code().value()) + { + case ENOMEM: + return true; + default: + return false; + } +} +bool +IsOOD(const std::system_error& SystemError) +{ + switch (SystemError.code().value()) + { + case ENOSPC: + case EDQUOT: + return true; + default: + return false; + } +} +#endif + } // namespace zen diff --git a/src/zencore/include/zencore/except.h b/src/zencore/include/zencore/except.h index 464852f88..6810e6ea9 100644 --- a/src/zencore/include/zencore/except.h +++ b/src/zencore/include/zencore/except.h @@ -66,4 +66,7 @@ public: inline explicit OptionParseException(const std::string& Message) : std::runtime_error(Message) {} }; +bool IsOOM(const std::system_error& SystemError); +bool IsOOD(const std::system_error& SystemError); + } // namespace zen diff --git a/src/zencore/iobuffer.cpp b/src/zencore/iobuffer.cpp index 09cd0a000..74fec4c51 100644 --- a/src/zencore/iobuffer.cpp +++ b/src/zencore/iobuffer.cpp @@ -339,12 +339,12 @@ IoBufferExtendedCore::Materialize() const if (Error || (BytesRead != m_DataBytes)) { std::error_code DummyEc; - ZEN_ERROR("ReadFile/pread failed (offset {:#x}, size {:#x}) file: '{}' (size {:#x}), {}", - m_FileOffset, - m_DataBytes, - zen::PathFromHandle(m_FileHandle, DummyEc), - zen::FileSizeFromHandle(m_FileHandle), - GetSystemErrorAsString(Error)); + ZEN_WARN("ReadFile/pread failed (offset {:#x}, size {:#x}) file: '{}' (size {:#x}), {}", + m_FileOffset, + m_DataBytes, + zen::PathFromHandle(m_FileHandle, DummyEc), + zen::FileSizeFromHandle(m_FileHandle), + GetSystemErrorAsString(Error)); throw std::system_error(std::error_code(Error, std::system_category()), fmt::format("ReadFile/pread failed (offset {:#x}, size {:#x}) file: '{}' (size {:#x})", m_FileOffset, @@ -379,7 +379,7 @@ IoBufferExtendedCore::Materialize() const { int32_t Error = zen::GetLastError(); std::error_code DummyEc; - ZEN_ERROR("CreateFileMapping failed on file '{}', {}", zen::PathFromHandle(m_FileHandle, DummyEc), GetSystemErrorAsString(Error)); + ZEN_WARN("CreateFileMapping failed on file '{}', {}", zen::PathFromHandle(m_FileHandle, DummyEc), GetSystemErrorAsString(Error)); throw std::system_error(std::error_code(Error, std::system_category()), fmt::format("CreateFileMapping failed on file '{}'", zen::PathFromHandle(m_FileHandle, DummyEc))); } @@ -412,12 +412,12 @@ IoBufferExtendedCore::Materialize() const #endif // ZEN_PLATFORM_WINDOWS std::error_code DummyEc; - ZEN_ERROR("MapViewOfFile/mmap failed (offset {:#x}, size {:#x}) file: '{}' (size {:#x}), {}", - MapOffset, - MapSize, - zen::PathFromHandle(m_FileHandle, DummyEc), - zen::FileSizeFromHandle(m_FileHandle), - GetSystemErrorAsString(Error)); + ZEN_WARN("MapViewOfFile/mmap failed (offset {:#x}, size {:#x}) file: '{}' (size {:#x}), {}", + MapOffset, + MapSize, + zen::PathFromHandle(m_FileHandle, DummyEc), + zen::FileSizeFromHandle(m_FileHandle), + GetSystemErrorAsString(Error)); throw std::system_error(std::error_code(Error, std::system_category()), fmt::format("MapViewOfFile failed (offset {:#x}, size {:#x}) file: '{}' (size {:#x})", MapOffset, diff --git a/src/zenhttp/httpasio.cpp b/src/zenhttp/httpasio.cpp index affe328e3..f29b3132e 100644 --- a/src/zenhttp/httpasio.cpp +++ b/src/zenhttp/httpasio.cpp @@ -2,6 +2,7 @@ #include "httpasio.h" +#include <zencore/except.h> #include <zencore/logging.h> #include <zencore/trace.h> #include <zenhttp/httpserver.h> @@ -544,13 +545,34 @@ HttpServerConnection::HandleRequest() { Service->HandleRequest(Request); } - catch (std::exception& ex) + catch (std::system_error& SystemError) { - ZEN_ERROR("Caught exception while handling request: {}", ex.what()); + // Drop any partially formatted response + Request.m_Response.reset(); + if (IsOOM(SystemError.code()) || IsOOD(SystemError.code())) + { + Request.WriteResponse(HttpResponseCode::InsufficientStorage, HttpContentType::kText, SystemError.what()); + } + else + { + ZEN_ERROR("Caught system error exception while handling request: {}", SystemError.what()); + Request.WriteResponse(HttpResponseCode::InternalServerError, HttpContentType::kText, SystemError.what()); + } + } + catch (std::bad_alloc& BadAlloc) + { + // Drop any partially formatted response + Request.m_Response.reset(); + + Request.WriteResponse(HttpResponseCode::InsufficientStorage, HttpContentType::kText, BadAlloc.what()); + } + catch (std::exception& ex) + { // Drop any partially formatted response Request.m_Response.reset(); + ZEN_ERROR("Caught exception while handling request: {}", ex.what()); Request.WriteResponse(HttpResponseCode::InternalServerError, HttpContentType::kText, ex.what()); } } diff --git a/src/zenhttp/httpsys.cpp b/src/zenhttp/httpsys.cpp index 8c1f68ee8..60358d0b0 100644 --- a/src/zenhttp/httpsys.cpp +++ b/src/zenhttp/httpsys.cpp @@ -1812,11 +1812,24 @@ InitialRequestHandler::HandleCompletion(ULONG IoResult, ULONG_PTR NumberOfBytesT // Unable to route return new HttpMessageResponseRequest(Transaction(), 404, "No suitable route found"sv); } + catch (std::system_error& SystemError) + { + if (IsOOM(SystemError.code()) || IsOOD(SystemError.code())) + { + return new HttpMessageResponseRequest(Transaction(), (uint16_t)HttpResponseCode::InsufficientStorage, SystemError.what()); + } + + ZEN_ERROR("Caught system error exception while handling request: {}", SystemError.what()); + return new HttpMessageResponseRequest(Transaction(), (uint16_t)HttpResponseCode::InternalServerError, SystemError.what()); + } + catch (std::bad_alloc& BadAlloc) + { + return new HttpMessageResponseRequest(Transaction(), (uint16_t)HttpResponseCode::InsufficientStorage, BadAlloc.what()); + } catch (std::exception& ex) { ZEN_ERROR("Caught exception while handling request: '{}'", ex.what()); - - return new HttpMessageResponseRequest(Transaction(), 500, ex.what()); + return new HttpMessageResponseRequest(Transaction(), (uint16_t)HttpResponseCode::InternalServerError, ex.what()); } } diff --git a/src/zenserver/cache/cachedisklayer.cpp b/src/zenserver/cache/cachedisklayer.cpp index d53d3f3f4..98a24116f 100644 --- a/src/zenserver/cache/cachedisklayer.cpp +++ b/src/zenserver/cache/cachedisklayer.cpp @@ -305,15 +305,21 @@ ZenCacheDiskLayer::CacheBucket::MakeIndexSnapshot() // Move index away, we keep it if something goes wrong if (fs::is_regular_file(STmpIndexPath)) { - fs::remove(STmpIndexPath); - } - if (fs::is_regular_file(IndexPath)) - { - fs::rename(IndexPath, STmpIndexPath); + std::error_code Ec; + if (!fs::remove(STmpIndexPath, Ec) || Ec) + { + ZEN_WARN("snapshot failed to clean up temp snapshot at {}, reason: '{}'", STmpIndexPath, Ec.message()); + return; + } } try { + if (fs::is_regular_file(IndexPath)) + { + fs::rename(IndexPath, STmpIndexPath); + } + // Write the current state of the location map to a new index state std::vector<DiskIndexEntry> Entries; Entries.resize(m_Index.size()); @@ -351,13 +357,22 @@ ZenCacheDiskLayer::CacheBucket::MakeIndexSnapshot() if (fs::is_regular_file(STmpIndexPath)) { - fs::remove(IndexPath); - fs::rename(STmpIndexPath, IndexPath); + std::error_code Ec; + fs::remove(IndexPath, Ec); // We don't care if this fails, we try to move the old temp file regardless + fs::rename(STmpIndexPath, IndexPath, Ec); + if (Ec) + { + ZEN_WARN("snapshot failed to restore old snapshot from {}, reason: '{}'", STmpIndexPath, Ec.message()); + } } } if (fs::is_regular_file(STmpIndexPath)) { - fs::remove(STmpIndexPath); + std::error_code Ec; + if (!fs::remove(STmpIndexPath, Ec) || Ec) + { + ZEN_WARN("snapshot failed to remove temporary file {}, reason: '{}'", STmpIndexPath, Ec.message()); + } } } @@ -1977,8 +1992,8 @@ ZenCacheDiskLayer::Put(std::string_view InBucket, const IoHash& HashKey, const Z } catch (const std::exception& Err) { - ZEN_ERROR("creating bucket '{}' in '{}' FAILED, reason: '{}'", BucketName, BucketPath, Err.what()); - return; + ZEN_WARN("creating bucket '{}' in '{}' FAILED, reason: '{}'", BucketName, BucketPath, Err.what()); + throw; } } } diff --git a/src/zenstore/blockstore.cpp b/src/zenstore/blockstore.cpp index 520227474..b5ed17fc6 100644 --- a/src/zenstore/blockstore.cpp +++ b/src/zenstore/blockstore.cpp @@ -2,6 +2,7 @@ #include <zenstore/blockstore.h> +#include <zencore/except.h> #include <zencore/fmtutils.h> #include <zencore/logging.h> #include <zencore/scopeguard.h> @@ -362,7 +363,11 @@ BlockStore::Flush() { uint32_t WriteBlockIndex = m_WriteBlockIndex.load(std::memory_order_acquire); WriteBlockIndex = (WriteBlockIndex + 1) & (m_MaxBlockCount - 1); - m_WriteBlock = nullptr; + if (m_WriteBlock) + { + m_WriteBlock->Flush(); + } + m_WriteBlock = nullptr; m_WriteBlockIndex.store(WriteBlockIndex, std::memory_order_release); m_CurrentInsertOffset = 0; } @@ -502,10 +507,18 @@ BlockStore::ReclaimSpace(const ReclaimSnapshotState& Snapshot, return; } - Ref<BlockStoreFile> NewBlockFile; try { ZEN_TRACE_CPU("BlockStore::ReclaimSpace::Compact"); + Ref<BlockStoreFile> NewBlockFile; + auto NewBlockFileGuard = MakeGuard([&]() { + if (NewBlockFile) + { + ZEN_DEBUG("dropping incomplete cas block store file '{}'", NewBlockFile->GetPath()); + m_TotalSize.fetch_sub(NewBlockFile->FileSize(), std::memory_order::relaxed); + NewBlockFile->MarkAsDeleteOnClose(); + } + }); uint64_t WriteOffset = 0; uint32_t NewBlockIndex = 0; @@ -703,15 +716,28 @@ BlockStore::ReclaimSpace(const ReclaimSnapshotState& Snapshot, NewBlockFile = nullptr; } } - catch (std::exception& ex) + catch (std::system_error& SystemError) { - ZEN_ERROR("reclaiming space for '{}' failed with: '{}'", m_BlocksBasePath, ex.what()); - if (NewBlockFile) + if (IsOOM(SystemError.code())) + { + ZEN_WARN("reclaiming space for '{}' ran out of memory: '{}'", m_BlocksBasePath, SystemError.what()); + } + else if (IsOOD(SystemError.code())) { - ZEN_DEBUG("dropping incomplete cas block store file '{}'", NewBlockFile->GetPath()); - m_TotalSize.fetch_sub(NewBlockFile->FileSize(), std::memory_order::relaxed); - NewBlockFile->MarkAsDeleteOnClose(); + ZEN_WARN("reclaiming space for '{}' ran out of disk space: '{}'", m_BlocksBasePath, SystemError.what()); } + else + { + ZEN_ERROR("reclaiming space for '{}' failed with system error exception: '{}'", m_BlocksBasePath, SystemError.what()); + } + } + catch (std::bad_alloc& BadAlloc) + { + ZEN_WARN("reclaiming space for '{}' ran out of memory: '{}'", m_BlocksBasePath, BadAlloc.what()); + } + catch (std::exception& ex) + { + ZEN_ERROR("reclaiming space for '{}' failed with: '{}'", m_BlocksBasePath, ex.what()); } } diff --git a/src/zenstore/compactcas.cpp b/src/zenstore/compactcas.cpp index a138e43e9..1d1797597 100644 --- a/src/zenstore/compactcas.cpp +++ b/src/zenstore/compactcas.cpp @@ -565,15 +565,21 @@ CasContainerStrategy::MakeIndexSnapshot() // Move index away, we keep it if something goes wrong if (fs::is_regular_file(TempIndexPath)) { - fs::remove(TempIndexPath); - } - if (fs::is_regular_file(IndexPath)) - { - fs::rename(IndexPath, TempIndexPath); + std::error_code Ec; + if (!fs::remove(TempIndexPath, Ec) || Ec) + { + ZEN_WARN("snapshot failed to clean up temp snapshot at {}, reason: '{}'", TempIndexPath, Ec.message()); + return; + } } try { + if (fs::is_regular_file(IndexPath)) + { + fs::rename(IndexPath, TempIndexPath); + } + // Write the current state of the location map to a new index state std::vector<CasDiskIndexEntry> Entries; @@ -613,13 +619,22 @@ CasContainerStrategy::MakeIndexSnapshot() if (fs::is_regular_file(TempIndexPath)) { - fs::remove(IndexPath); - fs::rename(TempIndexPath, IndexPath); + std::error_code Ec; + fs::remove(IndexPath, Ec); // We don't care if this fails, we try to move the old temp file regardless + fs::rename(TempIndexPath, IndexPath, Ec); + if (Ec) + { + ZEN_WARN("snapshot failed to restore old snapshot from {}, reason: '{}'", TempIndexPath, Ec.message()); + } } } if (fs::is_regular_file(TempIndexPath)) { - fs::remove(TempIndexPath); + std::error_code Ec; + if (!fs::remove(TempIndexPath, Ec) || Ec) + { + ZEN_WARN("snapshot failed to remove temporary file {}, reason: '{}'", TempIndexPath, Ec.message()); + } } } diff --git a/src/zenstore/filecas.cpp b/src/zenstore/filecas.cpp index c3dce2b7b..0d742d7e1 100644 --- a/src/zenstore/filecas.cpp +++ b/src/zenstore/filecas.cpp @@ -1031,6 +1031,7 @@ FileCasStrategy::MakeIndexSnapshot() { return; } + ZEN_DEBUG("write store snapshot for '{}'", m_RootDirectory); uint64_t EntryCount = 0; Stopwatch Timer; @@ -1049,15 +1050,21 @@ FileCasStrategy::MakeIndexSnapshot() // Move index away, we keep it if something goes wrong if (fs::is_regular_file(STmpIndexPath)) { - fs::remove(STmpIndexPath); - } - if (fs::is_regular_file(IndexPath)) - { - fs::rename(IndexPath, STmpIndexPath); + std::error_code Ec; + if (!fs::remove(STmpIndexPath, Ec) || Ec) + { + ZEN_WARN("snapshot failed to clean up temp snapshot at {}, reason: '{}'", STmpIndexPath, Ec.message()); + return; + } } try { + if (fs::is_regular_file(IndexPath)) + { + fs::rename(IndexPath, STmpIndexPath); + } + // Write the current state of the location map to a new index state std::vector<FileCasIndexEntry> Entries; @@ -1088,19 +1095,28 @@ FileCasStrategy::MakeIndexSnapshot() } catch (std::exception& Err) { - ZEN_ERROR("snapshot FAILED, reason: '{}'", Err.what()); + ZEN_WARN("snapshot FAILED, reason: '{}'", Err.what()); // Restore any previous snapshot if (fs::is_regular_file(STmpIndexPath)) { - fs::remove(IndexPath); - fs::rename(STmpIndexPath, IndexPath); + std::error_code Ec; + fs::remove(IndexPath, Ec); // We don't care if this fails, we try to move the old temp file regardless + fs::rename(STmpIndexPath, IndexPath, Ec); + if (Ec) + { + ZEN_WARN("snapshot failed to restore old snapshot from {}, reason: '{}'", STmpIndexPath, Ec.message()); + } } } if (fs::is_regular_file(STmpIndexPath)) { - fs::remove(STmpIndexPath); + std::error_code Ec; + if (!fs::remove(STmpIndexPath, Ec) || Ec) + { + ZEN_WARN("snapshot failed to remove temporary file {}, reason: '{}'", STmpIndexPath, Ec.message()); + } } } uint64_t diff --git a/src/zenstore/gc.cpp b/src/zenstore/gc.cpp index f8ef5de82..b63be04bb 100644 --- a/src/zenstore/gc.cpp +++ b/src/zenstore/gc.cpp @@ -1032,6 +1032,25 @@ GcScheduler::SchedulerThread() WaitTime = std::chrono::seconds(0); } + catch (std::system_error& SystemError) + { + if (IsOOM(SystemError.code())) + { + ZEN_WARN("scheduling garbage collection ran out of memory: '{}'", SystemError.what()); + } + else if (IsOOD(SystemError.code())) + { + ZEN_WARN("scheduling garbage collection ran out of disk space: '{}'", SystemError.what()); + } + else + { + ZEN_ERROR("scheduling garbage collection failed with system error exception: '{}'", SystemError.what()); + } + } + catch (std::bad_alloc& BadAlloc) + { + ZEN_WARN("scheduling garbage collection ran out of memory: '{}'", BadAlloc.what()); + } catch (std::exception& Ex) { ZEN_ERROR("scheduling garbage collection failed with: '{}'", Ex.what()); @@ -1151,9 +1170,28 @@ GcScheduler::CollectGarbage(const GcClock::TimePoint& CacheExpireTime, SchedulerState << "LastGcExpireTime"sv << static_cast<int64_t>(m_LastGcExpireTime.time_since_epoch().count()); SaveCompactBinaryObject(Path, SchedulerState.Save()); } + catch (std::system_error& SystemError) + { + if (IsOOM(SystemError.code())) + { + ZEN_WARN("writing gc scheduler state ran out of memory: '{}'", SystemError.what()); + } + else if (IsOOD(SystemError.code())) + { + ZEN_WARN("writing gc scheduler state ran out of disk space: '{}'", SystemError.what()); + } + else + { + ZEN_ERROR("writing gc scheduler state failed with system error exception: '{}'", SystemError.what()); + } + } + catch (std::bad_alloc& BadAlloc) + { + ZEN_WARN("writing gc scheduler state ran out of memory: '{}'", BadAlloc.what()); + } catch (std::exception& Ex) { - ZEN_WARN("writing gc scheduler state failed with: '{}'", Ex.what()); + ZEN_ERROR("writing gc scheduler state failed with: '{}'", Ex.what()); } } } |