aboutsummaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorDan Engelbrecht <[email protected]>2023-10-02 12:00:00 +0200
committerGitHub <[email protected]>2023-10-02 12:00:00 +0200
commit0abf7994e8913c19360a0f0b8527495c0f99de87 (patch)
treea9a0338d69a95a6f20d9634a2a0e9f5b1595b639 /src
parentLimit size of memory cache layer (#423) (diff)
downloadzen-0abf7994e8913c19360a0f0b8527495c0f99de87.tar.xz
zen-0abf7994e8913c19360a0f0b8527495c0f99de87.zip
Handle OOM and OOD more gracefully to not spam Sentry with error reports (#434)
- Improvement: Catch Out Of Memory and Out Of Disk exceptions and report back to reqeuster without reporting an error to Sentry - Improvement: If creating bucket fails when storing and item in the structured cache, log a warning and propagate error to requester without reporting an error to Sentry - Improvement: Make an explicit flush of the active block written to in blockstore flush - Improvement: Make sure cache and cas MakeIndexSnapshot does not throw exception on failure which would cause and abnormal termniation at exit
Diffstat (limited to 'src')
-rw-r--r--src/zencore/except.cpp58
-rw-r--r--src/zencore/include/zencore/except.h3
-rw-r--r--src/zencore/iobuffer.cpp26
-rw-r--r--src/zenhttp/httpasio.cpp26
-rw-r--r--src/zenhttp/httpsys.cpp17
-rw-r--r--src/zenserver/cache/cachedisklayer.cpp35
-rw-r--r--src/zenstore/blockstore.cpp42
-rw-r--r--src/zenstore/compactcas.cpp31
-rw-r--r--src/zenstore/filecas.cpp34
-rw-r--r--src/zenstore/gc.cpp40
10 files changed, 259 insertions, 53 deletions
diff --git a/src/zencore/except.cpp b/src/zencore/except.cpp
index 65f5ebc62..f98743ea9 100644
--- a/src/zencore/except.cpp
+++ b/src/zencore/except.cpp
@@ -110,4 +110,62 @@ ThrowOutOfMemory(std::string_view Message)
}
#endif
+#if ZEN_PLATFORM_WINDOWS
+bool
+IsOOM(const std::system_error& SystemError)
+{
+ switch (SystemError.code().value())
+ {
+ case ERROR_NOT_ENOUGH_MEMORY:
+ case ERROR_OUTOFMEMORY:
+ case ERROR_PAGEFILE_QUOTA_EXCEEDED:
+ case ERROR_NONPAGED_SYSTEM_RESOURCES:
+ case ERROR_PAGED_SYSTEM_RESOURCES:
+ case ERROR_PAGEFILE_QUOTA:
+ case ERROR_COMMITMENT_LIMIT:
+ return true;
+ default:
+ return false;
+ }
+}
+bool
+IsOOD(const std::system_error& SystemError)
+{
+ switch (SystemError.code().value())
+ {
+ case ERROR_HANDLE_DISK_FULL:
+ case ERROR_DISK_FULL:
+ case ERROR_DISK_RESOURCES_EXHAUSTED:
+ case ERROR_DISK_QUOTA_EXCEEDED:
+ return true;
+ default:
+ return false;
+ }
+}
+#else
+bool
+IsOOM(const std::system_error& SystemError)
+{
+ switch (SystemError.code().value())
+ {
+ case ENOMEM:
+ return true;
+ default:
+ return false;
+ }
+}
+bool
+IsOOD(const std::system_error& SystemError)
+{
+ switch (SystemError.code().value())
+ {
+ case ENOSPC:
+ case EDQUOT:
+ return true;
+ default:
+ return false;
+ }
+}
+#endif
+
} // namespace zen
diff --git a/src/zencore/include/zencore/except.h b/src/zencore/include/zencore/except.h
index 464852f88..6810e6ea9 100644
--- a/src/zencore/include/zencore/except.h
+++ b/src/zencore/include/zencore/except.h
@@ -66,4 +66,7 @@ public:
inline explicit OptionParseException(const std::string& Message) : std::runtime_error(Message) {}
};
+bool IsOOM(const std::system_error& SystemError);
+bool IsOOD(const std::system_error& SystemError);
+
} // namespace zen
diff --git a/src/zencore/iobuffer.cpp b/src/zencore/iobuffer.cpp
index 09cd0a000..74fec4c51 100644
--- a/src/zencore/iobuffer.cpp
+++ b/src/zencore/iobuffer.cpp
@@ -339,12 +339,12 @@ IoBufferExtendedCore::Materialize() const
if (Error || (BytesRead != m_DataBytes))
{
std::error_code DummyEc;
- ZEN_ERROR("ReadFile/pread failed (offset {:#x}, size {:#x}) file: '{}' (size {:#x}), {}",
- m_FileOffset,
- m_DataBytes,
- zen::PathFromHandle(m_FileHandle, DummyEc),
- zen::FileSizeFromHandle(m_FileHandle),
- GetSystemErrorAsString(Error));
+ ZEN_WARN("ReadFile/pread failed (offset {:#x}, size {:#x}) file: '{}' (size {:#x}), {}",
+ m_FileOffset,
+ m_DataBytes,
+ zen::PathFromHandle(m_FileHandle, DummyEc),
+ zen::FileSizeFromHandle(m_FileHandle),
+ GetSystemErrorAsString(Error));
throw std::system_error(std::error_code(Error, std::system_category()),
fmt::format("ReadFile/pread failed (offset {:#x}, size {:#x}) file: '{}' (size {:#x})",
m_FileOffset,
@@ -379,7 +379,7 @@ IoBufferExtendedCore::Materialize() const
{
int32_t Error = zen::GetLastError();
std::error_code DummyEc;
- ZEN_ERROR("CreateFileMapping failed on file '{}', {}", zen::PathFromHandle(m_FileHandle, DummyEc), GetSystemErrorAsString(Error));
+ ZEN_WARN("CreateFileMapping failed on file '{}', {}", zen::PathFromHandle(m_FileHandle, DummyEc), GetSystemErrorAsString(Error));
throw std::system_error(std::error_code(Error, std::system_category()),
fmt::format("CreateFileMapping failed on file '{}'", zen::PathFromHandle(m_FileHandle, DummyEc)));
}
@@ -412,12 +412,12 @@ IoBufferExtendedCore::Materialize() const
#endif // ZEN_PLATFORM_WINDOWS
std::error_code DummyEc;
- ZEN_ERROR("MapViewOfFile/mmap failed (offset {:#x}, size {:#x}) file: '{}' (size {:#x}), {}",
- MapOffset,
- MapSize,
- zen::PathFromHandle(m_FileHandle, DummyEc),
- zen::FileSizeFromHandle(m_FileHandle),
- GetSystemErrorAsString(Error));
+ ZEN_WARN("MapViewOfFile/mmap failed (offset {:#x}, size {:#x}) file: '{}' (size {:#x}), {}",
+ MapOffset,
+ MapSize,
+ zen::PathFromHandle(m_FileHandle, DummyEc),
+ zen::FileSizeFromHandle(m_FileHandle),
+ GetSystemErrorAsString(Error));
throw std::system_error(std::error_code(Error, std::system_category()),
fmt::format("MapViewOfFile failed (offset {:#x}, size {:#x}) file: '{}' (size {:#x})",
MapOffset,
diff --git a/src/zenhttp/httpasio.cpp b/src/zenhttp/httpasio.cpp
index affe328e3..f29b3132e 100644
--- a/src/zenhttp/httpasio.cpp
+++ b/src/zenhttp/httpasio.cpp
@@ -2,6 +2,7 @@
#include "httpasio.h"
+#include <zencore/except.h>
#include <zencore/logging.h>
#include <zencore/trace.h>
#include <zenhttp/httpserver.h>
@@ -544,13 +545,34 @@ HttpServerConnection::HandleRequest()
{
Service->HandleRequest(Request);
}
- catch (std::exception& ex)
+ catch (std::system_error& SystemError)
{
- ZEN_ERROR("Caught exception while handling request: {}", ex.what());
+ // Drop any partially formatted response
+ Request.m_Response.reset();
+ if (IsOOM(SystemError.code()) || IsOOD(SystemError.code()))
+ {
+ Request.WriteResponse(HttpResponseCode::InsufficientStorage, HttpContentType::kText, SystemError.what());
+ }
+ else
+ {
+ ZEN_ERROR("Caught system error exception while handling request: {}", SystemError.what());
+ Request.WriteResponse(HttpResponseCode::InternalServerError, HttpContentType::kText, SystemError.what());
+ }
+ }
+ catch (std::bad_alloc& BadAlloc)
+ {
+ // Drop any partially formatted response
+ Request.m_Response.reset();
+
+ Request.WriteResponse(HttpResponseCode::InsufficientStorage, HttpContentType::kText, BadAlloc.what());
+ }
+ catch (std::exception& ex)
+ {
// Drop any partially formatted response
Request.m_Response.reset();
+ ZEN_ERROR("Caught exception while handling request: {}", ex.what());
Request.WriteResponse(HttpResponseCode::InternalServerError, HttpContentType::kText, ex.what());
}
}
diff --git a/src/zenhttp/httpsys.cpp b/src/zenhttp/httpsys.cpp
index 8c1f68ee8..60358d0b0 100644
--- a/src/zenhttp/httpsys.cpp
+++ b/src/zenhttp/httpsys.cpp
@@ -1812,11 +1812,24 @@ InitialRequestHandler::HandleCompletion(ULONG IoResult, ULONG_PTR NumberOfBytesT
// Unable to route
return new HttpMessageResponseRequest(Transaction(), 404, "No suitable route found"sv);
}
+ catch (std::system_error& SystemError)
+ {
+ if (IsOOM(SystemError.code()) || IsOOD(SystemError.code()))
+ {
+ return new HttpMessageResponseRequest(Transaction(), (uint16_t)HttpResponseCode::InsufficientStorage, SystemError.what());
+ }
+
+ ZEN_ERROR("Caught system error exception while handling request: {}", SystemError.what());
+ return new HttpMessageResponseRequest(Transaction(), (uint16_t)HttpResponseCode::InternalServerError, SystemError.what());
+ }
+ catch (std::bad_alloc& BadAlloc)
+ {
+ return new HttpMessageResponseRequest(Transaction(), (uint16_t)HttpResponseCode::InsufficientStorage, BadAlloc.what());
+ }
catch (std::exception& ex)
{
ZEN_ERROR("Caught exception while handling request: '{}'", ex.what());
-
- return new HttpMessageResponseRequest(Transaction(), 500, ex.what());
+ return new HttpMessageResponseRequest(Transaction(), (uint16_t)HttpResponseCode::InternalServerError, ex.what());
}
}
diff --git a/src/zenserver/cache/cachedisklayer.cpp b/src/zenserver/cache/cachedisklayer.cpp
index d53d3f3f4..98a24116f 100644
--- a/src/zenserver/cache/cachedisklayer.cpp
+++ b/src/zenserver/cache/cachedisklayer.cpp
@@ -305,15 +305,21 @@ ZenCacheDiskLayer::CacheBucket::MakeIndexSnapshot()
// Move index away, we keep it if something goes wrong
if (fs::is_regular_file(STmpIndexPath))
{
- fs::remove(STmpIndexPath);
- }
- if (fs::is_regular_file(IndexPath))
- {
- fs::rename(IndexPath, STmpIndexPath);
+ std::error_code Ec;
+ if (!fs::remove(STmpIndexPath, Ec) || Ec)
+ {
+ ZEN_WARN("snapshot failed to clean up temp snapshot at {}, reason: '{}'", STmpIndexPath, Ec.message());
+ return;
+ }
}
try
{
+ if (fs::is_regular_file(IndexPath))
+ {
+ fs::rename(IndexPath, STmpIndexPath);
+ }
+
// Write the current state of the location map to a new index state
std::vector<DiskIndexEntry> Entries;
Entries.resize(m_Index.size());
@@ -351,13 +357,22 @@ ZenCacheDiskLayer::CacheBucket::MakeIndexSnapshot()
if (fs::is_regular_file(STmpIndexPath))
{
- fs::remove(IndexPath);
- fs::rename(STmpIndexPath, IndexPath);
+ std::error_code Ec;
+ fs::remove(IndexPath, Ec); // We don't care if this fails, we try to move the old temp file regardless
+ fs::rename(STmpIndexPath, IndexPath, Ec);
+ if (Ec)
+ {
+ ZEN_WARN("snapshot failed to restore old snapshot from {}, reason: '{}'", STmpIndexPath, Ec.message());
+ }
}
}
if (fs::is_regular_file(STmpIndexPath))
{
- fs::remove(STmpIndexPath);
+ std::error_code Ec;
+ if (!fs::remove(STmpIndexPath, Ec) || Ec)
+ {
+ ZEN_WARN("snapshot failed to remove temporary file {}, reason: '{}'", STmpIndexPath, Ec.message());
+ }
}
}
@@ -1977,8 +1992,8 @@ ZenCacheDiskLayer::Put(std::string_view InBucket, const IoHash& HashKey, const Z
}
catch (const std::exception& Err)
{
- ZEN_ERROR("creating bucket '{}' in '{}' FAILED, reason: '{}'", BucketName, BucketPath, Err.what());
- return;
+ ZEN_WARN("creating bucket '{}' in '{}' FAILED, reason: '{}'", BucketName, BucketPath, Err.what());
+ throw;
}
}
}
diff --git a/src/zenstore/blockstore.cpp b/src/zenstore/blockstore.cpp
index 520227474..b5ed17fc6 100644
--- a/src/zenstore/blockstore.cpp
+++ b/src/zenstore/blockstore.cpp
@@ -2,6 +2,7 @@
#include <zenstore/blockstore.h>
+#include <zencore/except.h>
#include <zencore/fmtutils.h>
#include <zencore/logging.h>
#include <zencore/scopeguard.h>
@@ -362,7 +363,11 @@ BlockStore::Flush()
{
uint32_t WriteBlockIndex = m_WriteBlockIndex.load(std::memory_order_acquire);
WriteBlockIndex = (WriteBlockIndex + 1) & (m_MaxBlockCount - 1);
- m_WriteBlock = nullptr;
+ if (m_WriteBlock)
+ {
+ m_WriteBlock->Flush();
+ }
+ m_WriteBlock = nullptr;
m_WriteBlockIndex.store(WriteBlockIndex, std::memory_order_release);
m_CurrentInsertOffset = 0;
}
@@ -502,10 +507,18 @@ BlockStore::ReclaimSpace(const ReclaimSnapshotState& Snapshot,
return;
}
- Ref<BlockStoreFile> NewBlockFile;
try
{
ZEN_TRACE_CPU("BlockStore::ReclaimSpace::Compact");
+ Ref<BlockStoreFile> NewBlockFile;
+ auto NewBlockFileGuard = MakeGuard([&]() {
+ if (NewBlockFile)
+ {
+ ZEN_DEBUG("dropping incomplete cas block store file '{}'", NewBlockFile->GetPath());
+ m_TotalSize.fetch_sub(NewBlockFile->FileSize(), std::memory_order::relaxed);
+ NewBlockFile->MarkAsDeleteOnClose();
+ }
+ });
uint64_t WriteOffset = 0;
uint32_t NewBlockIndex = 0;
@@ -703,15 +716,28 @@ BlockStore::ReclaimSpace(const ReclaimSnapshotState& Snapshot,
NewBlockFile = nullptr;
}
}
- catch (std::exception& ex)
+ catch (std::system_error& SystemError)
{
- ZEN_ERROR("reclaiming space for '{}' failed with: '{}'", m_BlocksBasePath, ex.what());
- if (NewBlockFile)
+ if (IsOOM(SystemError.code()))
+ {
+ ZEN_WARN("reclaiming space for '{}' ran out of memory: '{}'", m_BlocksBasePath, SystemError.what());
+ }
+ else if (IsOOD(SystemError.code()))
{
- ZEN_DEBUG("dropping incomplete cas block store file '{}'", NewBlockFile->GetPath());
- m_TotalSize.fetch_sub(NewBlockFile->FileSize(), std::memory_order::relaxed);
- NewBlockFile->MarkAsDeleteOnClose();
+ ZEN_WARN("reclaiming space for '{}' ran out of disk space: '{}'", m_BlocksBasePath, SystemError.what());
}
+ else
+ {
+ ZEN_ERROR("reclaiming space for '{}' failed with system error exception: '{}'", m_BlocksBasePath, SystemError.what());
+ }
+ }
+ catch (std::bad_alloc& BadAlloc)
+ {
+ ZEN_WARN("reclaiming space for '{}' ran out of memory: '{}'", m_BlocksBasePath, BadAlloc.what());
+ }
+ catch (std::exception& ex)
+ {
+ ZEN_ERROR("reclaiming space for '{}' failed with: '{}'", m_BlocksBasePath, ex.what());
}
}
diff --git a/src/zenstore/compactcas.cpp b/src/zenstore/compactcas.cpp
index a138e43e9..1d1797597 100644
--- a/src/zenstore/compactcas.cpp
+++ b/src/zenstore/compactcas.cpp
@@ -565,15 +565,21 @@ CasContainerStrategy::MakeIndexSnapshot()
// Move index away, we keep it if something goes wrong
if (fs::is_regular_file(TempIndexPath))
{
- fs::remove(TempIndexPath);
- }
- if (fs::is_regular_file(IndexPath))
- {
- fs::rename(IndexPath, TempIndexPath);
+ std::error_code Ec;
+ if (!fs::remove(TempIndexPath, Ec) || Ec)
+ {
+ ZEN_WARN("snapshot failed to clean up temp snapshot at {}, reason: '{}'", TempIndexPath, Ec.message());
+ return;
+ }
}
try
{
+ if (fs::is_regular_file(IndexPath))
+ {
+ fs::rename(IndexPath, TempIndexPath);
+ }
+
// Write the current state of the location map to a new index state
std::vector<CasDiskIndexEntry> Entries;
@@ -613,13 +619,22 @@ CasContainerStrategy::MakeIndexSnapshot()
if (fs::is_regular_file(TempIndexPath))
{
- fs::remove(IndexPath);
- fs::rename(TempIndexPath, IndexPath);
+ std::error_code Ec;
+ fs::remove(IndexPath, Ec); // We don't care if this fails, we try to move the old temp file regardless
+ fs::rename(TempIndexPath, IndexPath, Ec);
+ if (Ec)
+ {
+ ZEN_WARN("snapshot failed to restore old snapshot from {}, reason: '{}'", TempIndexPath, Ec.message());
+ }
}
}
if (fs::is_regular_file(TempIndexPath))
{
- fs::remove(TempIndexPath);
+ std::error_code Ec;
+ if (!fs::remove(TempIndexPath, Ec) || Ec)
+ {
+ ZEN_WARN("snapshot failed to remove temporary file {}, reason: '{}'", TempIndexPath, Ec.message());
+ }
}
}
diff --git a/src/zenstore/filecas.cpp b/src/zenstore/filecas.cpp
index c3dce2b7b..0d742d7e1 100644
--- a/src/zenstore/filecas.cpp
+++ b/src/zenstore/filecas.cpp
@@ -1031,6 +1031,7 @@ FileCasStrategy::MakeIndexSnapshot()
{
return;
}
+
ZEN_DEBUG("write store snapshot for '{}'", m_RootDirectory);
uint64_t EntryCount = 0;
Stopwatch Timer;
@@ -1049,15 +1050,21 @@ FileCasStrategy::MakeIndexSnapshot()
// Move index away, we keep it if something goes wrong
if (fs::is_regular_file(STmpIndexPath))
{
- fs::remove(STmpIndexPath);
- }
- if (fs::is_regular_file(IndexPath))
- {
- fs::rename(IndexPath, STmpIndexPath);
+ std::error_code Ec;
+ if (!fs::remove(STmpIndexPath, Ec) || Ec)
+ {
+ ZEN_WARN("snapshot failed to clean up temp snapshot at {}, reason: '{}'", STmpIndexPath, Ec.message());
+ return;
+ }
}
try
{
+ if (fs::is_regular_file(IndexPath))
+ {
+ fs::rename(IndexPath, STmpIndexPath);
+ }
+
// Write the current state of the location map to a new index state
std::vector<FileCasIndexEntry> Entries;
@@ -1088,19 +1095,28 @@ FileCasStrategy::MakeIndexSnapshot()
}
catch (std::exception& Err)
{
- ZEN_ERROR("snapshot FAILED, reason: '{}'", Err.what());
+ ZEN_WARN("snapshot FAILED, reason: '{}'", Err.what());
// Restore any previous snapshot
if (fs::is_regular_file(STmpIndexPath))
{
- fs::remove(IndexPath);
- fs::rename(STmpIndexPath, IndexPath);
+ std::error_code Ec;
+ fs::remove(IndexPath, Ec); // We don't care if this fails, we try to move the old temp file regardless
+ fs::rename(STmpIndexPath, IndexPath, Ec);
+ if (Ec)
+ {
+ ZEN_WARN("snapshot failed to restore old snapshot from {}, reason: '{}'", STmpIndexPath, Ec.message());
+ }
}
}
if (fs::is_regular_file(STmpIndexPath))
{
- fs::remove(STmpIndexPath);
+ std::error_code Ec;
+ if (!fs::remove(STmpIndexPath, Ec) || Ec)
+ {
+ ZEN_WARN("snapshot failed to remove temporary file {}, reason: '{}'", STmpIndexPath, Ec.message());
+ }
}
}
uint64_t
diff --git a/src/zenstore/gc.cpp b/src/zenstore/gc.cpp
index f8ef5de82..b63be04bb 100644
--- a/src/zenstore/gc.cpp
+++ b/src/zenstore/gc.cpp
@@ -1032,6 +1032,25 @@ GcScheduler::SchedulerThread()
WaitTime = std::chrono::seconds(0);
}
+ catch (std::system_error& SystemError)
+ {
+ if (IsOOM(SystemError.code()))
+ {
+ ZEN_WARN("scheduling garbage collection ran out of memory: '{}'", SystemError.what());
+ }
+ else if (IsOOD(SystemError.code()))
+ {
+ ZEN_WARN("scheduling garbage collection ran out of disk space: '{}'", SystemError.what());
+ }
+ else
+ {
+ ZEN_ERROR("scheduling garbage collection failed with system error exception: '{}'", SystemError.what());
+ }
+ }
+ catch (std::bad_alloc& BadAlloc)
+ {
+ ZEN_WARN("scheduling garbage collection ran out of memory: '{}'", BadAlloc.what());
+ }
catch (std::exception& Ex)
{
ZEN_ERROR("scheduling garbage collection failed with: '{}'", Ex.what());
@@ -1151,9 +1170,28 @@ GcScheduler::CollectGarbage(const GcClock::TimePoint& CacheExpireTime,
SchedulerState << "LastGcExpireTime"sv << static_cast<int64_t>(m_LastGcExpireTime.time_since_epoch().count());
SaveCompactBinaryObject(Path, SchedulerState.Save());
}
+ catch (std::system_error& SystemError)
+ {
+ if (IsOOM(SystemError.code()))
+ {
+ ZEN_WARN("writing gc scheduler state ran out of memory: '{}'", SystemError.what());
+ }
+ else if (IsOOD(SystemError.code()))
+ {
+ ZEN_WARN("writing gc scheduler state ran out of disk space: '{}'", SystemError.what());
+ }
+ else
+ {
+ ZEN_ERROR("writing gc scheduler state failed with system error exception: '{}'", SystemError.what());
+ }
+ }
+ catch (std::bad_alloc& BadAlloc)
+ {
+ ZEN_WARN("writing gc scheduler state ran out of memory: '{}'", BadAlloc.what());
+ }
catch (std::exception& Ex)
{
- ZEN_WARN("writing gc scheduler state failed with: '{}'", Ex.what());
+ ZEN_ERROR("writing gc scheduler state failed with: '{}'", Ex.what());
}
}
}