aboutsummaryrefslogtreecommitdiff
path: root/src/zenstore
diff options
context:
space:
mode:
authorDan Engelbrecht <[email protected]>2023-10-02 12:00:00 +0200
committerGitHub <[email protected]>2023-10-02 12:00:00 +0200
commit0abf7994e8913c19360a0f0b8527495c0f99de87 (patch)
treea9a0338d69a95a6f20d9634a2a0e9f5b1595b639 /src/zenstore
parentLimit size of memory cache layer (#423) (diff)
downloadzen-0abf7994e8913c19360a0f0b8527495c0f99de87.tar.xz
zen-0abf7994e8913c19360a0f0b8527495c0f99de87.zip
Handle OOM and OOD more gracefully to not spam Sentry with error reports (#434)
- Improvement: Catch Out Of Memory and Out Of Disk exceptions and report back to reqeuster without reporting an error to Sentry - Improvement: If creating bucket fails when storing and item in the structured cache, log a warning and propagate error to requester without reporting an error to Sentry - Improvement: Make an explicit flush of the active block written to in blockstore flush - Improvement: Make sure cache and cas MakeIndexSnapshot does not throw exception on failure which would cause and abnormal termniation at exit
Diffstat (limited to 'src/zenstore')
-rw-r--r--src/zenstore/blockstore.cpp42
-rw-r--r--src/zenstore/compactcas.cpp31
-rw-r--r--src/zenstore/filecas.cpp34
-rw-r--r--src/zenstore/gc.cpp40
4 files changed, 121 insertions, 26 deletions
diff --git a/src/zenstore/blockstore.cpp b/src/zenstore/blockstore.cpp
index 520227474..b5ed17fc6 100644
--- a/src/zenstore/blockstore.cpp
+++ b/src/zenstore/blockstore.cpp
@@ -2,6 +2,7 @@
#include <zenstore/blockstore.h>
+#include <zencore/except.h>
#include <zencore/fmtutils.h>
#include <zencore/logging.h>
#include <zencore/scopeguard.h>
@@ -362,7 +363,11 @@ BlockStore::Flush()
{
uint32_t WriteBlockIndex = m_WriteBlockIndex.load(std::memory_order_acquire);
WriteBlockIndex = (WriteBlockIndex + 1) & (m_MaxBlockCount - 1);
- m_WriteBlock = nullptr;
+ if (m_WriteBlock)
+ {
+ m_WriteBlock->Flush();
+ }
+ m_WriteBlock = nullptr;
m_WriteBlockIndex.store(WriteBlockIndex, std::memory_order_release);
m_CurrentInsertOffset = 0;
}
@@ -502,10 +507,18 @@ BlockStore::ReclaimSpace(const ReclaimSnapshotState& Snapshot,
return;
}
- Ref<BlockStoreFile> NewBlockFile;
try
{
ZEN_TRACE_CPU("BlockStore::ReclaimSpace::Compact");
+ Ref<BlockStoreFile> NewBlockFile;
+ auto NewBlockFileGuard = MakeGuard([&]() {
+ if (NewBlockFile)
+ {
+ ZEN_DEBUG("dropping incomplete cas block store file '{}'", NewBlockFile->GetPath());
+ m_TotalSize.fetch_sub(NewBlockFile->FileSize(), std::memory_order::relaxed);
+ NewBlockFile->MarkAsDeleteOnClose();
+ }
+ });
uint64_t WriteOffset = 0;
uint32_t NewBlockIndex = 0;
@@ -703,15 +716,28 @@ BlockStore::ReclaimSpace(const ReclaimSnapshotState& Snapshot,
NewBlockFile = nullptr;
}
}
- catch (std::exception& ex)
+ catch (std::system_error& SystemError)
{
- ZEN_ERROR("reclaiming space for '{}' failed with: '{}'", m_BlocksBasePath, ex.what());
- if (NewBlockFile)
+ if (IsOOM(SystemError.code()))
+ {
+ ZEN_WARN("reclaiming space for '{}' ran out of memory: '{}'", m_BlocksBasePath, SystemError.what());
+ }
+ else if (IsOOD(SystemError.code()))
{
- ZEN_DEBUG("dropping incomplete cas block store file '{}'", NewBlockFile->GetPath());
- m_TotalSize.fetch_sub(NewBlockFile->FileSize(), std::memory_order::relaxed);
- NewBlockFile->MarkAsDeleteOnClose();
+ ZEN_WARN("reclaiming space for '{}' ran out of disk space: '{}'", m_BlocksBasePath, SystemError.what());
}
+ else
+ {
+ ZEN_ERROR("reclaiming space for '{}' failed with system error exception: '{}'", m_BlocksBasePath, SystemError.what());
+ }
+ }
+ catch (std::bad_alloc& BadAlloc)
+ {
+ ZEN_WARN("reclaiming space for '{}' ran out of memory: '{}'", m_BlocksBasePath, BadAlloc.what());
+ }
+ catch (std::exception& ex)
+ {
+ ZEN_ERROR("reclaiming space for '{}' failed with: '{}'", m_BlocksBasePath, ex.what());
}
}
diff --git a/src/zenstore/compactcas.cpp b/src/zenstore/compactcas.cpp
index a138e43e9..1d1797597 100644
--- a/src/zenstore/compactcas.cpp
+++ b/src/zenstore/compactcas.cpp
@@ -565,15 +565,21 @@ CasContainerStrategy::MakeIndexSnapshot()
// Move index away, we keep it if something goes wrong
if (fs::is_regular_file(TempIndexPath))
{
- fs::remove(TempIndexPath);
- }
- if (fs::is_regular_file(IndexPath))
- {
- fs::rename(IndexPath, TempIndexPath);
+ std::error_code Ec;
+ if (!fs::remove(TempIndexPath, Ec) || Ec)
+ {
+ ZEN_WARN("snapshot failed to clean up temp snapshot at {}, reason: '{}'", TempIndexPath, Ec.message());
+ return;
+ }
}
try
{
+ if (fs::is_regular_file(IndexPath))
+ {
+ fs::rename(IndexPath, TempIndexPath);
+ }
+
// Write the current state of the location map to a new index state
std::vector<CasDiskIndexEntry> Entries;
@@ -613,13 +619,22 @@ CasContainerStrategy::MakeIndexSnapshot()
if (fs::is_regular_file(TempIndexPath))
{
- fs::remove(IndexPath);
- fs::rename(TempIndexPath, IndexPath);
+ std::error_code Ec;
+ fs::remove(IndexPath, Ec); // We don't care if this fails, we try to move the old temp file regardless
+ fs::rename(TempIndexPath, IndexPath, Ec);
+ if (Ec)
+ {
+ ZEN_WARN("snapshot failed to restore old snapshot from {}, reason: '{}'", TempIndexPath, Ec.message());
+ }
}
}
if (fs::is_regular_file(TempIndexPath))
{
- fs::remove(TempIndexPath);
+ std::error_code Ec;
+ if (!fs::remove(TempIndexPath, Ec) || Ec)
+ {
+ ZEN_WARN("snapshot failed to remove temporary file {}, reason: '{}'", TempIndexPath, Ec.message());
+ }
}
}
diff --git a/src/zenstore/filecas.cpp b/src/zenstore/filecas.cpp
index c3dce2b7b..0d742d7e1 100644
--- a/src/zenstore/filecas.cpp
+++ b/src/zenstore/filecas.cpp
@@ -1031,6 +1031,7 @@ FileCasStrategy::MakeIndexSnapshot()
{
return;
}
+
ZEN_DEBUG("write store snapshot for '{}'", m_RootDirectory);
uint64_t EntryCount = 0;
Stopwatch Timer;
@@ -1049,15 +1050,21 @@ FileCasStrategy::MakeIndexSnapshot()
// Move index away, we keep it if something goes wrong
if (fs::is_regular_file(STmpIndexPath))
{
- fs::remove(STmpIndexPath);
- }
- if (fs::is_regular_file(IndexPath))
- {
- fs::rename(IndexPath, STmpIndexPath);
+ std::error_code Ec;
+ if (!fs::remove(STmpIndexPath, Ec) || Ec)
+ {
+ ZEN_WARN("snapshot failed to clean up temp snapshot at {}, reason: '{}'", STmpIndexPath, Ec.message());
+ return;
+ }
}
try
{
+ if (fs::is_regular_file(IndexPath))
+ {
+ fs::rename(IndexPath, STmpIndexPath);
+ }
+
// Write the current state of the location map to a new index state
std::vector<FileCasIndexEntry> Entries;
@@ -1088,19 +1095,28 @@ FileCasStrategy::MakeIndexSnapshot()
}
catch (std::exception& Err)
{
- ZEN_ERROR("snapshot FAILED, reason: '{}'", Err.what());
+ ZEN_WARN("snapshot FAILED, reason: '{}'", Err.what());
// Restore any previous snapshot
if (fs::is_regular_file(STmpIndexPath))
{
- fs::remove(IndexPath);
- fs::rename(STmpIndexPath, IndexPath);
+ std::error_code Ec;
+ fs::remove(IndexPath, Ec); // We don't care if this fails, we try to move the old temp file regardless
+ fs::rename(STmpIndexPath, IndexPath, Ec);
+ if (Ec)
+ {
+ ZEN_WARN("snapshot failed to restore old snapshot from {}, reason: '{}'", STmpIndexPath, Ec.message());
+ }
}
}
if (fs::is_regular_file(STmpIndexPath))
{
- fs::remove(STmpIndexPath);
+ std::error_code Ec;
+ if (!fs::remove(STmpIndexPath, Ec) || Ec)
+ {
+ ZEN_WARN("snapshot failed to remove temporary file {}, reason: '{}'", STmpIndexPath, Ec.message());
+ }
}
}
uint64_t
diff --git a/src/zenstore/gc.cpp b/src/zenstore/gc.cpp
index f8ef5de82..b63be04bb 100644
--- a/src/zenstore/gc.cpp
+++ b/src/zenstore/gc.cpp
@@ -1032,6 +1032,25 @@ GcScheduler::SchedulerThread()
WaitTime = std::chrono::seconds(0);
}
+ catch (std::system_error& SystemError)
+ {
+ if (IsOOM(SystemError.code()))
+ {
+ ZEN_WARN("scheduling garbage collection ran out of memory: '{}'", SystemError.what());
+ }
+ else if (IsOOD(SystemError.code()))
+ {
+ ZEN_WARN("scheduling garbage collection ran out of disk space: '{}'", SystemError.what());
+ }
+ else
+ {
+ ZEN_ERROR("scheduling garbage collection failed with system error exception: '{}'", SystemError.what());
+ }
+ }
+ catch (std::bad_alloc& BadAlloc)
+ {
+ ZEN_WARN("scheduling garbage collection ran out of memory: '{}'", BadAlloc.what());
+ }
catch (std::exception& Ex)
{
ZEN_ERROR("scheduling garbage collection failed with: '{}'", Ex.what());
@@ -1151,9 +1170,28 @@ GcScheduler::CollectGarbage(const GcClock::TimePoint& CacheExpireTime,
SchedulerState << "LastGcExpireTime"sv << static_cast<int64_t>(m_LastGcExpireTime.time_since_epoch().count());
SaveCompactBinaryObject(Path, SchedulerState.Save());
}
+ catch (std::system_error& SystemError)
+ {
+ if (IsOOM(SystemError.code()))
+ {
+ ZEN_WARN("writing gc scheduler state ran out of memory: '{}'", SystemError.what());
+ }
+ else if (IsOOD(SystemError.code()))
+ {
+ ZEN_WARN("writing gc scheduler state ran out of disk space: '{}'", SystemError.what());
+ }
+ else
+ {
+ ZEN_ERROR("writing gc scheduler state failed with system error exception: '{}'", SystemError.what());
+ }
+ }
+ catch (std::bad_alloc& BadAlloc)
+ {
+ ZEN_WARN("writing gc scheduler state ran out of memory: '{}'", BadAlloc.what());
+ }
catch (std::exception& Ex)
{
- ZEN_WARN("writing gc scheduler state failed with: '{}'", Ex.what());
+ ZEN_ERROR("writing gc scheduler state failed with: '{}'", Ex.what());
}
}
}