aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorDan Engelbrecht <[email protected]>2024-03-21 13:03:41 +0100
committerGitHub Enterprise <[email protected]>2024-03-21 13:03:41 +0100
commitf60aec8607aa4ef70b4653d201c854b00a538951 (patch)
treea10a9b168fbc4f1f9a64c52f3126faecf845b795
parent5.4.2-pre6 (diff)
downloadzen-f60aec8607aa4ef70b4653d201c854b00a538951.tar.xz
zen-f60aec8607aa4ef70b4653d201c854b00a538951.zip
harden attach sponsor process (#14)
- Improvement: Delay exiting due to no sponsor processes by one second to handle race conditions - Improvement: Safer IsProcessRunning check - Improvement: make sure we can RequestApplicationExit safely from any thread
-rw-r--r--CHANGELOG.md5
-rw-r--r--src/zencore/include/zencore/zencore.h2
-rw-r--r--src/zencore/process.cpp24
-rw-r--r--src/zencore/zencore.cpp11
-rw-r--r--src/zenserver/main.cpp12
-rw-r--r--src/zenserver/zenserver.cpp45
-rw-r--r--src/zenserver/zenserver.h5
-rw-r--r--src/zenutil/zenserverprocess.cpp17
8 files changed, 74 insertions, 47 deletions
diff --git a/CHANGELOG.md b/CHANGELOG.md
index 2b106469a..5bd4fc89d 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -4,7 +4,7 @@
- Bugfix: Make sure exception do not leak out of async (worker thread pool) work and make sure we always wait for completion of all work
- Bugfix: Limit number of headers parsed to 127 as that is the maximum supported by Zen
- Bugfix: Don't capture for loop variables by reference when executing async code
-- Bugfix Make sure WriteFile() does not leave incomplete files
+- Bugfix: Make sure WriteFile() does not leave incomplete files
- Bugfix: Use TemporaryFile and MoveTemporaryIntoPlace to avoid leaving partial files on error
- Bugfix: Install Ctrl+C handler earlier when doing `zen oplog-export` and `zen oplog-export` to properly cancel jobs
- Feature: Added option `--access-token-path` to `zen oplog-export` and `zen-oplog-import` enabling it to read a cloud access token from a json file
@@ -25,6 +25,9 @@
- Improvement: Make sure zenserver reacts and exist on SIGTERM signal
- Improvement: Retry to create the .lock file at startup to avoid failing launch due to race condition with UE
- Improvement: Add CompressedBuffer::GetRange that references source data rather than make a memory copy
+- Improvement: Delay exiting due to no sponsor processes by one second to handle race conditions
+- Improvement: Safer IsProcessRunning check
+- Improvement: Make sure we can RequestApplicationExit safely from any thread
- Removed: `--cache-reference-cache-enabled` option has been removed along with the implementation for reference caching in disk cache
## 5.4.1
diff --git a/src/zencore/include/zencore/zencore.h b/src/zencore/include/zencore/zencore.h
index e8c734ba9..1a9060e29 100644
--- a/src/zencore/include/zencore/zencore.h
+++ b/src/zencore/include/zencore/zencore.h
@@ -84,7 +84,7 @@ protected:
namespace zen {
ZENCORE_API bool IsApplicationExitRequested();
-ZENCORE_API void RequestApplicationExit(int ExitCode);
+ZENCORE_API bool RequestApplicationExit(int ExitCode);
ZENCORE_API int ApplicationExitCode();
ZENCORE_API bool IsDebuggerPresent();
ZENCORE_API void SetIsInteractiveSession(bool Value);
diff --git a/src/zencore/process.cpp b/src/zencore/process.cpp
index e0a395494..d8be0d343 100644
--- a/src/zencore/process.cpp
+++ b/src/zencore/process.cpp
@@ -661,14 +661,13 @@ IsProcessRunning(int pid)
if (!hProc)
{
DWORD Error = zen::GetLastError();
-
if (Error == ERROR_INVALID_PARAMETER)
{
return false;
}
-
ThrowSystemError(Error, fmt::format("failed to open process with pid {}", pid));
}
+ auto _ = MakeGuard([hProc]() { CloseHandle(hProc); });
bool bStillActive = true;
DWORD ExitCode = 0;
@@ -678,14 +677,25 @@ IsProcessRunning(int pid)
}
else
{
- ZEN_WARN("Unable to get exit code from handle for process '{}', treating the process as active", pid);
+ DWORD Error = GetLastError();
+ ThrowSystemError(Error, fmt::format("failed to get process exit code for pid {}", pid));
}
-
- CloseHandle(hProc);
-
return bStillActive;
#elif ZEN_PLATFORM_LINUX || ZEN_PLATFORM_MAC
- return (kill(pid_t(pid), 0) == 0);
+ int Res = kill(pid_t(pid), 0);
+ if (Res == 0)
+ {
+ return true;
+ }
+ int Error = errno;
+ if (Error == ESRCH) // No such process
+ {
+ return false;
+ }
+ else
+ {
+ ThrowSystemError(Error, fmt::format("Failed to signal running process %d: %d", pid, Error));
+ }
#endif
}
diff --git a/src/zencore/zencore.cpp b/src/zencore/zencore.cpp
index d0acac608..b80b1d280 100644
--- a/src/zencore/zencore.cpp
+++ b/src/zencore/zencore.cpp
@@ -115,11 +115,16 @@ IsApplicationExitRequested()
return s_ApplicationExitRequested;
}
-void
+bool
RequestApplicationExit(int ExitCode)
{
- s_ApplicationExitCode = ExitCode;
- s_ApplicationExitRequested = true;
+ bool Expected = false;
+ if (s_ApplicationExitRequested.compare_exchange_weak(Expected, true))
+ {
+ s_ApplicationExitCode = ExitCode;
+ return true;
+ }
+ return false;
}
int
diff --git a/src/zenserver/main.cpp b/src/zenserver/main.cpp
index b4cb2464b..a9246ed8f 100644
--- a/src/zenserver/main.cpp
+++ b/src/zenserver/main.cpp
@@ -210,11 +210,8 @@ ZenEntryPoint::Run()
if (ShutdownEvent->Wait())
{
- if (!IsApplicationExitRequested())
- {
- ZEN_INFO("shutdown signal for pid {} received", zen::GetCurrentProcessId());
- Server.RequestExit(0);
- }
+ ZEN_INFO("shutdown signal for pid {} received", zen::GetCurrentProcessId());
+ Server.RequestExit(0);
}
else
{
@@ -253,10 +250,7 @@ ZenEntryPoint::Run()
catch (std::exception& e)
{
ZEN_CRITICAL("Caught exception in main for process {}: {}", zen::GetCurrentProcessId(), e.what());
- if (!IsApplicationExitRequested())
- {
- RequestApplicationExit(1);
- }
+ RequestApplicationExit(1);
}
ShutdownServerLogging();
diff --git a/src/zenserver/zenserver.cpp b/src/zenserver/zenserver.cpp
index daec743a0..d1faeb8b6 100644
--- a/src/zenserver/zenserver.cpp
+++ b/src/zenserver/zenserver.cpp
@@ -693,7 +693,6 @@ ZenServer::Run()
if (m_IsPowerCycle)
{
ZEN_INFO("Power cycle mode enabled -- shutting down");
-
RequestExit(0);
}
@@ -707,10 +706,12 @@ ZenServer::Run()
void
ZenServer::RequestExit(int ExitCode)
{
- RequestApplicationExit(ExitCode);
- if (m_Http)
+ if (RequestApplicationExit(ExitCode))
{
- m_Http->RequestExit();
+ if (m_Http)
+ {
+ m_Http->RequestExit();
+ }
}
}
@@ -779,7 +780,7 @@ ZenServer::EnsureIoRunner()
}
void
-ZenServer::EnqueueTimer()
+ZenServer::EnqueueProcessMonitorTimer()
{
m_PidCheckTimer.expires_after(std::chrono::seconds(1));
m_PidCheckTimer.async_wait([this](const asio::error_code&) { CheckOwnerPid(); });
@@ -857,11 +858,10 @@ ZenServer::CheckSigInt()
EnqueueSigIntTimer();
}
-void
-ZenServer::CheckOwnerPid()
+bool
+ZenServer::UpdateProcessMonitor()
{
// Pick up any new "owner" processes
-
std::set<uint32_t> AddedPids;
for (auto& PidEntry : m_ServerEntry->SponsorPids)
@@ -874,21 +874,38 @@ ZenServer::CheckOwnerPid()
{
m_ProcessMonitor.AddPid(ThisPid);
- ZEN_INFO("added process with pid #{} as a sponsor process", ThisPid);
+ ZEN_INFO("added process with pid {} as a sponsor process", ThisPid);
}
}
}
}
+ return m_ProcessMonitor.IsRunning();
+}
- if (m_ProcessMonitor.IsRunning())
+void
+ZenServer::CheckOwnerPid()
+{
+ bool IsRunning = UpdateProcessMonitor();
+
+ if (IsRunning)
{
- EnqueueTimer();
+ m_FoundNoActiveSponsors = false;
+ EnqueueProcessMonitorTimer();
}
else
{
- ZEN_INFO(ZEN_APP_NAME " exiting since sponsor processes are all gone");
-
- RequestExit(0);
+ // Delay exit one iteration to avoid race conditions where one process detaches
+ // and another attaches
+ if (m_FoundNoActiveSponsors)
+ {
+ ZEN_INFO(ZEN_APP_NAME " exiting since sponsor processes are all gone");
+ RequestExit(0);
+ }
+ else
+ {
+ m_FoundNoActiveSponsors = true;
+ EnqueueProcessMonitorTimer();
+ }
}
}
diff --git a/src/zenserver/zenserver.h b/src/zenserver/zenserver.h
index cdd7c17a7..dd259f855 100644
--- a/src/zenserver/zenserver.h
+++ b/src/zenserver/zenserver.h
@@ -71,14 +71,14 @@ public:
void OnReady();
void EnsureIoRunner();
- void EnqueueTimer();
+ void EnqueueProcessMonitorTimer();
void EnqueueStateMarkerTimer();
void EnqueueSigIntTimer();
void EnqueueStatsReportingTimer();
void CheckStateMarker();
void CheckSigInt();
void CheckOwnerPid();
-
+ bool UpdateProcessMonitor();
void ScrubStorage();
void Flush();
@@ -100,6 +100,7 @@ private:
asio::steady_timer m_StatsReportingTimer{m_IoContext};
ProcessMonitor m_ProcessMonitor;
NamedMutex m_ServerMutex;
+ bool m_FoundNoActiveSponsors = false;
enum ServerState
{
diff --git a/src/zenutil/zenserverprocess.cpp b/src/zenutil/zenserverprocess.cpp
index 3667ad2c6..7725d0af6 100644
--- a/src/zenutil/zenserverprocess.cpp
+++ b/src/zenutil/zenserverprocess.cpp
@@ -308,7 +308,7 @@ ZenServerState::Sweep()
if (Entry.DesiredListenPort)
{
- if (IsProcessRunning(Entry.Pid) == false)
+ if (Entry.Pid != 0 && IsProcessRunning(Entry.Pid) == false)
{
ZEN_DEBUG("Sweep - pid {} not running, reclaiming entry (port {})", Entry.Pid.load(), Entry.DesiredListenPort.load());
@@ -363,18 +363,15 @@ ZenServerState::ZenServerEntry::AddSponsorProcess(uint32_t PidToAdd)
{
for (std::atomic<uint32_t>& PidEntry : SponsorPids)
{
- if (PidEntry.load(std::memory_order_relaxed) == 0)
+ if (PidEntry.load(std::memory_order_relaxed) == PidToAdd)
{
- uint32_t Expected = 0;
- if (PidEntry.compare_exchange_strong(Expected, PidToAdd))
- {
- // Success!
- return true;
- }
+ // Success, the because pid is already in the list
+ return true;
}
- else if (PidEntry.load(std::memory_order_relaxed) == PidToAdd)
+ uint32_t Expected = 0;
+ if (PidEntry.compare_exchange_strong(Expected, PidToAdd))
{
- // Success, the because pid is already in the list
+ // Success!
return true;
}
}