diff options
| author | Dan Engelbrecht <[email protected]> | 2024-03-21 13:03:41 +0100 |
|---|---|---|
| committer | GitHub Enterprise <[email protected]> | 2024-03-21 13:03:41 +0100 |
| commit | f60aec8607aa4ef70b4653d201c854b00a538951 (patch) | |
| tree | a10a9b168fbc4f1f9a64c52f3126faecf845b795 | |
| parent | 5.4.2-pre6 (diff) | |
| download | zen-f60aec8607aa4ef70b4653d201c854b00a538951.tar.xz zen-f60aec8607aa4ef70b4653d201c854b00a538951.zip | |
harden attach sponsor process (#14)
- Improvement: Delay exiting due to no sponsor processes by one second to handle race conditions
- Improvement: Safer IsProcessRunning check
- Improvement: make sure we can RequestApplicationExit safely from any thread
| -rw-r--r-- | CHANGELOG.md | 5 | ||||
| -rw-r--r-- | src/zencore/include/zencore/zencore.h | 2 | ||||
| -rw-r--r-- | src/zencore/process.cpp | 24 | ||||
| -rw-r--r-- | src/zencore/zencore.cpp | 11 | ||||
| -rw-r--r-- | src/zenserver/main.cpp | 12 | ||||
| -rw-r--r-- | src/zenserver/zenserver.cpp | 45 | ||||
| -rw-r--r-- | src/zenserver/zenserver.h | 5 | ||||
| -rw-r--r-- | src/zenutil/zenserverprocess.cpp | 17 |
8 files changed, 74 insertions, 47 deletions
diff --git a/CHANGELOG.md b/CHANGELOG.md index 2b106469a..5bd4fc89d 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -4,7 +4,7 @@ - Bugfix: Make sure exception do not leak out of async (worker thread pool) work and make sure we always wait for completion of all work - Bugfix: Limit number of headers parsed to 127 as that is the maximum supported by Zen - Bugfix: Don't capture for loop variables by reference when executing async code -- Bugfix Make sure WriteFile() does not leave incomplete files +- Bugfix: Make sure WriteFile() does not leave incomplete files - Bugfix: Use TemporaryFile and MoveTemporaryIntoPlace to avoid leaving partial files on error - Bugfix: Install Ctrl+C handler earlier when doing `zen oplog-export` and `zen oplog-export` to properly cancel jobs - Feature: Added option `--access-token-path` to `zen oplog-export` and `zen-oplog-import` enabling it to read a cloud access token from a json file @@ -25,6 +25,9 @@ - Improvement: Make sure zenserver reacts and exist on SIGTERM signal - Improvement: Retry to create the .lock file at startup to avoid failing launch due to race condition with UE - Improvement: Add CompressedBuffer::GetRange that references source data rather than make a memory copy +- Improvement: Delay exiting due to no sponsor processes by one second to handle race conditions +- Improvement: Safer IsProcessRunning check +- Improvement: Make sure we can RequestApplicationExit safely from any thread - Removed: `--cache-reference-cache-enabled` option has been removed along with the implementation for reference caching in disk cache ## 5.4.1 diff --git a/src/zencore/include/zencore/zencore.h b/src/zencore/include/zencore/zencore.h index e8c734ba9..1a9060e29 100644 --- a/src/zencore/include/zencore/zencore.h +++ b/src/zencore/include/zencore/zencore.h @@ -84,7 +84,7 @@ protected: namespace zen { ZENCORE_API bool IsApplicationExitRequested(); -ZENCORE_API void RequestApplicationExit(int ExitCode); +ZENCORE_API bool RequestApplicationExit(int ExitCode); ZENCORE_API int ApplicationExitCode(); ZENCORE_API bool IsDebuggerPresent(); ZENCORE_API void SetIsInteractiveSession(bool Value); diff --git a/src/zencore/process.cpp b/src/zencore/process.cpp index e0a395494..d8be0d343 100644 --- a/src/zencore/process.cpp +++ b/src/zencore/process.cpp @@ -661,14 +661,13 @@ IsProcessRunning(int pid) if (!hProc) { DWORD Error = zen::GetLastError(); - if (Error == ERROR_INVALID_PARAMETER) { return false; } - ThrowSystemError(Error, fmt::format("failed to open process with pid {}", pid)); } + auto _ = MakeGuard([hProc]() { CloseHandle(hProc); }); bool bStillActive = true; DWORD ExitCode = 0; @@ -678,14 +677,25 @@ IsProcessRunning(int pid) } else { - ZEN_WARN("Unable to get exit code from handle for process '{}', treating the process as active", pid); + DWORD Error = GetLastError(); + ThrowSystemError(Error, fmt::format("failed to get process exit code for pid {}", pid)); } - - CloseHandle(hProc); - return bStillActive; #elif ZEN_PLATFORM_LINUX || ZEN_PLATFORM_MAC - return (kill(pid_t(pid), 0) == 0); + int Res = kill(pid_t(pid), 0); + if (Res == 0) + { + return true; + } + int Error = errno; + if (Error == ESRCH) // No such process + { + return false; + } + else + { + ThrowSystemError(Error, fmt::format("Failed to signal running process %d: %d", pid, Error)); + } #endif } diff --git a/src/zencore/zencore.cpp b/src/zencore/zencore.cpp index d0acac608..b80b1d280 100644 --- a/src/zencore/zencore.cpp +++ b/src/zencore/zencore.cpp @@ -115,11 +115,16 @@ IsApplicationExitRequested() return s_ApplicationExitRequested; } -void +bool RequestApplicationExit(int ExitCode) { - s_ApplicationExitCode = ExitCode; - s_ApplicationExitRequested = true; + bool Expected = false; + if (s_ApplicationExitRequested.compare_exchange_weak(Expected, true)) + { + s_ApplicationExitCode = ExitCode; + return true; + } + return false; } int diff --git a/src/zenserver/main.cpp b/src/zenserver/main.cpp index b4cb2464b..a9246ed8f 100644 --- a/src/zenserver/main.cpp +++ b/src/zenserver/main.cpp @@ -210,11 +210,8 @@ ZenEntryPoint::Run() if (ShutdownEvent->Wait()) { - if (!IsApplicationExitRequested()) - { - ZEN_INFO("shutdown signal for pid {} received", zen::GetCurrentProcessId()); - Server.RequestExit(0); - } + ZEN_INFO("shutdown signal for pid {} received", zen::GetCurrentProcessId()); + Server.RequestExit(0); } else { @@ -253,10 +250,7 @@ ZenEntryPoint::Run() catch (std::exception& e) { ZEN_CRITICAL("Caught exception in main for process {}: {}", zen::GetCurrentProcessId(), e.what()); - if (!IsApplicationExitRequested()) - { - RequestApplicationExit(1); - } + RequestApplicationExit(1); } ShutdownServerLogging(); diff --git a/src/zenserver/zenserver.cpp b/src/zenserver/zenserver.cpp index daec743a0..d1faeb8b6 100644 --- a/src/zenserver/zenserver.cpp +++ b/src/zenserver/zenserver.cpp @@ -693,7 +693,6 @@ ZenServer::Run() if (m_IsPowerCycle) { ZEN_INFO("Power cycle mode enabled -- shutting down"); - RequestExit(0); } @@ -707,10 +706,12 @@ ZenServer::Run() void ZenServer::RequestExit(int ExitCode) { - RequestApplicationExit(ExitCode); - if (m_Http) + if (RequestApplicationExit(ExitCode)) { - m_Http->RequestExit(); + if (m_Http) + { + m_Http->RequestExit(); + } } } @@ -779,7 +780,7 @@ ZenServer::EnsureIoRunner() } void -ZenServer::EnqueueTimer() +ZenServer::EnqueueProcessMonitorTimer() { m_PidCheckTimer.expires_after(std::chrono::seconds(1)); m_PidCheckTimer.async_wait([this](const asio::error_code&) { CheckOwnerPid(); }); @@ -857,11 +858,10 @@ ZenServer::CheckSigInt() EnqueueSigIntTimer(); } -void -ZenServer::CheckOwnerPid() +bool +ZenServer::UpdateProcessMonitor() { // Pick up any new "owner" processes - std::set<uint32_t> AddedPids; for (auto& PidEntry : m_ServerEntry->SponsorPids) @@ -874,21 +874,38 @@ ZenServer::CheckOwnerPid() { m_ProcessMonitor.AddPid(ThisPid); - ZEN_INFO("added process with pid #{} as a sponsor process", ThisPid); + ZEN_INFO("added process with pid {} as a sponsor process", ThisPid); } } } } + return m_ProcessMonitor.IsRunning(); +} - if (m_ProcessMonitor.IsRunning()) +void +ZenServer::CheckOwnerPid() +{ + bool IsRunning = UpdateProcessMonitor(); + + if (IsRunning) { - EnqueueTimer(); + m_FoundNoActiveSponsors = false; + EnqueueProcessMonitorTimer(); } else { - ZEN_INFO(ZEN_APP_NAME " exiting since sponsor processes are all gone"); - - RequestExit(0); + // Delay exit one iteration to avoid race conditions where one process detaches + // and another attaches + if (m_FoundNoActiveSponsors) + { + ZEN_INFO(ZEN_APP_NAME " exiting since sponsor processes are all gone"); + RequestExit(0); + } + else + { + m_FoundNoActiveSponsors = true; + EnqueueProcessMonitorTimer(); + } } } diff --git a/src/zenserver/zenserver.h b/src/zenserver/zenserver.h index cdd7c17a7..dd259f855 100644 --- a/src/zenserver/zenserver.h +++ b/src/zenserver/zenserver.h @@ -71,14 +71,14 @@ public: void OnReady(); void EnsureIoRunner(); - void EnqueueTimer(); + void EnqueueProcessMonitorTimer(); void EnqueueStateMarkerTimer(); void EnqueueSigIntTimer(); void EnqueueStatsReportingTimer(); void CheckStateMarker(); void CheckSigInt(); void CheckOwnerPid(); - + bool UpdateProcessMonitor(); void ScrubStorage(); void Flush(); @@ -100,6 +100,7 @@ private: asio::steady_timer m_StatsReportingTimer{m_IoContext}; ProcessMonitor m_ProcessMonitor; NamedMutex m_ServerMutex; + bool m_FoundNoActiveSponsors = false; enum ServerState { diff --git a/src/zenutil/zenserverprocess.cpp b/src/zenutil/zenserverprocess.cpp index 3667ad2c6..7725d0af6 100644 --- a/src/zenutil/zenserverprocess.cpp +++ b/src/zenutil/zenserverprocess.cpp @@ -308,7 +308,7 @@ ZenServerState::Sweep() if (Entry.DesiredListenPort) { - if (IsProcessRunning(Entry.Pid) == false) + if (Entry.Pid != 0 && IsProcessRunning(Entry.Pid) == false) { ZEN_DEBUG("Sweep - pid {} not running, reclaiming entry (port {})", Entry.Pid.load(), Entry.DesiredListenPort.load()); @@ -363,18 +363,15 @@ ZenServerState::ZenServerEntry::AddSponsorProcess(uint32_t PidToAdd) { for (std::atomic<uint32_t>& PidEntry : SponsorPids) { - if (PidEntry.load(std::memory_order_relaxed) == 0) + if (PidEntry.load(std::memory_order_relaxed) == PidToAdd) { - uint32_t Expected = 0; - if (PidEntry.compare_exchange_strong(Expected, PidToAdd)) - { - // Success! - return true; - } + // Success, the because pid is already in the list + return true; } - else if (PidEntry.load(std::memory_order_relaxed) == PidToAdd) + uint32_t Expected = 0; + if (PidEntry.compare_exchange_strong(Expected, PidToAdd)) { - // Success, the because pid is already in the list + // Success! return true; } } |