From a15de3d263a5759ca4f07aa7a2b9e6d494551150 Mon Sep 17 00:00:00 2001 From: Stefan Boberg Date: Mon, 23 Mar 2026 11:40:11 +0100 Subject: Process management improvements (#881) This PR improves process lifecycle handling and resilience across several areas: - **Reclaim stale shared-memory entries instead of exiting** (`zenserver.cpp`): When a zenserver instance fails to attach as a sponsor to an existing process (e.g. because the PID was reused by an unrelated process), the server now clears the stale shared-memory entry and proceeds with normal startup instead of calling `std::exit(1)`. - **Wait for child process exit in `Kill()` and `Terminate()` on Unix** (`process.cpp`): After sending `SIGTERM` in `Kill()`, the code now waits up to 5s for graceful shutdown (escalating to `SIGKILL` on timeout), matching the Windows behavior. `Terminate()` also waits after `SIGKILL` so the child is properly reaped and doesn't linger as a zombie clogging up the process table. - **Fix sysctl buffer race in macOS `FindProcess`** (`process.cpp`): The macOS process enumeration now retries the `sysctl` call (up to 3 attempts with 25% buffer padding) to handle the race where the process list changes between the sizing call and the data-fetching call. Also flattens the nesting and fixes the guard/free scoping. - **Terminate stale processes before integration tests** (`zenserver-test.cpp`, `test.lua`): The integration test runner now accepts a `--kill-stale-processes` flag (passed automatically by `test.lua`) that scans for and terminates any leftover `zenserver`, `zenserver-test`, and `zentest-appstub` processes from previous test runs, logging the executable name and PID of each. This addresses flaky test failures caused by stale processes from prior runs holding ports or other resources. --- src/zencore/process.cpp | 85 +++++++++++++++++++++++++++++++++---------------- 1 file changed, 57 insertions(+), 28 deletions(-) (limited to 'src/zencore/process.cpp') diff --git a/src/zencore/process.cpp b/src/zencore/process.cpp index 8a91ab287..47289a37b 100644 --- a/src/zencore/process.cpp +++ b/src/zencore/process.cpp @@ -397,6 +397,17 @@ ProcessHandle::Kill() return false; } } + + // Wait for the process to exit after SIGTERM, matching the Windows path + // which waits up to 5 seconds for graceful shutdown. Without this wait + // the child becomes a zombie and may hold resources (e.g. TCP ports). + std::error_code Ec; + if (!Wait(5000, Ec)) + { + // Graceful shutdown timed out — force-kill + kill(pid_t(m_Pid), SIGKILL); + Wait(1000, Ec); + } #endif Reset(); @@ -435,6 +446,11 @@ ProcessHandle::Terminate(int ExitCode) return false; } } + + // Wait for the process to be reaped after SIGKILL so it doesn't linger + // as a zombie holding resources (e.g. TCP ports). + std::error_code Ec; + Wait(5000, Ec); #endif Reset(); return true; @@ -1648,47 +1664,60 @@ FindProcess(const std::filesystem::path& ExecutableImage, ProcessHandle& OutHand return MakeErrorCodeFromLastError(); #endif // ZEN_PLATFORM_WINDOWS #if ZEN_PLATFORM_MAC - int Mib[4] = {CTL_KERN, KERN_PROC, KERN_PROC_ALL, 0}; - size_t BufferSize = 0; - - struct kinfo_proc* Processes = nullptr; - uint32_t ProcCount = 0; + int Mib[4] = {CTL_KERN, KERN_PROC, KERN_PROC_ALL, 0}; const pid_t ThisProcessId = getpid(); - if (sysctl(Mib, 4, NULL, &BufferSize, NULL, 0) != -1 && BufferSize > 0) + // The process list can change between the sizing sysctl call and the data sysctl call. + // Retry with padding to handle this race. + struct kinfo_proc* Processes = nullptr; + size_t BufferSize = 0; + bool Fetched = false; + auto _ = MakeGuard([&]() { free(Processes); }); + + for (int Attempt = 0; Attempt < 3; Attempt++) { - struct kinfo_proc* Processes = (struct kinfo_proc*)malloc(BufferSize); - auto _ = MakeGuard([&]() { free(Processes); }); - if (sysctl(Mib, 4, Processes, &BufferSize, NULL, 0) != -1) + if (sysctl(Mib, 4, nullptr, &BufferSize, nullptr, 0) == -1 || BufferSize == 0) + { + break; + } + BufferSize += BufferSize / 4; + free(Processes); + Processes = (struct kinfo_proc*)malloc(BufferSize); + if (sysctl(Mib, 4, Processes, &BufferSize, nullptr, 0) != -1) { - ProcCount = (uint32_t)(BufferSize / sizeof(struct kinfo_proc)); - char Buffer[PROC_PIDPATHINFO_MAXSIZE]; - for (uint32_t ProcIndex = 0; ProcIndex < ProcCount; ProcIndex++) + Fetched = true; + break; + } + } + + if (!Fetched) + { + return MakeErrorCodeFromLastError(); + } + + uint32_t ProcCount = (uint32_t)(BufferSize / sizeof(struct kinfo_proc)); + for (uint32_t ProcIndex = 0; ProcIndex < ProcCount; ProcIndex++) + { + pid_t Pid = Processes[ProcIndex].kp_proc.p_pid; + if (IncludeSelf || (Pid != ThisProcessId)) + { + std::error_code Ec; + std::filesystem::path EntryPath = GetProcessExecutablePath(Pid, Ec); + if (!Ec) { - pid_t Pid = Processes[ProcIndex].kp_proc.p_pid; - if (IncludeSelf || (Pid != ThisProcessId)) + if (EntryPath == ExecutableImage) { - std::error_code Ec; - std::filesystem::path EntryPath = GetProcessExecutablePath(Pid, Ec); - if (!Ec) + if (Processes[ProcIndex].kp_proc.p_stat != SZOMB) { - if (EntryPath == ExecutableImage) - { - if (Processes[ProcIndex].kp_proc.p_stat != SZOMB) - { - OutHandle.Initialize(Pid, Ec); - return Ec; - } - } + OutHandle.Initialize(Pid, Ec); + return Ec; } - Ec.clear(); } } - return {}; } } - return MakeErrorCodeFromLastError(); + return {}; #endif // ZEN_PLATFORM_MAC #if ZEN_PLATFORM_LINUX const pid_t ThisProcessId = getpid(); -- cgit v1.2.3