diff options
Diffstat (limited to 'src')
| -rw-r--r-- | src/zencore/process.cpp | 85 | ||||
| -rw-r--r-- | src/zencore/thread.cpp | 12 | ||||
| -rw-r--r-- | src/zenserver-test/zenserver-test.cpp | 41 | ||||
| -rw-r--r-- | src/zenserver/zenserver.cpp | 32 | ||||
| -rw-r--r-- | src/zenutil/zenserverprocess.cpp | 35 |
5 files changed, 160 insertions, 45 deletions
diff --git a/src/zencore/process.cpp b/src/zencore/process.cpp index 8a91ab287..47289a37b 100644 --- a/src/zencore/process.cpp +++ b/src/zencore/process.cpp @@ -397,6 +397,17 @@ ProcessHandle::Kill() return false; } } + + // Wait for the process to exit after SIGTERM, matching the Windows path + // which waits up to 5 seconds for graceful shutdown. Without this wait + // the child becomes a zombie and may hold resources (e.g. TCP ports). + std::error_code Ec; + if (!Wait(5000, Ec)) + { + // Graceful shutdown timed out — force-kill + kill(pid_t(m_Pid), SIGKILL); + Wait(1000, Ec); + } #endif Reset(); @@ -435,6 +446,11 @@ ProcessHandle::Terminate(int ExitCode) return false; } } + + // Wait for the process to be reaped after SIGKILL so it doesn't linger + // as a zombie holding resources (e.g. TCP ports). + std::error_code Ec; + Wait(5000, Ec); #endif Reset(); return true; @@ -1648,47 +1664,60 @@ FindProcess(const std::filesystem::path& ExecutableImage, ProcessHandle& OutHand return MakeErrorCodeFromLastError(); #endif // ZEN_PLATFORM_WINDOWS #if ZEN_PLATFORM_MAC - int Mib[4] = {CTL_KERN, KERN_PROC, KERN_PROC_ALL, 0}; - size_t BufferSize = 0; - - struct kinfo_proc* Processes = nullptr; - uint32_t ProcCount = 0; + int Mib[4] = {CTL_KERN, KERN_PROC, KERN_PROC_ALL, 0}; const pid_t ThisProcessId = getpid(); - if (sysctl(Mib, 4, NULL, &BufferSize, NULL, 0) != -1 && BufferSize > 0) + // The process list can change between the sizing sysctl call and the data sysctl call. + // Retry with padding to handle this race. + struct kinfo_proc* Processes = nullptr; + size_t BufferSize = 0; + bool Fetched = false; + auto _ = MakeGuard([&]() { free(Processes); }); + + for (int Attempt = 0; Attempt < 3; Attempt++) { - struct kinfo_proc* Processes = (struct kinfo_proc*)malloc(BufferSize); - auto _ = MakeGuard([&]() { free(Processes); }); - if (sysctl(Mib, 4, Processes, &BufferSize, NULL, 0) != -1) + if (sysctl(Mib, 4, nullptr, &BufferSize, nullptr, 0) == -1 || BufferSize == 0) + { + break; + } + BufferSize += BufferSize / 4; + free(Processes); + Processes = (struct kinfo_proc*)malloc(BufferSize); + if (sysctl(Mib, 4, Processes, &BufferSize, nullptr, 0) != -1) { - ProcCount = (uint32_t)(BufferSize / sizeof(struct kinfo_proc)); - char Buffer[PROC_PIDPATHINFO_MAXSIZE]; - for (uint32_t ProcIndex = 0; ProcIndex < ProcCount; ProcIndex++) + Fetched = true; + break; + } + } + + if (!Fetched) + { + return MakeErrorCodeFromLastError(); + } + + uint32_t ProcCount = (uint32_t)(BufferSize / sizeof(struct kinfo_proc)); + for (uint32_t ProcIndex = 0; ProcIndex < ProcCount; ProcIndex++) + { + pid_t Pid = Processes[ProcIndex].kp_proc.p_pid; + if (IncludeSelf || (Pid != ThisProcessId)) + { + std::error_code Ec; + std::filesystem::path EntryPath = GetProcessExecutablePath(Pid, Ec); + if (!Ec) { - pid_t Pid = Processes[ProcIndex].kp_proc.p_pid; - if (IncludeSelf || (Pid != ThisProcessId)) + if (EntryPath == ExecutableImage) { - std::error_code Ec; - std::filesystem::path EntryPath = GetProcessExecutablePath(Pid, Ec); - if (!Ec) + if (Processes[ProcIndex].kp_proc.p_stat != SZOMB) { - if (EntryPath == ExecutableImage) - { - if (Processes[ProcIndex].kp_proc.p_stat != SZOMB) - { - OutHandle.Initialize(Pid, Ec); - return Ec; - } - } + OutHandle.Initialize(Pid, Ec); + return Ec; } - Ec.clear(); } } - return {}; } } - return MakeErrorCodeFromLastError(); + return {}; #endif // ZEN_PLATFORM_MAC #if ZEN_PLATFORM_LINUX const pid_t ThisProcessId = getpid(); diff --git a/src/zencore/thread.cpp b/src/zencore/thread.cpp index f74791333..067e66c0d 100644 --- a/src/zencore/thread.cpp +++ b/src/zencore/thread.cpp @@ -503,6 +503,18 @@ NamedMutex::~NamedMutex() if (m_MutexHandle) { int Inner = int(intptr_t(m_MutexHandle)); + + // Remove the backing file before releasing the lock so that new callers + // of Create()/Exists() won't find a stale entry. Other processes that + // already have the file open still hold valid fds (unlink only removes + // the directory entry; the inode lives until the last fd is closed). + std::error_code Ec; + std::filesystem::path Name = PathFromHandle((void*)(intptr_t(Inner)), Ec); + if (!Ec) + { + unlink(Name.c_str()); + } + flock(Inner, LOCK_UN); close(Inner); } diff --git a/src/zenserver-test/zenserver-test.cpp b/src/zenserver-test/zenserver-test.cpp index fff77957d..cf7ffe4e4 100644 --- a/src/zenserver-test/zenserver-test.cpp +++ b/src/zenserver-test/zenserver-test.cpp @@ -99,7 +99,8 @@ main(int argc, char** argv) // somehow in the future std::string ServerClass; - bool Verbose = false; + bool Verbose = false; + bool KillStale = false; for (int i = 1; i < argc; ++i) { @@ -119,6 +120,44 @@ main(int argc, char** argv) { Verbose = true; } + else if (argv[i] == "--kill-stale-processes"sv) + { + KillStale = true; + } + } + + // Since GHA runners can leave processes behind from a previous test run, we need + // to be able to clean up any stale server processes before starting the tests to + // avoid interference + + if (KillStale) + { + ZEN_INFO("Killing any stale processes from previous test runs..."); + + auto KillStaleProcesses = [](const std::filesystem::path& Executable) { + ZEN_INFO(" Looking for stale '{}' processes...", Executable.filename()); + + for (;;) + { + ProcessHandle StaleProcess; + std::error_code Ec = FindProcess(Executable, StaleProcess, /*IncludeSelf*/ false); + + if (Ec || !StaleProcess.IsValid()) + { + break; + } + + ZEN_WARN("====> Found stale '{}' process (pid {}) from a previous test run - terminating it", + Executable.filename(), + StaleProcess.Pid()); + + StaleProcess.Terminate(0); + } + }; + + KillStaleProcesses(ProgramBaseDir / "zenserver" ZEN_EXE_SUFFIX_LITERAL); + KillStaleProcesses(ProgramBaseDir / "zenserver-test" ZEN_EXE_SUFFIX_LITERAL); + KillStaleProcesses(ProgramBaseDir / "zentest-appstub" ZEN_EXE_SUFFIX_LITERAL); } zen::tests::TestEnv.InitializeForTest(ProgramBaseDir, TestBaseDir, ServerClass); diff --git a/src/zenserver/zenserver.cpp b/src/zenserver/zenserver.cpp index 49cbbb9fc..ea86c5654 100644 --- a/src/zenserver/zenserver.cpp +++ b/src/zenserver/zenserver.cpp @@ -684,11 +684,33 @@ ZenServerMain::Run() } else { - ZEN_CONSOLE_WARN(ZEN_APP_NAME " exiting, failed to add sponsor owner pid {} to process listening to port {} (pid: {})", - m_ServerOptions.OwnerPid, - m_ServerOptions.BasePort, - Entry->Pid.load()); - std::exit(1); + // The entry's process failed to pick up our sponsor request after + // multiple attempts. Before reclaiming the entry, verify that the + // PID does not still belong to a zenserver process. If it does, the + // server is alive but unresponsive – fall back to the original error + // path. If the PID is gone or belongs to a different executable the + // entry is genuinely stale and safe to reclaim. + const int StalePid = Entry->Pid.load(); + std::error_code ExeEc; + std::filesystem::path PidExePath = GetProcessExecutablePath(StalePid, ExeEc); + const bool PidIsZenServer = !ExeEc && (PidExePath.filename() == GetRunningExecutablePath().filename()); + if (PidIsZenServer) + { + ZEN_CONSOLE_WARN(ZEN_APP_NAME + " exiting, failed to add sponsor to process on port {} " + "(pid {}); that pid is still a running zenserver instance", + m_ServerOptions.BasePort, + StalePid); + std::exit(1); + } + ZEN_CONSOLE_WARN( + "Failed to add sponsor to process on port {} (pid {}); " + "pid belongs to '{}' – assuming stale entry and reclaiming", + m_ServerOptions.BasePort, + StalePid, + ExeEc ? "<unknown>" : PidExePath.filename().string()); + Entry->Reset(); + Entry = nullptr; } } else diff --git a/src/zenutil/zenserverprocess.cpp b/src/zenutil/zenserverprocess.cpp index a2ab4c291..3993d6a32 100644 --- a/src/zenutil/zenserverprocess.cpp +++ b/src/zenutil/zenserverprocess.cpp @@ -1375,18 +1375,31 @@ ZenServerInstance::OnServerReady() const ZenServerState::ZenServerEntry* Entry = nullptr; - if (m_BasePort) + // The child process signals its ready event after writing its state entry, but under + // heavy instrumentation (e.g. sanitizers) the shared memory writes may not be immediately + // visible to this process. Retry briefly before giving up. + for (int Attempt = 0; Attempt < 10; ++Attempt) { - Entry = State.Lookup(m_BasePort); - } - else - { - State.Snapshot([&](const ZenServerState::ZenServerEntry& InEntry) { - if (InEntry.Pid == (uint32_t)m_Process.Pid()) - { - Entry = &InEntry; - } - }); + if (m_BasePort) + { + Entry = State.Lookup(m_BasePort); + } + else + { + State.Snapshot([&](const ZenServerState::ZenServerEntry& InEntry) { + if (InEntry.Pid == (uint32_t)m_Process.Pid()) + { + Entry = &InEntry; + } + }); + } + + if (Entry) + { + break; + } + + Sleep(100); } if (!Entry) |