aboutsummaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
Diffstat (limited to 'src')
-rw-r--r--src/zencore/process.cpp85
-rw-r--r--src/zencore/thread.cpp12
-rw-r--r--src/zenserver-test/zenserver-test.cpp41
-rw-r--r--src/zenserver/zenserver.cpp32
-rw-r--r--src/zenutil/zenserverprocess.cpp35
5 files changed, 160 insertions, 45 deletions
diff --git a/src/zencore/process.cpp b/src/zencore/process.cpp
index 8a91ab287..47289a37b 100644
--- a/src/zencore/process.cpp
+++ b/src/zencore/process.cpp
@@ -397,6 +397,17 @@ ProcessHandle::Kill()
return false;
}
}
+
+ // Wait for the process to exit after SIGTERM, matching the Windows path
+ // which waits up to 5 seconds for graceful shutdown. Without this wait
+ // the child becomes a zombie and may hold resources (e.g. TCP ports).
+ std::error_code Ec;
+ if (!Wait(5000, Ec))
+ {
+ // Graceful shutdown timed out — force-kill
+ kill(pid_t(m_Pid), SIGKILL);
+ Wait(1000, Ec);
+ }
#endif
Reset();
@@ -435,6 +446,11 @@ ProcessHandle::Terminate(int ExitCode)
return false;
}
}
+
+ // Wait for the process to be reaped after SIGKILL so it doesn't linger
+ // as a zombie holding resources (e.g. TCP ports).
+ std::error_code Ec;
+ Wait(5000, Ec);
#endif
Reset();
return true;
@@ -1648,47 +1664,60 @@ FindProcess(const std::filesystem::path& ExecutableImage, ProcessHandle& OutHand
return MakeErrorCodeFromLastError();
#endif // ZEN_PLATFORM_WINDOWS
#if ZEN_PLATFORM_MAC
- int Mib[4] = {CTL_KERN, KERN_PROC, KERN_PROC_ALL, 0};
- size_t BufferSize = 0;
-
- struct kinfo_proc* Processes = nullptr;
- uint32_t ProcCount = 0;
+ int Mib[4] = {CTL_KERN, KERN_PROC, KERN_PROC_ALL, 0};
const pid_t ThisProcessId = getpid();
- if (sysctl(Mib, 4, NULL, &BufferSize, NULL, 0) != -1 && BufferSize > 0)
+ // The process list can change between the sizing sysctl call and the data sysctl call.
+ // Retry with padding to handle this race.
+ struct kinfo_proc* Processes = nullptr;
+ size_t BufferSize = 0;
+ bool Fetched = false;
+ auto _ = MakeGuard([&]() { free(Processes); });
+
+ for (int Attempt = 0; Attempt < 3; Attempt++)
{
- struct kinfo_proc* Processes = (struct kinfo_proc*)malloc(BufferSize);
- auto _ = MakeGuard([&]() { free(Processes); });
- if (sysctl(Mib, 4, Processes, &BufferSize, NULL, 0) != -1)
+ if (sysctl(Mib, 4, nullptr, &BufferSize, nullptr, 0) == -1 || BufferSize == 0)
+ {
+ break;
+ }
+ BufferSize += BufferSize / 4;
+ free(Processes);
+ Processes = (struct kinfo_proc*)malloc(BufferSize);
+ if (sysctl(Mib, 4, Processes, &BufferSize, nullptr, 0) != -1)
{
- ProcCount = (uint32_t)(BufferSize / sizeof(struct kinfo_proc));
- char Buffer[PROC_PIDPATHINFO_MAXSIZE];
- for (uint32_t ProcIndex = 0; ProcIndex < ProcCount; ProcIndex++)
+ Fetched = true;
+ break;
+ }
+ }
+
+ if (!Fetched)
+ {
+ return MakeErrorCodeFromLastError();
+ }
+
+ uint32_t ProcCount = (uint32_t)(BufferSize / sizeof(struct kinfo_proc));
+ for (uint32_t ProcIndex = 0; ProcIndex < ProcCount; ProcIndex++)
+ {
+ pid_t Pid = Processes[ProcIndex].kp_proc.p_pid;
+ if (IncludeSelf || (Pid != ThisProcessId))
+ {
+ std::error_code Ec;
+ std::filesystem::path EntryPath = GetProcessExecutablePath(Pid, Ec);
+ if (!Ec)
{
- pid_t Pid = Processes[ProcIndex].kp_proc.p_pid;
- if (IncludeSelf || (Pid != ThisProcessId))
+ if (EntryPath == ExecutableImage)
{
- std::error_code Ec;
- std::filesystem::path EntryPath = GetProcessExecutablePath(Pid, Ec);
- if (!Ec)
+ if (Processes[ProcIndex].kp_proc.p_stat != SZOMB)
{
- if (EntryPath == ExecutableImage)
- {
- if (Processes[ProcIndex].kp_proc.p_stat != SZOMB)
- {
- OutHandle.Initialize(Pid, Ec);
- return Ec;
- }
- }
+ OutHandle.Initialize(Pid, Ec);
+ return Ec;
}
- Ec.clear();
}
}
- return {};
}
}
- return MakeErrorCodeFromLastError();
+ return {};
#endif // ZEN_PLATFORM_MAC
#if ZEN_PLATFORM_LINUX
const pid_t ThisProcessId = getpid();
diff --git a/src/zencore/thread.cpp b/src/zencore/thread.cpp
index f74791333..067e66c0d 100644
--- a/src/zencore/thread.cpp
+++ b/src/zencore/thread.cpp
@@ -503,6 +503,18 @@ NamedMutex::~NamedMutex()
if (m_MutexHandle)
{
int Inner = int(intptr_t(m_MutexHandle));
+
+ // Remove the backing file before releasing the lock so that new callers
+ // of Create()/Exists() won't find a stale entry. Other processes that
+ // already have the file open still hold valid fds (unlink only removes
+ // the directory entry; the inode lives until the last fd is closed).
+ std::error_code Ec;
+ std::filesystem::path Name = PathFromHandle((void*)(intptr_t(Inner)), Ec);
+ if (!Ec)
+ {
+ unlink(Name.c_str());
+ }
+
flock(Inner, LOCK_UN);
close(Inner);
}
diff --git a/src/zenserver-test/zenserver-test.cpp b/src/zenserver-test/zenserver-test.cpp
index fff77957d..cf7ffe4e4 100644
--- a/src/zenserver-test/zenserver-test.cpp
+++ b/src/zenserver-test/zenserver-test.cpp
@@ -99,7 +99,8 @@ main(int argc, char** argv)
// somehow in the future
std::string ServerClass;
- bool Verbose = false;
+ bool Verbose = false;
+ bool KillStale = false;
for (int i = 1; i < argc; ++i)
{
@@ -119,6 +120,44 @@ main(int argc, char** argv)
{
Verbose = true;
}
+ else if (argv[i] == "--kill-stale-processes"sv)
+ {
+ KillStale = true;
+ }
+ }
+
+ // Since GHA runners can leave processes behind from a previous test run, we need
+ // to be able to clean up any stale server processes before starting the tests to
+ // avoid interference
+
+ if (KillStale)
+ {
+ ZEN_INFO("Killing any stale processes from previous test runs...");
+
+ auto KillStaleProcesses = [](const std::filesystem::path& Executable) {
+ ZEN_INFO(" Looking for stale '{}' processes...", Executable.filename());
+
+ for (;;)
+ {
+ ProcessHandle StaleProcess;
+ std::error_code Ec = FindProcess(Executable, StaleProcess, /*IncludeSelf*/ false);
+
+ if (Ec || !StaleProcess.IsValid())
+ {
+ break;
+ }
+
+ ZEN_WARN("====> Found stale '{}' process (pid {}) from a previous test run - terminating it",
+ Executable.filename(),
+ StaleProcess.Pid());
+
+ StaleProcess.Terminate(0);
+ }
+ };
+
+ KillStaleProcesses(ProgramBaseDir / "zenserver" ZEN_EXE_SUFFIX_LITERAL);
+ KillStaleProcesses(ProgramBaseDir / "zenserver-test" ZEN_EXE_SUFFIX_LITERAL);
+ KillStaleProcesses(ProgramBaseDir / "zentest-appstub" ZEN_EXE_SUFFIX_LITERAL);
}
zen::tests::TestEnv.InitializeForTest(ProgramBaseDir, TestBaseDir, ServerClass);
diff --git a/src/zenserver/zenserver.cpp b/src/zenserver/zenserver.cpp
index 49cbbb9fc..ea86c5654 100644
--- a/src/zenserver/zenserver.cpp
+++ b/src/zenserver/zenserver.cpp
@@ -684,11 +684,33 @@ ZenServerMain::Run()
}
else
{
- ZEN_CONSOLE_WARN(ZEN_APP_NAME " exiting, failed to add sponsor owner pid {} to process listening to port {} (pid: {})",
- m_ServerOptions.OwnerPid,
- m_ServerOptions.BasePort,
- Entry->Pid.load());
- std::exit(1);
+ // The entry's process failed to pick up our sponsor request after
+ // multiple attempts. Before reclaiming the entry, verify that the
+ // PID does not still belong to a zenserver process. If it does, the
+ // server is alive but unresponsive – fall back to the original error
+ // path. If the PID is gone or belongs to a different executable the
+ // entry is genuinely stale and safe to reclaim.
+ const int StalePid = Entry->Pid.load();
+ std::error_code ExeEc;
+ std::filesystem::path PidExePath = GetProcessExecutablePath(StalePid, ExeEc);
+ const bool PidIsZenServer = !ExeEc && (PidExePath.filename() == GetRunningExecutablePath().filename());
+ if (PidIsZenServer)
+ {
+ ZEN_CONSOLE_WARN(ZEN_APP_NAME
+ " exiting, failed to add sponsor to process on port {} "
+ "(pid {}); that pid is still a running zenserver instance",
+ m_ServerOptions.BasePort,
+ StalePid);
+ std::exit(1);
+ }
+ ZEN_CONSOLE_WARN(
+ "Failed to add sponsor to process on port {} (pid {}); "
+ "pid belongs to '{}' – assuming stale entry and reclaiming",
+ m_ServerOptions.BasePort,
+ StalePid,
+ ExeEc ? "<unknown>" : PidExePath.filename().string());
+ Entry->Reset();
+ Entry = nullptr;
}
}
else
diff --git a/src/zenutil/zenserverprocess.cpp b/src/zenutil/zenserverprocess.cpp
index a2ab4c291..3993d6a32 100644
--- a/src/zenutil/zenserverprocess.cpp
+++ b/src/zenutil/zenserverprocess.cpp
@@ -1375,18 +1375,31 @@ ZenServerInstance::OnServerReady()
const ZenServerState::ZenServerEntry* Entry = nullptr;
- if (m_BasePort)
+ // The child process signals its ready event after writing its state entry, but under
+ // heavy instrumentation (e.g. sanitizers) the shared memory writes may not be immediately
+ // visible to this process. Retry briefly before giving up.
+ for (int Attempt = 0; Attempt < 10; ++Attempt)
{
- Entry = State.Lookup(m_BasePort);
- }
- else
- {
- State.Snapshot([&](const ZenServerState::ZenServerEntry& InEntry) {
- if (InEntry.Pid == (uint32_t)m_Process.Pid())
- {
- Entry = &InEntry;
- }
- });
+ if (m_BasePort)
+ {
+ Entry = State.Lookup(m_BasePort);
+ }
+ else
+ {
+ State.Snapshot([&](const ZenServerState::ZenServerEntry& InEntry) {
+ if (InEntry.Pid == (uint32_t)m_Process.Pid())
+ {
+ Entry = &InEntry;
+ }
+ });
+ }
+
+ if (Entry)
+ {
+ break;
+ }
+
+ Sleep(100);
}
if (!Entry)