From 5bcf03ec7fcc412c2259f341f6c7f3f86631466b Mon Sep 17 00:00:00 2001 From: Stefan Boberg Date: Thu, 9 Apr 2026 13:26:08 +0200 Subject: Fix ZenServerState stale entry detection on PID reuse (k8s) (#932) - Detect stale shared-memory entries whose PID matches the current process but predate our registration (m_OurEntry == nullptr) - Sweep() now reclaims such entries instead of skipping them - Lookup() and LookupByEffectivePort() skip stale same-PID entries - Fixes startup failure on k8s where PID 1 is always reused after an unclean shutdown --- src/zenutil/zenserverprocess.cpp | 31 +++++++++++++++++++++++++++++++ 1 file changed, 31 insertions(+) (limited to 'src/zenutil/zenserverprocess.cpp') diff --git a/src/zenutil/zenserverprocess.cpp b/src/zenutil/zenserverprocess.cpp index 2b27b2d8b..9a282a848 100644 --- a/src/zenutil/zenserverprocess.cpp +++ b/src/zenutil/zenserverprocess.cpp @@ -267,6 +267,8 @@ ZenServerState::InitializeReadOnly() ZenServerState::ZenServerEntry* ZenServerState::Lookup(int DesiredListenPort) const { + const uint32_t OurPid = GetCurrentProcessId(); + for (int i = 0; i < m_MaxEntryCount; ++i) { uint16_t EntryPort = m_Data[i].DesiredListenPort; @@ -274,6 +276,14 @@ ZenServerState::Lookup(int DesiredListenPort) const { if (DesiredListenPort == 0 || (EntryPort == DesiredListenPort)) { + // If the entry's PID matches our own but we haven't registered yet, + // this is a stale entry from a previous process incarnation (e.g. PID 1 + // reuse after unclean shutdown in k8s). Skip it. + if (m_Data[i].Pid == OurPid && m_OurEntry == nullptr) + { + continue; + } + std::error_code _; if (IsProcessRunning(m_Data[i].Pid, _)) { @@ -289,6 +299,8 @@ ZenServerState::Lookup(int DesiredListenPort) const ZenServerState::ZenServerEntry* ZenServerState::LookupByEffectivePort(int Port) const { + const uint32_t OurPid = GetCurrentProcessId(); + for (int i = 0; i < m_MaxEntryCount; ++i) { uint16_t EntryPort = m_Data[i].EffectiveListenPort; @@ -296,6 +308,11 @@ ZenServerState::LookupByEffectivePort(int Port) const { if (EntryPort == Port) { + if (m_Data[i].Pid == OurPid && m_OurEntry == nullptr) + { + continue; + } + std::error_code _; if (IsProcessRunning(m_Data[i].Pid, _)) { @@ -358,12 +375,26 @@ ZenServerState::Sweep() ZEN_ASSERT(m_IsReadOnly == false); + const uint32_t OurPid = GetCurrentProcessId(); + for (int i = 0; i < m_MaxEntryCount; ++i) { ZenServerEntry& Entry = m_Data[i]; if (Entry.DesiredListenPort) { + // If the entry's PID matches our own but we haven't registered yet, + // this is a stale entry from a previous process incarnation (e.g. PID 1 + // reuse after unclean shutdown in k8s). Reclaim it. + if (Entry.Pid == OurPid && m_OurEntry == nullptr) + { + ZEN_CONSOLE_DEBUG("Sweep - pid {} matches current process but no registration yet, reclaiming stale entry (port {})", + Entry.Pid.load(), + Entry.DesiredListenPort.load()); + Entry.Reset(); + continue; + } + std::error_code ErrorCode; if (Entry.Pid != 0 && IsProcessRunning(Entry.Pid, ErrorCode) == false) { -- cgit v1.2.3