diff options
| author | Fuwn <[email protected]> | 2026-01-20 17:16:22 -0800 |
|---|---|---|
| committer | Fuwn <[email protected]> | 2026-01-20 17:16:22 -0800 |
| commit | 2371b28128213fbcc8d1c062dccc3074e6b0fa98 (patch) | |
| tree | 84452dbf5f2b1821d1fc5cf8ecdb0a5ad2b74f56 | |
| parent | fix: Use wildcard path for badge endpoint to support .svg extension (diff) | |
| download | kaze-2371b28128213fbcc8d1c062dccc3074e6b0fa98.tar.xz kaze-2371b28128213fbcc8d1c062dccc3074e6b0fa98.zip | |
feat: Use composite group/name key for monitor identification
Previously monitors were identified by just their name, causing monitors
with the same name in different groups to share data in the database.
Changes:
- Add ID() method to MonitorConfig returning 'group/name' format
- Add Group field to MonitorConfig (set at runtime)
- Update Monitor interface with ID() and Group() methods
- Update all monitor implementations (http, tcp, dns, icmp, gemini,
graphql, database) to use composite ID
- Update Scheduler to use monitor ID instead of name
- Update server handlers to use composite ID for stats lookups
- Change API routes to use {group}/{name} pattern:
- /api/monitor/{group}/{name}
- /api/history/{group}/{name}
- /api/uptime/{group}/{name}
- /api/badge/{group}/{name}.svg
- URL-encode group and name components to handle special characters
(e.g., slashes in names become %2F)
- Update config.UpdateResetFlag to accept group and name separately
BREAKING: API endpoints now require group in the path. Existing database
data using just monitor names won't be associated with the new composite
keys.
| -rw-r--r-- | internal/config/config.go | 71 | ||||
| -rw-r--r-- | internal/monitor/database.go | 16 | ||||
| -rw-r--r-- | internal/monitor/dns.go | 64 | ||||
| -rw-r--r-- | internal/monitor/gemini.go | 94 | ||||
| -rw-r--r-- | internal/monitor/graphql.go | 17 | ||||
| -rw-r--r-- | internal/monitor/http.go | 16 | ||||
| -rw-r--r-- | internal/monitor/icmp.go | 56 | ||||
| -rw-r--r-- | internal/monitor/monitor.go | 6 | ||||
| -rw-r--r-- | internal/monitor/scheduler.go | 41 | ||||
| -rw-r--r-- | internal/monitor/tcp.go | 23 | ||||
| -rw-r--r-- | internal/server/server.go | 100 |
11 files changed, 316 insertions, 188 deletions
diff --git a/internal/config/config.go b/internal/config/config.go index 0c8b430..7542d3e 100644 --- a/internal/config/config.go +++ b/internal/config/config.go @@ -2,6 +2,7 @@ package config import ( "fmt" + "net/url" "os" "strings" "time" @@ -117,6 +118,7 @@ type MonitorDefaults struct { // MonitorConfig represents a single monitor type MonitorConfig struct { Name string `yaml:"name"` + Group string `yaml:"-"` // Set at runtime, not from YAML - the group this monitor belongs to Type string `yaml:"type"` // http, https, tcp, gemini Target string `yaml:"target"` Link string `yaml:"link,omitempty"` // Custom URL for clicking the monitor name (e.g., docs page) @@ -151,6 +153,45 @@ type MonitorConfig struct { DBType string `yaml:"db_type,omitempty"` // Database type: postgres, mysql, redis, memcached, mongodb } +// ID returns the unique identifier for this monitor (group/name format) +// Both group and name are URL-encoded to handle special characters like '/' +func (m *MonitorConfig) ID() string { + if m.Group == "" { + return url.PathEscape(m.Name) + } + return url.PathEscape(m.Group) + "/" + url.PathEscape(m.Name) +} + +// ParseMonitorID splits a monitor ID back into group and name components +// Returns (group, name, ok) where ok is false if the ID format is invalid +func ParseMonitorID(id string) (group, name string, ok bool) { + // Find the separator (first unescaped '/') + idx := strings.Index(id, "/") + if idx == -1 { + // No group, just a name + decoded, err := url.PathUnescape(id) + if err != nil { + return "", "", false + } + return "", decoded, true + } + + groupPart := id[:idx] + namePart := id[idx+1:] + + decodedGroup, err := url.PathUnescape(groupPart) + if err != nil { + return "", "", false + } + + decodedName, err := url.PathUnescape(namePart) + if err != nil { + return "", "", false + } + + return decodedGroup, decodedName, true +} + // IncidentConfig represents an incident or maintenance type IncidentConfig struct { Title string `yaml:"title"` @@ -285,6 +326,9 @@ func (c *Config) applyDefaults() { for j := range c.Groups[i].Monitors { m := &c.Groups[i].Monitors[j] + // Set the group name on the monitor + m.Group = grp.Name + // Apply group-level defaults first, then monitor-level overrides if m.Interval.Duration == 0 { if grp.Defaults != nil && grp.Defaults.Interval != nil { @@ -534,7 +578,8 @@ type MonitorWithGroup struct { // UpdateResetFlag updates the reset_on_next_check flag for a specific monitor in the config file // Uses a line-based approach to preserve original formatting and omitted fields -func UpdateResetFlag(configPath string, monitorName string, value bool) error { +// groupName and monitorName are used to find the correct monitor in the YAML structure +func UpdateResetFlag(configPath string, groupName string, monitorName string, value bool) error { // Read the config file data, err := os.ReadFile(configPath) if err != nil { @@ -542,6 +587,7 @@ func UpdateResetFlag(configPath string, monitorName string, value bool) error { } lines := strings.Split(string(data), "\n") + inTargetGroup := false inMonitor := false monitorIndent := "" foundMonitor := false @@ -550,8 +596,25 @@ func UpdateResetFlag(configPath string, monitorName string, value bool) error { line := lines[i] trimmed := strings.TrimSpace(line) - // Check if this is the start of our target monitor - if strings.HasPrefix(trimmed, "- name:") || strings.HasPrefix(trimmed, "name:") { + // Check if this is a group name line + if strings.HasPrefix(trimmed, "- name:") && !inMonitor { + // Could be a group name - check if it matches our target group + namePart := strings.TrimPrefix(trimmed, "- name:") + namePart = strings.TrimSpace(namePart) + namePart = strings.Trim(namePart, "\"'") + + // Check if this is our target group + if namePart == groupName { + inTargetGroup = true + } else if inTargetGroup { + // We've moved to a different group, stop looking + break + } + continue + } + + // Check if this is the start of our target monitor (within the target group) + if inTargetGroup && (strings.HasPrefix(trimmed, "- name:") || strings.HasPrefix(trimmed, "name:")) { // Extract monitor name from line namePart := strings.TrimPrefix(trimmed, "- name:") namePart = strings.TrimPrefix(namePart, "name:") @@ -597,7 +660,7 @@ func UpdateResetFlag(configPath string, monitorName string, value bool) error { } if !foundMonitor { - return fmt.Errorf("monitor %q not found in config", monitorName) + return fmt.Errorf("monitor %q in group %q not found in config", monitorName, groupName) } // Write back to file diff --git a/internal/monitor/database.go b/internal/monitor/database.go index 0b4d2a9..3301632 100644 --- a/internal/monitor/database.go +++ b/internal/monitor/database.go @@ -14,7 +14,9 @@ import ( // DatabaseMonitor monitors database connections type DatabaseMonitor struct { + id string name string + group string target string // Connection string or host:port dbType string // postgres, mysql, redis, mongodb, sqlite interval time.Duration @@ -48,7 +50,9 @@ func NewDatabaseMonitor(cfg config.MonitorConfig) (*DatabaseMonitor, error) { } return &DatabaseMonitor{ + id: cfg.ID(), name: cfg.Name, + group: cfg.Group, target: cfg.Target, dbType: dbType, interval: cfg.Interval.Duration, @@ -59,11 +63,21 @@ func NewDatabaseMonitor(cfg config.MonitorConfig) (*DatabaseMonitor, error) { }, nil } +// ID returns the unique identifier for this monitor +func (m *DatabaseMonitor) ID() string { + return m.id +} + // Name returns the monitor's name func (m *DatabaseMonitor) Name() string { return m.name } +// Group returns the group this monitor belongs to +func (m *DatabaseMonitor) Group() string { + return m.group +} + // Type returns the monitor type func (m *DatabaseMonitor) Type() string { return "database" @@ -102,7 +116,7 @@ func (m *DatabaseMonitor) RoundUptime() bool { // Check performs the database connection check func (m *DatabaseMonitor) Check(ctx context.Context) *Result { result := &Result{ - MonitorName: m.name, + MonitorName: m.id, Timestamp: time.Now(), } diff --git a/internal/monitor/dns.go b/internal/monitor/dns.go index 4f4f099..a962b5d 100644 --- a/internal/monitor/dns.go +++ b/internal/monitor/dns.go @@ -12,7 +12,9 @@ import ( // DNSMonitor monitors DNS resolution type DNSMonitor struct { + id string name string + group string target string // Domain to resolve interval time.Duration timeout time.Duration @@ -34,7 +36,9 @@ func NewDNSMonitor(cfg config.MonitorConfig) (*DNSMonitor, error) { } return &DNSMonitor{ + id: cfg.ID(), name: cfg.Name, + group: cfg.Group, target: cfg.Target, interval: cfg.Interval.Duration, timeout: cfg.Timeout.Duration, @@ -48,11 +52,21 @@ func NewDNSMonitor(cfg config.MonitorConfig) (*DNSMonitor, error) { }, nil } +// ID returns the unique identifier for this monitor +func (m *DNSMonitor) ID() string { + return m.id +} + // Name returns the monitor's name func (m *DNSMonitor) Name() string { return m.name } +// Group returns the group this monitor belongs to +func (m *DNSMonitor) Group() string { + return m.group +} + // Type returns the monitor type func (m *DNSMonitor) Type() string { return "dns" @@ -91,7 +105,7 @@ func (m *DNSMonitor) RoundUptime() bool { // Check performs the DNS resolution check func (m *DNSMonitor) Check(ctx context.Context) *Result { result := &Result{ - MonitorName: m.name, + MonitorName: m.id, Timestamp: time.Now(), } @@ -127,16 +141,16 @@ func (m *DNSMonitor) Check(ctx context.Context) *Result { if len(ips) == 0 { result.Status = StatusDown - result.Error = fmt.Errorf("no IP addresses returned") + result.Error = fmt.Errorf("no %s records found", m.recordType) return result } - // Check if expected IPs match + // If expected IPs are specified, verify them if len(m.expectedIPs) > 0 { found := false - for _, expectedIP := range m.expectedIPs { - for _, ip := range ips { - if ip.String() == expectedIP { + for _, ip := range ips { + for _, expected := range m.expectedIPs { + if ip.String() == expected { found = true break } @@ -147,7 +161,7 @@ func (m *DNSMonitor) Check(ctx context.Context) *Result { } if !found { result.Status = StatusDegraded - result.Error = fmt.Errorf("resolved IPs don't match expected: got %v, expected %v", ips, m.expectedIPs) + result.Error = fmt.Errorf("expected IPs not found in response") return result } } @@ -164,7 +178,7 @@ func (m *DNSMonitor) Check(ctx context.Context) *Result { return result } - // Check if expected CNAME matches + // If expected CNAME is specified, verify it if m.expectedCNAME != "" && cname != m.expectedCNAME { result.Status = StatusDegraded result.Error = fmt.Errorf("CNAME mismatch: got %s, expected %s", cname, m.expectedCNAME) @@ -174,7 +188,7 @@ func (m *DNSMonitor) Check(ctx context.Context) *Result { result.Status = StatusUp case "MX": - mxRecords, err := resolver.LookupMX(timeoutCtx, m.target) + mxs, err := resolver.LookupMX(timeoutCtx, m.target) result.ResponseTime = time.Since(start) if err != nil { @@ -183,7 +197,7 @@ func (m *DNSMonitor) Check(ctx context.Context) *Result { return result } - if len(mxRecords) == 0 { + if len(mxs) == 0 { result.Status = StatusDown result.Error = fmt.Errorf("no MX records found") return result @@ -192,7 +206,7 @@ func (m *DNSMonitor) Check(ctx context.Context) *Result { result.Status = StatusUp case "TXT": - txtRecords, err := resolver.LookupTXT(timeoutCtx, m.target) + txts, err := resolver.LookupTXT(timeoutCtx, m.target) result.ResponseTime = time.Since(start) if err != nil { @@ -201,7 +215,7 @@ func (m *DNSMonitor) Check(ctx context.Context) *Result { return result } - if len(txtRecords) == 0 { + if len(txts) == 0 { result.Status = StatusDown result.Error = fmt.Errorf("no TXT records found") return result @@ -210,31 +224,9 @@ func (m *DNSMonitor) Check(ctx context.Context) *Result { result.Status = StatusUp default: - // Fallback to generic IP lookup - ips, err := resolver.LookupIP(timeoutCtx, "ip", m.target) result.ResponseTime = time.Since(start) - - if err != nil { - result.Status = StatusDown - result.Error = fmt.Errorf("DNS lookup failed: %w", err) - return result - } - - if len(ips) == 0 { - result.Status = StatusDown - result.Error = fmt.Errorf("no IP addresses returned") - return result - } - - result.Status = StatusUp - } - - // Check for slow DNS resolution (degraded if > 1 second) - if result.Status == StatusUp && result.ResponseTime > 1*time.Second { - result.Status = StatusDegraded - if result.Error == nil { - result.Error = fmt.Errorf("slow DNS resolution: %v", result.ResponseTime) - } + result.Status = StatusDown + result.Error = fmt.Errorf("unsupported record type: %s", m.recordType) } return result diff --git a/internal/monitor/gemini.go b/internal/monitor/gemini.go index ed04e9d..65076e5 100644 --- a/internal/monitor/gemini.go +++ b/internal/monitor/gemini.go @@ -14,7 +14,9 @@ import ( // GeminiMonitor monitors Gemini protocol endpoints type GeminiMonitor struct { + id string name string + group string target string interval time.Duration timeout time.Duration @@ -53,7 +55,9 @@ func NewGeminiMonitor(cfg config.MonitorConfig) (*GeminiMonitor, error) { } return &GeminiMonitor{ + id: cfg.ID(), name: cfg.Name, + group: cfg.Group, target: target, interval: cfg.Interval.Duration, timeout: cfg.Timeout.Duration, @@ -65,11 +69,21 @@ func NewGeminiMonitor(cfg config.MonitorConfig) (*GeminiMonitor, error) { }, nil } +// ID returns the unique identifier for this monitor +func (m *GeminiMonitor) ID() string { + return m.id +} + // Name returns the monitor's name func (m *GeminiMonitor) Name() string { return m.name } +// Group returns the group this monitor belongs to +func (m *GeminiMonitor) Group() string { + return m.group +} + // Type returns the monitor type func (m *GeminiMonitor) Type() string { return "gemini" @@ -108,7 +122,7 @@ func (m *GeminiMonitor) RoundUptime() bool { // Check performs the Gemini protocol check func (m *GeminiMonitor) Check(ctx context.Context) *Result { result := &Result{ - MonitorName: m.name, + MonitorName: m.id, Timestamp: time.Now(), } @@ -133,41 +147,33 @@ func (m *GeminiMonitor) Check(ctx context.Context) *Result { // Connect with TLS conn, err := tls.DialWithDialer(dialer, "tcp", m.target, tlsConfig) if err != nil { - result.Status = StatusDown result.ResponseTime = time.Since(start) - result.Error = fmt.Errorf("connection failed: %w", err) + result.Status = StatusDown + result.Error = fmt.Errorf("TLS connection failed: %w", err) return result } defer conn.Close() - // Check SSL certificate (always track, even if not verifying) - connState := conn.ConnectionState() - if len(connState.PeerCertificates) > 0 { - cert := connState.PeerCertificates[0] + // Get SSL certificate info + if len(conn.ConnectionState().PeerCertificates) > 0 { + cert := conn.ConnectionState().PeerCertificates[0] result.SSLExpiry = &cert.NotAfter result.SSLDaysLeft = int(time.Until(cert.NotAfter).Hours() / 24) } - // Set deadline for the entire operation - deadline, ok := ctx.Deadline() - if ok { - conn.SetDeadline(deadline) - } else { - conn.SetDeadline(time.Now().Add(m.timeout)) - } - - // Send Gemini request + // Send Gemini request (just the URL followed by CRLF) // Format: gemini://host/path\r\n geminiURL := fmt.Sprintf("gemini://%s/\r\n", host) - _, err = conn.Write([]byte(geminiURL)) - if err != nil { - result.Status = StatusDown + conn.SetDeadline(time.Now().Add(m.timeout)) + + if _, err := conn.Write([]byte(geminiURL)); err != nil { result.ResponseTime = time.Since(start) + result.Status = StatusDown result.Error = fmt.Errorf("failed to send request: %w", err) return result } - // Read response header + // Read response header (status code and meta) reader := bufio.NewReader(conn) responseLine, err := reader.ReadString('\n') result.ResponseTime = time.Since(start) @@ -178,28 +184,17 @@ func (m *GeminiMonitor) Check(ctx context.Context) *Result { return result } - // Parse Gemini response - // Format: <STATUS><SPACE><META><CR><LF> - responseLine = strings.TrimSpace(responseLine) - parts := strings.SplitN(responseLine, " ", 2) - - if len(parts) < 1 { - result.Status = StatusDown - result.Error = fmt.Errorf("invalid response format") - return result - } - - // Parse status code (first 2 digits) - if len(parts[0]) < 2 { + // Parse status code (first two characters) + if len(responseLine) < 2 { result.Status = StatusDown - result.Error = fmt.Errorf("invalid status code: %s", parts[0]) + result.Error = fmt.Errorf("invalid Gemini response: too short") return result } - statusCode := parts[0][:2] + statusCode := responseLine[0:2] // Gemini status codes: - // 1x = INPUT (need user input) + // 1x = INPUT // 2x = SUCCESS // 3x = REDIRECT // 4x = TEMPORARY FAILURE @@ -207,29 +202,16 @@ func (m *GeminiMonitor) Check(ctx context.Context) *Result { // 6x = CLIENT CERTIFICATE REQUIRED switch statusCode[0] { - case '2': // Success (20 = success) - result.Status = StatusUp - case '3': // Redirect - consider as working - result.Status = StatusUp - case '1': // Input required - server is up but needs input - result.Status = StatusUp - case '4': // Temporary failure - result.Status = StatusDegraded - result.Error = fmt.Errorf("temporary failure: %s", responseLine) - case '5': // Permanent failure - result.Status = StatusDown - result.Error = fmt.Errorf("permanent failure: %s", responseLine) - case '6': // Client cert required - server is up + case '2': result.Status = StatusUp + case '3': + result.Status = StatusUp // Redirects are ok + case '1', '6': + result.Status = StatusDegraded // Input or cert required + result.Error = fmt.Errorf("status %s: %s", statusCode, strings.TrimSpace(responseLine[3:])) default: result.Status = StatusDown - result.Error = fmt.Errorf("unknown status code: %s", statusCode) - } - - // Check for slow response (degraded if > 2 seconds) - if result.Status == StatusUp && result.ResponseTime > 2*time.Second { - result.Status = StatusDegraded - result.Error = fmt.Errorf("slow response: %v", result.ResponseTime) + result.Error = fmt.Errorf("status %s: %s", statusCode, strings.TrimSpace(responseLine[3:])) } return result diff --git a/internal/monitor/graphql.go b/internal/monitor/graphql.go index 5b1fc91..333cf88 100644 --- a/internal/monitor/graphql.go +++ b/internal/monitor/graphql.go @@ -16,7 +16,9 @@ import ( // GraphQLMonitor monitors GraphQL endpoints type GraphQLMonitor struct { + id string name string + group string target string interval time.Duration timeout time.Duration @@ -85,7 +87,9 @@ func NewGraphQLMonitor(cfg config.MonitorConfig) (*GraphQLMonitor, error) { } return &GraphQLMonitor{ + id: cfg.ID(), name: cfg.Name, + group: cfg.Group, target: cfg.Target, interval: cfg.Interval.Duration, timeout: cfg.Timeout.Duration, @@ -104,11 +108,21 @@ func NewGraphQLMonitor(cfg config.MonitorConfig) (*GraphQLMonitor, error) { }, nil } +// ID returns the unique identifier for this monitor +func (m *GraphQLMonitor) ID() string { + return m.id +} + // Name returns the monitor's name func (m *GraphQLMonitor) Name() string { return m.name } +// Group returns the group this monitor belongs to +func (m *GraphQLMonitor) Group() string { + return m.group +} + // Type returns the monitor type func (m *GraphQLMonitor) Type() string { return "graphql" @@ -147,7 +161,7 @@ func (m *GraphQLMonitor) RoundUptime() bool { // Check performs the GraphQL endpoint check func (m *GraphQLMonitor) Check(ctx context.Context) *Result { result := &Result{ - MonitorName: m.name, + MonitorName: m.id, Timestamp: time.Now(), } @@ -248,6 +262,5 @@ func (m *GraphQLMonitor) Check(ctx context.Context) *Result { } result.Status = StatusUp - return result } diff --git a/internal/monitor/http.go b/internal/monitor/http.go index ddf8641..cdd7226 100644 --- a/internal/monitor/http.go +++ b/internal/monitor/http.go @@ -15,7 +15,9 @@ import ( // HTTPMonitor monitors HTTP and HTTPS endpoints type HTTPMonitor struct { + id string // unique identifier (group/name) name string + group string monitorType string target string interval time.Duration @@ -82,7 +84,9 @@ func NewHTTPMonitor(cfg config.MonitorConfig) (*HTTPMonitor, error) { } return &HTTPMonitor{ + id: cfg.ID(), name: cfg.Name, + group: cfg.Group, monitorType: cfg.Type, target: target, interval: cfg.Interval.Duration, @@ -102,11 +106,21 @@ func NewHTTPMonitor(cfg config.MonitorConfig) (*HTTPMonitor, error) { }, nil } +// ID returns the unique identifier for this monitor +func (m *HTTPMonitor) ID() string { + return m.id +} + // Name returns the monitor's name func (m *HTTPMonitor) Name() string { return m.name } +// Group returns the group this monitor belongs to +func (m *HTTPMonitor) Group() string { + return m.group +} + // Type returns the monitor type func (m *HTTPMonitor) Type() string { return m.monitorType @@ -145,7 +159,7 @@ func (m *HTTPMonitor) RoundUptime() bool { // Check performs the HTTP/HTTPS check func (m *HTTPMonitor) Check(ctx context.Context) *Result { result := &Result{ - MonitorName: m.name, + MonitorName: m.id, Timestamp: time.Now(), } diff --git a/internal/monitor/icmp.go b/internal/monitor/icmp.go index 8b1385c..9313cca 100644 --- a/internal/monitor/icmp.go +++ b/internal/monitor/icmp.go @@ -11,7 +11,9 @@ import ( // ICMPMonitor monitors hosts using ICMP ping type ICMPMonitor struct { + id string name string + group string target string interval time.Duration timeout time.Duration @@ -30,7 +32,9 @@ func NewICMPMonitor(cfg config.MonitorConfig) (*ICMPMonitor, error) { } return &ICMPMonitor{ + id: cfg.ID(), name: cfg.Name, + group: cfg.Group, target: cfg.Target, interval: cfg.Interval.Duration, timeout: cfg.Timeout.Duration, @@ -41,11 +45,21 @@ func NewICMPMonitor(cfg config.MonitorConfig) (*ICMPMonitor, error) { }, nil } +// ID returns the unique identifier for this monitor +func (m *ICMPMonitor) ID() string { + return m.id +} + // Name returns the monitor's name func (m *ICMPMonitor) Name() string { return m.name } +// Group returns the group this monitor belongs to +func (m *ICMPMonitor) Group() string { + return m.group +} + // Type returns the monitor type func (m *ICMPMonitor) Type() string { return "icmp" @@ -84,7 +98,7 @@ func (m *ICMPMonitor) RoundUptime() bool { // Check performs the ICMP ping check func (m *ICMPMonitor) Check(ctx context.Context) *Result { result := &Result{ - MonitorName: m.name, + MonitorName: m.id, Timestamp: time.Now(), } @@ -100,50 +114,34 @@ func (m *ICMPMonitor) Check(ctx context.Context) *Result { pinger.Timeout = m.timeout pinger.SetPrivileged(false) // Use unprivileged mode (UDP) by default - // Run with context cancellation support - done := make(chan error, 1) - go func() { - done <- pinger.Run() - }() - - select { - case <-ctx.Done(): - pinger.Stop() + // Run ping + err = pinger.Run() + if err != nil { result.Status = StatusDown - result.Error = fmt.Errorf("ping cancelled: %w", ctx.Err()) + result.Error = fmt.Errorf("ping failed: %w", err) return result - case err := <-done: - if err != nil { - result.Status = StatusDown - result.Error = fmt.Errorf("ping failed: %w", err) - return result - } } stats := pinger.Statistics() - // If no packets were received, mark as down + // Check if any packets were received if stats.PacketsRecv == 0 { result.Status = StatusDown - result.Error = fmt.Errorf("no packets received (100%% packet loss)") - result.ResponseTime = m.timeout + result.Error = fmt.Errorf("no response: 0/%d packets received", stats.PacketsSent) return result } // Use average RTT as response time result.ResponseTime = stats.AvgRtt - // Determine status based on packet loss - packetLoss := float64(stats.PacketsSent-stats.PacketsRecv) / float64(stats.PacketsSent) * 100 - - if packetLoss == 0 { - result.Status = StatusUp - } else if packetLoss < 50 { + // Check packet loss + if stats.PacketLoss > 0 { + // Some packet loss - degraded result.Status = StatusDegraded - result.Error = fmt.Errorf("%.0f%% packet loss", packetLoss) + result.Error = fmt.Errorf("%.1f%% packet loss (%d/%d)", stats.PacketLoss, stats.PacketsRecv, stats.PacketsSent) } else { - result.Status = StatusDown - result.Error = fmt.Errorf("%.0f%% packet loss", packetLoss) + // All packets received - up + result.Status = StatusUp } return result diff --git a/internal/monitor/monitor.go b/internal/monitor/monitor.go index 9a1ec15..d530e06 100644 --- a/internal/monitor/monitor.go +++ b/internal/monitor/monitor.go @@ -31,9 +31,15 @@ const ( // Monitor is the interface that all monitor types must implement type Monitor interface { + // ID returns the unique identifier for this monitor (group/name format) + ID() string + // Name returns the monitor's name Name() string + // Group returns the group this monitor belongs to + Group() string + // Type returns the monitor type (http, https, tcp, gemini, icmp, dns, graphql, database) Type() string diff --git a/internal/monitor/scheduler.go b/internal/monitor/scheduler.go index 1478732..ad1b7f6 100644 --- a/internal/monitor/scheduler.go +++ b/internal/monitor/scheduler.go @@ -13,7 +13,7 @@ import ( // Scheduler manages and runs all monitors type Scheduler struct { monitors []Monitor - monitorCfg map[string]config.MonitorConfig // Monitor configs by name for reset flag checks + monitorCfg map[string]config.MonitorConfig // Monitor configs by ID (group/name) for reset flag checks configPath string storage *storage.Storage logger *slog.Logger @@ -44,8 +44,10 @@ func NewScheduler(cfg *config.Config, store *storage.Storage, logger *slog.Logge return nil, err } s.monitors = append(s.monitors, mon) - s.monitorCfg[monCfg.Name] = monCfg // Store config for reset flag checks + s.monitorCfg[mon.ID()] = monCfg // Store config by ID (group/name) for reset flag checks logger.Info("registered monitor", + "id", mon.ID(), + "group", mon.Group(), "name", mon.Name(), "type", mon.Type(), "target", mon.Target(), @@ -91,7 +93,7 @@ func (s *Scheduler) runMonitor(mon Monitor) { for { select { case <-s.ctx.Done(): - s.logger.Info("monitor stopped", "name", mon.Name()) + s.logger.Info("monitor stopped", "id", mon.ID()) return case <-ticker.C: s.executeCheck(mon) @@ -102,27 +104,28 @@ func (s *Scheduler) runMonitor(mon Monitor) { // executeCheck performs a single check and saves the result func (s *Scheduler) executeCheck(mon Monitor) { // Check if reset flag is set for this monitor - if monCfg, exists := s.monitorCfg[mon.Name()]; exists && monCfg.ResetOnNextCheck { - s.logger.Info("resetting monitor data", "name", mon.Name()) + if monCfg, exists := s.monitorCfg[mon.ID()]; exists && monCfg.ResetOnNextCheck { + s.logger.Info("resetting monitor data", "id", mon.ID()) - // Delete all historical data for this monitor - if err := s.storage.ResetMonitorData(s.ctx, mon.Name()); err != nil { + // Delete all historical data for this monitor (using ID as the key) + if err := s.storage.ResetMonitorData(s.ctx, mon.ID()); err != nil { s.logger.Error("failed to reset monitor data", - "name", mon.Name(), + "id", mon.ID(), "error", err) } else { - s.logger.Info("monitor data reset complete", "name", mon.Name()) + s.logger.Info("monitor data reset complete", "id", mon.ID()) // Flip the reset flag to false in the config file - if err := config.UpdateResetFlag(s.configPath, mon.Name(), false); err != nil { + // Note: UpdateResetFlag uses monitor name for YAML lookup, group for context + if err := config.UpdateResetFlag(s.configPath, monCfg.Group, monCfg.Name, false); err != nil { s.logger.Error("failed to update reset flag in config", - "name", mon.Name(), + "id", mon.ID(), "error", err) } else { // Update in-memory config monCfg.ResetOnNextCheck = false - s.monitorCfg[mon.Name()] = monCfg - s.logger.Info("reset flag cleared in config", "name", mon.Name()) + s.monitorCfg[mon.ID()] = monCfg + s.logger.Info("reset flag cleared in config", "id", mon.ID()) } } } @@ -164,7 +167,7 @@ func (s *Scheduler) executeCheck(mon Monitor) { // Log the result logAttrs := []any{ - "name", mon.Name(), + "id", mon.ID(), "status", result.Status, "response_time", result.ResponseTime, } @@ -187,7 +190,7 @@ func (s *Scheduler) executeCheck(mon Monitor) { // Save to storage if err := s.storage.SaveCheckResult(s.ctx, result.ToCheckResult()); err != nil { s.logger.Error("failed to save check result", - "name", mon.Name(), + "id", mon.ID(), "error", err) } } @@ -220,10 +223,10 @@ func (s *Scheduler) GetMonitors() []Monitor { return s.monitors } -// RunCheck manually triggers a check for a specific monitor -func (s *Scheduler) RunCheck(name string) *Result { +// RunCheck manually triggers a check for a specific monitor by ID (group/name format) +func (s *Scheduler) RunCheck(id string) *Result { for _, mon := range s.monitors { - if mon.Name() == name { + if mon.ID() == id { ctx, cancel := context.WithTimeout(context.Background(), mon.Interval()) defer cancel() result := mon.Check(ctx) @@ -231,7 +234,7 @@ func (s *Scheduler) RunCheck(name string) *Result { // Save the result if err := s.storage.SaveCheckResult(context.Background(), result.ToCheckResult()); err != nil { s.logger.Error("failed to save manual check result", - "name", mon.Name(), + "id", mon.ID(), "error", err) } diff --git a/internal/monitor/tcp.go b/internal/monitor/tcp.go index da0a822..50a8a4b 100644 --- a/internal/monitor/tcp.go +++ b/internal/monitor/tcp.go @@ -11,7 +11,9 @@ import ( // TCPMonitor monitors TCP endpoints type TCPMonitor struct { + id string name string + group string target string interval time.Duration timeout time.Duration @@ -29,7 +31,9 @@ func NewTCPMonitor(cfg config.MonitorConfig) (*TCPMonitor, error) { } return &TCPMonitor{ + id: cfg.ID(), name: cfg.Name, + group: cfg.Group, target: cfg.Target, interval: cfg.Interval.Duration, timeout: cfg.Timeout.Duration, @@ -39,11 +43,21 @@ func NewTCPMonitor(cfg config.MonitorConfig) (*TCPMonitor, error) { }, nil } +// ID returns the unique identifier for this monitor +func (m *TCPMonitor) ID() string { + return m.id +} + // Name returns the monitor's name func (m *TCPMonitor) Name() string { return m.name } +// Group returns the group this monitor belongs to +func (m *TCPMonitor) Group() string { + return m.group +} + // Type returns the monitor type func (m *TCPMonitor) Type() string { return "tcp" @@ -82,7 +96,7 @@ func (m *TCPMonitor) RoundUptime() bool { // Check performs the TCP connection check func (m *TCPMonitor) Check(ctx context.Context) *Result { result := &Result{ - MonitorName: m.name, + MonitorName: m.id, Timestamp: time.Now(), } @@ -104,12 +118,5 @@ func (m *TCPMonitor) Check(ctx context.Context) *Result { defer conn.Close() result.Status = StatusUp - - // Check for slow response (degraded if > 1 second for TCP) - if result.ResponseTime > 1*time.Second { - result.Status = StatusDegraded - result.Error = fmt.Errorf("slow connection: %v", result.ResponseTime) - } - return result } diff --git a/internal/server/server.go b/internal/server/server.go index ace36f1..b47bf79 100644 --- a/internal/server/server.go +++ b/internal/server/server.go @@ -10,6 +10,7 @@ import ( "io/fs" "log/slog" "net/http" + "net/url" "sort" "strconv" "strings" @@ -68,18 +69,18 @@ func New(cfg *config.Config, store *storage.Storage, sched *monitor.Scheduler, l // API endpoints (protected by API access control) mux.HandleFunc("GET /api/status", s.withAPIAuth(s.handleAPIStatus)) - mux.HandleFunc("GET /api/monitor/{name}", s.withAPIAuth(s.handleAPIMonitor)) - mux.HandleFunc("GET /api/history/{name}", s.withAPIAuth(s.handleAPIHistory)) + mux.HandleFunc("GET /api/monitor/{group}/{name}", s.withAPIAuth(s.handleAPIMonitor)) + mux.HandleFunc("GET /api/history/{group}/{name}", s.withAPIAuth(s.handleAPIHistory)) mux.HandleFunc("GET /api/summary", s.withAPIAuth(s.handleAPISummary)) - mux.HandleFunc("GET /api/uptime/{name}", s.withAPIAuth(s.handleAPIUptime)) + mux.HandleFunc("GET /api/uptime/{group}/{name}", s.withAPIAuth(s.handleAPIUptime)) mux.HandleFunc("GET /api/incidents", s.withAPIAuth(s.handleAPIIncidents)) // Health check - always public (for load balancers, monitoring) mux.HandleFunc("GET /api/health", s.handleAPIHealth) // Badge endpoint - always public (for embedding in READMEs, docs) - // Note: {name...} captures the rest of the path including .svg extension - mux.HandleFunc("GET /api/badge/{name...}", s.handleAPIBadge) + // Note: {path...} captures the rest of the path (group/name.svg) + mux.HandleFunc("GET /api/badge/{path...}", s.handleAPIBadge) // Full page data endpoint - public if refresh_mode=api, otherwise follows api.access if cfg.Display.RefreshMode == "api" { @@ -326,7 +327,9 @@ func (s *Server) handleIndex(w http.ResponseWriter, r *http.Request) { DisableUptimeTooltip: monCfg.DisableUptimeTooltip, } - if stat, ok := stats[monCfg.Name]; ok { + // Use composite ID (group/name) to look up stats + monitorID := monCfg.ID() + if stat, ok := stats[monitorID]; ok { md.Status = stat.CurrentStatus md.ResponseTime = stat.LastResponseTime md.UptimePercent = stat.UptimePercent @@ -353,13 +356,13 @@ func (s *Server) handleIndex(w http.ResponseWriter, r *http.Request) { // Get aggregated history for display ticks, err := s.storage.GetAggregatedHistory( ctx, - monCfg.Name, + monitorID, s.config.Display.TickCount, s.config.Display.TickMode, s.config.Display.PingFixedSlots, ) if err != nil { - s.logger.Error("failed to get tick history", "monitor", monCfg.Name, "error", err) + s.logger.Error("failed to get tick history", "monitor", monitorID, "error", err) } else { md.Ticks = ticks } @@ -500,13 +503,18 @@ func (s *Server) handleAPIStatus(w http.ResponseWriter, r *http.Request) { // handleAPIMonitor returns JSON status for a specific monitor func (s *Server) handleAPIMonitor(w http.ResponseWriter, r *http.Request) { + group := r.PathValue("group") name := r.PathValue("name") - if name == "" { - s.jsonError(w, "Monitor name required", http.StatusBadRequest) + if group == "" || name == "" { + s.jsonError(w, "Group and monitor name required", http.StatusBadRequest) return } - stats, err := s.storage.GetMonitorStats(r.Context(), name) + // Construct composite ID (path values are already URL-decoded by net/http, + // but we need to re-encode to match the internal ID format) + monitorID := url.PathEscape(group) + "/" + url.PathEscape(name) + + stats, err := s.storage.GetMonitorStats(r.Context(), monitorID) if err != nil { s.jsonError(w, "Failed to get monitor stats", http.StatusInternalServerError) return @@ -517,12 +525,16 @@ func (s *Server) handleAPIMonitor(w http.ResponseWriter, r *http.Request) { // handleAPIHistory returns aggregated history for a monitor func (s *Server) handleAPIHistory(w http.ResponseWriter, r *http.Request) { + group := r.PathValue("group") name := r.PathValue("name") - if name == "" { - s.jsonError(w, "Monitor name required", http.StatusBadRequest) + if group == "" || name == "" { + s.jsonError(w, "Group and monitor name required", http.StatusBadRequest) return } + // Construct composite ID (re-encode to match internal format) + monitorID := url.PathEscape(group) + "/" + url.PathEscape(name) + // Allow optional parameters, default to config values mode := s.config.Display.TickMode if modeParam := r.URL.Query().Get("mode"); modeParam != "" { @@ -539,14 +551,14 @@ func (s *Server) handleAPIHistory(w http.ResponseWriter, r *http.Request) { } } - ticks, err := s.storage.GetAggregatedHistory(r.Context(), name, count, mode, s.config.Display.PingFixedSlots) + ticks, err := s.storage.GetAggregatedHistory(r.Context(), monitorID, count, mode, s.config.Display.PingFixedSlots) if err != nil { s.jsonError(w, "Failed to get history", http.StatusInternalServerError) return } s.jsonResponse(w, map[string]interface{}{ - "monitor": name, + "monitor": monitorID, "mode": mode, "count": count, "ticks": ticks, @@ -593,7 +605,9 @@ func (s *Server) handleAPIPage(w http.ResponseWriter, r *http.Request) { // Build monitor data with history for _, group := range s.config.Groups { for _, monCfg := range group.Monitors { - stat, ok := stats[monCfg.Name] + // Use composite ID (group/name) to look up stats + monitorID := monCfg.ID() + stat, ok := stats[monitorID] if !ok { continue } @@ -601,17 +615,17 @@ func (s *Server) handleAPIPage(w http.ResponseWriter, r *http.Request) { // Get history ticks ticks, err := s.storage.GetAggregatedHistory( ctx, - monCfg.Name, + monitorID, s.config.Display.TickCount, s.config.Display.TickMode, s.config.Display.PingFixedSlots, ) if err != nil { - s.logger.Error("failed to get tick history", "monitor", monCfg.Name, "error", err) + s.logger.Error("failed to get tick history", "monitor", monitorID, "error", err) ticks = nil } - response.Monitors[monCfg.Name] = APIMonitorData{ + response.Monitors[monitorID] = APIMonitorData{ Status: stat.CurrentStatus, ResponseTime: stat.LastResponseTime, Uptime: stat.UptimePercent, @@ -654,21 +668,37 @@ func (s *Server) handleAPIHealth(w http.ResponseWriter, r *http.Request) { // handleAPIBadge returns an SVG status badge for a monitor (shields.io style) func (s *Server) handleAPIBadge(w http.ResponseWriter, r *http.Request) { - name := r.PathValue("name") - if name == "" { - http.Error(w, "Monitor name required", http.StatusBadRequest) + path := r.PathValue("path") + if path == "" { + http.Error(w, "Monitor path required (group/name.svg)", http.StatusBadRequest) return } // Strip .svg extension if present - name = strings.TrimSuffix(name, ".svg") + path = strings.TrimSuffix(path, ".svg") + + // The path should be in format "group/name" (URL-encoded components) + // Split into group and name, then re-encode to match internal ID format + idx := strings.Index(path, "/") + var monitorID, displayName string + if idx >= 0 { + group := path[:idx] + name := path[idx+1:] + // Re-encode to ensure consistent internal format + monitorID = url.PathEscape(group) + "/" + url.PathEscape(name) + displayName = name + } else { + // No group, just name + monitorID = url.PathEscape(path) + displayName = path + } // Get monitor stats - stats, err := s.storage.GetMonitorStats(r.Context(), name) + stats, err := s.storage.GetMonitorStats(r.Context(), monitorID) if err != nil { - s.logger.Error("failed to get monitor stats for badge", "monitor", name, "error", err) + s.logger.Error("failed to get monitor stats for badge", "monitor", monitorID, "error", err) // Return a gray "unknown" badge on error - s.serveBadge(w, r, name, "unknown", "#9ca3af") + s.serveBadge(w, r, displayName, "unknown", "#9ca3af") return } @@ -692,7 +722,7 @@ func (s *Server) handleAPIBadge(w http.ResponseWriter, r *http.Request) { // Check for custom label label := r.URL.Query().Get("label") if label == "" { - label = name + label = displayName } // Check for style (flat or plastic, default: flat) @@ -809,7 +839,9 @@ func (s *Server) handleAPISummary(w http.ResponseWriter, r *http.Request) { for _, group := range s.config.Groups { for _, monCfg := range group.Monitors { - stat, ok := stats[monCfg.Name] + // Use composite ID (group/name) to look up stats + monitorID := monCfg.ID() + stat, ok := stats[monitorID] if !ok { continue } @@ -851,12 +883,16 @@ type APIUptimeResponse struct { // handleAPIUptime returns historical uptime for a specific period func (s *Server) handleAPIUptime(w http.ResponseWriter, r *http.Request) { + group := r.PathValue("group") name := r.PathValue("name") - if name == "" { - s.jsonError(w, "Monitor name required", http.StatusBadRequest) + if group == "" || name == "" { + s.jsonError(w, "Group and monitor name required", http.StatusBadRequest) return } + // Construct composite ID (re-encode to match internal format) + monitorID := url.PathEscape(group) + "/" + url.PathEscape(name) + // Parse period (default: 24h, options: 1h, 24h, 7d, 30d, 90d) period := r.URL.Query().Get("period") if period == "" { @@ -880,14 +916,14 @@ func (s *Server) handleAPIUptime(w http.ResponseWriter, r *http.Request) { return } - stats, err := s.storage.GetUptimeStats(r.Context(), name, duration) + stats, err := s.storage.GetUptimeStats(r.Context(), monitorID, duration) if err != nil { s.jsonError(w, "Failed to get uptime stats", http.StatusInternalServerError) return } s.jsonResponse(w, APIUptimeResponse{ - Monitor: name, + Monitor: monitorID, Period: period, UptimePercent: stats.UptimePercent, TotalChecks: stats.TotalChecks, |