aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorFuwn <[email protected]>2026-01-20 17:16:22 -0800
committerFuwn <[email protected]>2026-01-20 17:16:22 -0800
commit2371b28128213fbcc8d1c062dccc3074e6b0fa98 (patch)
tree84452dbf5f2b1821d1fc5cf8ecdb0a5ad2b74f56
parentfix: Use wildcard path for badge endpoint to support .svg extension (diff)
downloadkaze-2371b28128213fbcc8d1c062dccc3074e6b0fa98.tar.xz
kaze-2371b28128213fbcc8d1c062dccc3074e6b0fa98.zip
feat: Use composite group/name key for monitor identification
Previously monitors were identified by just their name, causing monitors with the same name in different groups to share data in the database. Changes: - Add ID() method to MonitorConfig returning 'group/name' format - Add Group field to MonitorConfig (set at runtime) - Update Monitor interface with ID() and Group() methods - Update all monitor implementations (http, tcp, dns, icmp, gemini, graphql, database) to use composite ID - Update Scheduler to use monitor ID instead of name - Update server handlers to use composite ID for stats lookups - Change API routes to use {group}/{name} pattern: - /api/monitor/{group}/{name} - /api/history/{group}/{name} - /api/uptime/{group}/{name} - /api/badge/{group}/{name}.svg - URL-encode group and name components to handle special characters (e.g., slashes in names become %2F) - Update config.UpdateResetFlag to accept group and name separately BREAKING: API endpoints now require group in the path. Existing database data using just monitor names won't be associated with the new composite keys.
-rw-r--r--internal/config/config.go71
-rw-r--r--internal/monitor/database.go16
-rw-r--r--internal/monitor/dns.go64
-rw-r--r--internal/monitor/gemini.go94
-rw-r--r--internal/monitor/graphql.go17
-rw-r--r--internal/monitor/http.go16
-rw-r--r--internal/monitor/icmp.go56
-rw-r--r--internal/monitor/monitor.go6
-rw-r--r--internal/monitor/scheduler.go41
-rw-r--r--internal/monitor/tcp.go23
-rw-r--r--internal/server/server.go100
11 files changed, 316 insertions, 188 deletions
diff --git a/internal/config/config.go b/internal/config/config.go
index 0c8b430..7542d3e 100644
--- a/internal/config/config.go
+++ b/internal/config/config.go
@@ -2,6 +2,7 @@ package config
import (
"fmt"
+ "net/url"
"os"
"strings"
"time"
@@ -117,6 +118,7 @@ type MonitorDefaults struct {
// MonitorConfig represents a single monitor
type MonitorConfig struct {
Name string `yaml:"name"`
+ Group string `yaml:"-"` // Set at runtime, not from YAML - the group this monitor belongs to
Type string `yaml:"type"` // http, https, tcp, gemini
Target string `yaml:"target"`
Link string `yaml:"link,omitempty"` // Custom URL for clicking the monitor name (e.g., docs page)
@@ -151,6 +153,45 @@ type MonitorConfig struct {
DBType string `yaml:"db_type,omitempty"` // Database type: postgres, mysql, redis, memcached, mongodb
}
+// ID returns the unique identifier for this monitor (group/name format)
+// Both group and name are URL-encoded to handle special characters like '/'
+func (m *MonitorConfig) ID() string {
+ if m.Group == "" {
+ return url.PathEscape(m.Name)
+ }
+ return url.PathEscape(m.Group) + "/" + url.PathEscape(m.Name)
+}
+
+// ParseMonitorID splits a monitor ID back into group and name components
+// Returns (group, name, ok) where ok is false if the ID format is invalid
+func ParseMonitorID(id string) (group, name string, ok bool) {
+ // Find the separator (first unescaped '/')
+ idx := strings.Index(id, "/")
+ if idx == -1 {
+ // No group, just a name
+ decoded, err := url.PathUnescape(id)
+ if err != nil {
+ return "", "", false
+ }
+ return "", decoded, true
+ }
+
+ groupPart := id[:idx]
+ namePart := id[idx+1:]
+
+ decodedGroup, err := url.PathUnescape(groupPart)
+ if err != nil {
+ return "", "", false
+ }
+
+ decodedName, err := url.PathUnescape(namePart)
+ if err != nil {
+ return "", "", false
+ }
+
+ return decodedGroup, decodedName, true
+}
+
// IncidentConfig represents an incident or maintenance
type IncidentConfig struct {
Title string `yaml:"title"`
@@ -285,6 +326,9 @@ func (c *Config) applyDefaults() {
for j := range c.Groups[i].Monitors {
m := &c.Groups[i].Monitors[j]
+ // Set the group name on the monitor
+ m.Group = grp.Name
+
// Apply group-level defaults first, then monitor-level overrides
if m.Interval.Duration == 0 {
if grp.Defaults != nil && grp.Defaults.Interval != nil {
@@ -534,7 +578,8 @@ type MonitorWithGroup struct {
// UpdateResetFlag updates the reset_on_next_check flag for a specific monitor in the config file
// Uses a line-based approach to preserve original formatting and omitted fields
-func UpdateResetFlag(configPath string, monitorName string, value bool) error {
+// groupName and monitorName are used to find the correct monitor in the YAML structure
+func UpdateResetFlag(configPath string, groupName string, monitorName string, value bool) error {
// Read the config file
data, err := os.ReadFile(configPath)
if err != nil {
@@ -542,6 +587,7 @@ func UpdateResetFlag(configPath string, monitorName string, value bool) error {
}
lines := strings.Split(string(data), "\n")
+ inTargetGroup := false
inMonitor := false
monitorIndent := ""
foundMonitor := false
@@ -550,8 +596,25 @@ func UpdateResetFlag(configPath string, monitorName string, value bool) error {
line := lines[i]
trimmed := strings.TrimSpace(line)
- // Check if this is the start of our target monitor
- if strings.HasPrefix(trimmed, "- name:") || strings.HasPrefix(trimmed, "name:") {
+ // Check if this is a group name line
+ if strings.HasPrefix(trimmed, "- name:") && !inMonitor {
+ // Could be a group name - check if it matches our target group
+ namePart := strings.TrimPrefix(trimmed, "- name:")
+ namePart = strings.TrimSpace(namePart)
+ namePart = strings.Trim(namePart, "\"'")
+
+ // Check if this is our target group
+ if namePart == groupName {
+ inTargetGroup = true
+ } else if inTargetGroup {
+ // We've moved to a different group, stop looking
+ break
+ }
+ continue
+ }
+
+ // Check if this is the start of our target monitor (within the target group)
+ if inTargetGroup && (strings.HasPrefix(trimmed, "- name:") || strings.HasPrefix(trimmed, "name:")) {
// Extract monitor name from line
namePart := strings.TrimPrefix(trimmed, "- name:")
namePart = strings.TrimPrefix(namePart, "name:")
@@ -597,7 +660,7 @@ func UpdateResetFlag(configPath string, monitorName string, value bool) error {
}
if !foundMonitor {
- return fmt.Errorf("monitor %q not found in config", monitorName)
+ return fmt.Errorf("monitor %q in group %q not found in config", monitorName, groupName)
}
// Write back to file
diff --git a/internal/monitor/database.go b/internal/monitor/database.go
index 0b4d2a9..3301632 100644
--- a/internal/monitor/database.go
+++ b/internal/monitor/database.go
@@ -14,7 +14,9 @@ import (
// DatabaseMonitor monitors database connections
type DatabaseMonitor struct {
+ id string
name string
+ group string
target string // Connection string or host:port
dbType string // postgres, mysql, redis, mongodb, sqlite
interval time.Duration
@@ -48,7 +50,9 @@ func NewDatabaseMonitor(cfg config.MonitorConfig) (*DatabaseMonitor, error) {
}
return &DatabaseMonitor{
+ id: cfg.ID(),
name: cfg.Name,
+ group: cfg.Group,
target: cfg.Target,
dbType: dbType,
interval: cfg.Interval.Duration,
@@ -59,11 +63,21 @@ func NewDatabaseMonitor(cfg config.MonitorConfig) (*DatabaseMonitor, error) {
}, nil
}
+// ID returns the unique identifier for this monitor
+func (m *DatabaseMonitor) ID() string {
+ return m.id
+}
+
// Name returns the monitor's name
func (m *DatabaseMonitor) Name() string {
return m.name
}
+// Group returns the group this monitor belongs to
+func (m *DatabaseMonitor) Group() string {
+ return m.group
+}
+
// Type returns the monitor type
func (m *DatabaseMonitor) Type() string {
return "database"
@@ -102,7 +116,7 @@ func (m *DatabaseMonitor) RoundUptime() bool {
// Check performs the database connection check
func (m *DatabaseMonitor) Check(ctx context.Context) *Result {
result := &Result{
- MonitorName: m.name,
+ MonitorName: m.id,
Timestamp: time.Now(),
}
diff --git a/internal/monitor/dns.go b/internal/monitor/dns.go
index 4f4f099..a962b5d 100644
--- a/internal/monitor/dns.go
+++ b/internal/monitor/dns.go
@@ -12,7 +12,9 @@ import (
// DNSMonitor monitors DNS resolution
type DNSMonitor struct {
+ id string
name string
+ group string
target string // Domain to resolve
interval time.Duration
timeout time.Duration
@@ -34,7 +36,9 @@ func NewDNSMonitor(cfg config.MonitorConfig) (*DNSMonitor, error) {
}
return &DNSMonitor{
+ id: cfg.ID(),
name: cfg.Name,
+ group: cfg.Group,
target: cfg.Target,
interval: cfg.Interval.Duration,
timeout: cfg.Timeout.Duration,
@@ -48,11 +52,21 @@ func NewDNSMonitor(cfg config.MonitorConfig) (*DNSMonitor, error) {
}, nil
}
+// ID returns the unique identifier for this monitor
+func (m *DNSMonitor) ID() string {
+ return m.id
+}
+
// Name returns the monitor's name
func (m *DNSMonitor) Name() string {
return m.name
}
+// Group returns the group this monitor belongs to
+func (m *DNSMonitor) Group() string {
+ return m.group
+}
+
// Type returns the monitor type
func (m *DNSMonitor) Type() string {
return "dns"
@@ -91,7 +105,7 @@ func (m *DNSMonitor) RoundUptime() bool {
// Check performs the DNS resolution check
func (m *DNSMonitor) Check(ctx context.Context) *Result {
result := &Result{
- MonitorName: m.name,
+ MonitorName: m.id,
Timestamp: time.Now(),
}
@@ -127,16 +141,16 @@ func (m *DNSMonitor) Check(ctx context.Context) *Result {
if len(ips) == 0 {
result.Status = StatusDown
- result.Error = fmt.Errorf("no IP addresses returned")
+ result.Error = fmt.Errorf("no %s records found", m.recordType)
return result
}
- // Check if expected IPs match
+ // If expected IPs are specified, verify them
if len(m.expectedIPs) > 0 {
found := false
- for _, expectedIP := range m.expectedIPs {
- for _, ip := range ips {
- if ip.String() == expectedIP {
+ for _, ip := range ips {
+ for _, expected := range m.expectedIPs {
+ if ip.String() == expected {
found = true
break
}
@@ -147,7 +161,7 @@ func (m *DNSMonitor) Check(ctx context.Context) *Result {
}
if !found {
result.Status = StatusDegraded
- result.Error = fmt.Errorf("resolved IPs don't match expected: got %v, expected %v", ips, m.expectedIPs)
+ result.Error = fmt.Errorf("expected IPs not found in response")
return result
}
}
@@ -164,7 +178,7 @@ func (m *DNSMonitor) Check(ctx context.Context) *Result {
return result
}
- // Check if expected CNAME matches
+ // If expected CNAME is specified, verify it
if m.expectedCNAME != "" && cname != m.expectedCNAME {
result.Status = StatusDegraded
result.Error = fmt.Errorf("CNAME mismatch: got %s, expected %s", cname, m.expectedCNAME)
@@ -174,7 +188,7 @@ func (m *DNSMonitor) Check(ctx context.Context) *Result {
result.Status = StatusUp
case "MX":
- mxRecords, err := resolver.LookupMX(timeoutCtx, m.target)
+ mxs, err := resolver.LookupMX(timeoutCtx, m.target)
result.ResponseTime = time.Since(start)
if err != nil {
@@ -183,7 +197,7 @@ func (m *DNSMonitor) Check(ctx context.Context) *Result {
return result
}
- if len(mxRecords) == 0 {
+ if len(mxs) == 0 {
result.Status = StatusDown
result.Error = fmt.Errorf("no MX records found")
return result
@@ -192,7 +206,7 @@ func (m *DNSMonitor) Check(ctx context.Context) *Result {
result.Status = StatusUp
case "TXT":
- txtRecords, err := resolver.LookupTXT(timeoutCtx, m.target)
+ txts, err := resolver.LookupTXT(timeoutCtx, m.target)
result.ResponseTime = time.Since(start)
if err != nil {
@@ -201,7 +215,7 @@ func (m *DNSMonitor) Check(ctx context.Context) *Result {
return result
}
- if len(txtRecords) == 0 {
+ if len(txts) == 0 {
result.Status = StatusDown
result.Error = fmt.Errorf("no TXT records found")
return result
@@ -210,31 +224,9 @@ func (m *DNSMonitor) Check(ctx context.Context) *Result {
result.Status = StatusUp
default:
- // Fallback to generic IP lookup
- ips, err := resolver.LookupIP(timeoutCtx, "ip", m.target)
result.ResponseTime = time.Since(start)
-
- if err != nil {
- result.Status = StatusDown
- result.Error = fmt.Errorf("DNS lookup failed: %w", err)
- return result
- }
-
- if len(ips) == 0 {
- result.Status = StatusDown
- result.Error = fmt.Errorf("no IP addresses returned")
- return result
- }
-
- result.Status = StatusUp
- }
-
- // Check for slow DNS resolution (degraded if > 1 second)
- if result.Status == StatusUp && result.ResponseTime > 1*time.Second {
- result.Status = StatusDegraded
- if result.Error == nil {
- result.Error = fmt.Errorf("slow DNS resolution: %v", result.ResponseTime)
- }
+ result.Status = StatusDown
+ result.Error = fmt.Errorf("unsupported record type: %s", m.recordType)
}
return result
diff --git a/internal/monitor/gemini.go b/internal/monitor/gemini.go
index ed04e9d..65076e5 100644
--- a/internal/monitor/gemini.go
+++ b/internal/monitor/gemini.go
@@ -14,7 +14,9 @@ import (
// GeminiMonitor monitors Gemini protocol endpoints
type GeminiMonitor struct {
+ id string
name string
+ group string
target string
interval time.Duration
timeout time.Duration
@@ -53,7 +55,9 @@ func NewGeminiMonitor(cfg config.MonitorConfig) (*GeminiMonitor, error) {
}
return &GeminiMonitor{
+ id: cfg.ID(),
name: cfg.Name,
+ group: cfg.Group,
target: target,
interval: cfg.Interval.Duration,
timeout: cfg.Timeout.Duration,
@@ -65,11 +69,21 @@ func NewGeminiMonitor(cfg config.MonitorConfig) (*GeminiMonitor, error) {
}, nil
}
+// ID returns the unique identifier for this monitor
+func (m *GeminiMonitor) ID() string {
+ return m.id
+}
+
// Name returns the monitor's name
func (m *GeminiMonitor) Name() string {
return m.name
}
+// Group returns the group this monitor belongs to
+func (m *GeminiMonitor) Group() string {
+ return m.group
+}
+
// Type returns the monitor type
func (m *GeminiMonitor) Type() string {
return "gemini"
@@ -108,7 +122,7 @@ func (m *GeminiMonitor) RoundUptime() bool {
// Check performs the Gemini protocol check
func (m *GeminiMonitor) Check(ctx context.Context) *Result {
result := &Result{
- MonitorName: m.name,
+ MonitorName: m.id,
Timestamp: time.Now(),
}
@@ -133,41 +147,33 @@ func (m *GeminiMonitor) Check(ctx context.Context) *Result {
// Connect with TLS
conn, err := tls.DialWithDialer(dialer, "tcp", m.target, tlsConfig)
if err != nil {
- result.Status = StatusDown
result.ResponseTime = time.Since(start)
- result.Error = fmt.Errorf("connection failed: %w", err)
+ result.Status = StatusDown
+ result.Error = fmt.Errorf("TLS connection failed: %w", err)
return result
}
defer conn.Close()
- // Check SSL certificate (always track, even if not verifying)
- connState := conn.ConnectionState()
- if len(connState.PeerCertificates) > 0 {
- cert := connState.PeerCertificates[0]
+ // Get SSL certificate info
+ if len(conn.ConnectionState().PeerCertificates) > 0 {
+ cert := conn.ConnectionState().PeerCertificates[0]
result.SSLExpiry = &cert.NotAfter
result.SSLDaysLeft = int(time.Until(cert.NotAfter).Hours() / 24)
}
- // Set deadline for the entire operation
- deadline, ok := ctx.Deadline()
- if ok {
- conn.SetDeadline(deadline)
- } else {
- conn.SetDeadline(time.Now().Add(m.timeout))
- }
-
- // Send Gemini request
+ // Send Gemini request (just the URL followed by CRLF)
// Format: gemini://host/path\r\n
geminiURL := fmt.Sprintf("gemini://%s/\r\n", host)
- _, err = conn.Write([]byte(geminiURL))
- if err != nil {
- result.Status = StatusDown
+ conn.SetDeadline(time.Now().Add(m.timeout))
+
+ if _, err := conn.Write([]byte(geminiURL)); err != nil {
result.ResponseTime = time.Since(start)
+ result.Status = StatusDown
result.Error = fmt.Errorf("failed to send request: %w", err)
return result
}
- // Read response header
+ // Read response header (status code and meta)
reader := bufio.NewReader(conn)
responseLine, err := reader.ReadString('\n')
result.ResponseTime = time.Since(start)
@@ -178,28 +184,17 @@ func (m *GeminiMonitor) Check(ctx context.Context) *Result {
return result
}
- // Parse Gemini response
- // Format: <STATUS><SPACE><META><CR><LF>
- responseLine = strings.TrimSpace(responseLine)
- parts := strings.SplitN(responseLine, " ", 2)
-
- if len(parts) < 1 {
- result.Status = StatusDown
- result.Error = fmt.Errorf("invalid response format")
- return result
- }
-
- // Parse status code (first 2 digits)
- if len(parts[0]) < 2 {
+ // Parse status code (first two characters)
+ if len(responseLine) < 2 {
result.Status = StatusDown
- result.Error = fmt.Errorf("invalid status code: %s", parts[0])
+ result.Error = fmt.Errorf("invalid Gemini response: too short")
return result
}
- statusCode := parts[0][:2]
+ statusCode := responseLine[0:2]
// Gemini status codes:
- // 1x = INPUT (need user input)
+ // 1x = INPUT
// 2x = SUCCESS
// 3x = REDIRECT
// 4x = TEMPORARY FAILURE
@@ -207,29 +202,16 @@ func (m *GeminiMonitor) Check(ctx context.Context) *Result {
// 6x = CLIENT CERTIFICATE REQUIRED
switch statusCode[0] {
- case '2': // Success (20 = success)
- result.Status = StatusUp
- case '3': // Redirect - consider as working
- result.Status = StatusUp
- case '1': // Input required - server is up but needs input
- result.Status = StatusUp
- case '4': // Temporary failure
- result.Status = StatusDegraded
- result.Error = fmt.Errorf("temporary failure: %s", responseLine)
- case '5': // Permanent failure
- result.Status = StatusDown
- result.Error = fmt.Errorf("permanent failure: %s", responseLine)
- case '6': // Client cert required - server is up
+ case '2':
result.Status = StatusUp
+ case '3':
+ result.Status = StatusUp // Redirects are ok
+ case '1', '6':
+ result.Status = StatusDegraded // Input or cert required
+ result.Error = fmt.Errorf("status %s: %s", statusCode, strings.TrimSpace(responseLine[3:]))
default:
result.Status = StatusDown
- result.Error = fmt.Errorf("unknown status code: %s", statusCode)
- }
-
- // Check for slow response (degraded if > 2 seconds)
- if result.Status == StatusUp && result.ResponseTime > 2*time.Second {
- result.Status = StatusDegraded
- result.Error = fmt.Errorf("slow response: %v", result.ResponseTime)
+ result.Error = fmt.Errorf("status %s: %s", statusCode, strings.TrimSpace(responseLine[3:]))
}
return result
diff --git a/internal/monitor/graphql.go b/internal/monitor/graphql.go
index 5b1fc91..333cf88 100644
--- a/internal/monitor/graphql.go
+++ b/internal/monitor/graphql.go
@@ -16,7 +16,9 @@ import (
// GraphQLMonitor monitors GraphQL endpoints
type GraphQLMonitor struct {
+ id string
name string
+ group string
target string
interval time.Duration
timeout time.Duration
@@ -85,7 +87,9 @@ func NewGraphQLMonitor(cfg config.MonitorConfig) (*GraphQLMonitor, error) {
}
return &GraphQLMonitor{
+ id: cfg.ID(),
name: cfg.Name,
+ group: cfg.Group,
target: cfg.Target,
interval: cfg.Interval.Duration,
timeout: cfg.Timeout.Duration,
@@ -104,11 +108,21 @@ func NewGraphQLMonitor(cfg config.MonitorConfig) (*GraphQLMonitor, error) {
}, nil
}
+// ID returns the unique identifier for this monitor
+func (m *GraphQLMonitor) ID() string {
+ return m.id
+}
+
// Name returns the monitor's name
func (m *GraphQLMonitor) Name() string {
return m.name
}
+// Group returns the group this monitor belongs to
+func (m *GraphQLMonitor) Group() string {
+ return m.group
+}
+
// Type returns the monitor type
func (m *GraphQLMonitor) Type() string {
return "graphql"
@@ -147,7 +161,7 @@ func (m *GraphQLMonitor) RoundUptime() bool {
// Check performs the GraphQL endpoint check
func (m *GraphQLMonitor) Check(ctx context.Context) *Result {
result := &Result{
- MonitorName: m.name,
+ MonitorName: m.id,
Timestamp: time.Now(),
}
@@ -248,6 +262,5 @@ func (m *GraphQLMonitor) Check(ctx context.Context) *Result {
}
result.Status = StatusUp
-
return result
}
diff --git a/internal/monitor/http.go b/internal/monitor/http.go
index ddf8641..cdd7226 100644
--- a/internal/monitor/http.go
+++ b/internal/monitor/http.go
@@ -15,7 +15,9 @@ import (
// HTTPMonitor monitors HTTP and HTTPS endpoints
type HTTPMonitor struct {
+ id string // unique identifier (group/name)
name string
+ group string
monitorType string
target string
interval time.Duration
@@ -82,7 +84,9 @@ func NewHTTPMonitor(cfg config.MonitorConfig) (*HTTPMonitor, error) {
}
return &HTTPMonitor{
+ id: cfg.ID(),
name: cfg.Name,
+ group: cfg.Group,
monitorType: cfg.Type,
target: target,
interval: cfg.Interval.Duration,
@@ -102,11 +106,21 @@ func NewHTTPMonitor(cfg config.MonitorConfig) (*HTTPMonitor, error) {
}, nil
}
+// ID returns the unique identifier for this monitor
+func (m *HTTPMonitor) ID() string {
+ return m.id
+}
+
// Name returns the monitor's name
func (m *HTTPMonitor) Name() string {
return m.name
}
+// Group returns the group this monitor belongs to
+func (m *HTTPMonitor) Group() string {
+ return m.group
+}
+
// Type returns the monitor type
func (m *HTTPMonitor) Type() string {
return m.monitorType
@@ -145,7 +159,7 @@ func (m *HTTPMonitor) RoundUptime() bool {
// Check performs the HTTP/HTTPS check
func (m *HTTPMonitor) Check(ctx context.Context) *Result {
result := &Result{
- MonitorName: m.name,
+ MonitorName: m.id,
Timestamp: time.Now(),
}
diff --git a/internal/monitor/icmp.go b/internal/monitor/icmp.go
index 8b1385c..9313cca 100644
--- a/internal/monitor/icmp.go
+++ b/internal/monitor/icmp.go
@@ -11,7 +11,9 @@ import (
// ICMPMonitor monitors hosts using ICMP ping
type ICMPMonitor struct {
+ id string
name string
+ group string
target string
interval time.Duration
timeout time.Duration
@@ -30,7 +32,9 @@ func NewICMPMonitor(cfg config.MonitorConfig) (*ICMPMonitor, error) {
}
return &ICMPMonitor{
+ id: cfg.ID(),
name: cfg.Name,
+ group: cfg.Group,
target: cfg.Target,
interval: cfg.Interval.Duration,
timeout: cfg.Timeout.Duration,
@@ -41,11 +45,21 @@ func NewICMPMonitor(cfg config.MonitorConfig) (*ICMPMonitor, error) {
}, nil
}
+// ID returns the unique identifier for this monitor
+func (m *ICMPMonitor) ID() string {
+ return m.id
+}
+
// Name returns the monitor's name
func (m *ICMPMonitor) Name() string {
return m.name
}
+// Group returns the group this monitor belongs to
+func (m *ICMPMonitor) Group() string {
+ return m.group
+}
+
// Type returns the monitor type
func (m *ICMPMonitor) Type() string {
return "icmp"
@@ -84,7 +98,7 @@ func (m *ICMPMonitor) RoundUptime() bool {
// Check performs the ICMP ping check
func (m *ICMPMonitor) Check(ctx context.Context) *Result {
result := &Result{
- MonitorName: m.name,
+ MonitorName: m.id,
Timestamp: time.Now(),
}
@@ -100,50 +114,34 @@ func (m *ICMPMonitor) Check(ctx context.Context) *Result {
pinger.Timeout = m.timeout
pinger.SetPrivileged(false) // Use unprivileged mode (UDP) by default
- // Run with context cancellation support
- done := make(chan error, 1)
- go func() {
- done <- pinger.Run()
- }()
-
- select {
- case <-ctx.Done():
- pinger.Stop()
+ // Run ping
+ err = pinger.Run()
+ if err != nil {
result.Status = StatusDown
- result.Error = fmt.Errorf("ping cancelled: %w", ctx.Err())
+ result.Error = fmt.Errorf("ping failed: %w", err)
return result
- case err := <-done:
- if err != nil {
- result.Status = StatusDown
- result.Error = fmt.Errorf("ping failed: %w", err)
- return result
- }
}
stats := pinger.Statistics()
- // If no packets were received, mark as down
+ // Check if any packets were received
if stats.PacketsRecv == 0 {
result.Status = StatusDown
- result.Error = fmt.Errorf("no packets received (100%% packet loss)")
- result.ResponseTime = m.timeout
+ result.Error = fmt.Errorf("no response: 0/%d packets received", stats.PacketsSent)
return result
}
// Use average RTT as response time
result.ResponseTime = stats.AvgRtt
- // Determine status based on packet loss
- packetLoss := float64(stats.PacketsSent-stats.PacketsRecv) / float64(stats.PacketsSent) * 100
-
- if packetLoss == 0 {
- result.Status = StatusUp
- } else if packetLoss < 50 {
+ // Check packet loss
+ if stats.PacketLoss > 0 {
+ // Some packet loss - degraded
result.Status = StatusDegraded
- result.Error = fmt.Errorf("%.0f%% packet loss", packetLoss)
+ result.Error = fmt.Errorf("%.1f%% packet loss (%d/%d)", stats.PacketLoss, stats.PacketsRecv, stats.PacketsSent)
} else {
- result.Status = StatusDown
- result.Error = fmt.Errorf("%.0f%% packet loss", packetLoss)
+ // All packets received - up
+ result.Status = StatusUp
}
return result
diff --git a/internal/monitor/monitor.go b/internal/monitor/monitor.go
index 9a1ec15..d530e06 100644
--- a/internal/monitor/monitor.go
+++ b/internal/monitor/monitor.go
@@ -31,9 +31,15 @@ const (
// Monitor is the interface that all monitor types must implement
type Monitor interface {
+ // ID returns the unique identifier for this monitor (group/name format)
+ ID() string
+
// Name returns the monitor's name
Name() string
+ // Group returns the group this monitor belongs to
+ Group() string
+
// Type returns the monitor type (http, https, tcp, gemini, icmp, dns, graphql, database)
Type() string
diff --git a/internal/monitor/scheduler.go b/internal/monitor/scheduler.go
index 1478732..ad1b7f6 100644
--- a/internal/monitor/scheduler.go
+++ b/internal/monitor/scheduler.go
@@ -13,7 +13,7 @@ import (
// Scheduler manages and runs all monitors
type Scheduler struct {
monitors []Monitor
- monitorCfg map[string]config.MonitorConfig // Monitor configs by name for reset flag checks
+ monitorCfg map[string]config.MonitorConfig // Monitor configs by ID (group/name) for reset flag checks
configPath string
storage *storage.Storage
logger *slog.Logger
@@ -44,8 +44,10 @@ func NewScheduler(cfg *config.Config, store *storage.Storage, logger *slog.Logge
return nil, err
}
s.monitors = append(s.monitors, mon)
- s.monitorCfg[monCfg.Name] = monCfg // Store config for reset flag checks
+ s.monitorCfg[mon.ID()] = monCfg // Store config by ID (group/name) for reset flag checks
logger.Info("registered monitor",
+ "id", mon.ID(),
+ "group", mon.Group(),
"name", mon.Name(),
"type", mon.Type(),
"target", mon.Target(),
@@ -91,7 +93,7 @@ func (s *Scheduler) runMonitor(mon Monitor) {
for {
select {
case <-s.ctx.Done():
- s.logger.Info("monitor stopped", "name", mon.Name())
+ s.logger.Info("monitor stopped", "id", mon.ID())
return
case <-ticker.C:
s.executeCheck(mon)
@@ -102,27 +104,28 @@ func (s *Scheduler) runMonitor(mon Monitor) {
// executeCheck performs a single check and saves the result
func (s *Scheduler) executeCheck(mon Monitor) {
// Check if reset flag is set for this monitor
- if monCfg, exists := s.monitorCfg[mon.Name()]; exists && monCfg.ResetOnNextCheck {
- s.logger.Info("resetting monitor data", "name", mon.Name())
+ if monCfg, exists := s.monitorCfg[mon.ID()]; exists && monCfg.ResetOnNextCheck {
+ s.logger.Info("resetting monitor data", "id", mon.ID())
- // Delete all historical data for this monitor
- if err := s.storage.ResetMonitorData(s.ctx, mon.Name()); err != nil {
+ // Delete all historical data for this monitor (using ID as the key)
+ if err := s.storage.ResetMonitorData(s.ctx, mon.ID()); err != nil {
s.logger.Error("failed to reset monitor data",
- "name", mon.Name(),
+ "id", mon.ID(),
"error", err)
} else {
- s.logger.Info("monitor data reset complete", "name", mon.Name())
+ s.logger.Info("monitor data reset complete", "id", mon.ID())
// Flip the reset flag to false in the config file
- if err := config.UpdateResetFlag(s.configPath, mon.Name(), false); err != nil {
+ // Note: UpdateResetFlag uses monitor name for YAML lookup, group for context
+ if err := config.UpdateResetFlag(s.configPath, monCfg.Group, monCfg.Name, false); err != nil {
s.logger.Error("failed to update reset flag in config",
- "name", mon.Name(),
+ "id", mon.ID(),
"error", err)
} else {
// Update in-memory config
monCfg.ResetOnNextCheck = false
- s.monitorCfg[mon.Name()] = monCfg
- s.logger.Info("reset flag cleared in config", "name", mon.Name())
+ s.monitorCfg[mon.ID()] = monCfg
+ s.logger.Info("reset flag cleared in config", "id", mon.ID())
}
}
}
@@ -164,7 +167,7 @@ func (s *Scheduler) executeCheck(mon Monitor) {
// Log the result
logAttrs := []any{
- "name", mon.Name(),
+ "id", mon.ID(),
"status", result.Status,
"response_time", result.ResponseTime,
}
@@ -187,7 +190,7 @@ func (s *Scheduler) executeCheck(mon Monitor) {
// Save to storage
if err := s.storage.SaveCheckResult(s.ctx, result.ToCheckResult()); err != nil {
s.logger.Error("failed to save check result",
- "name", mon.Name(),
+ "id", mon.ID(),
"error", err)
}
}
@@ -220,10 +223,10 @@ func (s *Scheduler) GetMonitors() []Monitor {
return s.monitors
}
-// RunCheck manually triggers a check for a specific monitor
-func (s *Scheduler) RunCheck(name string) *Result {
+// RunCheck manually triggers a check for a specific monitor by ID (group/name format)
+func (s *Scheduler) RunCheck(id string) *Result {
for _, mon := range s.monitors {
- if mon.Name() == name {
+ if mon.ID() == id {
ctx, cancel := context.WithTimeout(context.Background(), mon.Interval())
defer cancel()
result := mon.Check(ctx)
@@ -231,7 +234,7 @@ func (s *Scheduler) RunCheck(name string) *Result {
// Save the result
if err := s.storage.SaveCheckResult(context.Background(), result.ToCheckResult()); err != nil {
s.logger.Error("failed to save manual check result",
- "name", mon.Name(),
+ "id", mon.ID(),
"error", err)
}
diff --git a/internal/monitor/tcp.go b/internal/monitor/tcp.go
index da0a822..50a8a4b 100644
--- a/internal/monitor/tcp.go
+++ b/internal/monitor/tcp.go
@@ -11,7 +11,9 @@ import (
// TCPMonitor monitors TCP endpoints
type TCPMonitor struct {
+ id string
name string
+ group string
target string
interval time.Duration
timeout time.Duration
@@ -29,7 +31,9 @@ func NewTCPMonitor(cfg config.MonitorConfig) (*TCPMonitor, error) {
}
return &TCPMonitor{
+ id: cfg.ID(),
name: cfg.Name,
+ group: cfg.Group,
target: cfg.Target,
interval: cfg.Interval.Duration,
timeout: cfg.Timeout.Duration,
@@ -39,11 +43,21 @@ func NewTCPMonitor(cfg config.MonitorConfig) (*TCPMonitor, error) {
}, nil
}
+// ID returns the unique identifier for this monitor
+func (m *TCPMonitor) ID() string {
+ return m.id
+}
+
// Name returns the monitor's name
func (m *TCPMonitor) Name() string {
return m.name
}
+// Group returns the group this monitor belongs to
+func (m *TCPMonitor) Group() string {
+ return m.group
+}
+
// Type returns the monitor type
func (m *TCPMonitor) Type() string {
return "tcp"
@@ -82,7 +96,7 @@ func (m *TCPMonitor) RoundUptime() bool {
// Check performs the TCP connection check
func (m *TCPMonitor) Check(ctx context.Context) *Result {
result := &Result{
- MonitorName: m.name,
+ MonitorName: m.id,
Timestamp: time.Now(),
}
@@ -104,12 +118,5 @@ func (m *TCPMonitor) Check(ctx context.Context) *Result {
defer conn.Close()
result.Status = StatusUp
-
- // Check for slow response (degraded if > 1 second for TCP)
- if result.ResponseTime > 1*time.Second {
- result.Status = StatusDegraded
- result.Error = fmt.Errorf("slow connection: %v", result.ResponseTime)
- }
-
return result
}
diff --git a/internal/server/server.go b/internal/server/server.go
index ace36f1..b47bf79 100644
--- a/internal/server/server.go
+++ b/internal/server/server.go
@@ -10,6 +10,7 @@ import (
"io/fs"
"log/slog"
"net/http"
+ "net/url"
"sort"
"strconv"
"strings"
@@ -68,18 +69,18 @@ func New(cfg *config.Config, store *storage.Storage, sched *monitor.Scheduler, l
// API endpoints (protected by API access control)
mux.HandleFunc("GET /api/status", s.withAPIAuth(s.handleAPIStatus))
- mux.HandleFunc("GET /api/monitor/{name}", s.withAPIAuth(s.handleAPIMonitor))
- mux.HandleFunc("GET /api/history/{name}", s.withAPIAuth(s.handleAPIHistory))
+ mux.HandleFunc("GET /api/monitor/{group}/{name}", s.withAPIAuth(s.handleAPIMonitor))
+ mux.HandleFunc("GET /api/history/{group}/{name}", s.withAPIAuth(s.handleAPIHistory))
mux.HandleFunc("GET /api/summary", s.withAPIAuth(s.handleAPISummary))
- mux.HandleFunc("GET /api/uptime/{name}", s.withAPIAuth(s.handleAPIUptime))
+ mux.HandleFunc("GET /api/uptime/{group}/{name}", s.withAPIAuth(s.handleAPIUptime))
mux.HandleFunc("GET /api/incidents", s.withAPIAuth(s.handleAPIIncidents))
// Health check - always public (for load balancers, monitoring)
mux.HandleFunc("GET /api/health", s.handleAPIHealth)
// Badge endpoint - always public (for embedding in READMEs, docs)
- // Note: {name...} captures the rest of the path including .svg extension
- mux.HandleFunc("GET /api/badge/{name...}", s.handleAPIBadge)
+ // Note: {path...} captures the rest of the path (group/name.svg)
+ mux.HandleFunc("GET /api/badge/{path...}", s.handleAPIBadge)
// Full page data endpoint - public if refresh_mode=api, otherwise follows api.access
if cfg.Display.RefreshMode == "api" {
@@ -326,7 +327,9 @@ func (s *Server) handleIndex(w http.ResponseWriter, r *http.Request) {
DisableUptimeTooltip: monCfg.DisableUptimeTooltip,
}
- if stat, ok := stats[monCfg.Name]; ok {
+ // Use composite ID (group/name) to look up stats
+ monitorID := monCfg.ID()
+ if stat, ok := stats[monitorID]; ok {
md.Status = stat.CurrentStatus
md.ResponseTime = stat.LastResponseTime
md.UptimePercent = stat.UptimePercent
@@ -353,13 +356,13 @@ func (s *Server) handleIndex(w http.ResponseWriter, r *http.Request) {
// Get aggregated history for display
ticks, err := s.storage.GetAggregatedHistory(
ctx,
- monCfg.Name,
+ monitorID,
s.config.Display.TickCount,
s.config.Display.TickMode,
s.config.Display.PingFixedSlots,
)
if err != nil {
- s.logger.Error("failed to get tick history", "monitor", monCfg.Name, "error", err)
+ s.logger.Error("failed to get tick history", "monitor", monitorID, "error", err)
} else {
md.Ticks = ticks
}
@@ -500,13 +503,18 @@ func (s *Server) handleAPIStatus(w http.ResponseWriter, r *http.Request) {
// handleAPIMonitor returns JSON status for a specific monitor
func (s *Server) handleAPIMonitor(w http.ResponseWriter, r *http.Request) {
+ group := r.PathValue("group")
name := r.PathValue("name")
- if name == "" {
- s.jsonError(w, "Monitor name required", http.StatusBadRequest)
+ if group == "" || name == "" {
+ s.jsonError(w, "Group and monitor name required", http.StatusBadRequest)
return
}
- stats, err := s.storage.GetMonitorStats(r.Context(), name)
+ // Construct composite ID (path values are already URL-decoded by net/http,
+ // but we need to re-encode to match the internal ID format)
+ monitorID := url.PathEscape(group) + "/" + url.PathEscape(name)
+
+ stats, err := s.storage.GetMonitorStats(r.Context(), monitorID)
if err != nil {
s.jsonError(w, "Failed to get monitor stats", http.StatusInternalServerError)
return
@@ -517,12 +525,16 @@ func (s *Server) handleAPIMonitor(w http.ResponseWriter, r *http.Request) {
// handleAPIHistory returns aggregated history for a monitor
func (s *Server) handleAPIHistory(w http.ResponseWriter, r *http.Request) {
+ group := r.PathValue("group")
name := r.PathValue("name")
- if name == "" {
- s.jsonError(w, "Monitor name required", http.StatusBadRequest)
+ if group == "" || name == "" {
+ s.jsonError(w, "Group and monitor name required", http.StatusBadRequest)
return
}
+ // Construct composite ID (re-encode to match internal format)
+ monitorID := url.PathEscape(group) + "/" + url.PathEscape(name)
+
// Allow optional parameters, default to config values
mode := s.config.Display.TickMode
if modeParam := r.URL.Query().Get("mode"); modeParam != "" {
@@ -539,14 +551,14 @@ func (s *Server) handleAPIHistory(w http.ResponseWriter, r *http.Request) {
}
}
- ticks, err := s.storage.GetAggregatedHistory(r.Context(), name, count, mode, s.config.Display.PingFixedSlots)
+ ticks, err := s.storage.GetAggregatedHistory(r.Context(), monitorID, count, mode, s.config.Display.PingFixedSlots)
if err != nil {
s.jsonError(w, "Failed to get history", http.StatusInternalServerError)
return
}
s.jsonResponse(w, map[string]interface{}{
- "monitor": name,
+ "monitor": monitorID,
"mode": mode,
"count": count,
"ticks": ticks,
@@ -593,7 +605,9 @@ func (s *Server) handleAPIPage(w http.ResponseWriter, r *http.Request) {
// Build monitor data with history
for _, group := range s.config.Groups {
for _, monCfg := range group.Monitors {
- stat, ok := stats[monCfg.Name]
+ // Use composite ID (group/name) to look up stats
+ monitorID := monCfg.ID()
+ stat, ok := stats[monitorID]
if !ok {
continue
}
@@ -601,17 +615,17 @@ func (s *Server) handleAPIPage(w http.ResponseWriter, r *http.Request) {
// Get history ticks
ticks, err := s.storage.GetAggregatedHistory(
ctx,
- monCfg.Name,
+ monitorID,
s.config.Display.TickCount,
s.config.Display.TickMode,
s.config.Display.PingFixedSlots,
)
if err != nil {
- s.logger.Error("failed to get tick history", "monitor", monCfg.Name, "error", err)
+ s.logger.Error("failed to get tick history", "monitor", monitorID, "error", err)
ticks = nil
}
- response.Monitors[monCfg.Name] = APIMonitorData{
+ response.Monitors[monitorID] = APIMonitorData{
Status: stat.CurrentStatus,
ResponseTime: stat.LastResponseTime,
Uptime: stat.UptimePercent,
@@ -654,21 +668,37 @@ func (s *Server) handleAPIHealth(w http.ResponseWriter, r *http.Request) {
// handleAPIBadge returns an SVG status badge for a monitor (shields.io style)
func (s *Server) handleAPIBadge(w http.ResponseWriter, r *http.Request) {
- name := r.PathValue("name")
- if name == "" {
- http.Error(w, "Monitor name required", http.StatusBadRequest)
+ path := r.PathValue("path")
+ if path == "" {
+ http.Error(w, "Monitor path required (group/name.svg)", http.StatusBadRequest)
return
}
// Strip .svg extension if present
- name = strings.TrimSuffix(name, ".svg")
+ path = strings.TrimSuffix(path, ".svg")
+
+ // The path should be in format "group/name" (URL-encoded components)
+ // Split into group and name, then re-encode to match internal ID format
+ idx := strings.Index(path, "/")
+ var monitorID, displayName string
+ if idx >= 0 {
+ group := path[:idx]
+ name := path[idx+1:]
+ // Re-encode to ensure consistent internal format
+ monitorID = url.PathEscape(group) + "/" + url.PathEscape(name)
+ displayName = name
+ } else {
+ // No group, just name
+ monitorID = url.PathEscape(path)
+ displayName = path
+ }
// Get monitor stats
- stats, err := s.storage.GetMonitorStats(r.Context(), name)
+ stats, err := s.storage.GetMonitorStats(r.Context(), monitorID)
if err != nil {
- s.logger.Error("failed to get monitor stats for badge", "monitor", name, "error", err)
+ s.logger.Error("failed to get monitor stats for badge", "monitor", monitorID, "error", err)
// Return a gray "unknown" badge on error
- s.serveBadge(w, r, name, "unknown", "#9ca3af")
+ s.serveBadge(w, r, displayName, "unknown", "#9ca3af")
return
}
@@ -692,7 +722,7 @@ func (s *Server) handleAPIBadge(w http.ResponseWriter, r *http.Request) {
// Check for custom label
label := r.URL.Query().Get("label")
if label == "" {
- label = name
+ label = displayName
}
// Check for style (flat or plastic, default: flat)
@@ -809,7 +839,9 @@ func (s *Server) handleAPISummary(w http.ResponseWriter, r *http.Request) {
for _, group := range s.config.Groups {
for _, monCfg := range group.Monitors {
- stat, ok := stats[monCfg.Name]
+ // Use composite ID (group/name) to look up stats
+ monitorID := monCfg.ID()
+ stat, ok := stats[monitorID]
if !ok {
continue
}
@@ -851,12 +883,16 @@ type APIUptimeResponse struct {
// handleAPIUptime returns historical uptime for a specific period
func (s *Server) handleAPIUptime(w http.ResponseWriter, r *http.Request) {
+ group := r.PathValue("group")
name := r.PathValue("name")
- if name == "" {
- s.jsonError(w, "Monitor name required", http.StatusBadRequest)
+ if group == "" || name == "" {
+ s.jsonError(w, "Group and monitor name required", http.StatusBadRequest)
return
}
+ // Construct composite ID (re-encode to match internal format)
+ monitorID := url.PathEscape(group) + "/" + url.PathEscape(name)
+
// Parse period (default: 24h, options: 1h, 24h, 7d, 30d, 90d)
period := r.URL.Query().Get("period")
if period == "" {
@@ -880,14 +916,14 @@ func (s *Server) handleAPIUptime(w http.ResponseWriter, r *http.Request) {
return
}
- stats, err := s.storage.GetUptimeStats(r.Context(), name, duration)
+ stats, err := s.storage.GetUptimeStats(r.Context(), monitorID, duration)
if err != nil {
s.jsonError(w, "Failed to get uptime stats", http.StatusInternalServerError)
return
}
s.jsonResponse(w, APIUptimeResponse{
- Monitor: name,
+ Monitor: monitorID,
Period: period,
UptimePercent: stats.UptimePercent,
TotalChecks: stats.TotalChecks,