package storage import ( "context" "database/sql" "fmt" "strings" "time" "github.com/Fuwn/kaze/internal/config" _ "modernc.org/sqlite" ) // Storage handles all database operations type Storage struct { db *sql.DB dbPath string historyDays int maintenance *MaintenanceState } // CheckResult represents a single monitor check result type CheckResult struct { ID int64 MonitorName string Timestamp time.Time Status string // up, down, degraded ResponseTime int64 // milliseconds StatusCode int // HTTP status code (0 for non-HTTP) Error string SSLExpiry *time.Time SSLDaysLeft int } // DailyStatus represents aggregated daily status for a monitor type DailyStatus struct { Date time.Time MonitorName string SuccessCount int FailureCount int TotalChecks int AvgResponse float64 UptimePercent float64 } // MonitorStats represents overall statistics for a monitor type MonitorStats struct { MonitorName string CurrentStatus string LastCheck time.Time LastResponseTime int64 LastError string UptimePercent float64 AvgResponseTime float64 SSLExpiry *time.Time SSLDaysLeft int TotalChecks int64 LastFailure *time.Time // Time of last failure LastFailureError string // Error message from last failure } // TickData represents aggregated data for one tick in the history bar type TickData struct { Timestamp time.Time TotalChecks int SuccessCount int FailureCount int AvgResponse float64 UptimePercent float64 // For ping mode only Status string // up, down, degraded (only for ping mode) ResponseTime int64 // milliseconds (only for ping mode) } // New creates a new storage instance func New(dbPath string, historyDays int) (*Storage, error) { return NewWithMaintenance(dbPath, historyDays, config.MaintenanceConfig{}) } // NewWithMaintenance creates a new storage instance with maintenance configuration func NewWithMaintenance(dbPath string, historyDays int, maintenanceCfg config.MaintenanceConfig) (*Storage, error) { db, err := openDB(dbPath) if err != nil { return nil, err } s := &Storage{ db: db, dbPath: dbPath, historyDays: historyDays, } if maintenanceCfg.Mode != "" && maintenanceCfg.Mode != "never" { m, err := NewMaintenanceState(dbPath, maintenanceCfg) if err != nil { db.Close() return nil, fmt.Errorf("failed to initialize maintenance: %w", err) } s.maintenance = m } if err := s.migrate(); err != nil { db.Close() return nil, fmt.Errorf("failed to run migrations: %w", err) } return s, nil } func openDB(dbPath string) (*sql.DB, error) { dsn := fmt.Sprintf("%s?_txlock=immediate&_busy_timeout=5000&_journal_mode=WAL&_synchronous=NORMAL", dbPath) db, err := sql.Open("sqlite", dsn) if err != nil { return nil, fmt.Errorf("failed to open database: %w", err) } db.SetMaxOpenConns(1) db.SetMaxIdleConns(1) db.SetConnMaxLifetime(0) var journalMode string if err := db.QueryRow("PRAGMA journal_mode").Scan(&journalMode); err != nil { db.Close() return nil, fmt.Errorf("failed to check journal mode: %w", err) } if _, err := db.Exec("PRAGMA foreign_keys=ON"); err != nil { db.Close() return nil, fmt.Errorf("failed to enable foreign keys: %w", err) } return db, nil } // migrate creates the database schema func (s *Storage) migrate() error { schema := ` CREATE TABLE IF NOT EXISTS check_results ( id INTEGER PRIMARY KEY AUTOINCREMENT, monitor_name TEXT NOT NULL, timestamp DATETIME NOT NULL DEFAULT CURRENT_TIMESTAMP, status TEXT NOT NULL CHECK(status IN ('up', 'down', 'degraded')), response_time_ms INTEGER NOT NULL DEFAULT 0, status_code INTEGER DEFAULT 0, error_message TEXT, ssl_expiry DATETIME, ssl_days_left INTEGER DEFAULT 0 ); CREATE INDEX IF NOT EXISTS idx_check_results_monitor_time ON check_results(monitor_name, timestamp DESC); CREATE INDEX IF NOT EXISTS idx_check_results_timestamp ON check_results(timestamp); CREATE TABLE IF NOT EXISTS daily_stats ( id INTEGER PRIMARY KEY AUTOINCREMENT, monitor_name TEXT NOT NULL, date DATE NOT NULL, success_count INTEGER NOT NULL DEFAULT 0, failure_count INTEGER NOT NULL DEFAULT 0, total_checks INTEGER NOT NULL DEFAULT 0, avg_response_ms REAL DEFAULT 0, uptime_percent REAL DEFAULT 0, UNIQUE(monitor_name, date) ); CREATE INDEX IF NOT EXISTS idx_daily_stats_monitor_date ON daily_stats(monitor_name, date DESC); CREATE TABLE IF NOT EXISTS monitor_state ( monitor_name TEXT PRIMARY KEY, current_status TEXT NOT NULL DEFAULT 'unknown', last_check DATETIME, last_response_time_ms INTEGER DEFAULT 0, last_error TEXT, ssl_expiry DATETIME, ssl_days_left INTEGER DEFAULT 0 ); ` _, err := s.db.Exec(schema) return err } // SaveCheckResult saves a check result and updates monitor state func (s *Storage) SaveCheckResult(ctx context.Context, result *CheckResult) error { // Retry logic for transient lock issues var lastErr error for attempt := 0; attempt < 3; attempt++ { if attempt > 0 { // Wait before retry with exponential backoff select { case <-ctx.Done(): return ctx.Err() case <-time.After(time.Duration(attempt*100) * time.Millisecond): } } lastErr = s.saveCheckResultOnce(ctx, result) if lastErr == nil { return nil } // Only retry on lock errors if !isLockError(lastErr) { return lastErr } } return fmt.Errorf("failed after 3 attempts: %w", lastErr) } // isLockError checks if the error is a database lock error func isLockError(err error) bool { if err == nil { return false } errStr := err.Error() return strings.Contains(errStr, "database is locked") || strings.Contains(errStr, "SQLITE_BUSY") || strings.Contains(errStr, "database table is locked") } // saveCheckResultOnce performs a single attempt to save the check result func (s *Storage) saveCheckResultOnce(ctx context.Context, result *CheckResult) error { tx, err := s.db.BeginTx(ctx, nil) if err != nil { return fmt.Errorf("failed to begin transaction: %w", err) } defer tx.Rollback() // Insert check result _, err = tx.ExecContext(ctx, ` INSERT INTO check_results ( monitor_name, timestamp, status, response_time_ms, status_code, error_message, ssl_expiry, ssl_days_left ) VALUES (?, ?, ?, ?, ?, ?, ?, ?) `, result.MonitorName, result.Timestamp, result.Status, result.ResponseTime, result.StatusCode, result.Error, result.SSLExpiry, result.SSLDaysLeft) if err != nil { return fmt.Errorf("failed to insert check result: %w", err) } // Update or insert monitor state _, err = tx.ExecContext(ctx, ` INSERT INTO monitor_state ( monitor_name, current_status, last_check, last_response_time_ms, last_error, ssl_expiry, ssl_days_left ) VALUES (?, ?, ?, ?, ?, ?, ?) ON CONFLICT(monitor_name) DO UPDATE SET current_status = excluded.current_status, last_check = excluded.last_check, last_response_time_ms = excluded.last_response_time_ms, last_error = excluded.last_error, ssl_expiry = excluded.ssl_expiry, ssl_days_left = excluded.ssl_days_left `, result.MonitorName, result.Status, result.Timestamp, result.ResponseTime, result.Error, result.SSLExpiry, result.SSLDaysLeft) if err != nil { return fmt.Errorf("failed to update monitor state: %w", err) } // Update daily stats date := result.Timestamp.Format("2006-01-02") successIncr := 0 failureIncr := 0 if result.Status == "up" { successIncr = 1 } else { failureIncr = 1 } _, err = tx.ExecContext(ctx, ` INSERT INTO daily_stats ( monitor_name, date, success_count, failure_count, total_checks, avg_response_ms, uptime_percent ) VALUES (?, ?, ?, ?, 1, ?, ?) ON CONFLICT(monitor_name, date) DO UPDATE SET success_count = daily_stats.success_count + ?, failure_count = daily_stats.failure_count + ?, total_checks = daily_stats.total_checks + 1, avg_response_ms = (daily_stats.avg_response_ms * daily_stats.total_checks + ?) / (daily_stats.total_checks + 1), uptime_percent = CAST((daily_stats.success_count + ?) AS REAL) / (daily_stats.total_checks + 1) * 100 `, result.MonitorName, date, successIncr, failureIncr, float64(result.ResponseTime), float64(successIncr)*100, successIncr, failureIncr, float64(result.ResponseTime), successIncr) if err != nil { return fmt.Errorf("failed to update daily stats: %w", err) } return tx.Commit() } // GetMonitorStats returns statistics for a specific monitor func (s *Storage) GetMonitorStats(ctx context.Context, monitorName string) (*MonitorStats, error) { stats := &MonitorStats{MonitorName: monitorName} // Get current state err := s.db.QueryRowContext(ctx, ` SELECT current_status, last_check, last_response_time_ms, last_error, ssl_expiry, ssl_days_left FROM monitor_state WHERE monitor_name = ? `, monitorName).Scan(&stats.CurrentStatus, &stats.LastCheck, &stats.LastResponseTime, &stats.LastError, &stats.SSLExpiry, &stats.SSLDaysLeft) if err == sql.ErrNoRows { stats.CurrentStatus = "unknown" return stats, nil } if err != nil { return nil, fmt.Errorf("failed to get monitor state: %w", err) } // Get aggregate stats from check results within history window cutoff := time.Now().AddDate(0, 0, -s.historyDays) err = s.db.QueryRowContext(ctx, ` SELECT COUNT(*) as total, AVG(response_time_ms) as avg_response, CAST(SUM(CASE WHEN status = 'up' THEN 1 ELSE 0 END) AS REAL) / COUNT(*) * 100 as uptime FROM check_results WHERE monitor_name = ? AND timestamp >= ? `, monitorName, cutoff).Scan(&stats.TotalChecks, &stats.AvgResponseTime, &stats.UptimePercent) if err != nil && err != sql.ErrNoRows { return nil, fmt.Errorf("failed to get aggregate stats: %w", err) } return stats, nil } // GetAllMonitorStats returns statistics for all monitors func (s *Storage) GetAllMonitorStats(ctx context.Context) (map[string]*MonitorStats, error) { stats := make(map[string]*MonitorStats) cutoff := time.Now().AddDate(0, 0, -s.historyDays) // Get all monitor states rows, err := s.db.QueryContext(ctx, ` SELECT monitor_name, current_status, last_check, last_response_time_ms, last_error, ssl_expiry, ssl_days_left FROM monitor_state `) if err != nil { return nil, fmt.Errorf("failed to query monitor states: %w", err) } defer rows.Close() for rows.Next() { var ms MonitorStats var lastError sql.NullString var sslExpiry sql.NullTime err := rows.Scan(&ms.MonitorName, &ms.CurrentStatus, &ms.LastCheck, &ms.LastResponseTime, &lastError, &sslExpiry, &ms.SSLDaysLeft) if err != nil { return nil, fmt.Errorf("failed to scan monitor state: %w", err) } if lastError.Valid { ms.LastError = lastError.String } if sslExpiry.Valid { ms.SSLExpiry = &sslExpiry.Time } stats[ms.MonitorName] = &ms } // Get aggregate stats for all monitors rows, err = s.db.QueryContext(ctx, ` SELECT monitor_name, COUNT(*) as total, AVG(response_time_ms) as avg_response, CAST(SUM(CASE WHEN status = 'up' THEN 1 ELSE 0 END) AS REAL) / COUNT(*) * 100 as uptime FROM check_results WHERE timestamp >= ? GROUP BY monitor_name `, cutoff) if err != nil { return nil, fmt.Errorf("failed to query aggregate stats: %w", err) } defer rows.Close() for rows.Next() { var name string var total int64 var avgResponse, uptime float64 if err := rows.Scan(&name, &total, &avgResponse, &uptime); err != nil { return nil, fmt.Errorf("failed to scan aggregate stats: %w", err) } if ms, ok := stats[name]; ok { ms.TotalChecks = total ms.AvgResponseTime = avgResponse ms.UptimePercent = uptime } } // Get last failure for each monitor rows, err = s.db.QueryContext(ctx, ` SELECT monitor_name, timestamp, error_message FROM check_results WHERE status != 'up' AND (monitor_name, timestamp) IN ( SELECT monitor_name, MAX(timestamp) FROM check_results WHERE status != 'up' GROUP BY monitor_name ) `) if err != nil { return nil, fmt.Errorf("failed to query last failures: %w", err) } defer rows.Close() for rows.Next() { var name string var timestamp time.Time var errorMsg sql.NullString if err := rows.Scan(&name, ×tamp, &errorMsg); err != nil { return nil, fmt.Errorf("failed to scan last failure: %w", err) } if ms, ok := stats[name]; ok { ms.LastFailure = ×tamp if errorMsg.Valid { ms.LastFailureError = errorMsg.String } } } return stats, nil } // GetDailyStats returns daily statistics for a monitor func (s *Storage) GetDailyStats(ctx context.Context, monitorName string, days int) ([]DailyStatus, error) { cutoff := time.Now().AddDate(0, 0, -days) rows, err := s.db.QueryContext(ctx, ` SELECT date, success_count, failure_count, total_checks, avg_response_ms, uptime_percent FROM daily_stats WHERE monitor_name = ? AND date >= ? ORDER BY date ASC `, monitorName, cutoff.Format("2006-01-02")) if err != nil { return nil, fmt.Errorf("failed to query daily stats: %w", err) } defer rows.Close() var results []DailyStatus for rows.Next() { var ds DailyStatus var dateStr string ds.MonitorName = monitorName if err := rows.Scan(&dateStr, &ds.SuccessCount, &ds.FailureCount, &ds.TotalChecks, &ds.AvgResponse, &ds.UptimePercent); err != nil { return nil, fmt.Errorf("failed to scan daily stats: %w", err) } ds.Date, _ = time.Parse("2006-01-02", dateStr) results = append(results, ds) } return results, nil } // GetUptimeHistory returns uptime data for the history visualization // Returns a slice of daily uptime percentages, one per day for the specified period func (s *Storage) GetUptimeHistory(ctx context.Context, monitorName string, days int) ([]float64, error) { // Create a map of date to uptime uptimeMap := make(map[string]float64) // Use local time for date range calculation (matches how we store dates) cutoffDate := time.Now().AddDate(0, 0, -days).Format("2006-01-02") rows, err := s.db.QueryContext(ctx, ` SELECT date, uptime_percent FROM daily_stats WHERE monitor_name = ? AND date >= ? ORDER BY date ASC `, monitorName, cutoffDate) if err != nil { return nil, fmt.Errorf("failed to query uptime history: %w", err) } defer rows.Close() for rows.Next() { var dateStr string var uptime float64 if err := rows.Scan(&dateStr, &uptime); err != nil { return nil, fmt.Errorf("failed to scan uptime: %w", err) } // Normalize date format (strip any time component if present) if len(dateStr) > 10 { dateStr = dateStr[:10] } uptimeMap[dateStr] = uptime } // Build result array with -1 for days with no data result := make([]float64, days) today := time.Now().Format("2006-01-02") for i := 0; i < days; i++ { date := time.Now().AddDate(0, 0, -days+i+1).Format("2006-01-02") if uptime, ok := uptimeMap[date]; ok { result[i] = uptime } else if date == today { // For today, if we have monitor data but no daily stats yet, // check if there's recent data and show it result[i] = -1 } else { result[i] = -1 // No data for this day } } return result, nil } // UptimeStats contains uptime statistics for a period type UptimeStats struct { UptimePercent float64 TotalChecks int64 SuccessChecks int64 FailedChecks int64 } // GetUptimeStats returns uptime statistics for a monitor over a given duration func (s *Storage) GetUptimeStats(ctx context.Context, monitorName string, duration time.Duration) (*UptimeStats, error) { cutoff := time.Now().Add(-duration) var totalChecks, successChecks int64 err := s.db.QueryRowContext(ctx, ` SELECT COUNT(*) as total, SUM(CASE WHEN status = 'up' THEN 1 ELSE 0 END) as success FROM check_results WHERE monitor_name = ? AND timestamp >= ? `, monitorName, cutoff).Scan(&totalChecks, &successChecks) if err != nil { return nil, fmt.Errorf("failed to query uptime stats: %w", err) } stats := &UptimeStats{ TotalChecks: totalChecks, SuccessChecks: successChecks, FailedChecks: totalChecks - successChecks, } if totalChecks > 0 { stats.UptimePercent = float64(successChecks) / float64(totalChecks) * 100 } return stats, nil } // PingResult represents a single ping for the history display type PingResult struct { Status string // up, down, degraded ResponseTime int64 // milliseconds Timestamp time.Time } // GetRecentPings returns the last N check results for a monitor // Results are ordered from oldest to newest (left to right on display) func (s *Storage) GetRecentPings(ctx context.Context, monitorName string, limit int) ([]PingResult, error) { // Query in descending order then reverse, so we get the most recent N results // but in chronological order for display rows, err := s.db.QueryContext(ctx, ` SELECT status, response_time_ms, timestamp FROM check_results WHERE monitor_name = ? ORDER BY timestamp DESC LIMIT ? `, monitorName, limit) if err != nil { return nil, fmt.Errorf("failed to query recent pings: %w", err) } defer rows.Close() var results []PingResult for rows.Next() { var p PingResult if err := rows.Scan(&p.Status, &p.ResponseTime, &p.Timestamp); err != nil { return nil, fmt.Errorf("failed to scan ping: %w", err) } results = append(results, p) } // Reverse to get chronological order (oldest first) for i, j := 0, len(results)-1; i < j; i, j = i+1, j-1 { results[i], results[j] = results[j], results[i] } return results, nil } // GetAggregatedHistory returns tick data for the history visualization // tickMode: "ping", "minute", "hour", "day" // fixedSlots: when true, returns exactly tickCount elements with nil for missing periods // Returns a slice where nil entries indicate no data for that period func (s *Storage) GetAggregatedHistory(ctx context.Context, monitorName string, tickCount int, tickMode string, fixedSlots bool) ([]*TickData, error) { switch tickMode { case "ping": return s.getAggregatedPings(ctx, monitorName, tickCount, fixedSlots) case "minute": return s.getAggregatedByTime(ctx, monitorName, tickCount, time.Minute, "%Y-%m-%d %H:%M") case "hour": return s.getAggregatedByTime(ctx, monitorName, tickCount, time.Hour, "%Y-%m-%d %H") case "day": return s.getAggregatedByTime(ctx, monitorName, tickCount, 24*time.Hour, "%Y-%m-%d") default: return nil, fmt.Errorf("invalid tick mode: %s", tickMode) } } // getAggregatedPings returns individual pings as tick data func (s *Storage) getAggregatedPings(ctx context.Context, monitorName string, tickCount int, fixedSlots bool) ([]*TickData, error) { pings, err := s.GetRecentPings(ctx, monitorName, tickCount) if err != nil { return nil, err } result := make([]*TickData, 0, tickCount) // If fixedSlots is true, pad the beginning with nils if fixedSlots && len(pings) < tickCount { for i := 0; i < tickCount-len(pings); i++ { result = append(result, nil) } } // Convert pings to TickData for _, p := range pings { uptime := 0.0 if p.Status == "up" { uptime = 100.0 } else if p.Status == "degraded" { uptime = 50.0 } result = append(result, &TickData{ Timestamp: p.Timestamp, TotalChecks: 1, SuccessCount: boolToInt(p.Status == "up"), FailureCount: boolToInt(p.Status == "down"), AvgResponse: float64(p.ResponseTime), UptimePercent: uptime, Status: p.Status, ResponseTime: p.ResponseTime, }) } return result, nil } // getAggregatedByTime returns aggregated tick data by time bucket func (s *Storage) getAggregatedByTime(ctx context.Context, monitorName string, tickCount int, bucketDuration time.Duration, sqlFormat string) ([]*TickData, error) { // Calculate the start time for our window now := time.Now() startTime := now.Add(-time.Duration(tickCount) * bucketDuration) // Query aggregated data grouped by time bucket // First extract just the datetime part (first 19 chars: "2006-01-02 15:04:05") // then apply strftime to that query := ` SELECT strftime('` + sqlFormat + `', substr(timestamp, 1, 19)) as bucket, COUNT(*) as total_checks, SUM(CASE WHEN status = 'up' THEN 1 ELSE 0 END) as success_count, SUM(CASE WHEN status = 'down' THEN 1 ELSE 0 END) as failure_count, AVG(response_time_ms) as avg_response FROM check_results WHERE monitor_name = ? AND timestamp >= ? GROUP BY bucket HAVING bucket IS NOT NULL ORDER BY bucket ASC ` rows, err := s.db.QueryContext(ctx, query, monitorName, startTime) if err != nil { return nil, fmt.Errorf("failed to query aggregated history: %w", err) } defer rows.Close() // Build a map of bucket -> data dataMap := make(map[string]*TickData) for rows.Next() { var bucket sql.NullString var td TickData if err := rows.Scan(&bucket, &td.TotalChecks, &td.SuccessCount, &td.FailureCount, &td.AvgResponse); err != nil { return nil, fmt.Errorf("failed to scan aggregated data: %w", err) } if !bucket.Valid || bucket.String == "" { continue // Skip null buckets } if td.TotalChecks > 0 { td.UptimePercent = float64(td.SuccessCount) / float64(td.TotalChecks) * 100 } dataMap[bucket.String] = &td } // Generate all time slots and fill in data result := make([]*TickData, tickCount) for i := 0; i < tickCount; i++ { slotTime := startTime.Add(time.Duration(i+1) * bucketDuration) bucket := formatTimeBucket(slotTime, bucketDuration) if td, ok := dataMap[bucket]; ok { td.Timestamp = slotTime result[i] = td } else { result[i] = nil // No data for this slot } } return result, nil } // formatTimeBucket formats a time into the bucket key format func formatTimeBucket(t time.Time, duration time.Duration) string { switch { case duration >= 24*time.Hour: return t.Format("2006-01-02") case duration >= time.Hour: return t.Format("2006-01-02 15") case duration >= time.Minute: return t.Format("2006-01-02 15:04") default: return t.Format("2006-01-02 15:04:05") } } // boolToInt converts a boolean to an int (1 for true, 0 for false) func boolToInt(b bool) int { if b { return 1 } return 0 } // Cleanup removes old data beyond the history retention period func (s *Storage) Cleanup(ctx context.Context) error { cutoff := time.Now().AddDate(0, 0, -s.historyDays) _, err := s.db.ExecContext(ctx, ` DELETE FROM check_results WHERE timestamp < ? `, cutoff) if err != nil { return fmt.Errorf("failed to cleanup check_results: %w", err) } _, err = s.db.ExecContext(ctx, ` DELETE FROM daily_stats WHERE date < ? `, cutoff.Format("2006-01-02")) if err != nil { return fmt.Errorf("failed to cleanup daily_stats: %w", err) } // Vacuum to reclaim space _, err = s.db.ExecContext(ctx, "VACUUM") if err != nil { return fmt.Errorf("failed to vacuum database: %w", err) } return nil } // ResetMonitorData deletes all historical data for a specific monitor func (s *Storage) ResetMonitorData(ctx context.Context, monitorName string) error { // Delete from check_results _, err := s.db.ExecContext(ctx, ` DELETE FROM check_results WHERE monitor_name = ? `, monitorName) if err != nil { return fmt.Errorf("failed to delete check_results for monitor %q: %w", monitorName, err) } // Delete from daily_stats _, err = s.db.ExecContext(ctx, ` DELETE FROM daily_stats WHERE monitor_name = ? `, monitorName) if err != nil { return fmt.Errorf("failed to delete daily_stats for monitor %q: %w", monitorName, err) } return nil } // Close closes the database connection func (s *Storage) Close() error { return s.db.Close() } func (s *Storage) RecordCheck() { if s.maintenance != nil { s.maintenance.IncrementChecks() } } func (s *Storage) CheckMaintenance() (bool, error) { if s.maintenance == nil { return false, nil } if !s.maintenance.ShouldRun() { return false, nil } if err := s.db.Close(); err != nil { return false, fmt.Errorf("failed to close database before maintenance: %w", err) } if err := s.maintenance.Execute(); err != nil { db, openErr := openDB(s.dbPath) if openErr == nil { s.db = db } return false, fmt.Errorf("maintenance failed: %w", err) } db, err := openDB(s.dbPath) if err != nil { return true, fmt.Errorf("failed to reopen database after maintenance: %w", err) } s.db = db if err := s.migrate(); err != nil { return true, fmt.Errorf("failed to migrate after maintenance: %w", err) } return true, nil } func (s *Storage) GetMaintenanceMode() string { if s.maintenance == nil { return "never" } return s.maintenance.cfg.Mode }