diff options
| author | Fuwn <[email protected]> | 2026-01-17 23:17:49 -0800 |
|---|---|---|
| committer | Fuwn <[email protected]> | 2026-01-17 23:17:49 -0800 |
| commit | 4bc6165258cd7b5b76ccb01aa75c7cefdc35d143 (patch) | |
| tree | e7c3bb335a1efd48f82d365169e8b4a66b7abe1d /internal/storage/sqlite.go | |
| download | kaze-4bc6165258cd7b5b76ccb01aa75c7cefdc35d143.tar.xz kaze-4bc6165258cd7b5b76ccb01aa75c7cefdc35d143.zip | |
feat: Initial commit
Diffstat (limited to 'internal/storage/sqlite.go')
| -rw-r--r-- | internal/storage/sqlite.go | 679 |
1 files changed, 679 insertions, 0 deletions
diff --git a/internal/storage/sqlite.go b/internal/storage/sqlite.go new file mode 100644 index 0000000..e08e4ee --- /dev/null +++ b/internal/storage/sqlite.go @@ -0,0 +1,679 @@ +package storage + +import ( + "context" + "database/sql" + "fmt" + "strings" + "time" + + _ "modernc.org/sqlite" +) + +// Storage handles all database operations +type Storage struct { + db *sql.DB + historyDays int +} + +// CheckResult represents a single monitor check result +type CheckResult struct { + ID int64 + MonitorName string + Timestamp time.Time + Status string // up, down, degraded + ResponseTime int64 // milliseconds + StatusCode int // HTTP status code (0 for non-HTTP) + Error string + SSLExpiry *time.Time + SSLDaysLeft int +} + +// DailyStatus represents aggregated daily status for a monitor +type DailyStatus struct { + Date time.Time + MonitorName string + SuccessCount int + FailureCount int + TotalChecks int + AvgResponse float64 + UptimePercent float64 +} + +// MonitorStats represents overall statistics for a monitor +type MonitorStats struct { + MonitorName string + CurrentStatus string + LastCheck time.Time + LastResponseTime int64 + LastError string + UptimePercent float64 + AvgResponseTime float64 + SSLExpiry *time.Time + SSLDaysLeft int + TotalChecks int64 +} + +// TickData represents aggregated data for one tick in the history bar +type TickData struct { + Timestamp time.Time + TotalChecks int + SuccessCount int + FailureCount int + AvgResponse float64 + UptimePercent float64 + // For ping mode only + Status string // up, down, degraded (only for ping mode) + ResponseTime int64 // milliseconds (only for ping mode) +} + +// New creates a new storage instance +func New(dbPath string, historyDays int) (*Storage, error) { + // Add connection parameters for better concurrency handling + // _txlock=immediate ensures transactions acquire locks immediately + // _busy_timeout=5000 waits up to 5 seconds for locks + // _journal_mode=WAL enables write-ahead logging for better concurrency + // _synchronous=NORMAL balances safety and performance + dsn := fmt.Sprintf("%s?_txlock=immediate&_busy_timeout=5000&_journal_mode=WAL&_synchronous=NORMAL", dbPath) + + db, err := sql.Open("sqlite", dsn) + if err != nil { + return nil, fmt.Errorf("failed to open database: %w", err) + } + + // CRITICAL: Set max open connections to 1 for SQLite + // SQLite only supports one writer at a time, so we serialize all writes + db.SetMaxOpenConns(1) + db.SetMaxIdleConns(1) + db.SetConnMaxLifetime(0) // Don't close idle connections + + // Verify WAL mode is enabled + var journalMode string + if err := db.QueryRow("PRAGMA journal_mode").Scan(&journalMode); err != nil { + db.Close() + return nil, fmt.Errorf("failed to check journal mode: %w", err) + } + + // Enable foreign keys (must be done per-connection, but with 1 conn it's fine) + if _, err := db.Exec("PRAGMA foreign_keys=ON"); err != nil { + db.Close() + return nil, fmt.Errorf("failed to enable foreign keys: %w", err) + } + + s := &Storage{ + db: db, + historyDays: historyDays, + } + + if err := s.migrate(); err != nil { + db.Close() + return nil, fmt.Errorf("failed to run migrations: %w", err) + } + + return s, nil +} + +// migrate creates the database schema +func (s *Storage) migrate() error { + schema := ` + CREATE TABLE IF NOT EXISTS check_results ( + id INTEGER PRIMARY KEY AUTOINCREMENT, + monitor_name TEXT NOT NULL, + timestamp DATETIME NOT NULL DEFAULT CURRENT_TIMESTAMP, + status TEXT NOT NULL CHECK(status IN ('up', 'down', 'degraded')), + response_time_ms INTEGER NOT NULL DEFAULT 0, + status_code INTEGER DEFAULT 0, + error_message TEXT, + ssl_expiry DATETIME, + ssl_days_left INTEGER DEFAULT 0 + ); + + CREATE INDEX IF NOT EXISTS idx_check_results_monitor_time + ON check_results(monitor_name, timestamp DESC); + + CREATE INDEX IF NOT EXISTS idx_check_results_timestamp + ON check_results(timestamp); + + CREATE TABLE IF NOT EXISTS daily_stats ( + id INTEGER PRIMARY KEY AUTOINCREMENT, + monitor_name TEXT NOT NULL, + date DATE NOT NULL, + success_count INTEGER NOT NULL DEFAULT 0, + failure_count INTEGER NOT NULL DEFAULT 0, + total_checks INTEGER NOT NULL DEFAULT 0, + avg_response_ms REAL DEFAULT 0, + uptime_percent REAL DEFAULT 0, + UNIQUE(monitor_name, date) + ); + + CREATE INDEX IF NOT EXISTS idx_daily_stats_monitor_date + ON daily_stats(monitor_name, date DESC); + + CREATE TABLE IF NOT EXISTS monitor_state ( + monitor_name TEXT PRIMARY KEY, + current_status TEXT NOT NULL DEFAULT 'unknown', + last_check DATETIME, + last_response_time_ms INTEGER DEFAULT 0, + last_error TEXT, + ssl_expiry DATETIME, + ssl_days_left INTEGER DEFAULT 0 + ); + ` + + _, err := s.db.Exec(schema) + return err +} + +// SaveCheckResult saves a check result and updates monitor state +func (s *Storage) SaveCheckResult(ctx context.Context, result *CheckResult) error { + // Retry logic for transient lock issues + var lastErr error + for attempt := 0; attempt < 3; attempt++ { + if attempt > 0 { + // Wait before retry with exponential backoff + select { + case <-ctx.Done(): + return ctx.Err() + case <-time.After(time.Duration(attempt*100) * time.Millisecond): + } + } + + lastErr = s.saveCheckResultOnce(ctx, result) + if lastErr == nil { + return nil + } + + // Only retry on lock errors + if !isLockError(lastErr) { + return lastErr + } + } + return fmt.Errorf("failed after 3 attempts: %w", lastErr) +} + +// isLockError checks if the error is a database lock error +func isLockError(err error) bool { + if err == nil { + return false + } + errStr := err.Error() + return strings.Contains(errStr, "database is locked") || + strings.Contains(errStr, "SQLITE_BUSY") || + strings.Contains(errStr, "database table is locked") +} + +// saveCheckResultOnce performs a single attempt to save the check result +func (s *Storage) saveCheckResultOnce(ctx context.Context, result *CheckResult) error { + tx, err := s.db.BeginTx(ctx, nil) + if err != nil { + return fmt.Errorf("failed to begin transaction: %w", err) + } + defer tx.Rollback() + + // Insert check result + _, err = tx.ExecContext(ctx, ` + INSERT INTO check_results ( + monitor_name, timestamp, status, response_time_ms, + status_code, error_message, ssl_expiry, ssl_days_left + ) VALUES (?, ?, ?, ?, ?, ?, ?, ?) + `, result.MonitorName, result.Timestamp, result.Status, + result.ResponseTime, result.StatusCode, result.Error, + result.SSLExpiry, result.SSLDaysLeft) + if err != nil { + return fmt.Errorf("failed to insert check result: %w", err) + } + + // Update or insert monitor state + _, err = tx.ExecContext(ctx, ` + INSERT INTO monitor_state ( + monitor_name, current_status, last_check, + last_response_time_ms, last_error, ssl_expiry, ssl_days_left + ) VALUES (?, ?, ?, ?, ?, ?, ?) + ON CONFLICT(monitor_name) DO UPDATE SET + current_status = excluded.current_status, + last_check = excluded.last_check, + last_response_time_ms = excluded.last_response_time_ms, + last_error = excluded.last_error, + ssl_expiry = excluded.ssl_expiry, + ssl_days_left = excluded.ssl_days_left + `, result.MonitorName, result.Status, result.Timestamp, + result.ResponseTime, result.Error, result.SSLExpiry, result.SSLDaysLeft) + if err != nil { + return fmt.Errorf("failed to update monitor state: %w", err) + } + + // Update daily stats + date := result.Timestamp.Format("2006-01-02") + successIncr := 0 + failureIncr := 0 + if result.Status == "up" { + successIncr = 1 + } else { + failureIncr = 1 + } + + _, err = tx.ExecContext(ctx, ` + INSERT INTO daily_stats ( + monitor_name, date, success_count, failure_count, + total_checks, avg_response_ms, uptime_percent + ) VALUES (?, ?, ?, ?, 1, ?, ?) + ON CONFLICT(monitor_name, date) DO UPDATE SET + success_count = daily_stats.success_count + ?, + failure_count = daily_stats.failure_count + ?, + total_checks = daily_stats.total_checks + 1, + avg_response_ms = (daily_stats.avg_response_ms * daily_stats.total_checks + ?) / (daily_stats.total_checks + 1), + uptime_percent = CAST((daily_stats.success_count + ?) AS REAL) / (daily_stats.total_checks + 1) * 100 + `, result.MonitorName, date, successIncr, failureIncr, + float64(result.ResponseTime), float64(successIncr)*100, + successIncr, failureIncr, float64(result.ResponseTime), successIncr) + if err != nil { + return fmt.Errorf("failed to update daily stats: %w", err) + } + + return tx.Commit() +} + +// GetMonitorStats returns statistics for a specific monitor +func (s *Storage) GetMonitorStats(ctx context.Context, monitorName string) (*MonitorStats, error) { + stats := &MonitorStats{MonitorName: monitorName} + + // Get current state + err := s.db.QueryRowContext(ctx, ` + SELECT current_status, last_check, last_response_time_ms, + last_error, ssl_expiry, ssl_days_left + FROM monitor_state + WHERE monitor_name = ? + `, monitorName).Scan(&stats.CurrentStatus, &stats.LastCheck, + &stats.LastResponseTime, &stats.LastError, + &stats.SSLExpiry, &stats.SSLDaysLeft) + if err == sql.ErrNoRows { + stats.CurrentStatus = "unknown" + return stats, nil + } + if err != nil { + return nil, fmt.Errorf("failed to get monitor state: %w", err) + } + + // Get aggregate stats from check results within history window + cutoff := time.Now().AddDate(0, 0, -s.historyDays) + err = s.db.QueryRowContext(ctx, ` + SELECT + COUNT(*) as total, + AVG(response_time_ms) as avg_response, + CAST(SUM(CASE WHEN status = 'up' THEN 1 ELSE 0 END) AS REAL) / COUNT(*) * 100 as uptime + FROM check_results + WHERE monitor_name = ? AND timestamp >= ? + `, monitorName, cutoff).Scan(&stats.TotalChecks, &stats.AvgResponseTime, &stats.UptimePercent) + if err != nil && err != sql.ErrNoRows { + return nil, fmt.Errorf("failed to get aggregate stats: %w", err) + } + + return stats, nil +} + +// GetAllMonitorStats returns statistics for all monitors +func (s *Storage) GetAllMonitorStats(ctx context.Context) (map[string]*MonitorStats, error) { + stats := make(map[string]*MonitorStats) + cutoff := time.Now().AddDate(0, 0, -s.historyDays) + + // Get all monitor states + rows, err := s.db.QueryContext(ctx, ` + SELECT monitor_name, current_status, last_check, + last_response_time_ms, last_error, ssl_expiry, ssl_days_left + FROM monitor_state + `) + if err != nil { + return nil, fmt.Errorf("failed to query monitor states: %w", err) + } + defer rows.Close() + + for rows.Next() { + var ms MonitorStats + var lastError sql.NullString + var sslExpiry sql.NullTime + err := rows.Scan(&ms.MonitorName, &ms.CurrentStatus, &ms.LastCheck, + &ms.LastResponseTime, &lastError, &sslExpiry, &ms.SSLDaysLeft) + if err != nil { + return nil, fmt.Errorf("failed to scan monitor state: %w", err) + } + if lastError.Valid { + ms.LastError = lastError.String + } + if sslExpiry.Valid { + ms.SSLExpiry = &sslExpiry.Time + } + stats[ms.MonitorName] = &ms + } + + // Get aggregate stats for all monitors + rows, err = s.db.QueryContext(ctx, ` + SELECT + monitor_name, + COUNT(*) as total, + AVG(response_time_ms) as avg_response, + CAST(SUM(CASE WHEN status = 'up' THEN 1 ELSE 0 END) AS REAL) / COUNT(*) * 100 as uptime + FROM check_results + WHERE timestamp >= ? + GROUP BY monitor_name + `, cutoff) + if err != nil { + return nil, fmt.Errorf("failed to query aggregate stats: %w", err) + } + defer rows.Close() + + for rows.Next() { + var name string + var total int64 + var avgResponse, uptime float64 + if err := rows.Scan(&name, &total, &avgResponse, &uptime); err != nil { + return nil, fmt.Errorf("failed to scan aggregate stats: %w", err) + } + if ms, ok := stats[name]; ok { + ms.TotalChecks = total + ms.AvgResponseTime = avgResponse + ms.UptimePercent = uptime + } + } + + return stats, nil +} + +// GetDailyStats returns daily statistics for a monitor +func (s *Storage) GetDailyStats(ctx context.Context, monitorName string, days int) ([]DailyStatus, error) { + cutoff := time.Now().AddDate(0, 0, -days) + + rows, err := s.db.QueryContext(ctx, ` + SELECT date, success_count, failure_count, total_checks, + avg_response_ms, uptime_percent + FROM daily_stats + WHERE monitor_name = ? AND date >= ? + ORDER BY date ASC + `, monitorName, cutoff.Format("2006-01-02")) + if err != nil { + return nil, fmt.Errorf("failed to query daily stats: %w", err) + } + defer rows.Close() + + var results []DailyStatus + for rows.Next() { + var ds DailyStatus + var dateStr string + ds.MonitorName = monitorName + if err := rows.Scan(&dateStr, &ds.SuccessCount, &ds.FailureCount, + &ds.TotalChecks, &ds.AvgResponse, &ds.UptimePercent); err != nil { + return nil, fmt.Errorf("failed to scan daily stats: %w", err) + } + ds.Date, _ = time.Parse("2006-01-02", dateStr) + results = append(results, ds) + } + + return results, nil +} + +// GetUptimeHistory returns uptime data for the history visualization +// Returns a slice of daily uptime percentages, one per day for the specified period +func (s *Storage) GetUptimeHistory(ctx context.Context, monitorName string, days int) ([]float64, error) { + // Create a map of date to uptime + uptimeMap := make(map[string]float64) + + // Use local time for date range calculation (matches how we store dates) + cutoffDate := time.Now().AddDate(0, 0, -days).Format("2006-01-02") + + rows, err := s.db.QueryContext(ctx, ` + SELECT date, uptime_percent + FROM daily_stats + WHERE monitor_name = ? AND date >= ? + ORDER BY date ASC + `, monitorName, cutoffDate) + if err != nil { + return nil, fmt.Errorf("failed to query uptime history: %w", err) + } + defer rows.Close() + + for rows.Next() { + var dateStr string + var uptime float64 + if err := rows.Scan(&dateStr, &uptime); err != nil { + return nil, fmt.Errorf("failed to scan uptime: %w", err) + } + // Normalize date format (strip any time component if present) + if len(dateStr) > 10 { + dateStr = dateStr[:10] + } + uptimeMap[dateStr] = uptime + } + + // Build result array with -1 for days with no data + result := make([]float64, days) + today := time.Now().Format("2006-01-02") + for i := 0; i < days; i++ { + date := time.Now().AddDate(0, 0, -days+i+1).Format("2006-01-02") + if uptime, ok := uptimeMap[date]; ok { + result[i] = uptime + } else if date == today { + // For today, if we have monitor data but no daily stats yet, + // check if there's recent data and show it + result[i] = -1 + } else { + result[i] = -1 // No data for this day + } + } + + return result, nil +} + +// PingResult represents a single ping for the history display +type PingResult struct { + Status string // up, down, degraded + ResponseTime int64 // milliseconds + Timestamp time.Time +} + +// GetRecentPings returns the last N check results for a monitor +// Results are ordered from oldest to newest (left to right on display) +func (s *Storage) GetRecentPings(ctx context.Context, monitorName string, limit int) ([]PingResult, error) { + // Query in descending order then reverse, so we get the most recent N results + // but in chronological order for display + rows, err := s.db.QueryContext(ctx, ` + SELECT status, response_time_ms, timestamp + FROM check_results + WHERE monitor_name = ? + ORDER BY timestamp DESC + LIMIT ? + `, monitorName, limit) + if err != nil { + return nil, fmt.Errorf("failed to query recent pings: %w", err) + } + defer rows.Close() + + var results []PingResult + for rows.Next() { + var p PingResult + if err := rows.Scan(&p.Status, &p.ResponseTime, &p.Timestamp); err != nil { + return nil, fmt.Errorf("failed to scan ping: %w", err) + } + results = append(results, p) + } + + // Reverse to get chronological order (oldest first) + for i, j := 0, len(results)-1; i < j; i, j = i+1, j-1 { + results[i], results[j] = results[j], results[i] + } + + return results, nil +} + +// GetAggregatedHistory returns tick data for the history visualization +// tickMode: "ping", "minute", "hour", "day" +// fixedSlots: when true, returns exactly tickCount elements with nil for missing periods +// Returns a slice where nil entries indicate no data for that period +func (s *Storage) GetAggregatedHistory(ctx context.Context, monitorName string, tickCount int, tickMode string, fixedSlots bool) ([]*TickData, error) { + switch tickMode { + case "ping": + return s.getAggregatedPings(ctx, monitorName, tickCount, fixedSlots) + case "minute": + return s.getAggregatedByTime(ctx, monitorName, tickCount, time.Minute, "%Y-%m-%d %H:%M") + case "hour": + return s.getAggregatedByTime(ctx, monitorName, tickCount, time.Hour, "%Y-%m-%d %H") + case "day": + return s.getAggregatedByTime(ctx, monitorName, tickCount, 24*time.Hour, "%Y-%m-%d") + default: + return nil, fmt.Errorf("invalid tick mode: %s", tickMode) + } +} + +// getAggregatedPings returns individual pings as tick data +func (s *Storage) getAggregatedPings(ctx context.Context, monitorName string, tickCount int, fixedSlots bool) ([]*TickData, error) { + pings, err := s.GetRecentPings(ctx, monitorName, tickCount) + if err != nil { + return nil, err + } + + result := make([]*TickData, 0, tickCount) + + // If fixedSlots is true, pad the beginning with nils + if fixedSlots && len(pings) < tickCount { + for i := 0; i < tickCount-len(pings); i++ { + result = append(result, nil) + } + } + + // Convert pings to TickData + for _, p := range pings { + uptime := 0.0 + if p.Status == "up" { + uptime = 100.0 + } else if p.Status == "degraded" { + uptime = 50.0 + } + result = append(result, &TickData{ + Timestamp: p.Timestamp, + TotalChecks: 1, + SuccessCount: boolToInt(p.Status == "up"), + FailureCount: boolToInt(p.Status == "down"), + AvgResponse: float64(p.ResponseTime), + UptimePercent: uptime, + Status: p.Status, + ResponseTime: p.ResponseTime, + }) + } + + return result, nil +} + +// getAggregatedByTime returns aggregated tick data by time bucket +func (s *Storage) getAggregatedByTime(ctx context.Context, monitorName string, tickCount int, bucketDuration time.Duration, sqlFormat string) ([]*TickData, error) { + // Calculate the start time for our window + now := time.Now() + startTime := now.Add(-time.Duration(tickCount) * bucketDuration) + + // Query aggregated data grouped by time bucket + // First extract just the datetime part (first 19 chars: "2006-01-02 15:04:05") + // then apply strftime to that + query := ` + SELECT + strftime('` + sqlFormat + `', substr(timestamp, 1, 19)) as bucket, + COUNT(*) as total_checks, + SUM(CASE WHEN status = 'up' THEN 1 ELSE 0 END) as success_count, + SUM(CASE WHEN status = 'down' THEN 1 ELSE 0 END) as failure_count, + AVG(response_time_ms) as avg_response + FROM check_results + WHERE monitor_name = ? AND timestamp >= ? + GROUP BY bucket + HAVING bucket IS NOT NULL + ORDER BY bucket ASC + ` + + rows, err := s.db.QueryContext(ctx, query, monitorName, startTime) + if err != nil { + return nil, fmt.Errorf("failed to query aggregated history: %w", err) + } + defer rows.Close() + + // Build a map of bucket -> data + dataMap := make(map[string]*TickData) + for rows.Next() { + var bucket sql.NullString + var td TickData + if err := rows.Scan(&bucket, &td.TotalChecks, &td.SuccessCount, &td.FailureCount, &td.AvgResponse); err != nil { + return nil, fmt.Errorf("failed to scan aggregated data: %w", err) + } + if !bucket.Valid || bucket.String == "" { + continue // Skip null buckets + } + if td.TotalChecks > 0 { + td.UptimePercent = float64(td.SuccessCount) / float64(td.TotalChecks) * 100 + } + dataMap[bucket.String] = &td + } + + // Generate all time slots and fill in data + result := make([]*TickData, tickCount) + for i := 0; i < tickCount; i++ { + slotTime := startTime.Add(time.Duration(i+1) * bucketDuration) + bucket := formatTimeBucket(slotTime, bucketDuration) + + if td, ok := dataMap[bucket]; ok { + td.Timestamp = slotTime + result[i] = td + } else { + result[i] = nil // No data for this slot + } + } + + return result, nil +} + +// formatTimeBucket formats a time into the bucket key format +func formatTimeBucket(t time.Time, duration time.Duration) string { + switch { + case duration >= 24*time.Hour: + return t.Format("2006-01-02") + case duration >= time.Hour: + return t.Format("2006-01-02 15") + case duration >= time.Minute: + return t.Format("2006-01-02 15:04") + default: + return t.Format("2006-01-02 15:04:05") + } +} + +// boolToInt converts a boolean to an int (1 for true, 0 for false) +func boolToInt(b bool) int { + if b { + return 1 + } + return 0 +} + +// Cleanup removes old data beyond the history retention period +func (s *Storage) Cleanup(ctx context.Context) error { + cutoff := time.Now().AddDate(0, 0, -s.historyDays) + + _, err := s.db.ExecContext(ctx, ` + DELETE FROM check_results WHERE timestamp < ? + `, cutoff) + if err != nil { + return fmt.Errorf("failed to cleanup check_results: %w", err) + } + + _, err = s.db.ExecContext(ctx, ` + DELETE FROM daily_stats WHERE date < ? + `, cutoff.Format("2006-01-02")) + if err != nil { + return fmt.Errorf("failed to cleanup daily_stats: %w", err) + } + + // Vacuum to reclaim space + _, err = s.db.ExecContext(ctx, "VACUUM") + if err != nil { + return fmt.Errorf("failed to vacuum database: %w", err) + } + + return nil +} + +// Close closes the database connection +func (s *Storage) Close() error { + return s.db.Close() +} |