package monitor import ( "context" "log/slog" "sync" "time" "github.com/Fuwn/kaze/internal/config" "github.com/Fuwn/kaze/internal/storage" ) type OnCheckCallback func() type Scheduler struct { monitors []Monitor monitorCfg map[string]config.MonitorConfig configPath string storage *storage.Storage logger *slog.Logger wg sync.WaitGroup ctx context.Context cancel context.CancelFunc onCheckDone OnCheckCallback } // NewScheduler creates a new monitor scheduler func NewScheduler(cfg *config.Config, store *storage.Storage, logger *slog.Logger, configPath string) (*Scheduler, error) { ctx, cancel := context.WithCancel(context.Background()) s := &Scheduler{ monitorCfg: make(map[string]config.MonitorConfig), configPath: configPath, storage: store, logger: logger, ctx: ctx, cancel: cancel, } // Create monitors from configuration for _, group := range cfg.Groups { for _, monCfg := range group.Monitors { mon, err := New(monCfg) if err != nil { cancel() return nil, err } s.monitors = append(s.monitors, mon) s.monitorCfg[mon.ID()] = monCfg // Store config by ID (group/name) for reset flag checks logger.Info("registered monitor", "id", mon.ID(), "group", mon.Group(), "name", mon.Name(), "type", mon.Type(), "target", mon.Target(), "interval", mon.Interval()) } } return s, nil } // Start begins running all monitors func (s *Scheduler) Start() { s.logger.Info("starting scheduler", "monitors", len(s.monitors)) for _, mon := range s.monitors { s.wg.Add(1) go s.runMonitor(mon) } s.wg.Add(1) go s.runCleanup() s.wg.Add(1) go s.runMaintenance() } // Stop gracefully stops all monitors func (s *Scheduler) Stop() { s.logger.Info("stopping scheduler") s.cancel() s.wg.Wait() s.logger.Info("scheduler stopped") } // runMonitor runs a single monitor in a loop func (s *Scheduler) runMonitor(mon Monitor) { defer s.wg.Done() // Run immediately on start s.executeCheck(mon) ticker := time.NewTicker(mon.Interval()) defer ticker.Stop() for { select { case <-s.ctx.Done(): s.logger.Info("monitor stopped", "id", mon.ID()) return case <-ticker.C: s.executeCheck(mon) } } } // executeCheck performs a single check and saves the result func (s *Scheduler) executeCheck(mon Monitor) { // Check if reset flag is set for this monitor if monCfg, exists := s.monitorCfg[mon.ID()]; exists && monCfg.ResetOnNextCheck { s.logger.Info("resetting monitor data", "id", mon.ID()) // Delete all historical data for this monitor (using ID as the key) if err := s.storage.ResetMonitorData(s.ctx, mon.ID()); err != nil { s.logger.Error("failed to reset monitor data", "id", mon.ID(), "error", err) } else { s.logger.Info("monitor data reset complete", "id", mon.ID()) // Flip the reset flag to false in the config file // Note: UpdateResetFlag uses monitor name for YAML lookup, group for context if err := config.UpdateResetFlag(s.configPath, monCfg.Group, monCfg.Name, false); err != nil { s.logger.Error("failed to update reset flag in config", "id", mon.ID(), "error", err) } else { // Update in-memory config monCfg.ResetOnNextCheck = false s.monitorCfg[mon.ID()] = monCfg s.logger.Info("reset flag cleared in config", "id", mon.ID()) } } } // Create a context with timeout for this check checkCtx, cancel := context.WithTimeout(s.ctx, mon.Interval()) defer cancel() var result *Result retries := mon.Retries() // Try the check, with retries if configured for attempt := 0; attempt <= retries; attempt++ { result = mon.Check(checkCtx) // If check succeeded (up or degraded), no need to retry if result.Status == StatusUp || result.Status == StatusDegraded { break } // If this wasn't the last attempt, log and retry if attempt < retries { s.logger.Debug("check failed, retrying", "name", mon.Name(), "attempt", attempt+1, "max_retries", retries, "error", result.Error) // Small delay before retry (500ms) select { case <-checkCtx.Done(): // Context cancelled, abort retries break case <-time.After(500 * time.Millisecond): // Continue to next retry } } } // Log the result logAttrs := []any{ "id", mon.ID(), "status", result.Status, "response_time", result.ResponseTime, } if result.StatusCode > 0 { logAttrs = append(logAttrs, "status_code", result.StatusCode) } if result.SSLDaysLeft > 0 { logAttrs = append(logAttrs, "ssl_days_left", result.SSLDaysLeft) } if result.Error != nil { logAttrs = append(logAttrs, "error", result.Error) } if result.Status == StatusUp { s.logger.Debug("check completed", logAttrs...) } else { s.logger.Warn("check completed", logAttrs...) } // Save to storage if err := s.storage.SaveCheckResult(s.ctx, result.ToCheckResult()); err != nil { s.logger.Error("failed to save check result", "id", mon.ID(), "error", err) } s.storage.RecordCheck() if s.onCheckDone != nil { s.onCheckDone() } } // runCleanup periodically cleans up old data func (s *Scheduler) runCleanup() { defer s.wg.Done() ticker := time.NewTicker(24 * time.Hour) defer ticker.Stop() for { select { case <-s.ctx.Done(): return case <-ticker.C: s.logger.Info("running database cleanup") if err := s.storage.Cleanup(s.ctx); err != nil { s.logger.Error("cleanup failed", "error", err) } else { s.logger.Info("cleanup completed") } } } } func (s *Scheduler) runMaintenance() { defer s.wg.Done() if s.storage.GetMaintenanceMode() == "never" { return } ticker := time.NewTicker(1 * time.Minute) defer ticker.Stop() for { select { case <-s.ctx.Done(): return case <-ticker.C: ran, err := s.storage.CheckMaintenance() if err != nil { s.logger.Error("maintenance check failed", "error", err) } else if ran { s.logger.Info("database maintenance completed", "mode", s.storage.GetMaintenanceMode()) } } } } func (s *Scheduler) GetMonitors() []Monitor { return s.monitors } func (s *Scheduler) SetOnCheckCallback(cb OnCheckCallback) { s.onCheckDone = cb } // RunCheck manually triggers a check for a specific monitor by ID (group/name format) func (s *Scheduler) RunCheck(id string) *Result { for _, mon := range s.monitors { if mon.ID() == id { ctx, cancel := context.WithTimeout(context.Background(), mon.Interval()) defer cancel() result := mon.Check(ctx) // Save the result if err := s.storage.SaveCheckResult(context.Background(), result.ToCheckResult()); err != nil { s.logger.Error("failed to save manual check result", "id", mon.ID(), "error", err) } return result } } return nil }