"""
Visualization module for code metrics analysis results.
"""
import matplotlib.pyplot as plt
import seaborn as sns
import pandas as pd
import numpy as np
from pathlib import Path
from typing import Dict, List, Optional
import os


class Visualizer:
    """Creates visualizations for code metrics analysis."""
    
    def __init__(self, output_dir: str = 'figures'):
        self.output_dir = Path(output_dir)
        self.output_dir.mkdir(exist_ok=True)
        sns.set_style("whitegrid")
        plt.rcParams['figure.figsize'] = (12, 8)
    
    def plot_correlation_heatmap(self, df: pd.DataFrame, 
                                 complexity_cols: List[str],
                                 issue_cols: List[str],
                                 filename: str = 'correlation_heatmap.png'):
        """Create correlation heatmap between complexity and issues."""
        # Filter to existing columns
        comp_cols = [col for col in complexity_cols if col in df.columns]
        iss_cols = [col for col in issue_cols if col in df.columns]
        
        if not comp_cols or not iss_cols:
            return
        
        # Calculate correlation matrix
        corr_matrix = np.full((len(comp_cols), len(iss_cols)), np.nan)
        
        for i, comp_col in enumerate(comp_cols):
            for j, iss_col in enumerate(iss_cols):
                mask = df[[comp_col, iss_col]].notna().all(axis=1)
                if mask.sum() >= 3:
                    corr = df.loc[mask, comp_col].corr(df.loc[mask, iss_col], method='pearson')
                    if not np.isnan(corr):
                        corr_matrix[i, j] = corr
        
        # Check if we have any valid correlations
        if np.isnan(corr_matrix).all():
            return
        
        # Create heatmap
        fig, ax = plt.subplots(figsize=(14, 10))
        sns.heatmap(corr_matrix, annot=True, fmt='.3f', cmap='coolwarm', 
                   center=0, vmin=-1, vmax=1, ax=ax,
                   xticklabels=iss_cols, yticklabels=comp_cols,
                   mask=np.isnan(corr_matrix))
        ax.set_title('Correlation Heatmap: Complexity Metrics vs Issue Metrics', 
                    fontsize=16, fontweight='bold')
        plt.tight_layout()
        plt.savefig(self.output_dir / filename, dpi=300, bbox_inches='tight')
        plt.close()
    
    def plot_complexity_vs_issues_scatter(self, df: pd.DataFrame,
                                          complexity_col: str,
                                          issue_col: str,
                                          filename: Optional[str] = None):
        """Create scatter plot of complexity vs issues."""
        if complexity_col not in df.columns or issue_col not in df.columns:
            return
        
        mask = df[[complexity_col, issue_col]].notna().all(axis=1)
        if mask.sum() < 3:
            return
        
        fig, ax = plt.subplots(figsize=(10, 6))
        
        x = df.loc[mask, complexity_col]
        y = df.loc[mask, issue_col]
        
        # Remove any infinite or NaN values
        valid_mask = np.isfinite(x) & np.isfinite(y)
        x = x[valid_mask]
        y = y[valid_mask]
        
        if len(x) < 2:
            return
        
        ax.scatter(x, y, alpha=0.5, s=50)
        
        # Add regression line (with error handling)
        try:
            # Check if x has variance (not constant)
            if x.std() > 1e-10:
                z = np.polyfit(x, y, 1)
                p = np.poly1d(z)
                x_sorted = np.sort(x)
                ax.plot(x_sorted, p(x_sorted), "r--", alpha=0.8, linewidth=2, 
                       label=f'Trend line (slope={z[0]:.3f})')
        except (np.linalg.LinAlgError, ValueError, RuntimeError):
            # Skip regression line if fitting fails
            pass
        
        ax.set_xlabel(complexity_col.replace('_', ' ').title(), fontsize=12)
        ax.set_ylabel(issue_col.replace('_', ' ').title(), fontsize=12)
        ax.set_title(f'{complexity_col.replace("_", " ").title()} vs '
                    f'{issue_col.replace("_", " ").title()}', 
                    fontsize=14, fontweight='bold')
        ax.legend()
        ax.grid(True, alpha=0.3)
        
        plt.tight_layout()
        if filename:
            plt.savefig(self.output_dir / filename, dpi=300, bbox_inches='tight')
        plt.close()
    
    def plot_module_complexity_comparison(self, df: pd.DataFrame,
                                         complexity_col: str,
                                         filename: str = 'module_complexity_comparison.png'):
        """Compare complexity across different modules."""
        if 'module' not in df.columns or complexity_col not in df.columns:
            return
        
        mask = df[complexity_col].notna()
        if mask.sum() == 0:
            return
        
        fig, axes = plt.subplots(2, 1, figsize=(14, 10))
        
        # Box plot
        modules = df.loc[mask, 'module'].unique()
        data_by_module = [df.loc[mask & (df['module'] == mod), complexity_col].values 
                         for mod in modules]
        
        axes[0].boxplot(data_by_module, labels=modules)
        axes[0].set_ylabel(complexity_col.replace('_', ' ').title(), fontsize=12)
        axes[0].set_title(f'{complexity_col.replace("_", " ").title()} by Module', 
                         fontsize=14, fontweight='bold')
        axes[0].tick_params(axis='x', rotation=45)
        axes[0].grid(True, alpha=0.3)
        
        # Bar plot of means
        module_means = df.loc[mask].groupby('module')[complexity_col].mean().sort_values(ascending=False)
        axes[1].bar(range(len(module_means)), module_means.values)
        axes[1].set_xticks(range(len(module_means)))
        axes[1].set_xticklabels(module_means.index, rotation=45, ha='right')
        axes[1].set_ylabel(f'Mean {complexity_col.replace("_", " ").title()}', fontsize=12)
        axes[1].set_title(f'Average {complexity_col.replace("_", " ").title()} by Module', 
                         fontsize=14, fontweight='bold')
        axes[1].grid(True, alpha=0.3, axis='y')
        
        plt.tight_layout()
        plt.savefig(self.output_dir / filename, dpi=300, bbox_inches='tight')
        plt.close()
    
    def plot_distribution_analysis(self, df: pd.DataFrame,
                                  metric_col: str,
                                  filename: str = 'distribution_analysis.png'):
        """Plot distribution analysis for a metric."""
        if metric_col not in df.columns:
            return
        
        data = df[metric_col].dropna()
        if len(data) == 0:
            return
        
        fig, axes = plt.subplots(2, 2, figsize=(14, 10))
        
        # Histogram
        axes[0, 0].hist(data, bins=30, edgecolor='black', alpha=0.7)
        axes[0, 0].set_xlabel(metric_col.replace('_', ' ').title(), fontsize=12)
        axes[0, 0].set_ylabel('Frequency', fontsize=12)
        axes[0, 0].set_title('Histogram', fontsize=12, fontweight='bold')
        axes[0, 0].grid(True, alpha=0.3)
        
        # Q-Q plot
        from scipy import stats
        stats.probplot(data, dist="norm", plot=axes[0, 1])
        axes[0, 1].set_title('Q-Q Plot (Normal Distribution)', fontsize=12, fontweight='bold')
        axes[0, 1].grid(True, alpha=0.3)
        
        # Box plot
        axes[1, 0].boxplot(data, vert=True)
        axes[1, 0].set_ylabel(metric_col.replace('_', ' ').title(), fontsize=12)
        axes[1, 0].set_title('Box Plot', fontsize=12, fontweight='bold')
        axes[1, 0].grid(True, alpha=0.3)
        
        # Cumulative distribution
        sorted_data = np.sort(data)
        cumulative = np.arange(1, len(sorted_data) + 1) / len(sorted_data)
        axes[1, 1].plot(sorted_data, cumulative, linewidth=2)
        axes[1, 1].set_xlabel(metric_col.replace('_', ' ').title(), fontsize=12)
        axes[1, 1].set_ylabel('Cumulative Probability', fontsize=12)
        axes[1, 1].set_title('Cumulative Distribution Function', fontsize=12, fontweight='bold')
        axes[1, 1].grid(True, alpha=0.3)
        
        plt.suptitle(f'Distribution Analysis: {metric_col.replace("_", " ").title()}', 
                    fontsize=16, fontweight='bold')
        plt.tight_layout()
        plt.savefig(self.output_dir / filename, dpi=300, bbox_inches='tight')
        plt.close()
    
    def plot_regression_results(self, regression_results: Dict,
                               filename: str = 'regression_results.png'):
        """Visualize regression analysis results."""
        if 'coefficients' not in regression_results:
            return
        
        coefficients = regression_results['coefficients']
        p_values = regression_results.get('p_values', {})
        ci = regression_results.get('confidence_intervals', {})
        
        if not coefficients:
            return
        
        fig, axes = plt.subplots(1, 2, figsize=(14, 6))
        
        # Coefficient plot with confidence intervals
        features = list(coefficients.keys())
        coef_values = list(coefficients.values())
        colors = ['red' if p_values.get(f, 1) < 0.05 else 'gray' 
                 for f in features]
        
        y_pos = np.arange(len(features))
        axes[0].barh(y_pos, coef_values, color=colors, alpha=0.7)
        
        # Add confidence intervals
        for i, feature in enumerate(features):
            if feature in ci:
                ci_lower, ci_upper = ci[feature]
                axes[0].plot([ci_lower, ci_upper], [i, i], 'k-', linewidth=2)
        
        axes[0].set_yticks(y_pos)
        axes[0].set_yticklabels(features)
        axes[0].set_xlabel('Coefficient Value', fontsize=12)
        axes[0].set_title('Regression Coefficients with 95% CI', fontsize=14, fontweight='bold')
        axes[0].axvline(x=0, color='black', linestyle='--', linewidth=1)
        axes[0].grid(True, alpha=0.3)
        
        # P-values plot
        p_vals = [p_values.get(f, 1) for f in features]
        axes[1].barh(y_pos, p_vals, color=colors, alpha=0.7)
        axes[1].axvline(x=0.05, color='red', linestyle='--', linewidth=2, 
                       label='Significance Level (0.05)')
        axes[1].set_yticks(y_pos)
        axes[1].set_yticklabels(features)
        axes[1].set_xlabel('P-value', fontsize=12)
        axes[1].set_title('P-values for Coefficients', fontsize=14, fontweight='bold')
        axes[1].set_xlim([0, max(p_vals) * 1.1])
        axes[1].legend()
        axes[1].grid(True, alpha=0.3)
        
        plt.suptitle(f'Regression Analysis Results (R² = {regression_results.get("r_squared", 0):.3f})', 
                    fontsize=16, fontweight='bold')
        plt.tight_layout()
        plt.savefig(self.output_dir / filename, dpi=300, bbox_inches='tight')
        plt.close()