""" Configuration file for the code metrics analysis project. """ import os from dotenv import load_dotenv load_dotenv() # GitHub API Configuration GITHUB_TOKEN = os.getenv('GITHUB_TOKEN', '') # Set your token in .env file GITHUB_API_BASE = 'https://api.github.com' # Analysis Configuration MAX_REPOSITORIES = 10 # Limit number of repos to analyze MIN_STARS = 100 # Minimum stars for repository selection PYTHON_FILE_EXTENSIONS = ['.py'] EXCLUDE_DIRS = ['__pycache__', '.git', 'venv', 'env', '.venv', 'node_modules', 'tests', 'test'] # Curated list of popular Python projects that use semantic commits # Format: (owner, repo_name) CURATED_REPOSITORIES = [ # FastAPI - modern web framework ('tiangolo', 'fastapi'), # Requests - HTTP library ('psf', 'requests'), # Django REST Framework - API framework ('encode', 'djangorestframework'), # Flask - web framework ('pallets', 'flask'), # Celery - distributed task queue ('celery', 'celery'), # Pydantic - data validation ('pydantic', 'pydantic'), # SQLAlchemy - SQL toolkit ('sqlalchemy', 'sqlalchemy'), # Pandas - data analysis ('pandas-dev', 'pandas'), # NumPy - numerical computing ('numpy', 'numpy'), # Scikit-learn - machine learning ('scikit-learn', 'scikit-learn'), ] # Statistical Analysis Configuration SIGNIFICANCE_LEVEL = 0.05 CONFIDENCE_LEVEL = 0.95 # Output Configuration OUTPUT_DIR = 'results' FIGURES_DIR = 'figures' # Data Loading Configuration USE_EXISTING_METRICS = False # If True, load from existing raw_metrics.csv instead of collecting new data # Set to False to recollect data with fixed cognitive_complexity calculation RAW_METRICS_FILE = 'results/raw_metrics.csv' # Path to existing raw metrics CSV file # Analysis Mode Configuration FOCUSED_MODE = True # If True, only perform regression analysis and hypothesis testing (t-tests, z-tests) # Parallelization Configuration MAX_WORKERS = None # None = use CPU count, or set to specific number PARALLEL_REPOS = True # Process repositories in parallel PARALLEL_FILES = True # Analyze files in parallel