"""
Central config: env vars, language mappings, provider configs, version constants.
All tunables live here so Docker ENV overrides work cleanly.
"""
from __future__ import annotations

import os
import uuid
from dataclasses import dataclass, field
from pathlib import Path
from typing import Optional

from dotenv import load_dotenv

load_dotenv(Path(__file__).resolve().parent.parent / ".env")

# === VERSION CONSTANTS (bump on schema/prompt/logic changes) ===
PROMPT_VERSION = "1.0.0"
SCHEMA_VERSION = "1.0.0"
TRIMMER_VERSION = "1.0.0"
VALIDATOR_VERSION = "1.0.0"

# === LANGUAGE MAPPINGS ===
# ISO 639-1 code -> (display name, script name, script-specific prompt hint)
LANGUAGE_MAP: dict[str, tuple[str, str, str]] = {
    "hi": ("Hindi", "Devanagari", "Don't over-split words. Preserve Sandhi/combined forms as spoken."),
    "mr": ("Marathi", "Devanagari", "Don't over-split words. Preserve Sandhi/combined forms as spoken."),
    "te": ("Telugu", "Telugu", "Don't over-split words. Preserve Sandhi/combined forms as spoken."),
    "ta": ("Tamil", "Tamil", "Don't over-split words. Preserve Sandhi/combined forms as spoken."),
    "kn": ("Kannada", "Kannada", "Don't over-split words. Preserve Sandhi/combined forms as spoken."),
    "ml": ("Malayalam", "Malayalam", "Don't over-split words. Preserve Sandhi/combined forms as spoken."),
    "gu": ("Gujarati", "Gujarati", "Don't over-split words. Preserve Sandhi/combined forms as spoken."),
    "pa": ("Punjabi", "Gurmukhi", "Don't over-split words. Preserve Sandhi/combined forms as spoken."),
    "bn": ("Bengali", "Bengali", "Don't over-split words. Preserve Sandhi/combined forms as spoken."),
    "as": ("Assamese", "Assamese", "Don't over-split words. Preserve Sandhi/combined forms as spoken."),
    "or": ("Odia", "Odia", "Don't over-split words. Preserve Sandhi/combined forms as spoken."),
    "en": ("English", "Latin", "Standard English transcription rules apply."),
}

SUPPORTED_LANGUAGES = list(LANGUAGE_MAP.keys())

# === AUDIO EVENT TAGS ===
AUDIO_EVENT_TAGS = [
    "laugh", "cough", "sigh", "breath", "singing",
    "noise", "music", "applause", "snort", "cry",
]

# === AUDIO POLISHING THRESHOLDS ===
MIN_SEGMENT_DURATION_S = 2.0
MAX_SEGMENT_DURATION_S = 15.0
PREFERRED_MAX_DURATION_S = 10.0
SPLIT_SEARCH_START_S = 7.0
FORCE_CUT_RANGE = (10.0, 15.0)
BOUNDARY_CHECK_MS = 50
SILENCE_PAD_MS = 150
BOUNDARY_TRIM_MAX_PCT = 0.40  # max % of segment to trim from either end

# === PROVIDER SETTINGS ===
GEMINI_MODEL = "gemini-3-flash-preview"
TEMPERATURE = float(os.getenv("TEMPERATURE", "0"))
THINKING_LEVEL = os.getenv("THINKING_LEVEL", "low")

# === BATCH SETTINGS ===
WORKER_BATCH_SIZE = int(os.getenv("WORKER_BATCH_SIZE", "1000"))
BATCH_INTERVAL_SECONDS = int(os.getenv("BATCH_INTERVAL_SECONDS", "60"))
MAX_RETRIES_429 = 3
FLOOD_THRESHOLD_PCT = 0.10  # 10% 429s triggers provider switch
HEARTBEAT_INTERVAL_S = 30
METRICS_UPDATE_INTERVAL_S = 5
# 0 = unlimited (production). Set to N for test runs that stop after N videos.
MAX_VIDEOS = int(os.getenv("MAX_VIDEOS", "0"))

# === VALIDATION ===
ENABLE_GPU_VALIDATION = os.getenv("ENABLE_GPU_VALIDATION", "false").lower() == "true"


def _env(key: str, default: str = "") -> str:
    return os.getenv(key, default)


@dataclass
class EnvConfig:
    """Resolved environment configuration. Reads env vars at instantiation time."""
    r2_endpoint_url: str = ""
    r2_bucket: str = "1-cleaned-data"
    r2_output_bucket: str = "transcribed"
    r2_access_key_id: str = ""
    r2_secret_access_key: str = ""
    r2_output_prefix: str = ""
    gemini_key: str = ""
    supabase_url: str = ""
    supabase_admin_key: str = ""
    database_url: str = ""
    worker_id: str = ""
    gpu_type: str = "unknown"
    mock_mode: bool = False
    # Multi-key pool: all GCP project keys (identical quotas, 20K RPM each)
    # GEMINI_KEY_INDEX selects primary at deployment, rest become fallbacks
    gemini_keys: list = field(default_factory=list)
    gemini_key_index: int = 0

    def __post_init__(self):
        """Fill unfilled fields from environment variables."""
        self.r2_endpoint_url = self.r2_endpoint_url or _env("R2_ENDPOINT_URL")
        self.r2_bucket = self.r2_bucket if self.r2_bucket != "1-cleaned-data" else _env("R2_BUCKET", "1-cleaned-data")
        self.r2_output_bucket = self.r2_output_bucket if self.r2_output_bucket != "transcribed" else _env("R2_OUTPUT_BUCKET", "transcribed")
        self.r2_access_key_id = self.r2_access_key_id or _env("R2_ACCESS_KEY_ID")
        self.r2_secret_access_key = self.r2_secret_access_key or _env("R2_SECRET_ACCESS_KEY")
        self.r2_output_prefix = self.r2_output_prefix or _env("R2_OUTPUT_PREFIX", "")
        self.gemini_key = self.gemini_key or _env("GEMINI_KEY")
        self.supabase_url = self.supabase_url or _env("URL")
        self.supabase_admin_key = self.supabase_admin_key or _env("SUPABASE_ADMIN")
        self.database_url = self.database_url or _env("DATABASE_URL")
        self.worker_id = self.worker_id or _env("WORKER_ID", str(uuid.uuid4())[:12])
        self.gpu_type = self.gpu_type if self.gpu_type != "unknown" else _env("GPU_TYPE", "unknown")
        if not self.mock_mode:
            self.mock_mode = _env("MOCK_MODE", "false").lower() == "true"

        # Build gemini key pool from env if not set explicitly
        if not self.gemini_keys:
            pool = []
            if self.gemini_key:
                pool.append(self.gemini_key)
            for extra in [_env("GEMINI_PROJECT2"), _env("GEMINI_PROJECT3"), _env("GEMINI_PROJECT4")]:
                if extra:
                    pool.append(extra)
            self.gemini_keys = pool

        self.gemini_key_index = int(_env("GEMINI_KEY_INDEX", "0"))
        if self.gemini_keys and self.gemini_key_index < len(self.gemini_keys):
            self.gemini_key = self.gemini_keys[self.gemini_key_index]

    @property
    def primary_gemini_key(self) -> str:
        if self.gemini_keys:
            idx = min(self.gemini_key_index, len(self.gemini_keys) - 1)
            return self.gemini_keys[idx]
        return self.gemini_key

    @property
    def fallback_gemini_keys(self) -> list[str]:
        """All keys except the primary — used as 429 fallbacks."""
        primary = self.primary_gemini_key
        return [k for k in self.gemini_keys if k != primary]

    def validate(self) -> list[str]:
        errors = []
        if not self.mock_mode:
            if not self.gemini_keys:
                errors.append("At least one GEMINI_KEY required")
            if not self.supabase_url:
                errors.append("URL (Supabase) is required")
            if not self.r2_endpoint_url:
                errors.append("R2_ENDPOINT_URL is required")
        return errors


def get_config() -> EnvConfig:
    return EnvConfig()
