"""
Spark TTS Constants

Token IDs, special tokens, speakers, and emotions for Spark TTS model.
Migrated from Veena3/Orpheus to Spark TTS (BiCodec) architecture.
"""

import os

# Stop tokens for generation
# Spark TTS uses <|im_end|> as stop token
TRAINING_STOP_TOKEN_IDS = ["<|im_end|>"]  # String-based stop token for Spark TTS

# Spark TTS Speaker System - 12 Speakers from Training
# Maps user-facing speaker names to internal speaker IDs (speaker_0 to speaker_11)
# Model: BayAreaBoys/spark_tts_4speaker (HuggingFace) has 12 speaker tokens
SPEAKER_MAP = {
    "lipakshi": 0,    # speaker_0
    "vardan": 1,      # speaker_1
    "reet": 2,        # speaker_2
    "Nandini": 3,     # speaker_3
    "krishna": 4,     # speaker_4
    "anika": 5,       # speaker_5
    "adarsh": 6,      # speaker_6
    "Nilay": 7,       # speaker_7
    "Aarvi": 8,       # speaker_8
    "Asha": 9,        # speaker_9
    "Bittu": 10,      # speaker_10
    "Mira": 11,       # speaker_11
}

# Friendly speaker name mappings (user-facing → internal)
# Users can use these friendly names instead of internal names
FRIENDLY_SPEAKER_MAP = {
    "Mitra": "lipakshi",
    "Aaranya": "reet",
    "Taru": "Nandini",
    "Neer": "Nilay",
    "Dhruva": "vardan",
    "Ira": "anika",
    "Veda": "adarsh",
    "Aria": "krishna",
    # Direct mappings for new speakers
    "Aarvi": "Aarvi",
    "Asha": "Asha",
    "Bittu": "Bittu",
    "Mira": "Mira",
}

# Valid speaker names (for API validation)
# Include both internal names and friendly names
INDIC_SPEAKERS = list(SPEAKER_MAP.keys())
ALL_SPEAKER_NAMES = INDIC_SPEAKERS + list(FRIENDLY_SPEAKER_MAP.keys())

def resolve_speaker_name(name: str) -> str:
    """
    Resolve friendly speaker name to internal name.
    
    Args:
        name: Speaker name (friendly or internal)
    
    Returns:
        Internal speaker name
    
    Examples:
        resolve_speaker_name("Mitra") -> "lipakshi"
        resolve_speaker_name("lipakshi") -> "lipakshi"
    """
    # Check if it's a friendly name
    if name in FRIENDLY_SPEAKER_MAP:
        return FRIENDLY_SPEAKER_MAP[name]
    
    # Check if it's already an internal name
    if name in INDIC_SPEAKERS:
        return name
    
    # Invalid speaker name
    raise ValueError(f"Invalid speaker name: {name}. Valid names: {', '.join(ALL_SPEAKER_NAMES)}")

# Spark TTS Emotion Tags (bracket format: [emotion])
# NOTE: API users must now use [emotion] instead of <emotion>
INDIC_EMOTION_TAGS = [
    "[angry]",
    "[curious]",
    "[excited]",
    "[giggle]",
    "[laughs harder]",
    "[laughs]",
    "[screams]",
    "[sighs]",
    "[sings]",
    "[whispers]"
]

# Legacy emotion tags for backward compatibility mapping
# Maps old <emotion> format to new [emotion] format
LEGACY_EMOTION_MAP = {
    "<angry>": "[angry]",
    "<curious>": "[curious]",
    "<excited>": "[excited]",
    "<giggle>": "[giggle]",
    "<laugh_harder>": "[laughs harder]",
    "<laugh>": "[laughs]",
    "<scream>": "[screams]",
    "<sigh>": "[sighs]",
    "<sing>": "[sings]",
    "<whisper>": "[whispers]"
}

# All emotion tags (for validation)
ALL_EMOTION_TAGS = INDIC_EMOTION_TAGS

# Model configuration
DEFAULT_MODEL_PATH = "BayAreaBoys/spark_tts_4speaker"
DEFAULT_MAX_MODEL_LEN = 4096  # Lower than old model (was 8192)

# BiCodec Audio Tokenizer
# Location: resolved from env var MODEL_PATH or default
BICODEC_TOKENIZER_PATH = os.environ.get('MODEL_PATH', os.environ.get('SPARK_TTS_MODEL_PATH', '/models/spark_tts_4speaker'))

# Audio configuration
AUDIO_SAMPLE_RATE = 16000  # Changed from 24kHz to 16kHz for BiCodec
AUDIO_CHANNELS = 1
AUDIO_BITS_PER_SAMPLE = 16

# Generation defaults for Spark TTS
DEFAULT_TEMPERATURE = 0.8  # Higher than old model (was 0.4)
DEFAULT_TOP_K = 50
DEFAULT_TOP_P = 1.0
DEFAULT_MAX_TOKENS = 4096  # Increased to handle complex multilingual generation
DEFAULT_MIN_TOKENS = 28  # Minimum tokens for generation (legacy compatibility)
DEFAULT_REPETITION_PENALTY = 1.0  # Repetition penalty (legacy compatibility)
DEFAULT_SEED = None  # None = random, set integer for reproducibility

# vLLM Configuration optimizations for Spark TTS
# NOTE: gpu_memory_utilization adjusted based on available memory
# Production: 0.85, Limited memory: 0.3-0.5
#
# OPTIMIZATION Dec 2025:
# - enable_chunked_prefill: Prevents long prompts from blocking concurrent streams
# - async_scheduling: DISABLED - caused EngineCore issues in production
# OPTIMIZATION Feb 2026:
# Reduced gpu_memory_utilization from 0.85 to 0.25
# Model is only 0.5B (~1.3GB weights). At 0.85, vLLM pre-allocated 65GB for KV cache
# (enough for 1,399 concurrent seqs) when peak actual need is ~100 concurrent.
# At 0.25: ~18GB KV cache, supports ~380 concurrent seqs (4x peak), frees ~48GB VRAM.
# This enables fitting on L4 (24GB) or even T4 (16GB) GPUs.
VLLM_CONFIG = {
    "dtype": "bfloat16",
    "gpu_memory_utilization": 0.25,  # 0.5B model needs <2GB; 0.25 gives ~380 concurrent seqs
    "max_model_len": 4096,
    "max_num_batched_tokens": 4096,
    "enable_prefix_caching": True,  # Cache common prompts for faster TTFB
    "enable_chunked_prefill": True,  # Chunk long prefills to avoid blocking concurrent streams
    "enforce_eager": False,  # Enable CUDA graphs for low latency
    "disable_log_stats": False,
    "trust_remote_code": True,
    "tensor_parallel_size": 1,
}

# Streaming configuration (may need adjustment for BiCodec)
STREAM_BUFFER_SIZE = 50  # Buffer size for streaming (BiCodec-specific, TBD)
BICODEC_BATCH_SIZE = 64
BICODEC_BATCH_TIMEOUT_MS = 15

# Backward compatibility: Old SNAC constants for files not yet migrated
# These are NOT used by Spark TTS but needed by legacy streaming code
# TODO: Remove once streaming_pipeline.py is fully migrated to BiCodec
CODE_START_TOKEN_ID = 128257  # Legacy SNAC constant
CODE_END_TOKEN_ID = 128258    # Legacy SNAC constant
SNAC_MIN_ID = 128266          # Legacy SNAC constant
SNAC_MAX_ID = 156937          # Legacy SNAC constant

