#!/usr/bin/env python3
"""
Transcription Validation Dashboard
==================================

Minimal Gradio dashboard to:
1. Listen to audio segments
2. View transcriptions (native, romanized)
3. See CTC alignment scores
4. Compare with ASR (Whisper) for validation

Run:
    python dashboard.py
    
Then open http://localhost:7860
"""
import os
import json
import gradio as gr
from pathlib import Path


def load_analysis_data():
    """Load existing analysis results."""
    analysis_path = "analysis_results/final_analysis.json"
    scoring_path = "analysis_results/scoring_results.json"
    
    data = {"segments": [], "scores": {}}
    
    if os.path.exists(analysis_path):
        with open(analysis_path, 'r', encoding='utf-8') as f:
            analysis = json.load(f)
            data["segments"] = analysis.get("segments", [])
    
    if os.path.exists(scoring_path):
        with open(scoring_path, 'r', encoding='utf-8') as f:
            scores = json.load(f)
            for r in scores.get("results", []):
                data["scores"][r["segment_id"]] = r["scoring"]
    
    return data


def get_segment_list(data):
    """Get list of segment IDs."""
    return [s["segment_id"] for s in data["segments"]]


def get_segment_details(segment_id, data):
    """Get details for a specific segment."""
    for seg in data["segments"]:
        if seg["segment_id"] == segment_id:
            return seg
    return None


def format_word_scores(scores):
    """Format word scores for display."""
    if not scores:
        return "No word scores available"
    
    lines = []
    for ws in scores[:20]:  # Limit to 20 words
        conf = ws.get("confidence", 0)
        color = "🟢" if conf >= 0.85 else "🟡" if conf >= 0.7 else "🔴"
        lines.append(f"{color} {ws['word']}: {conf:.3f}")
    
    return "\n".join(lines)


def create_dashboard():
    """Create Gradio dashboard."""
    
    # Load data
    data = load_analysis_data()
    segment_ids = get_segment_list(data)
    
    def update_display(segment_id):
        """Update all displays when segment changes."""
        seg = get_segment_details(segment_id, data)
        if not seg:
            return None, "No data", "No data", "No data", "No data", "No scores", "N/A"
        
        # Audio path
        audio_path = seg.get("audio_path", "")
        if not os.path.exists(audio_path):
            audio_path = None
        
        # Get transcriptions (gemini-3-flash temp0_low)
        model_data = seg.get("models", {}).get("gemini-3-flash-preview", {})
        config_data = model_data.get("temp0_low", {})
        
        native = config_data.get("native", "N/A")
        punctuated = config_data.get("punctuated", "N/A")
        code_switch = config_data.get("code_switch", "N/A")
        romanized = config_data.get("romanized", "N/A")
        
        # Get scoring
        scoring = data["scores"].get(segment_id, {})
        alignment_score = scoring.get("alignment_score", 0)
        avg_confidence = scoring.get("average_confidence", 0)
        word_scores = scoring.get("word_scores", [])
        
        # Format score display
        if alignment_score >= 0.8:
            quality = "🟢 HIGH"
        elif alignment_score >= 0.7:
            quality = "🟡 MEDIUM"
        else:
            quality = "🔴 LOW"
        
        score_text = f"""
**Alignment Score**: {alignment_score:.4f} {quality}
**Avg Confidence**: {avg_confidence:.4f}
**Duration**: {seg.get('duration_sec', 0):.2f}s
        """
        
        word_display = format_word_scores(word_scores)
        
        return (
            audio_path,
            native,
            punctuated,
            code_switch,
            romanized,
            score_text,
            word_display
        )
    
    # Build UI
    with gr.Blocks(title="Transcription Validation", theme=gr.themes.Soft()) as demo:
        gr.Markdown("""
        # 🎙️ Transcription Validation Dashboard
        
        Compare Gemini transcriptions with CTC alignment scores.
        
        **Model**: gemini-3-flash-preview (temp=0, thinking=low)
        """)
        
        with gr.Row():
            segment_dropdown = gr.Dropdown(
                choices=segment_ids,
                value=segment_ids[0] if segment_ids else None,
                label="Select Segment",
                interactive=True
            )
        
        with gr.Row():
            with gr.Column(scale=1):
                audio_player = gr.Audio(label="Audio", type="filepath")
                
                gr.Markdown("### 📊 Alignment Scores")
                score_display = gr.Markdown("Loading...")
                
            with gr.Column(scale=2):
                gr.Markdown("### 📝 Transcriptions")
                
                with gr.Tab("Native"):
                    native_text = gr.Textbox(label="Native Script", lines=3)
                
                with gr.Tab("Punctuated"):
                    punctuated_text = gr.Textbox(label="With Punctuation", lines=3)
                
                with gr.Tab("Code-Switch"):
                    codeswitch_text = gr.Textbox(label="Code-Switch", lines=3)
                
                with gr.Tab("Romanized"):
                    romanized_text = gr.Textbox(label="Romanized", lines=3)
        
        with gr.Row():
            gr.Markdown("### 📈 Word-Level Confidence")
            word_scores_display = gr.Textbox(
                label="Word Scores (🟢 ≥0.85, 🟡 ≥0.7, 🔴 <0.7)",
                lines=10
            )
        
        # Event handler
        segment_dropdown.change(
            fn=update_display,
            inputs=[segment_dropdown],
            outputs=[
                audio_player,
                native_text,
                punctuated_text,
                codeswitch_text,
                romanized_text,
                score_display,
                word_scores_display
            ]
        )
        
        # Initial load
        demo.load(
            fn=update_display,
            inputs=[segment_dropdown],
            outputs=[
                audio_player,
                native_text,
                punctuated_text,
                codeswitch_text,
                romanized_text,
                score_display,
                word_scores_display
            ]
        )
    
    return demo


if __name__ == "__main__":
    print("Starting Transcription Validation Dashboard...")
    print("Open http://localhost:7860 in your browser")
    
    demo = create_dashboard()
    demo.launch(server_name="0.0.0.0", server_port=7860, share=False)