#!/usr/bin/env python3
"""
Transcription Review Dashboard
================================

Serves a web UI for reviewing audio segments alongside their transcriptions
and validation scores. Reads from consistency_test/ or transcriptions/ JSON.

Usage:
    cd /home/ubuntu/maya3_transcribe
    source venv/bin/activate
    python bin/dashboard.py [--port 8765] [--data consistency_test/v2_comparison.json]

Then open http://localhost:8765 in your browser.
"""
import os
import sys
import json
import argparse
import mimetypes
from pathlib import Path
from http.server import HTTPServer, SimpleHTTPRequestHandler
from urllib.parse import urlparse, parse_qs

PROJECT_ROOT = Path(__file__).parent.parent
DEFAULT_DATA = PROJECT_ROOT / "consistency_test" / "v2_comparison.json"
DEFAULT_SEG_DIR = "/tmp/maya3_transcribe/pF_BQpHaIdU/extracted/pF_BQpHaIdU/segments"


def load_dashboard_data(data_path: str, seg_dir: str) -> dict:
    """Load transcription data from JSON and resolve audio paths."""
    with open(data_path, "r", encoding="utf-8") as f:
        raw = json.load(f)

    # Handle both consistency-test format and pipeline output format
    segments = []

    if "runs" in raw:
        # Consistency test format: {config, runs: [{seg: {...}}, ...]}
        runs = raw["runs"]
        all_segs = set()
        for run in runs:
            all_segs.update(run.keys())

        for seg_name in sorted(all_segs):
            run_data = []
            for ri, run in enumerate(runs):
                if seg_name in run:
                    run_data.append(run[seg_name])

            if not run_data:
                continue

            r0 = run_data[0]
            audio_path = os.path.join(seg_dir, seg_name)
            consistent = all(
                rd.get("transcription") == r0.get("transcription")
                for rd in run_data
            )

            segments.append({
                "id": seg_name,
                "audio_exists": os.path.exists(audio_path),
                "audio_url": f"/audio/{seg_name}",
                "transcription": r0.get("transcription", ""),
                "romanized": r0.get("romanized", ""),
                "detected_language": r0.get("detected_language", ""),
                "native_ctc": r0.get("native_ctc", 0),
                "roman_mms": r0.get("roman_mms", 0),
                "combined": r0.get("combined", 0),
                "status": r0.get("status", "unknown"),
                "consistent": consistent,
                "num_runs": len(run_data),
                "runs": run_data,
            })

    elif "results" in raw:
        # Pipeline output format: {results: [{segment_id, transcription: {...}, ...}]}
        for r in raw["results"]:
            seg_name = r.get("segment_id", "")
            audio_path = os.path.join(seg_dir, seg_name)
            tx = r.get("transcription", {})

            segments.append({
                "id": seg_name,
                "audio_exists": os.path.exists(audio_path),
                "audio_url": f"/audio/{seg_name}",
                "transcription": tx.get("transcription", "") if isinstance(tx, dict) else str(tx),
                "romanized": tx.get("romanized", "") if isinstance(tx, dict) else "",
                "tagged": tx.get("tagged", "") if isinstance(tx, dict) else "",
                "detected_language": tx.get("detected_language", "") if isinstance(tx, dict) else "",
                "native_ctc": 0,
                "roman_mms": 0,
                "combined": r.get("validation_score", 0) or 0,
                "status": r.get("validation_status", "unknown") or "unknown",
                "consistent": True,
                "num_runs": 1,
                "runs": [],
            })

    return {"segments": segments, "source": str(data_path)}


class DashboardHandler(SimpleHTTPRequestHandler):
    """HTTP handler serving the dashboard, audio files, and API."""

    data = {}
    seg_dir = ""

    def do_GET(self):
        parsed = urlparse(self.path)
        path = parsed.path

        if path == "/" or path == "/index.html":
            self._serve_html()
        elif path == "/api/data":
            self._serve_json(self.data)
        elif path.startswith("/audio/"):
            self._serve_audio(path[7:])
        else:
            self.send_error(404)

    def _serve_html(self):
        html = DASHBOARD_HTML
        self.send_response(200)
        self.send_header("Content-Type", "text/html; charset=utf-8")
        self.end_headers()
        self.wfile.write(html.encode("utf-8"))

    def _serve_json(self, data):
        self.send_response(200)
        self.send_header("Content-Type", "application/json")
        self.send_header("Access-Control-Allow-Origin", "*")
        self.end_headers()
        self.wfile.write(json.dumps(data, ensure_ascii=False).encode("utf-8"))

    def _serve_audio(self, filename):
        audio_path = os.path.join(self.seg_dir, filename)
        if not os.path.exists(audio_path):
            self.send_error(404, f"Audio not found: {filename}")
            return

        mime = mimetypes.guess_type(audio_path)[0] or "audio/flac"
        size = os.path.getsize(audio_path)

        self.send_response(200)
        self.send_header("Content-Type", mime)
        self.send_header("Content-Length", str(size))
        self.send_header("Accept-Ranges", "bytes")
        self.end_headers()

        with open(audio_path, "rb") as f:
            self.wfile.write(f.read())

    def log_message(self, format, *args):
        if "/api/" not in str(args[0]) and "/audio/" not in str(args[0]):
            super().log_message(format, *args)


DASHBOARD_HTML = r"""<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="UTF-8">
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<title>Transcription Review Dashboard</title>
<style>
  :root {
    --bg: #0f1117; --surface: #1a1d27; --surface2: #242836;
    --border: #2e3348; --text: #e1e4ed; --text2: #8b90a5;
    --accept: #22c55e; --review: #f59e0b; --retry: #3b82f6; --reject: #ef4444;
    --accent: #818cf8;
  }
  * { margin: 0; padding: 0; box-sizing: border-box; }
  body { font-family: 'SF Mono', 'Fira Code', 'JetBrains Mono', monospace;
         background: var(--bg); color: var(--text); line-height: 1.5; }

  .header { padding: 20px 24px; border-bottom: 1px solid var(--border);
            display: flex; justify-content: space-between; align-items: center; }
  .header h1 { font-size: 16px; font-weight: 600; }
  .stats { display: flex; gap: 16px; font-size: 12px; color: var(--text2); }
  .stat-pill { padding: 4px 10px; border-radius: 12px; background: var(--surface2); }

  .filters { padding: 12px 24px; border-bottom: 1px solid var(--border);
             display: flex; gap: 8px; flex-wrap: wrap; }
  .filter-btn { padding: 5px 14px; border-radius: 6px; border: 1px solid var(--border);
                background: var(--surface); color: var(--text2); cursor: pointer;
                font-size: 12px; font-family: inherit; transition: all 0.15s; }
  .filter-btn:hover { border-color: var(--accent); color: var(--text); }
  .filter-btn.active { background: var(--accent); color: #fff; border-color: var(--accent); }

  .segments { padding: 16px 24px; display: flex; flex-direction: column; gap: 12px; }

  .segment { background: var(--surface); border: 1px solid var(--border);
             border-radius: 10px; overflow: hidden; transition: border-color 0.15s; }
  .segment:hover { border-color: var(--accent); }
  .segment.playing { border-color: var(--accent); box-shadow: 0 0 0 1px var(--accent); }

  .seg-header { padding: 12px 16px; display: flex; justify-content: space-between;
                align-items: center; border-bottom: 1px solid var(--border); }
  .seg-id { font-size: 13px; font-weight: 600; }
  .seg-badges { display: flex; gap: 6px; align-items: center; }

  .badge { padding: 2px 8px; border-radius: 4px; font-size: 11px; font-weight: 600;
           text-transform: uppercase; }
  .badge-accept { background: rgba(34,197,94,0.15); color: var(--accept); }
  .badge-review { background: rgba(245,158,11,0.15); color: var(--review); }
  .badge-retry { background: rgba(59,130,246,0.15); color: var(--retry); }
  .badge-reject { background: rgba(239,68,68,0.15); color: var(--reject); }
  .badge-unknown { background: var(--surface2); color: var(--text2); }

  .badge-consistent { background: rgba(34,197,94,0.1); color: var(--accept); font-weight: 400; }
  .badge-differs { background: rgba(239,68,68,0.1); color: var(--reject); font-weight: 400; }

  .seg-body { padding: 16px; display: grid; grid-template-columns: 1fr 1fr; gap: 16px; }
  @media (max-width: 900px) { .seg-body { grid-template-columns: 1fr; } }

  .audio-col { display: flex; flex-direction: column; gap: 10px; }
  .text-col { display: flex; flex-direction: column; gap: 8px; }

  audio { width: 100%; height: 40px; border-radius: 6px; }
  audio::-webkit-media-controls-panel { background: var(--surface2); }

  .scores { display: flex; gap: 12px; font-size: 12px; }
  .score { display: flex; flex-direction: column; align-items: center; }
  .score-val { font-size: 18px; font-weight: 700; }
  .score-label { color: var(--text2); font-size: 10px; text-transform: uppercase; }
  .score-bar { width: 60px; height: 4px; background: var(--surface2); border-radius: 2px;
               margin-top: 3px; overflow: hidden; }
  .score-fill { height: 100%; border-radius: 2px; transition: width 0.3s; }

  .tx-block { background: var(--surface2); padding: 10px 12px; border-radius: 6px;
              font-size: 14px; line-height: 1.7; }
  .tx-label { font-size: 10px; text-transform: uppercase; color: var(--text2);
              margin-bottom: 4px; letter-spacing: 0.5px; }
  .tx-native { font-family: 'Noto Sans Telugu', 'Noto Sans Devanagari', 'Noto Sans',
               sans-serif; font-size: 16px; }
  .tx-roman { color: var(--text2); font-size: 13px; }
  .tx-lang { font-size: 11px; color: var(--accent); }

  .runs-toggle { font-size: 11px; color: var(--accent); cursor: pointer;
                 border: none; background: none; font-family: inherit; padding: 4px 0; }
  .runs-detail { display: none; padding: 8px 12px; background: var(--bg);
                 border-radius: 6px; font-size: 12px; margin-top: 6px; }
  .runs-detail.open { display: block; }
  .run-item { padding: 4px 0; border-bottom: 1px solid var(--border); }
  .run-item:last-child { border-bottom: none; }

  .empty { text-align: center; padding: 60px 24px; color: var(--text2); }
  .keyboard-hint { font-size: 11px; color: var(--text2); }
  kbd { padding: 1px 6px; background: var(--surface2); border: 1px solid var(--border);
        border-radius: 3px; font-size: 10px; }
</style>
</head>
<body>

<div class="header">
  <h1>Transcription Review</h1>
  <div class="stats">
    <span class="stat-pill" id="stat-total">-- segments</span>
    <span class="stat-pill" id="stat-accept">-- accept</span>
    <span class="stat-pill" id="stat-avg">avg S: --</span>
    <span class="keyboard-hint"><kbd>J</kbd>/<kbd>K</kbd> navigate &middot; <kbd>Space</kbd> play/pause</span>
  </div>
</div>

<div class="filters" id="filters">
  <button class="filter-btn active" data-filter="all">All</button>
  <button class="filter-btn" data-filter="accept">Accept</button>
  <button class="filter-btn" data-filter="review">Review</button>
  <button class="filter-btn" data-filter="retry">Retry</button>
  <button class="filter-btn" data-filter="reject">Reject</button>
</div>

<div class="segments" id="segments"></div>

<script>
let DATA = null;
let currentFilter = 'all';
let currentIdx = -1;

async function init() {
  const resp = await fetch('/api/data');
  DATA = await resp.json();
  renderStats();
  renderSegments();
  setupKeyboard();
  setupFilters();
}

function renderStats() {
  const segs = DATA.segments;
  const statuses = {};
  let totalS = 0, countS = 0;
  segs.forEach(s => {
    statuses[s.status] = (statuses[s.status] || 0) + 1;
    if (s.combined > 0) { totalS += s.combined; countS++; }
  });
  document.getElementById('stat-total').textContent = `${segs.length} segments`;
  document.getElementById('stat-accept').textContent =
    `${statuses.accept||0} accept / ${statuses.review||0} review / ${statuses.retry||0} retry / ${statuses.reject||0} reject`;
  document.getElementById('stat-avg').textContent = `avg S: ${countS ? (totalS/countS).toFixed(3) : '--'}`;
}

function scoreColor(val) {
  if (val >= 0.80) return 'var(--accept)';
  if (val >= 0.65) return 'var(--review)';
  if (val >= 0.55) return 'var(--retry)';
  return 'var(--reject)';
}

function renderSegments() {
  const container = document.getElementById('segments');
  const filtered = DATA.segments.filter(s =>
    currentFilter === 'all' || s.status === currentFilter
  );

  if (filtered.length === 0) {
    container.innerHTML = '<div class="empty">No segments match this filter</div>';
    return;
  }

  container.innerHTML = filtered.map((s, i) => {
    const shortId = s.id.replace('SPEAKER_00_','').replace('.flac','');
    const badgeCls = `badge-${s.status || 'unknown'}`;
    const consistCls = s.consistent ? 'badge-consistent' : 'badge-differs';
    const consistTxt = s.consistent ? `${s.num_runs}/${s.num_runs} identical` : 'DIFFERS';

    const runsHtml = s.runs && s.runs.length > 1 ? s.runs.map((r, ri) =>
      `<div class="run-item">Run ${ri+1}: ${(r.transcription||'').substring(0,100)}${(r.transcription||'').length>100?'...':''}</div>`
    ).join('') : '';

    return `
    <div class="segment" data-idx="${i}" data-status="${s.status}" id="seg-${i}">
      <div class="seg-header">
        <span class="seg-id">${shortId}</span>
        <div class="seg-badges">
          <span class="tx-lang">${s.detected_language || '?'}</span>
          ${s.num_runs > 1 ? `<span class="badge ${consistCls}">${consistTxt}</span>` : ''}
          <span class="badge ${badgeCls}">${s.status}</span>
        </div>
      </div>
      <div class="seg-body">
        <div class="audio-col">
          ${s.audio_exists
            ? `<audio controls preload="none" src="${s.audio_url}" data-seg="${i}"></audio>`
            : `<div style="color:var(--text2);font-size:12px">Audio not found</div>`}
          <div class="scores">
            <div class="score">
              <div class="score-val" style="color:${scoreColor(s.combined)}">${s.combined.toFixed(3)}</div>
              <div class="score-label">Combined</div>
              <div class="score-bar"><div class="score-fill" style="width:${s.combined*100}%;background:${scoreColor(s.combined)}"></div></div>
            </div>
            <div class="score">
              <div class="score-val" style="color:${scoreColor(s.native_ctc)}">${s.native_ctc.toFixed(3)}</div>
              <div class="score-label">CTC</div>
              <div class="score-bar"><div class="score-fill" style="width:${s.native_ctc*100}%;background:${scoreColor(s.native_ctc)}"></div></div>
            </div>
            <div class="score">
              <div class="score-val" style="color:${scoreColor(s.roman_mms)}">${s.roman_mms.toFixed(3)}</div>
              <div class="score-label">MMS</div>
              <div class="score-bar"><div class="score-fill" style="width:${s.roman_mms*100}%;background:${scoreColor(s.roman_mms)}"></div></div>
            </div>
          </div>
        </div>
        <div class="text-col">
          <div>
            <div class="tx-label">Transcription</div>
            <div class="tx-block tx-native">${escHtml(s.transcription)}</div>
          </div>
          ${s.romanized ? `<div>
            <div class="tx-label">Romanized (uroman)</div>
            <div class="tx-block tx-roman">${escHtml(s.romanized)}</div>
          </div>` : ''}
          ${s.tagged ? `<div>
            <div class="tx-label">Tagged</div>
            <div class="tx-block tx-roman">${escHtml(s.tagged)}</div>
          </div>` : ''}
          ${runsHtml ? `<div>
            <button class="runs-toggle" onclick="this.nextElementSibling.classList.toggle('open')">
              Show ${s.runs.length} runs
            </button>
            <div class="runs-detail">${runsHtml}</div>
          </div>` : ''}
        </div>
      </div>
    </div>`;
  }).join('');
}

function escHtml(s) {
  if (!s) return '';
  return s.replace(/&/g,'&amp;').replace(/</g,'&lt;').replace(/>/g,'&gt;');
}

function setupFilters() {
  document.querySelectorAll('.filter-btn').forEach(btn => {
    btn.addEventListener('click', () => {
      document.querySelectorAll('.filter-btn').forEach(b => b.classList.remove('active'));
      btn.classList.add('active');
      currentFilter = btn.dataset.filter;
      currentIdx = -1;
      renderSegments();
    });
  });
}

function setupKeyboard() {
  document.addEventListener('keydown', e => {
    if (e.target.tagName === 'INPUT') return;
    const segs = document.querySelectorAll('.segment');
    if (!segs.length) return;

    if (e.key === 'j' || e.key === 'ArrowDown') {
      e.preventDefault();
      currentIdx = Math.min(currentIdx + 1, segs.length - 1);
      focusSeg(segs[currentIdx]);
    } else if (e.key === 'k' || e.key === 'ArrowUp') {
      e.preventDefault();
      currentIdx = Math.max(currentIdx - 1, 0);
      focusSeg(segs[currentIdx]);
    } else if (e.key === ' ') {
      e.preventDefault();
      if (currentIdx >= 0) {
        const audio = segs[currentIdx].querySelector('audio');
        if (audio) audio.paused ? audio.play() : audio.pause();
      }
    }
  });
}

function focusSeg(el) {
  document.querySelectorAll('.segment').forEach(s => s.classList.remove('playing'));
  el.classList.add('playing');
  el.scrollIntoView({ behavior: 'smooth', block: 'center' });
}

init();
</script>
</body>
</html>"""


def main():
    parser = argparse.ArgumentParser(description="Transcription Review Dashboard")
    parser.add_argument("--port", "-p", type=int, default=8765)
    parser.add_argument("--data", "-d", default=str(DEFAULT_DATA),
                        help="Path to transcription JSON")
    parser.add_argument("--segments", "-s", default=DEFAULT_SEG_DIR,
                        help="Path to audio segments directory")
    args = parser.parse_args()

    if not os.path.exists(args.data):
        print(f"Data file not found: {args.data}")
        print("Run a consistency test or pipeline first to generate data.")
        sys.exit(1)

    print(f"Loading data from {args.data}...")
    data = load_dashboard_data(args.data, args.segments)
    print(f"Loaded {len(data['segments'])} segments")

    DashboardHandler.data = data
    DashboardHandler.seg_dir = args.segments

    server = HTTPServer(("0.0.0.0", args.port), DashboardHandler)
    print(f"\nDashboard running at http://localhost:{args.port}")
    print("Press Ctrl+C to stop\n")

    try:
        server.serve_forever()
    except KeyboardInterrupt:
        print("\nShutting down...")
        server.server_close()


if __name__ == "__main__":
    main()
