o
    h iJ                     @   s   d Z ddlZddlZddlZddlZddlZddlmZ ddlm	Z	m
Z
 ddlmZmZ eejjZed d ZdZd	ed
edefddZG dd de
ZdZdd Zedkr\e  dS dS )a  
Transcription Review Dashboard
================================

Serves a web UI for reviewing audio segments alongside their transcriptions
and validation scores. Reads from consistency_test/ or transcriptions/ JSON.

Usage:
    cd /home/ubuntu/maya3_transcribe
    source venv/bin/activate
    python bin/dashboard.py [--port 8765] [--data consistency_test/v2_comparison.json]

Then open http://localhost:8765 in your browser.
    N)Path)
HTTPServerSimpleHTTPRequestHandler)urlparseparse_qsconsistency_testzv2_comparison.jsonz@/tmp/maya3_transcribe/pF_BQpHaIdU/extracted/pF_BQpHaIdU/segments	data_pathseg_dirreturnc                    sB  t | ddd}t|}W d   n1 sw   Y  g }d|v r|d }t }|D ]	}||  q+t|D ]i}g }	t|D ]\}
}||v rP|	||  qA|	sTq9|	d  t	j
||}t fdd|	D }||t	j
|d	|  d
d dd dd dd dd dd dd|t|	|	d q9nvd|v r|d D ]l}|dd}t	j
||}|d
i }||t	j
|d	| t|tr|d
dnt|t|tr|ddndt|tr|ddndt|tr|ddnddd|ddp
d|ddpdddg d q|t| dS )z:Load transcription data from JSON and resolve audio paths.rutf-8)encodingNrunsr   c                 3   s$    | ]}| d   d kV  qdS )transcriptionN)get).0rdr0 ./home/ubuntu/maya3_transcribe/bin/dashboard.py	<genexpr>8   s
    
z&load_dashboard_data.<locals>.<genexpr>/audio/r    	romanizeddetected_language
native_ctc	roman_mmscombinedstatusunknown)idaudio_exists	audio_urlr   r   r   r   r   r   r   
consistentnum_runsr   results
segment_idtaggedvalidation_scorevalidation_statusT   )r!   r"   r#   r   r   r(   r   r   r   r   r   r$   r%   r   )segmentssource)openjsonloadsetupdatekeyssorted	enumerateappendospathjoinallexistsr   len
isinstancedictstr)r   r	   frawr,   r   all_segsrunseg_namerun_datari
audio_pathr$   r   txr   r   r   load_dashboard_data   sx   









 

rI   c                       sH   e Zd ZdZi ZdZdd Zdd Zdd Zd	d
 Z	 fddZ
  ZS )DashboardHandlerz9HTTP handler serving the dashboard, audio files, and API.r   c                 C   sr   t | j}|j}|dks|dkr|   d S |dkr"| | j d S |dr2| |dd   d S | d d S )N/z/index.htmlz	/api/datar        )r   r8   _serve_html_serve_jsondata
startswith_serve_audio
send_error)selfparsedr8   r   r   r   do_GETn   s   

zDashboardHandler.do_GETc                 C   s8   t }| d | dd |   | j|d d S )N   Content-Typeztext/html; charset=utf-8r   )DASHBOARD_HTMLsend_responsesend_headerend_headerswfilewriteencode)rT   htmlr   r   r   rN   {   s
   
zDashboardHandler._serve_htmlc                 C   sJ   |  d | dd | dd |   | jtj|ddd d S )	NrW   rX   zapplication/jsonzAccess-Control-Allow-Origin*F)ensure_asciir   )rZ   r[   r\   r]   r^   r/   dumpsr_   )rT   rP   r   r   r   rO      s
   
 zDashboardHandler._serve_jsonc                 C   s   t j| j|}t j|s| dd|  d S t|d p!d}t j|}| 	d | 
d| | 
dt| | 
dd	 |   t|d
}| j|  W d    d S 1 s^w   Y  d S )NrM   zAudio not found: r   z
audio/flacrW   rX   zContent-LengthzAccept-Rangesbytesrb)r7   r8   r9   r	   r;   rS   	mimetypes
guess_typegetsizerZ   r[   r?   r\   r.   r]   r^   read)rT   filenamerG   mimesizer@   r   r   r   rR      s   
"zDashboardHandler._serve_audioc                    s@   dt |d vrdt |d vrt j|g|R   d S d S d S )Nz/api/r   r   )r?   superlog_message)rT   formatargs	__class__r   r   rn      s    zDashboardHandler.log_message)__name__
__module____qualname____doc__rP   r	   rV   rN   rO   rR   rn   __classcell__r   r   rq   r   rJ   h   s    rJ   a8/  <!DOCTYPE html>
<html lang="en">
<head>
<meta charset="UTF-8">
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<title>Transcription Review Dashboard</title>
<style>
  :root {
    --bg: #0f1117; --surface: #1a1d27; --surface2: #242836;
    --border: #2e3348; --text: #e1e4ed; --text2: #8b90a5;
    --accept: #22c55e; --review: #f59e0b; --retry: #3b82f6; --reject: #ef4444;
    --accent: #818cf8;
  }
  * { margin: 0; padding: 0; box-sizing: border-box; }
  body { font-family: 'SF Mono', 'Fira Code', 'JetBrains Mono', monospace;
         background: var(--bg); color: var(--text); line-height: 1.5; }

  .header { padding: 20px 24px; border-bottom: 1px solid var(--border);
            display: flex; justify-content: space-between; align-items: center; }
  .header h1 { font-size: 16px; font-weight: 600; }
  .stats { display: flex; gap: 16px; font-size: 12px; color: var(--text2); }
  .stat-pill { padding: 4px 10px; border-radius: 12px; background: var(--surface2); }

  .filters { padding: 12px 24px; border-bottom: 1px solid var(--border);
             display: flex; gap: 8px; flex-wrap: wrap; }
  .filter-btn { padding: 5px 14px; border-radius: 6px; border: 1px solid var(--border);
                background: var(--surface); color: var(--text2); cursor: pointer;
                font-size: 12px; font-family: inherit; transition: all 0.15s; }
  .filter-btn:hover { border-color: var(--accent); color: var(--text); }
  .filter-btn.active { background: var(--accent); color: #fff; border-color: var(--accent); }

  .segments { padding: 16px 24px; display: flex; flex-direction: column; gap: 12px; }

  .segment { background: var(--surface); border: 1px solid var(--border);
             border-radius: 10px; overflow: hidden; transition: border-color 0.15s; }
  .segment:hover { border-color: var(--accent); }
  .segment.playing { border-color: var(--accent); box-shadow: 0 0 0 1px var(--accent); }

  .seg-header { padding: 12px 16px; display: flex; justify-content: space-between;
                align-items: center; border-bottom: 1px solid var(--border); }
  .seg-id { font-size: 13px; font-weight: 600; }
  .seg-badges { display: flex; gap: 6px; align-items: center; }

  .badge { padding: 2px 8px; border-radius: 4px; font-size: 11px; font-weight: 600;
           text-transform: uppercase; }
  .badge-accept { background: rgba(34,197,94,0.15); color: var(--accept); }
  .badge-review { background: rgba(245,158,11,0.15); color: var(--review); }
  .badge-retry { background: rgba(59,130,246,0.15); color: var(--retry); }
  .badge-reject { background: rgba(239,68,68,0.15); color: var(--reject); }
  .badge-unknown { background: var(--surface2); color: var(--text2); }

  .badge-consistent { background: rgba(34,197,94,0.1); color: var(--accept); font-weight: 400; }
  .badge-differs { background: rgba(239,68,68,0.1); color: var(--reject); font-weight: 400; }

  .seg-body { padding: 16px; display: grid; grid-template-columns: 1fr 1fr; gap: 16px; }
  @media (max-width: 900px) { .seg-body { grid-template-columns: 1fr; } }

  .audio-col { display: flex; flex-direction: column; gap: 10px; }
  .text-col { display: flex; flex-direction: column; gap: 8px; }

  audio { width: 100%; height: 40px; border-radius: 6px; }
  audio::-webkit-media-controls-panel { background: var(--surface2); }

  .scores { display: flex; gap: 12px; font-size: 12px; }
  .score { display: flex; flex-direction: column; align-items: center; }
  .score-val { font-size: 18px; font-weight: 700; }
  .score-label { color: var(--text2); font-size: 10px; text-transform: uppercase; }
  .score-bar { width: 60px; height: 4px; background: var(--surface2); border-radius: 2px;
               margin-top: 3px; overflow: hidden; }
  .score-fill { height: 100%; border-radius: 2px; transition: width 0.3s; }

  .tx-block { background: var(--surface2); padding: 10px 12px; border-radius: 6px;
              font-size: 14px; line-height: 1.7; }
  .tx-label { font-size: 10px; text-transform: uppercase; color: var(--text2);
              margin-bottom: 4px; letter-spacing: 0.5px; }
  .tx-native { font-family: 'Noto Sans Telugu', 'Noto Sans Devanagari', 'Noto Sans',
               sans-serif; font-size: 16px; }
  .tx-roman { color: var(--text2); font-size: 13px; }
  .tx-lang { font-size: 11px; color: var(--accent); }

  .runs-toggle { font-size: 11px; color: var(--accent); cursor: pointer;
                 border: none; background: none; font-family: inherit; padding: 4px 0; }
  .runs-detail { display: none; padding: 8px 12px; background: var(--bg);
                 border-radius: 6px; font-size: 12px; margin-top: 6px; }
  .runs-detail.open { display: block; }
  .run-item { padding: 4px 0; border-bottom: 1px solid var(--border); }
  .run-item:last-child { border-bottom: none; }

  .empty { text-align: center; padding: 60px 24px; color: var(--text2); }
  .keyboard-hint { font-size: 11px; color: var(--text2); }
  kbd { padding: 1px 6px; background: var(--surface2); border: 1px solid var(--border);
        border-radius: 3px; font-size: 10px; }
</style>
</head>
<body>

<div class="header">
  <h1>Transcription Review</h1>
  <div class="stats">
    <span class="stat-pill" id="stat-total">-- segments</span>
    <span class="stat-pill" id="stat-accept">-- accept</span>
    <span class="stat-pill" id="stat-avg">avg S: --</span>
    <span class="keyboard-hint"><kbd>J</kbd>/<kbd>K</kbd> navigate &middot; <kbd>Space</kbd> play/pause</span>
  </div>
</div>

<div class="filters" id="filters">
  <button class="filter-btn active" data-filter="all">All</button>
  <button class="filter-btn" data-filter="accept">Accept</button>
  <button class="filter-btn" data-filter="review">Review</button>
  <button class="filter-btn" data-filter="retry">Retry</button>
  <button class="filter-btn" data-filter="reject">Reject</button>
</div>

<div class="segments" id="segments"></div>

<script>
let DATA = null;
let currentFilter = 'all';
let currentIdx = -1;

async function init() {
  const resp = await fetch('/api/data');
  DATA = await resp.json();
  renderStats();
  renderSegments();
  setupKeyboard();
  setupFilters();
}

function renderStats() {
  const segs = DATA.segments;
  const statuses = {};
  let totalS = 0, countS = 0;
  segs.forEach(s => {
    statuses[s.status] = (statuses[s.status] || 0) + 1;
    if (s.combined > 0) { totalS += s.combined; countS++; }
  });
  document.getElementById('stat-total').textContent = `${segs.length} segments`;
  document.getElementById('stat-accept').textContent =
    `${statuses.accept||0} accept / ${statuses.review||0} review / ${statuses.retry||0} retry / ${statuses.reject||0} reject`;
  document.getElementById('stat-avg').textContent = `avg S: ${countS ? (totalS/countS).toFixed(3) : '--'}`;
}

function scoreColor(val) {
  if (val >= 0.80) return 'var(--accept)';
  if (val >= 0.65) return 'var(--review)';
  if (val >= 0.55) return 'var(--retry)';
  return 'var(--reject)';
}

function renderSegments() {
  const container = document.getElementById('segments');
  const filtered = DATA.segments.filter(s =>
    currentFilter === 'all' || s.status === currentFilter
  );

  if (filtered.length === 0) {
    container.innerHTML = '<div class="empty">No segments match this filter</div>';
    return;
  }

  container.innerHTML = filtered.map((s, i) => {
    const shortId = s.id.replace('SPEAKER_00_','').replace('.flac','');
    const badgeCls = `badge-${s.status || 'unknown'}`;
    const consistCls = s.consistent ? 'badge-consistent' : 'badge-differs';
    const consistTxt = s.consistent ? `${s.num_runs}/${s.num_runs} identical` : 'DIFFERS';

    const runsHtml = s.runs && s.runs.length > 1 ? s.runs.map((r, ri) =>
      `<div class="run-item">Run ${ri+1}: ${(r.transcription||'').substring(0,100)}${(r.transcription||'').length>100?'...':''}</div>`
    ).join('') : '';

    return `
    <div class="segment" data-idx="${i}" data-status="${s.status}" id="seg-${i}">
      <div class="seg-header">
        <span class="seg-id">${shortId}</span>
        <div class="seg-badges">
          <span class="tx-lang">${s.detected_language || '?'}</span>
          ${s.num_runs > 1 ? `<span class="badge ${consistCls}">${consistTxt}</span>` : ''}
          <span class="badge ${badgeCls}">${s.status}</span>
        </div>
      </div>
      <div class="seg-body">
        <div class="audio-col">
          ${s.audio_exists
            ? `<audio controls preload="none" src="${s.audio_url}" data-seg="${i}"></audio>`
            : `<div style="color:var(--text2);font-size:12px">Audio not found</div>`}
          <div class="scores">
            <div class="score">
              <div class="score-val" style="color:${scoreColor(s.combined)}">${s.combined.toFixed(3)}</div>
              <div class="score-label">Combined</div>
              <div class="score-bar"><div class="score-fill" style="width:${s.combined*100}%;background:${scoreColor(s.combined)}"></div></div>
            </div>
            <div class="score">
              <div class="score-val" style="color:${scoreColor(s.native_ctc)}">${s.native_ctc.toFixed(3)}</div>
              <div class="score-label">CTC</div>
              <div class="score-bar"><div class="score-fill" style="width:${s.native_ctc*100}%;background:${scoreColor(s.native_ctc)}"></div></div>
            </div>
            <div class="score">
              <div class="score-val" style="color:${scoreColor(s.roman_mms)}">${s.roman_mms.toFixed(3)}</div>
              <div class="score-label">MMS</div>
              <div class="score-bar"><div class="score-fill" style="width:${s.roman_mms*100}%;background:${scoreColor(s.roman_mms)}"></div></div>
            </div>
          </div>
        </div>
        <div class="text-col">
          <div>
            <div class="tx-label">Transcription</div>
            <div class="tx-block tx-native">${escHtml(s.transcription)}</div>
          </div>
          ${s.romanized ? `<div>
            <div class="tx-label">Romanized (uroman)</div>
            <div class="tx-block tx-roman">${escHtml(s.romanized)}</div>
          </div>` : ''}
          ${s.tagged ? `<div>
            <div class="tx-label">Tagged</div>
            <div class="tx-block tx-roman">${escHtml(s.tagged)}</div>
          </div>` : ''}
          ${runsHtml ? `<div>
            <button class="runs-toggle" onclick="this.nextElementSibling.classList.toggle('open')">
              Show ${s.runs.length} runs
            </button>
            <div class="runs-detail">${runsHtml}</div>
          </div>` : ''}
        </div>
      </div>
    </div>`;
  }).join('');
}

function escHtml(s) {
  if (!s) return '';
  return s.replace(/&/g,'&amp;').replace(/</g,'&lt;').replace(/>/g,'&gt;');
}

function setupFilters() {
  document.querySelectorAll('.filter-btn').forEach(btn => {
    btn.addEventListener('click', () => {
      document.querySelectorAll('.filter-btn').forEach(b => b.classList.remove('active'));
      btn.classList.add('active');
      currentFilter = btn.dataset.filter;
      currentIdx = -1;
      renderSegments();
    });
  });
}

function setupKeyboard() {
  document.addEventListener('keydown', e => {
    if (e.target.tagName === 'INPUT') return;
    const segs = document.querySelectorAll('.segment');
    if (!segs.length) return;

    if (e.key === 'j' || e.key === 'ArrowDown') {
      e.preventDefault();
      currentIdx = Math.min(currentIdx + 1, segs.length - 1);
      focusSeg(segs[currentIdx]);
    } else if (e.key === 'k' || e.key === 'ArrowUp') {
      e.preventDefault();
      currentIdx = Math.max(currentIdx - 1, 0);
      focusSeg(segs[currentIdx]);
    } else if (e.key === ' ') {
      e.preventDefault();
      if (currentIdx >= 0) {
        const audio = segs[currentIdx].querySelector('audio');
        if (audio) audio.paused ? audio.play() : audio.pause();
      }
    }
  });
}

function focusSeg(el) {
  document.querySelectorAll('.segment').forEach(s => s.classList.remove('playing'));
  el.classList.add('playing');
  el.scrollIntoView({ behavior: 'smooth', block: 'center' });
}

init();
</script>
</body>
</html>c                  C   s   t jdd} | jddtdd | jddttd	d
 | jddtdd
 |  }tj	
|js?td|j  td td td|j d t|j|j}tdt|d  d |t_|jt_td|jft}td|j  td z|  W d S  ty   td |  Y d S w )NzTranscription Review Dashboard)descriptionz--portz-pi="  )typedefaultz--dataz-dzPath to transcription JSON)rz   helpz
--segmentsz-sz Path to audio segments directoryzData file not found: z:Run a consistency test or pipeline first to generate data.r+   zLoading data from z...zLoaded r,   z	 segmentsz0.0.0.0z'
Dashboard running at http://localhost:zPress Ctrl+C to stop
z
Shutting down...)argparseArgumentParseradd_argumentintr?   DEFAULT_DATADEFAULT_SEG_DIR
parse_argsr7   r8   r;   rP   printsysexitrI   r,   r<   rJ   r	   r   portserve_foreverKeyboardInterruptserver_close)parserrp   rP   serverr   r   r   main  s6   

r   __main__)rv   r7   r   r/   r|   rf   pathlibr   http.serverr   r   urllib.parser   r   __file__parentPROJECT_ROOTr   r   r?   r>   rI   rJ   rY   r   rs   r   r   r   r   <module>   s*   J8   
