#!/usr/bin/env python3
from openai import OpenAI
import subprocess
import re
import os

client = OpenAI(api_key=os.environ.get('OPENAI_API_KEY'))
audio_file = 'test5_final_optimized.wav'

ORIGINAL_ENGLISH = "Once upon a time, in a small village nestled in the mountains, there lived a young girl named Maya. She had a special gift - she could understand the language of birds. Every morning, she would wake up to the melodious songs of the sparrows outside her window. They would tell her stories about their travels and adventures. One day, a majestic eagle landed on her windowsill. It brought news of a distant land where all the birds were disappearing. Maya knew she had to help. She packed her bag with essentials and set off on a long journey. The path was treacherous, winding through dense forests and across rushing rivers. But Maya was determined. She asked every bird she met for directions. The crows guided her through the forest. The geese helped her cross the rivers. The owls kept watch over her at night, ensuring her safety. After many days of travel, Maya finally reached the land the eagle had spoken of. It was a barren wasteland, with no trees or vegetation. The sky was eerily silent. She discovered that a powerful sorcerer had cast a spell, trapping all the birds in cages of light. Using her gift, Maya spoke to the imprisoned birds, learning the spell weakness. With courage and determination, she confronted the sorcerer and broke the spell. The birds were freed, and the land came alive with their joyous songs once more. Maya returned home as a hero. The birds celebrated her bravery with a grand festival. From that day forward, she was known as the Guardian of Birds."

print('=' * 80)
print('FINAL ASR VALIDATION - OPTIMIZED VERSION')
print('=' * 80)
print(f'Original English: {len(ORIGINAL_ENGLISH)} chars')
print()

# Transcribe
print('Transcribing...')
with open(audio_file, 'rb') as f:
    transcript = client.audio.transcriptions.create(
        model='whisper-1',
        file=f,
        response_format='verbose_json',
        timestamp_granularities=['segment']
    )

# Get duration
duration = float(subprocess.run(['ffprobe', '-v', 'error', '-show_entries', 'format=duration', '-of', 'default=noprint_wrappers=1:nokey=1', audio_file], capture_output=True, text=True).stdout.strip())

segments = transcript.segments
speech_time = sum(seg.end - seg.start for seg in segments)
speech_pct = (speech_time / duration) * 100

print(f'\nRESULTS:')
print(f'  Duration: {duration:.1f}s ({duration/60:.1f} min)')
print(f'  Speech: {speech_time:.1f}s ({speech_pct:.1f}%)')
print(f'  Silence: {duration - speech_time:.1f}s ({100-speech_pct:.1f}%)')
print(f'  Segments: {len(segments)}')

# Gaps
gaps = [segments[i+1].start - segments[i].end for i in range(len(segments)-1)]
large_gaps = [g for g in gaps if g > 0.5]
if large_gaps:
    print(f'  Large gaps: {len(large_gaps)} (max: {max(large_gaps):.1f}s)')
else:
    print(f'  Large gaps: NONE')

# Coverage
def norm(t):
    return re.sub(r'[^\w\s]', '', t.lower()).strip()

trans_words = set(norm(transcript.text).split())
orig_words = set(norm(ORIGINAL_ENGLISH).split())
common = orig_words & trans_words
coverage = len(common) / len(orig_words) * 100

print(f'  Coverage: {coverage:.1f}% ({len(common)}/{len(orig_words)} words)')

# Verdict
print()
if speech_pct > 90 and not large_gaps and coverage > 85:
    print('VERDICT: EXCELLENT!')
elif speech_pct > 85 and coverage > 75:
    print('VERDICT: GOOD')
else:
    print('VERDICT: NEEDS IMPROVEMENT')

print(f'\nTranscription ({len(transcript.text)} chars):')
print(transcript.text[:600])
print('...')