#!/usr/bin/env python3
"""
ASR Validation for Chunked Audio

Verifies that all text chunks were processed and spoken correctly
by transcribing the audio and comparing to original text.

This confirms:
1. No chunks were skipped
2. All text was synthesized
3. Chunking boundaries don't cause text loss
"""

import os
import sys
from pathlib import Path

# Check if whisper is available
try:
    import whisper
    print("✅ Whisper found")
except ImportError:
    print("❌ Whisper not found. Install with: pip install openai-whisper")
    sys.exit(1)

# Test files
TEST_FILES = {
    "test1_short": {
        "audio": "/tmp/chunking_tests/test1_short.wav",
        "original": "Hello world. This is a short test.",
        "expected_chunked": False
    },
    "test2_medium": {
        "audio": "/tmp/chunking_tests/test2_medium.wav",
        "original": "This is a medium length text that should trigger chunking. It contains multiple sentences spanning several lines. Each sentence provides meaningful content that needs to be preserved. The chunking algorithm should split this at natural sentence boundaries. This ensures good prosody and natural-sounding speech. The final audio should be seamlessly stitched together. No pops or clicks should be audible at chunk boundaries. The crossfade algorithm handles smooth transitions. This is the final sentence of our medium test. We continue with more sentences to ensure we exceed the 800 character threshold that triggers chunking. Here we add even more content to make absolutely sure. The algorithm will intelligently split this text. Each chunk will be processed separately. Then the audio will be stitched with crossfade. The result should sound natural and continuous.",
        "expected_chunked": True
    },
    "test3_long_story": {
        "audio": "/tmp/chunking_tests/test3_long_story.wav",
        "original": "Once upon a time, in a small village nestled in the mountains, there lived a young girl named Maya. She had a special gift - she could understand the language of birds. Every morning, she would wake up to the melodious songs of the sparrows outside her window. They would tell her stories about their travels and adventures. One day, a majestic eagle landed on her windowsill. It brought news of a distant land where all the birds were disappearing. एक बार की बात है, पहाड़ों में बसे एक छोटे से गाँव में माया नाम की एक युवा लड़की रहती थी। उसके पास एक विशेष उपहार था - वह पक्षियों की भाषा समझ सकती थी। हर सुबह, वह अपनी खिड़की के बाहर गौरैयों के मधुर गीतों के साथ उठती थी। वे उसे अपनी यात्राओं और रोमांचों की कहानियां सुनाते थे। एक दिन, एक राजसी चील उसकी खिड़की पर उतरी। यह एक दूर देश की खबर लाई जहाँ सभी पक्षी गायब हो रहे थे। Maya knew she had to help. She packed her bag with essentials and set off on a long journey. The path was treacherous, winding through dense forests and across rushing rivers. But Maya was determined. She asked every bird she met for directions. The crows guided her through the forest. The geese helped her cross the rivers. The owls kept watch over her at night, ensuring her safety. माया जानती थी कि उसे मदद करनी है। उसने अपना बैग आवश्यक चीजों से भर लिया और एक लंबी यात्रा पर निकल पड़ी। रास्ता कठिन था, घने जंगलों और तेज़ नदियों के पार घूमता हुआ। लेकिन माया दृढ़ थी। उसने हर पक्षी से दिशा-निर्देश पूछे। कौवों ने उसे जंगल के माध्यम से मार्गदर्शन किया। हंसों ने उसे नदियाँ पार करने में मदद की। उल्लुओं ने रात में उस पर नज़र रखी, उसकी सुरक्षा सुनिश्चित करते हुए। After many days of travel, Maya finally reached the land the eagle had spoken of. It was a barren wasteland, with no trees or vegetation. The sky was eerily silent. She discovered that a powerful sorcerer had cast a spell, trapping all the birds in cages of light. Using her gift, Maya spoke to the imprisoned birds, learning the spell weakness. With courage and determination, she confronted the sorcerer and broke the spell. The birds were freed, and the land came alive with their joyous songs once more. कई दिनों की यात्रा के बाद, माया आखिरकार उस भूमि पर पहुंची जिसके बारे में चील ने बात की थी। यह एक बंजर भूमि थी, जिसमें न पेड़ थे और न ही वनस्पति। आकाश भयानक रूप से शांत था। उसने पाया कि एक शक्तिशाली जादूगर ने एक मंत्र डाला था, सभी पक्षियों को प्रकाश के पिंजरों में फंसाया था। अपने उपहार का उपयोग करते हुए, माया ने कैद पक्षियों से बात की। साहस और दृढ़ संकल्प के साथ, उसने जादूगर का सामना किया और मंत्र तोड़ दिया। पक्षियों को मुक्त कर दिया गया। Maya returned home as a hero. The birds celebrated her bravery with a grand festival. From that day forward, she was known as the Guardian of Birds.",
        "expected_chunked": True
    }
}


def normalize_text(text):
    """Normalize text for comparison (remove extra spaces, lowercase)."""
    import re
    # Remove multiple spaces
    text = re.sub(r'\s+', ' ', text)
    # Lowercase
    text = text.lower()
    # Strip
    text = text.strip()
    return text


def calculate_similarity(original, transcribed):
    """Calculate word-level similarity between texts."""
    orig_words = set(normalize_text(original).split())
    trans_words = set(normalize_text(transcribed).split())
    
    if not orig_words:
        return 0.0
    
    # Words in both (intersection)
    common = orig_words & trans_words
    
    # Precision: what fraction of transcribed words are correct
    precision = len(common) / len(trans_words) if trans_words else 0
    
    # Recall: what fraction of original words were transcribed  
    recall = len(common) / len(orig_words) if orig_words else 0
    
    # F1 score
    f1 = 2 * (precision * recall) / (precision + recall) if (precision + recall) > 0 else 0
    
    return f1


def main():
    print("=" * 80)
    print("ASR VALIDATION FOR CHUNKED AUDIO")
    print("=" * 80)
    print()
    
    # Load Whisper model
    print("📥 Loading Whisper model (large-v2)...")
    print("   This may take a minute...")
    model = whisper.load_model("large-v2")
    print("✅ Model loaded\n")
    
    results = {}
    
    for test_name, test_data in TEST_FILES.items():
        audio_file = test_data["audio"]
        
        # Check if file exists
        if not os.path.exists(audio_file):
            print(f"⚠️  {test_name}: Audio file not found: {audio_file}")
            continue
        
        print("=" * 80)
        print(f"TEST: {test_name}")
        print("=" * 80)
        print(f"Audio: {audio_file}")
        print(f"Original length: {len(test_data['original'])} chars")
        print(f"Expected chunked: {test_data['expected_chunked']}")
        print()
        
        # Transcribe with Whisper
        print("🎧 Transcribing audio (this may take 30-60s for long files)...")
        result = model.transcribe(
            audio_file,
            language="en",  # Auto-detect, but prefer English
            task="transcribe",
            verbose=False
        )
        
        transcribed = result["text"]
        
        print(f"✅ Transcription complete")
        print(f"   Transcribed length: {len(transcribed)} chars")
        print()
        
        # Show preview
        print(f"Original (first 200 chars):")
        print(f"   {test_data['original'][:200]}...")
        print()
        print(f"Transcribed (first 200 chars):")
        print(f"   {transcribed[:200]}...")
        print()
        
        # Calculate similarity
        similarity = calculate_similarity(test_data['original'], transcribed)
        
        print(f"📊 Similarity Score: {similarity:.1%}")
        
        # Determine pass/fail
        # For short English text, expect high accuracy
        # For mixed language, lower threshold (Hindi may not transcribe well)
        if len(test_data['original']) < 200:
            threshold = 0.70  # 70% for short English
        elif "एक बार" in test_data['original']:  # Contains Hindi
            threshold = 0.40  # 40% for mixed language (Hindi words will be missing/wrong)
        else:
            threshold = 0.60  # 60% for medium English
        
        if similarity >= threshold:
            print(f"✅ PASS (>= {threshold:.0%} threshold)")
            status = "PASS"
        else:
            print(f"⚠️  WARNING: Low similarity (< {threshold:.0%} threshold)")
            print(f"   This may indicate missing chunks or poor transcription")
            status = "WARNING"
        
        results[test_name] = {
            "similarity": similarity,
            "status": status,
            "transcribed_length": len(transcribed),
            "original_length": len(test_data['original'])
        }
        
        # Save transcription
        trans_file = audio_file.replace(".wav", "_transcription.txt")
        with open(trans_file, 'w') as f:
            f.write(f"Original Text:\n{test_data['original']}\n\n")
            f.write(f"Transcribed Text:\n{transcribed}\n\n")
            f.write(f"Similarity: {similarity:.1%}\n")
        
        print(f"   Saved transcription to: {trans_file}")
        print()
    
    # Summary
    print("=" * 80)
    print("SUMMARY")
    print("=" * 80)
    print()
    
    for test_name, result in results.items():
        status_emoji = "✅" if result['status'] == "PASS" else "⚠️"
        print(f"{status_emoji} {test_name}: {result['similarity']:.1%} similarity ({result['status']})")
    
    print()
    print("=" * 80)
    print("INTERPRETATION")
    print("=" * 80)
    print()
    print("✅ High similarity (>70%) = All text was spoken correctly")
    print("⚠️  Medium similarity (40-70%) = Most text spoken (expected for mixed language)")
    print("❌ Low similarity (<40%) = Possible missing chunks or errors")
    print()
    print("Note: Hindi text may not transcribe well with Whisper")
    print("      (it was spoken, but Whisper expects English)")
    print()
    print("CONCLUSION:")
    all_pass = all(r['status'] == 'PASS' for r in results.values())
    if all_pass:
        print("✅ All tests passed - Chunking works correctly!")
    else:
        print("⚠️  Some tests have low similarity - Check transcriptions manually")
    print()


if __name__ == '__main__':
    main()