o
    5i                     @   s4   d Z ddlmZmZmZmZmZmZmZ g dZ	dS )a  
Validators Module v4 - Transcription Validation (Dual Scoring + Structural)
=============================================================================

Validates transcriptions via:
  Step 0: Structural sanity checks (no ML, instant)
  Step 1: Character validation (unicode block check)
  Step 2: Native CTC alignment (language-specific wav2vec2)
  Step 3: Romanized MMS alignment (uroman-derived, deterministic)

v4 change: romanized text is derived internally via uroman from native
transcription. Validator no longer depends on Gemini's romanization output.

Combined scoring: S = 0.45*N + 0.55*R - 0.10*abs(N-R)

Usage:
```python
from src.validators import validate_transcription, quick_validate

# Full validation (derives uroman romanization internally)
result = validate_transcription("audio.flac", native_text, language="te")
# result.status: "accept" / "review" / "retry" / "reject"
# result.combined_score: 0-1

# Quick check (character + structural only - instant)
result = quick_validate(text, language="te")
```
   )validate_transcriptionquick_validatecheck_characterscompute_combined_scorestructural_sanity_checkValidationResultcleanupN)
__doc__simple_validatorr   r   r   r   r   r   r   __all__ r   r   8/home/ubuntu/maya3_transcribe/src/validators/__init__.py<module>   s    $
