o
    [iA2                  	   @   s  d Z ddlZddlZddlZddlmZmZmZmZ ddl	m
Z
mZ ddlmZ ddlmZ ddlmZmZmZ e
G d	d
 d
Ze
G dd dZG dd dZ		d@dedee dedefddZedkrLddlZejddZejddd ejdddd ejdddd d! ejd"d#d$d%d! ejd&d'd( ejd)d'd( e Zeej ej  ej!ej"d*Z#e#j$ej%ej&ej!d+Z'e(d,d-  e(d. e(d-  e(d/ej%  e(d0ej!  e(d1e'j)d2d3 e(  e'j*+ D ]O\Z,Z-e-j.rd4nd5Z/e(e/ d6e, d7 e-j.r7e-j0re(d8e-j0dd9  d: e-j1r)e(d;e2e-j1 d< e-j3r6e(d=e-j3d> qe(d?e-j4  qe#5e'g e#6  dS dS )Az
Validator Runner
================

Coordinates running multiple validators on audio segments.
Supports toggling individual validators and collecting aggregated results.
    N)ListDictAnyOptional)	dataclassfield)datetime)Path   )BaseValidatorValidationResultnormalize_language_codec                   @   s   e Zd ZU dZdZeed< dZeed< dZe	ed< dZ
e	ed< d	Ze	ed
< dZeed< eedZee	ef ed< eedZee	ef ed< dS )RunnerConfigz(Configuration for the validation runner.Tenable_indicmfaenable_indic_conformertelanguageautodevice./validation_results
output_dirsave_resultsdefault_factoryindicmfa_optionsindic_conformer_optionsN)__name__
__module____qualname____doc__r   bool__annotations__r   r   strr   r   r   r   dictr   r   r   r    r$   r$   6/home/ubuntu/maya3_transcribe/src/validators/runner.pyr      s   
 r   c                   @   sj   e Zd ZU dZeed< ee ed< eed< eedZ	e
eef ed< dZeed< d	e
eef fd
dZdS )AggregatedResultz?Aggregated results from all validators for a single audio file.
audio_pathreference_textr   r   results        total_processing_time_secreturnc                 C   s(   | j | j| j| jdd | j D dS )Nc                 S      i | ]	\}}||  qS r$   to_dict).0nameresultr$   r$   r%   
<dictcomp>:       z,AggregatedResult.to_dict.<locals>.<dictcomp>)r'   r(   r   r+   r)   )r'   r(   r   r+   r)   itemsselfr$   r$   r%   r/   4   s   zAggregatedResult.to_dictN)r   r   r   r   r"   r!   r   r   r#   r)   r   r   r+   floatr   r/   r$   r$   r$   r%   r&   '   s   
 r&   c                   @   s  e Zd ZdZ					d#dededed	ed
ef
ddZdd Zdeee	f fddZ
			d$dedee dee deee  def
ddZ				d%dee deee  dee deee  dee dee fddZ	d&dee dee defdd Zd!d" ZdS )'ValidatorRunnerai  
    Runs multiple validators on audio files.
    
    Example:
    ```python
    runner = ValidatorRunner(
        enable_indicmfa=True,
        enable_indic_conformer=True,
        language="te"
    )
    
    result = runner.validate(
        audio_path="audio.flac",
        reference_text="reference transcription",
        language="te"
    )
    ```
    Tr   r   r   r   r   r   r   r   c                 K   s:   t d|||||ddd | D | _i | _d| _dS )a|  
        Initialize the validator runner.
        
        Args:
            enable_indicmfa: Enable IndicMFA validator (forced alignment)
            enable_indic_conformer: Enable IndicConformer validator (ASR)
            language: Default language code
            device: Device for ML models ("cuda", "cpu", "auto")
            output_dir: Directory to save results
        )r   r   r   r   r   c                 S   s    i | ]\}}| d r||qS )_options)endswith)r0   kvr$   r$   r%   r3   n   s     z,ValidatorRunner.__init__.<locals>.<dictcomp>FNr$   )r   r5   config
validators_initialized)r7   r   r   r   r   r   kwargsr$   r$   r%   __init__U   s   	
zValidatorRunner.__init__c              
   C   s  | j rdS td | jjrAzddlm} |dddi| jj| jd< td W n ty@ } ztd	|  W Y d}~nd}~ww | jj	r|zdd
l
m} |dd| jjd| jj| jd< td W n ty{ } ztd|  W Y d}~nd}~ww tdt| j d d| _ dS )z"Initialize all enabled validators.Nz#[Runner] Initializing validators...r
   )IndicMFAValidatorenabledTindicmfau%   [Runner] ✓ IndicMFA validator addedu#   [Runner] ✗ IndicMFA unavailable: )IndicConformerValidator)rD   r   indic_conformeru+   [Runner] ✓ IndicConformer validator addedu)   [Runner] ✗ IndicConformer unavailable: 	[Runner] z validators initializedr$   )r@   printr>   r   indicmfa_validatorrC   r   r?   ImportErrorr   indic_conformer_validatorrF   r   r   len)r7   rC   erF   r$   r$   r%   _init_validatorst   s@   
z ValidatorRunner._init_validatorsr,   c                 C   s   |    dd | j D S )z%Get information about all validators.c                 S   r-   r$   )get_info)r0   r1   	validatorr$   r$   r%   r3      r4   z6ValidatorRunner.get_validator_info.<locals>.<dictcomp>)rO   r?   r5   r6   r$   r$   r%   get_validator_info   s   z"ValidatorRunner.get_validator_infoNr'   r(   r?   c                 C   s@  |    t|p
| jj}t }t|||d}|pt| j }|D ]t}	|	| jvr2t	d|	 d q"| j|	 }
|
j
s;q"t	d|	 d z*|
j|||d}||j|	< |jrVdnd}|jp\d}t	d	| d
|	 d|dd W q" ty } zt	d|	 d|  t|	|dt|d|j|	< W Y d}~q"d}~ww t | |_|S )a  
        Run validation on a single audio file.
        
        Args:
            audio_path: Path to audio file
            reference_text: Reference transcription (required for MFA)
            language: Language code (defaults to config)
            validators: Specific validators to run (None = all enabled)
            
        Returns:
            AggregatedResult with results from all validators
        r'   r(   r   z%[Runner] Warning: Unknown validator ''z[Runner] Running ...   ✓   ✗r*   rH    : .2fsu   [Runner] ✗ z	 failed: F)validator_namer'   successerror_messageN)rO   r   r>   r   timer&   listr?   keysrI   rD   validater)   r]   processing_time_sec	Exceptionr   r"   r+   )r7   r'   r(   r   r?   lang
start_timer2   validators_to_runr1   rQ   validation_resultstatus	proc_timerN   r$   r$   r%   rb      sN   



"	zValidatorRunner.validateaudio_pathsreference_textsprogress_callbackc              	   C   s   g }|p
dgt | }t |}td| d tt||D ]0\}	\}
}|r.||	d | ntd|	d  d| dt|
j  | j|
|||d}|| q|S )	a  
        Run validation on multiple audio files.
        
        Args:
            audio_paths: List of audio file paths
            reference_texts: List of reference transcriptions
            language: Language code
            validators: Specific validators to run
            progress_callback: Called with (current, total) for progress
            
        Returns:
            List of AggregatedResults
        Nz[Runner] Processing z	 files...r
   z[Runner] File /rY   )r'   r(   r   r?   )rM   rI   	enumeratezipr	   r1   rb   append)r7   rk   rl   r   r?   rm   r)   	ref_textstotalir'   ref_textr2   r$   r$   r%   validate_batch   s    $zValidatorRunner.validate_batchr)   filenamec                 C   s   t j| jjdd |du rt d}d| d}t j| jj|}t 	 | jj
t| j dt|dd	 |D d
}t|ddd}tj||ddd W d   n1 sZw   Y  td|  |S )z
        Save validation results to JSON file.
        
        Args:
            results: List of AggregatedResults
            filename: Optional filename (auto-generated if None)
            
        Returns:
            Path to saved file
        T)exist_okNz%Y%m%d_%H%M%Svalidation_results_z.json)r   r?   c                 S   s   g | ]}|  qS r$   r.   )r0   rr$   r$   r%   
<listcomp>-  s    z0ValidatorRunner.save_results.<locals>.<listcomp>)	timestampr>   results_countr)   wzutf-8)encodingF   )ensure_asciiindentz[Runner] Results saved to: )osmakedirsr>   r   r   nowstrftimepathjoin	isoformatr   r`   r?   ra   rM   openjsondumprI   )r7   r)   rw   r|   output_pathoutput_datafr$   r$   r%   r     s"   

zValidatorRunner.save_resultsc                 C   s$   | j  D ]}|  qtd dS )z&Release resources from all validators.z[Runner] Cleanup completeN)r?   valuescleanuprI   )r7   rQ   r$   r$   r%   r   6  s   
zValidatorRunner.cleanup)TTr   r   r   )NNN)NNNN)N)r   r   r   r   r    r"   rB   rO   r   r   rR   r   r   r&   rb   callablerv   r   r   r$   r$   r$   r%   r9   A   sx    
#

H


.
'r9   r   r'   r(   r   r,   c                 K   s.   t dd|i|}|j| ||d}|  |S )a4  
    Convenience function to run all validators on a single file.
    
    Args:
        audio_path: Path to audio file
        reference_text: Reference transcription
        language: Language code
        **kwargs: Additional options for ValidatorRunner
        
    Returns:
        AggregatedResult
    r   rS   Nr$   )r9   rb   r   )r'   r(   r   rA   runnerr2   r$   r$   r%   run_all_validators=  s   r   __main__zRun transcription validators)descriptionzPath to audio file)helpz--referencez-rzReference transcriptionz
--languagez-lzLanguage code)defaultr   z--outputz-or   zOutput directoryz--disable-mfa
store_true)actionz--disable-conformer)r   r   r   r   rS   
z<============================================================zValidation SummaryzAudio: z
Language: zTotal time: rZ   r[   rV   rW   rX   :z   Transcription: d   rU   z   Word alignments: z wordsz   Confidence: z.3fz
   Error: )Nr   )7r   r   r   r_   typingr   r   r   r   dataclassesr   r   r   pathlibr	   baser   r   r   r   r&   r9   r"   r   r   argparseArgumentParserparseradd_argument
parse_argsargsdisable_mfadisable_conformerr   outputr   rb   r'   	referencer2   rI   r+   r)   r5   r1   vrr]   ri   transcriptionword_alignmentsrM   overall_confidencer^   r   r   r$   r$   r$   r%   <module>   s     



