o
    =i@1                     @   sh   d Z ddlZddlZddlmZmZmZmZ ddlm	Z	 ddl
mZmZmZmZmZ G dd deZdS )	a  
IndicConformer Validator
========================

Uses AI4Bharat's IndicConformer models for ASR.

Two model types supported:
1. NeMo-based language-specific models (ai4bharat/indicconformer_stt_*_hybrid_ctc_rnnt_large)
2. HuggingFace AutoModel multilingual (ai4bharat/indic-conformer-600m-multilingual)

Features:
- 600M parameter Conformer architecture
- Hybrid CTC-RNNT decoding
- Support for 22 Indian languages
    N)OptionalListDictAny)Path   )BaseValidatorValidationResultWordAlignmentValidatorStatusnormalize_language_codec                       s  e Zd ZdZdZdZdddddd	d
dddd
ZdZ					d1dede	de	dede	f
 fddZ
d2de	defddZde	defdd Zde	defd!d"Zd3d$e	d%efd&d'Zd$e	de	dee	ef fd(d)Zd$e	dee	ef fd*d+Z		d4d$e	d,ee	 de	defd-d.Zd/d0 Z  ZS )5IndicConformerValidatorz
    Validator using AI4Bharat's IndicConformer models.
    
    Supports:
    - Multilingual model (600M params, 22 languages)
    - Language-specific NeMo models
    indic_conformerz7AI4Bharat IndicConformer - 600M Multilingual Indian ASRz5ai4bharat/indicconformer_stt_te_hybrid_ctc_rnnt_largez5ai4bharat/indicconformer_stt_hi_hybrid_ctc_rnnt_largez5ai4bharat/indicconformer_stt_bn_hybrid_ctc_rnnt_largez5ai4bharat/indicconformer_stt_ta_hybrid_ctc_rnnt_largez5ai4bharat/indicconformer_stt_kn_hybrid_ctc_rnnt_largez5ai4bharat/indicconformer_stt_ml_hybrid_ctc_rnnt_largez5ai4bharat/indicconformer_stt_mr_hybrid_ctc_rnnt_largez5ai4bharat/indicconformer_stt_gu_hybrid_ctc_rnnt_largez5ai4bharat/indicconformer_stt_pa_hybrid_ctc_rnnt_largez5ai4bharat/indicconformer_stt_or_hybrid_ctc_rnnt_large)
tehibntaknmlmrgupaorz+ai4bharat/indic-conformer-600m-multilingualTautor   rnntenableddevicelanguageuse_multilingualdecodingc                    sT   t  jdd|i| || _t|| _|| _|| _d| _d| _d| _	d| _
d| _dS )a_  
        Initialize IndicConformer validator.
        
        Args:
            enabled: Whether validator is active
            device: "cuda", "cpu", or "auto"
            language: Default language code
            use_multilingual: Use multilingual model (recommended, no gating)
            decoding: Decoding strategy - "ctc" or "rnnt"
        r   N )super__init__device_preferencer   r   r   r   modelr   _model_name_model_type_current_language)selfr   r   r   r   r   kwargs	__class__r    I/home/ubuntu/maya3_transcribe/src/validators/indic_conformer_validator.pyr"   ;   s   

z IndicConformerValidator.__init__Nreturnc              
   C   s  zfddl }| jdkr|j rdnd| _n| j| _td| j d| j d t|p+| j}|| _	| j
raz| |W W S  ty` } ztd| j d	|  td| j d
 W Y d}~nd}~ww | |W S  ty } ztd| j d|  ddl}|  W Y d}~dS d}~ww )zLoad IndicConformer model.r   Nr   cudacpu[z] Loading model on z...z] Multilingual setup failed: z(] Trying language-specific NeMo model...z] Setup error: F)torchr#   r.   is_availabler   printnamer   r   r'   r   _setup_multilingual	Exception_setup_nemo	traceback	print_exc)r(   r   r1   	lang_codeer8   r    r    r,   setupY   s0   
zIndicConformerValidator.setupc                 C   s   ddl m} ddl}| j| _td| j d| j  |  |jddd |jdd	d |j	| jd
d| _
W d   n1 s@w   Y  | j
  d| _td| j d d
S )z+Setup using HuggingFace multilingual model.r   )	AutoModelNr0   z] Loading multilingual model: ignorez.*CUDAExecutionProvider.*)messagez.*FRAME_DURATION.*T)trust_remote_codemultilingualz7] Multilingual model loaded successfully (ONNX backend))transformersr=   warningsMULTILINGUAL_MODELr%   r3   r4   catch_warningsfilterwarningsfrom_pretrainedr$   evalr&   )r(   r   r=   rC   r    r    r,   r5   z   s    


z+IndicConformerValidator._setup_multilingualc              
   C   s  zddl m  m} W n ty   td| j d Y dS w | j|| _| js6td| j d|  dS td| j d| j  z)|j	j
j| jd| _| jd	krX| j | _| j  d
| _td| j d W dS  ty } ztd| j d|  W Y d}~dS d}~ww )z)Setup using NeMo language-specific model.r   Nr0   zA] NeMo not installed. Install with: pip install nemo_toolkit[asr]Fz] No NeMo model for z] Loading NeMo model: )
model_namer.   nemoz ] NeMo model loaded successfullyTz] NeMo model load failed: )nemo.collections.asrcollectionsasrImportErrorr3   r4   NEMO_MODELSgetr%   modelsASRModelrG   r$   r   r.   rH   r&   r6   )r(   r   nemo_asrr;   r    r    r,   r7      s4   

z#IndicConformerValidator._setup_nemo>  
audio_path	target_src           
      C   s   ddl }ddl}ddl}||\}}|| }t|jdkr'|d}n|jd dkr5|j	ddd}||krD|j
||}	|	|}||fS )zLoad and resample audio.r   Nr   T)dimkeepdim)	soundfiler1   
torchaudioread
from_numpyfloatlenshape	unsqueezemean
transformsResample)
r(   rU   rV   sfr1   rZ   datasample_ratewaveform	resamplerr    r    r,   _load_audio   s   z#IndicConformerValidator._load_audioc              
   C   s   ddl }| |\}}|jd | }|  | ||| j}W d   n1 s)w   Y  t|ttfr=|r;|d nd}t	|
 }g }	t|D ]#\}
}|	t||
| tt|d |
d | tt|d dd qIt	||	|dS )z?Transcribe using multilingual HuggingFace model (ONNX backend).r   Nr    word
start_timeend_time
confidencetranscription
alignmentsduration)r1   ri   r_   no_gradr$   r   
isinstancelisttuplestrsplit	enumerateappendr
   maxr^   )r(   rU   r   r1   rg   srrs   rq   wordsrr   irl   r    r    r,   _transcribe_multilingual   s*   

z0IndicConformerValidator._transcribe_multilingualc              
   C   s   | j |gd }ddl}||\}}t|| }| }g }t|D ]#\}	}
|t|
|	| t	t|d |	d | t	t|d dd q$|||dS )zTranscribe using NeMo model.r   Nr   rk   rp   )
r$   
transcriberY   r[   r^   ry   rz   r{   r
   r|   )r(   rU   rq   rd   re   r}   rs   r~   rr   r   rl   r    r    r,   _transcribe_nemo   s"   
z(IndicConformerValidator._transcribe_nemoreference_textc           
      C   s  | j st| j|dddS t }t|}| j|ks| jdu r,| |s,t| j|dddS z5| jdkr9| 	||}n| 
|}t | }t| j|d|d |d	 d||d
|| j| j| jdd	W S  ty } zddl}	|	  t| j|dt|t | dW  Y d}~S d}~ww )a+  
        Transcribe audio using IndicConformer.
        
        Args:
            audio_path: Path to audio file
            reference_text: Optional reference (not used for ASR)
            language: Language code
            
        Returns:
            ValidationResult with ASR output
        FzValidator disabled)validator_namerU   successerror_messageNzModel not loadedrA   Trq   rr   rs   )r   r$   
model_typer   )	r   rU   r   rq   word_alignmentsoverall_confidenceprocessing_time_secaudio_duration_sec
raw_outputr   )r   rU   r   r   r   )r   r	   r4   timer   r'   r$   r<   r&   r   r   rP   r%   r   r6   r8   r9   rx   )
r(   rU   r   r   rm   r:   resultprocessing_timer;   r8   r    r    r,   validate  sb   



z IndicConformerValidator.validatec                 C   s8   | j dur
| ` d| _ ddl}|j r|j  dS dS )zRelease resources.Nr   )r$   r1   r.   r2   empty_cache)r(   r1   r    r    r,   cleanup[  s   

zIndicConformerValidator.cleanup)Tr   r   Tr   )N)rT   )Nr   )__name__
__module____qualname____doc__r4   descriptionrO   rD   boolrx   r"   r<   r5   r7   intri   r   r   r   r   r   r	   r   r   __classcell__r    r    r*   r,   r      sd    !$&
Kr   )r   osr   typingr   r   r   r   pathlibr   baser   r	   r
   r   r   r   r    r    r    r,   <module>   s    	