o
    ?؂i                     @   s$  d Z ddlZddlmZmZ ddlmZmZ ddlm	Z	m
Z
mZmZ ddlmZ G dd deZeG d	d
 d
ZeG dd dZG dd deZi dddddddddddddddddd d!d"d#d$ddddddddddddddd d"d$d%Zd&ed'efd(d)ZdS )*z|
Base classes for transcription validators.
All validators inherit from BaseValidator and produce ValidationResult objects.
    N)ABCabstractmethod)	dataclassfield)ListOptionalDictAny)Enumc                   @   s    e Zd ZdZdZdZdZdZdS )ValidatorStatuszStatus of a validator.not_initializedreadyerrordisabledN)__name__
__module____qualname____doc__NOT_INITIALIZEDREADYERRORDISABLED r   r   4/home/ubuntu/maya3_transcribe/src/validators/base.pyr      s    r   c                   @   sL   e Zd ZU dZeed< eed< eed< dZee ed< e	defdd	Z
dS )
WordAlignmentz3Represents alignment information for a single word.word
start_timeend_timeN
confidencereturnc                 C   s   | j | j S )N)r   r   selfr   r   r   duration   s   zWordAlignment.duration)r   r   r   r   str__annotations__floatr   r   propertyr"   r   r   r   r   r      s   
 r   c                   @   s   e Zd ZU dZeed< eed< eed< dZee ed< dZ	ee ed< e
edZee ed	< dZee ed
< dZee ed< dZee ed< dZee ed< dZeeeef  ed< edee fddZdeeef fddZdS )ValidationResultz:Result from a single validator for a single audio segment.validator_name
audio_pathsuccessNerror_messagetranscription)default_factoryword_alignmentsoverall_confidencealignment_scoreprocessing_time_secaudio_duration_sec
raw_outputr   c                 C   s2   | j r| jrt| j  }| jdkr|| j S dS )z3Calculate speaking rate if transcription available.r   N)r,   r2   lensplit)r!   
word_countr   r   r   words_per_second<   s
   

z!ValidationResult.words_per_secondc                 C   s<   | j | j| j| j| jdd | jD | j| j| j| j	| j
dS )z(Convert to dictionary for serialization.c                 S   s&   g | ]}|j |j|j|j|jd qS )r   r   r   r   r"   r8   ).0war   r   r   
<listcomp>M   s    z,ValidationResult.to_dict.<locals>.<listcomp>r(   r)   r*   r+   r,   r.   r/   r0   r1   r2   r7   r<   r    r   r   r   to_dictE   s   
zValidationResult.to_dict)r   r   r   r   r#   r$   boolr+   r   r,   r   listr.   r   r   r/   r%   r0   r1   r2   r3   r   r	   r&   r7   r=   r   r   r   r   r'   !   s    
 r'   c                
   @   s   e Zd ZU dZdZeed< dZeed< ddefdd	Z	e
d
efddZe
		ddedee ded
efddZ		ddee deee  ded
ee fddZd
efddZdd Zd
eeef fddZdS ) BaseValidatora  
    Abstract base class for all transcription validators.
    
    Each validator should:
    1. Initialize model/resources in __init__ or setup()
    2. Implement validate() to process a single audio file
    3. Return ValidationResult with relevant metrics
    base_validatornamezBase validator classdescriptionTenabledc                 K   s   || _ tj| _|| _d| _dS )z
        Initialize the validator.
        
        Args:
            enabled: Whether this validator is active
            **kwargs: Validator-specific configuration
        N)rD   r   r   statusconfig_model)r!   rD   kwargsr   r   r   __init__l   s   
zBaseValidator.__init__r   c                 C      dS )z
        Initialize models and resources.
        Called lazily before first validation.
        
        Returns:
            True if setup successful, False otherwise
        Nr   r    r   r   r   setupy   s   	zBaseValidator.setupNter)   reference_textlanguagec                 C   rJ   )am  
        Validate/process a single audio file.
        
        Args:
            audio_path: Path to audio file
            reference_text: Optional reference transcription for alignment
            language: Language code (te=Telugu, hi=Hindi, kn=Kannada, etc.)
            
        Returns:
            ValidationResult with transcription/alignment data
        Nr   )r!   r)   rM   rN   r   r   r   validate   s   zBaseValidator.validateaudio_pathsreference_textsc           	      C   sF   g }|p
dgt | }t||D ]\}}| |||}|| q|S )z
        Validate multiple audio files.
        Default implementation calls validate() for each file.
        Override for batch-optimized processing.
        N)r4   ziprO   append)	r!   rP   rQ   rN   results	ref_textsr)   ref_textresultr   r   r   validate_batch   s   zBaseValidator.validate_batchc              
   C   s|   | j tjkr8z|  }|rtjntj| _ W n" ty7 } ztj| _ td| j d|  W Y d}~dS d}~ww | j tjkS )z&Ensure validator is set up before use.[z] Setup failed: NF)	rE   r   r   rK   r   r   	ExceptionprintrB   )r!   r*   er   r   r   ensure_setup   s   zBaseValidator.ensure_setupc                 C   s
   d| _ dS )z&Release resources. Override if needed.N)rG   r    r   r   r   cleanup   s   
zBaseValidator.cleanupc                 C   s   | j | j| j| jj| jdS )zGet validator information.)rB   rC   rD   rE   rF   )rB   rC   rD   rE   valuerF   r    r   r   r   get_info   s   zBaseValidator.get_info)T)NrL   )r   r   r   r   rB   r#   r$   rC   r>   rI   r   rK   r   r'   rO   r   rX   r]   r^   r   r	   r`   r   r   r   r   r@   _   sB   
 	


r@   telugurL   hindihikannadakntamilta	malayalammlbengalibngujaratigumarathimrpunjabipaodiaorenglishen)rm   ro   rq   rs   ru   rN   r   c                 C   s   t |  |  S )z(Convert language name to ISO 639-1 code.)LANGUAGE_CODESgetlower)rN   r   r   r   normalize_language_code   s   ry   )r   osabcr   r   dataclassesr   r   typingr   r   r   r	   enumr
   r   r   r'   r@   rv   r#   ry   r   r   r   r   <module>   sl    =i	
