o
    %i
                     @  sF   d Z ddlmZ ddlZddlZeeZ	ddddZdddZ	dS )u   
CTC scoring: compute log-likelihood of reference text given audio log-probabilities.
This is NOT ASR — we don't decode. We ask "how likely is this text given this audio?"
    )annotationsN	log_probstorch.Tensortarget_tokens	list[int]blank_idintreturntuple[float, float]c           
      C  s   | j d }t|}|dkrdS ||k rtdtdfS |    }tjj|ddd}||dtj	|gtj
dtj	|gtj
dtj	|gtj
d}|  }|| }	t|d	t|	d	fS )
a  
    Compute CTC log-likelihood of target_tokens given log_probs.

    Args:
        log_probs: [T, V] log-softmax output from CTC model (on any device)
        target_tokens: list of token IDs for the reference text
        blank_id: blank token index (usually 0)

    Returns:
        (raw_score, normalized_score) where normalized = raw / len(tokens)
        Higher is better. Typical range: raw ~ -50 to -5, norm ~ -2.0 to -0.1
    r   )        r   z-infnoneT)blank	reductionzero_infinity   )dtype   )shapelenfloatdetachcputorchnnCTCLoss	unsqueezetensorlongitemround)
r   r   r   TSlog_probs_cpuctc_loss_fnloss	raw_scorenormalized_score r'   1/home/ubuntu/transcripts/validations/ctc_score.pycompute_ctc_score   s&   

r)   	referencestr
hypothesisr   c              	   C  s   | s|sdS dS |sdS t |  }t | }t|}t|}t t|d }td|d D ];}|d }||d< td|d D ])}	||	 }
||d  ||	d  krW|||	< ndt|||	 ||	d   ||	< |
}q@q/|dkru|| | S dS )z
    Compute Character Error Rate between reference and hypothesis.
    Returns 0.0 (perfect match) to 1.0+ (completely wrong).
    Used as CTC score fallback when log-probabilities aren't accessible.
    r   g      ?r   r   )liststripr   rangemin)r*   r,   	ref_chars	hyp_charsnmdpiprevjtempr'   r'   r(   character_error_rate=   s(   
 r:   )r   )r   r   r   r   r   r   r	   r
   )r*   r+   r,   r+   r	   r   )
__doc__
__future__r   loggingr   	getLogger__name__loggerr)   r:   r'   r'   r'   r(   <module>   s    
/