o
    }oi}                     @   s~   d dl mZ d dlZd dlZd dlmZ d dlmZ d dlm	Z	 d dl
mZ d dlmZ d dlmZ G d	d
 d
Zdd ZdS )    )defaultdictN)EnglishTextNormalizer)ASRModel)WithOptionalCudaGraphs)fp32_precision)load_pretrained_nemo)loggingc                   @   sv   e Zd ZdZddedededdfdd	Zd
d Z	ddedee de	j
de	j
ddf
ddZdeee	j
f fddZdS )ASRBLEUz
    Computes BLEU scores on ASR predictions on generated audio with pretrained NeMo ASR.
    By default, uses Whisper's EnglishTextNormalizer on hypotheses and references.
    TNpretrained_asr	normalizeverbosereturnc                 C   sN   d | _ || _|| _|r|d u rt | _n|| _nt| _tt| _tt| _	d S N)
asrpretrained_asr_namer   r   
normalizer	_identityr   list_refs_hyps)selfr
   r   r   r    r   e/home/ubuntu/.local/lib/python3.10/site-packages/nemo/collections/speechlm2/parts/metrics/asr_bleu.py__init__!   s   

zASRBLEU.__init__c                 C   sX   t jj  t  tt| j | _	W d    n1 sw   Y  t
j| j	dd | S )Nzdecoding.decoding)attribute_path)torchcudamemoryempty_cacher   r   r   r   evalr   r   disable_cuda_graphs_recursive)r   r   r   r   reset0   s   zASRBLEU.resetnamerefs
pred_audiopred_audio_lensc           	   
   C   s   | j d u r	|   |d u r|jd g|jd  }t  | j jdd t||D |jd dd}W d    n1 s:w   Y  t||D ]8\}}|j}| j| | 	| | j
| | 	| | jr|t||gj}td| d| d	|d
d qDd S )N   r   c                 S   s   g | ]
\}}|d | qS r   r   ).0audioalenr   r   r   
<listcomp>E   s    z"ASRBLEU.update.<locals>.<listcomp>F)
batch_sizer   z[REF]	z
[ASR]	z [z.2f])r   r!   shaper   
transcribeziptextr   appendr   r   r   	sacrebleusentence_bleuscorer   info)	r   r"   r#   r$   r%   asr_hypsrefasr_hypasrbr   r   r   update:   s(   
 zASRBLEU.updatec                 C   s   i }| j  D ]}tt| j| | j | gj}||d| < qtt	|
  |d< | j   | j  d| _tjj  |S )zAComputes the final score and deallocates ASR and partial results.	asr_bleu_asr_bleuN)r   keysr   tensorr2   corpus_bleur   r4   stackr   valuesmeanclearr   r   r   r   )r   corpus_metricr"   metricr   r   r   computeR   s   "

zASRBLEU.compute)TNTr   )__name__
__module____qualname____doc__strboolr   r!   r   r   Tensorr:   dictrF   r   r   r   r   r	      s"    
r	   c                 C   s   | S r   r   )xr   r   r   r   `   s   r   )collectionsr   r2   r   whisper_normalizer.englishr   nemo.collections.asr.modelsr   2nemo.collections.common.parts.optional_cuda_graphsr   *nemo.collections.speechlm2.parts.precisionr   +nemo.collections.speechlm2.parts.pretrainedr   
nemo.utilsr   r	   r   r   r   r   r   <module>   s   E