o
    wi7                     @   s  d dl mZmZmZmZ d dlZd dlZd dlZd dlm	Z	 d dl
mZ d dlmZ d dlmZ d dlmZ g dZd	d
 Zddee dee defddZ	ddee dee deeeeeef fddZddee dee deee ef fddZG dd de	ZdS )    )ListOptionalTupleUnionN)Metric)AbstractCTCDecoding)AbstractMultiTaskDecoding)AbstractRNNTDecoding)logging)word_error_rateword_error_rate_detailWERc                 C   s6   t t| j}| j|g|d |  ||d d    S )N   )listrangendimpermute)tensor	dim_indexall_dims r   ]/home/ubuntu/sommelier/.venv/lib/python3.10/site-packages/nemo/collections/asr/metrics/wer.pymove_dimension_to_the_front   s   (r   F
hypotheses
referencesreturnc           
      C   s   d}d}t | t |krtdt | t |t| |D ]%\}}|r,t|}t|}n| }| }|t |7 }|t||7 }q|dkrOd| | }	|	S td}	|	S )au  
    Computes Average Word Error rate between two texts represented as
    corresponding lists of string.

    Hypotheses and references must have same length.

    Args:
        hypotheses (list): list of hypotheses
        references(list) : list of references
        use_cer (bool): set True to enable cer

    Returns:
        wer (float): average word error rate
    r   In word error rate calculation, hypotheses and reference lists must have the same number of elements. But I got:{0} and {1} correspondingly      ?inf)	len
ValueErrorformatzipr   spliteditdistanceevalfloat)
r   r   use_cerscoreswordshrh_listr_listwerr   r   r   r   #   s(   
r   c                 C   s  d}d}dddd}t | t |krtdt | t |t| |D ]x\}}|r2t|}t|}	n| }| }	t |	dkrVt |dkrSt |}
|d  |
7  < n>d}
n;|ratj||dd}nt||}|d |d  |d  }
|d  |d 7  < |d  |d 7  < |d  |d 7  < ||
7 }|t |	7 }q#|dkrd	| | }d	|d  | }d	|d  | }d	|d  | }nt	d
t	d
t	d
t	d
f\}}}}|||||fS )a  
    Computes Average Word Error Rate with details (insertion rate, deletion rate, substitution rate)
    between two texts represented as corresponding lists of string.

    Hypotheses and references must have same length.

    Args:
        hypotheses (list): list of hypotheses
        references(list) : list of references
        use_cer (bool): set True to enable cer

    Returns:
        wer (float): average word error rate
        words (int):  Total number of words/charactors of given reference texts
        ins_rate (float): average insertion error rate
        del_rate (float): average deletion error rate
        sub_rate (float): average substitution error rate
    r   )substitutions
insertions	deletionsr   r0   Treturn_dictr1   r/   r   r   )
r   r    r!   r"   r   r#   jiwercercompute_measuresr&   )r   r   r'   r(   r)   	ops_countr*   r+   r,   r-   errorsmeasuresr.   ins_ratedel_ratesub_rater   r   r   r   L   sF   
$r   c                 C   s2  d}d}g }t | t |krtdt | t |t| |D ]c\}}|r.t|}t|}	n| }| }	t |	dkrNt |dkrMt |}
|td n*|r]tj	||dd}|d }n
t
||}|d }|d |d	  |d
  }
|| ||
7 }|t |	7 }q|dkrd| | }||fS td}||fS )a  
    Computes Word Error Rate per utterance and the average WER
    between two texts represented as corresponding lists of string.

    Hypotheses and references must have same length.

    Args:
        hypotheses (list): list of hypotheses
        references(list) : list of references
        use_cer (bool): set True to enable cer

    Returns:
        wer_per_utt (List[float]): word error rate per utterance
        avg_wer (float): average word error rate
    r   r   r   Tr2   r5   r.   r0   r1   r/   r   )r   r    r!   r"   r   r#   appendr&   r4   r5   r6   )r   r   r'   r(   r)   wer_per_uttr*   r+   r,   r-   r8   r9   eravg_werr   r   r   word_error_rate_per_utt   sB   


rA   c                       s   e Zd ZU dZdZeed< 						ddeee	e
f f fddZ				dd
ejdejdejdejdeej deej fddZdd Z  ZS )r   a  
    This metric computes numerator and denominator for Overall Word Error Rate (WER) between prediction and reference
    texts. When doing distributed training/evaluation the result of ``res=WER(predictions, predictions_lengths, targets, target_lengths)``
    calls will be all-reduced between all workers using SUM operations. Here ``res`` contains three numbers
    ``res=[wer, total_levenstein_distance, total_number_of_words]``.

    If used with PytorchLightning LightningModule, include wer_numerator and wer_denominators inside validation_step
    results. Then aggregate (sum) then at the end of validation epoch to correctly compute validation WER.

    Example:
        def validation_step(self, batch, batch_idx):
            ...
            wer_num, wer_denom = self.__wer(predictions, predictions_len, transcript, transcript_len)
            self.val_outputs = {'val_loss': loss_value, 'val_wer_num': wer_num, 'val_wer_denom': wer_denom}
            return self.val_outputs

        def on_validation_epoch_end(self):
            ...
            wer_num = torch.stack([x['val_wer_num'] for x in self.val_outputs]).sum()
            wer_denom = torch.stack([x['val_wer_denom'] for x in self.val_outputs]).sum()
            tensorboard_logs = {'validation_loss': val_loss_mean, 'validation_avg_wer': wer_num / wer_denom}
            self.val_outputs.clear()  # free memory
            return {'val_loss': val_loss_mean, 'log': tensorboard_logs}

    Args:
        decoding: An instance of CTCDecoding or RNNTDecoding.
        use_cer: Whether to use Character Error Rate instead of Word Error Rate.
        log_prediction: Whether to log a single decoded sample per call.
        batch_dim_index: Index corresponding to batch dimension. (For RNNT.)
        dist_dync_on_step: Whether to perform reduction on forward pass of metric.

    Returns:
        res: a tuple of 3 zero dimensional float32 ``torch.Tensor` objects: a WER score, a sum of Levenstein's
            distances for all prediction - reference pairs, total number of words in all references.
    Tfull_state_updateFr   decodingc           	         s   t  j||d | _| _| _| _| _d  _t jt	r( fdd _n&t jt
r6 fdd _nt jtrD fdd _n
tdt j  jdtdd	d
d  jdtdd	d
d d S )N)dist_sync_on_stepsync_on_computec                    s    j j| |dS )N)encoder_outputencoded_lengths)rC   rnnt_decoder_predictions_tensorpredictionspredictions_lengthspredictions_mask	input_idsselfr   r   <lambda>  s    zWER.__init__.<locals>.<lambda>c                    s    j j| | jdS )N)decoder_outputsdecoder_lengthsfold_consecutive)rC   ctc_decoder_predictions_tensorrS   rI   rN   r   r   rP     s
    c                    s    j j| ||ddS )NF)encoder_hidden_statesencoder_input_maskdecoder_input_idsreturn_hypotheses)rC   decode_predictions_tensor)rJ   prediction_lengthsrL   rM   rN   r   r   rP     s    z-WER metric does not support decoding of type r(   r   sumF)defaultdist_reduce_fx
persistentr)   )super__init__rC   r'   log_predictionrS   batch_dim_indexdecode
isinstancer	   r   r   	TypeErrortype	add_statetorchr   )	rO   rC   r'   ra   rS   rb   rD   rE   kwargs	__class__rN   r   r`      s    zWER.__init__NrJ   rK   targetstargets_lengthsrL   rM   c                 K   s  d}d}	g }
t  V |  }|  }| jdkr"t|| j}t|jd D ]}||  }|| d| 	 
 }| j|}|
| q)| dkrW| ||||ng }W d   n1 scw   Y  |r| jrtd td|
d   td|d j  t||
D ]1\}}t|tr|d }| jrt|j}t|}n	|j }| }|t|7 }|	t||7 }	qt j|	| jj| jjd| _t j|| j j| j jd| _ dS )a}  
        Updates metric state.
        Args:
            predictions: an integer torch.Tensor of shape ``[Batch, Time, {Vocabulary}]`` (if ``batch_dim_index == 0``) or
                ``[Time, Batch]`` (if ``batch_dim_index == 1``)
            prediction_lengths: an integer torch.Tensor of shape ``[Batch]``
            targets: an integer torch.Tensor of shape ``[Batch, Time]`` (if ``batch_dim_index == 0``) or
                ``[Time, Batch]`` (if ``batch_dim_index == 1``)
            target_lengths: an integer torch.Tensor of shape ``[Batch]``
            predictions_lengths: an integer torch.Tensor of shape ``[Batch]``
        r   N
zWER reference:zWER predicted:)devicedtype)!rh   no_gradlongcpurb   r   r   shapeitemnumpytolistrC   decode_ids_to_strr=   numelrc   ra   r
   infotextr"   rd   r   r'   r#   r   r$   r%   r   r(   ro   rp   r)   )rO   rJ   rK   rl   rm   rL   rM   ri   r)   r(   r   tgt_lenths_cpu_tensortargets_cpu_tensorindtgt_lentarget	referencer   r*   r+   r,   r-   r   r   r   update$  sD   







z
WER.updatec                 C   s*   | j   }| j  }|| ||fS )N)r(   detachr&   r)   )rO   r(   r)   r   r   r   computed  s   zWER.compute)FTTr   FT)NN)__name__
__module____qualname____doc__rB   bool__annotations__r   r   r	   r   r`   rh   Tensorr   r   r   __classcell__r   r   rj   r   r      s8   
 $1
@r   )F)typingr   r   r   r   r$   r4   rh   torchmetricsr   2nemo.collections.asr.parts.submodules.ctc_decodingr   8nemo.collections.asr.parts.submodules.multitask_decodingr   3nemo.collections.asr.parts.submodules.rnnt_decodingr	   
nemo.utilsr
   __all__r   strr&   r   intr   rA   r   r   r   r   r   <module>   s,    *
,H>