o
    Gi)                     @   s   zd dl mZ W n ey   d dlmZ Y nw d dl mZmZ d dlZd dlZG dd dejj	Z
						
dded deejejf fddZdS )    )Literal)ListUnionNc                       sx   e Zd ZdZ			ddededed f fd	d
Zdej	de
ejeee  f dedede
ejejf f
ddZ  ZS )MWERLosszMinimum Word Error Rate Loss compuration in k2.

    See equation 2 of https://arxiv.org/pdf/2106.02302.pdf about its definition.
          ?Tsumtemperatureuse_double_scores	reductionnonemeanr   c                    s0   |dv sJ |t    || _|| _|| _dS )a  
        Args:
          temperature:
            For long utterances, the dynamic range of scores will be too large
            and the posteriors will be mostly 0 or 1.
            To prevent this it might be a good idea to have an extra argument
            that functions like a temperature.
            We scale the logprobs by before doing the normalization.
          use_double_scores:
            True to use double precision floating point.
            False to use single precision.
          reduction:
            Specifies the reduction to apply to the output:
            'none' | 'sum' | 'mean'.
            'none': no reduction will be applied.
                    The returned 'loss' is a k2.RaggedTensor, with
                    loss.tot_size(0) == batch_size.
                    loss.tot_size(1) == total_num_paths_of_current_batch
                    If you want the MWER loss for each utterance, just do:
                    `loss_per_utt = loss.sum()`
                    Then loss_per_utt.shape[0] should be batch_size.
                    See more example usages in 'k2/python/tests/mwer_test.py'
            'sum': sum loss of each path over the whole batch together.
            'mean': divide above 'sum' by total num paths over the whole batch.
        r   N)super__init__r   r	   r
   )selfr   r	   r
   	__class__ @/home/ubuntu/.local/lib/python3.10/site-packages/k2/mwer_loss.pyr      s
   

zMWERLoss.__init__lattice	ref_textsnbest_scale	num_pathsreturnc                 C   s  t jj||| j|d}|jj}|jj|}|j|}|	 }	t j
||	jd}
t j|
|	|jddd}|j| jdd}|| }t ||jj}| | j }t ||}|jddj }|| }| jd	krp| }|S | jd
kr{| }|S t ||}|S )ai  Compute the Minimum Word Error loss given
        a lattice and corresponding ref_texts.

        Args:
          lattice:
            An FsaVec with axes [utt][state][arc].
          ref_texts:
            It can be one of the following types:
              - A list of list-of-integers, e..g, `[ [1, 2], [1, 2, 3] ]`
              - An instance of :class:`k2.RaggedTensor`.
                Must have `num_axes == 2` and with dtype `torch.int32`.
          nbest_scale:
            Scale `lattice.score` before passing it to :func:`k2.random_paths`.
            A smaller value leads to more unique paths at the risk of being not
            to sample the path with the best score.
          num_paths:
            Number of paths to **sample** from the lattice
            using :func:`k2.random_paths`.
        Returns:
            Minimum Word Error Rate loss.
        )r   r   r	   r   )device   T)refshypshyp_to_ref_mapsorted_match_refF)r	   log_semiring)use_logr   r   )k2Nbestfrom_latticer	   scoresr   	kept_pathshapetobuild_levenshtein_graphslevenshtein_graphlevenshtein_alignmentrow_idsget_tot_scoresRaggedTensorfsar   r   	normalizevaluesexpr
   r   )r   r   r   r   r   nbestr   path_arc_shapestream_path_shaper   r   r+   
tot_scoreswersragged_nbest_logp	path_logpragged_path_logpprob_normalizedlossr   r   r   forward8   sD   


zMWERLoss.forward)r   Tr   )__name__
__module____qualname____doc__floatboolr   r   r"   Fsar   r.   r   inttorchTensorr=   __classcell__r   r   r   r   r      s.    $r         ?   r   Tr   r
   r   r   c                 C   s*   |dv sJ |t |||}|| |||S )a  Compute the Minimum loss given a lattice and corresponding ref_texts.

    Args:
       lattice:
         An FsaVec with axes [utt][state][arc].
       ref_texts:
         It can be one of the following types:
           - A list of list-of-integers, e..g, `[ [1, 2], [1, 2, 3] ]`
           - An instance of :class:`k2.RaggedTensor`.
             Must have `num_axes == 2` and with dtype `torch.int32`.
       nbest_scale:
         Scale `lattice.score` before passing it to :func:`k2.random_paths`.
         A smaller value leads to more unique paths at the risk of being not
         to sample the path with the best score.''
       num_paths:
         Number of paths to **sample** from the lattice
         using :func:`k2.random_paths`.
       temperature:
         For long utterances, the dynamic range of scores will be too large
         and the posteriors will be mostly 0 or 1.
         To prevent this it might be a good idea to have an extra argument
         that functions like a temperature.
         We scale the logprobs by before doing the normalization.
       use_double_scores:
         True to use double precision floating point.
         False to use single precision.
       reduction:
         Specifies the reduction to apply to the output:
         'none' | 'sum' | 'mean'.
         'none': no reduction will be applied.
                 The returned 'loss' is a k2.RaggedTensor, with
                 loss.tot_size(0) == batch_size.
                 loss.tot_size(1) == total_num_paths_of_current_batch
                 If you want the MWER loss for each utterance, just do:
                 `loss_per_utt = loss.sum()`
                 Then loss_per_utt.shape[0] should be batch_size.
                 See more example usages in 'k2/python/tests/mwer_test.py'
         'sum': sum loss of each path over the whole batch together.
         'mean': divide above 'sum' by total num paths over the whole batch.
    Returns:
       Minimum Word Error Rate loss.
    r   )r   )r   r   r   r   r   r	   r
   mr   r   r   	mwer_loss   s   2rL   )rI   rJ   r   Tr   )typingr   ImportErrortyping_extensionsr   r   rF   r"   nnModuler   rG   r.   rL   r   r   r   r   <module>   s&   s