o
    Gi&!                     @   s   zd dl mZ W n ey   d dlmZ Y nw d dl mZ d dlZd dlmZ ddlm	Z	 ddl
mZ ddlmZ ddlmZ G d	d
 d
ejZ					ddededededed dedeej dejfddZdS )    )Literal)OptionalN   )intersect_dense)DenseFsaVec)Fsa)RaggedTensorc                       sd   e Zd ZdZ		ddeded def fdd	Z	
	ddede	dede
ej dejf
ddZ  ZS )CtcLossa  Ctc Loss computation in k2. It produces the same output as `torch.CtcLoss`
    if given the same input.

    One difference between `k2.CtcLoss` and `torch.CtcLoss` is that k2 accepts
    a general FSA while PyTorch requires a linear FSA (represented as a list).
    That means, `k2.CtcLoss` supports words with multiple pronunciations.

    See `k2/python/tests/ctc_loss_test.py <https://github.com/k2-fsa/k2/blob/master/k2/python/tests/ctc_loss_test.py>`_
    for usage.

    We assume that the blank label is always 0. The arguments `reduction` and
    `target_lengths` have the same meaning as their counterparts in
    `torch.CtcLoss`.
    sumToutput_beam	reductionnonemeanr
   use_double_scoresc                    s,   t    |dv sJ || _|| _|| _dS )a  
        Args:
          output_beam:
             Beam to prune output, similar to lattice-beam in Kaldi.  Relative
             to best path of output.
          reduction:
            Specifies the reduction to apply to the output:
            'none' | 'mean' | 'sum'.
            'none': no reduction will be applied, 'mean': the output losses
            will be **divided** by the target lengths and then the **mean** over
            the batch is taken. 'sum': sum the output losses over batches.
          use_double_scores:
            True to use double precision floating point in computing
            the total scores. False to use single precision.
        r   N)super__init__r   r   r   )selfr   r   r   	__class__ ?/home/ubuntu/.local/lib/python3.10/site-packages/k2/ctc_loss.pyr   #   s
   

zCtcLoss.__init__        Ndecoding_graphdense_fsa_vecdelay_penaltytarget_lengthsreturnc                 C   sR  t ||| j|dkrdndd}|dkrxt|j d|j}|j|j	}|j
|d? dd}| |j }	t|drQt|jtjsCJ |jd	k|jdkB |jB }
nt|d
sXJ t|jtjsaJ |jd	k|jdkB }
|	|
d | j|	7  _|jd| jd}d| }|tj}| jdkr|S | jdkr| S | jdksJ || }| S )ab  Compute the CTC loss given a decoding graph and a dense fsa vector.

        Args:
          decoding_graph:
            An FsaVec. It can be the composition result of a CTC topology
            and a transcript.
          dense_fsa_vec:
            It represents the neural network output. Refer to the help
            information in :class:`k2.DenseFsaVec`.
          delay_penalty:
            A constant to penalize symbol delay, which is used to make symbol
            emit earlier for streaming models. It is almost the same as the
            `delay_penalty` in our `rnnt_loss`, See
            https://github.com/k2-fsa/k2/issues/955 and
            https://arxiv.org/pdf/2211.00490.pdf for more details.
          target_lengths:
            Used only when `reduction` is `mean`. It is a 1-D tensor of batch
            size representing lengths of the targets, e.g., number of phones or
            number of word pieces in a sentence.
        Returns:
          If `reduction` is `none`, return a 1-D tensor with size equal to batch
          size. If `reduction` is `mean` or `sum`, return a scalar.
        r   	frame_idxN)a_fsasb_fsasr   frame_idx_namer   )valuealpha_is_repeat_token_r   
aux_labelsT)log_semiringr   r   r
   r   )r   r   r   arcsshaperemove_axisr   durationtodeviceaddvalueshasattr
isinstancer%   torchTensorlabelsr&   masked_fill_scoresget_tot_scoresr   float32r   r
   r   )r   r   r   r   r   latticer   r+   offsetpenaltymask
tot_scoreslossr   r   r   forward>   sL   



zCtcLoss.forward)r
   T)r   N)__name__
__module____qualname____doc__floatr   boolr   r   r   r   r2   r3   r?   __classcell__r   r   r   r   r	      s0    r	   
   r   r
   Tr   r   r   r   r   r   r   r   r   c                 C   s   t |||d}|| |||dS )aV  Compute the CTC loss given a decoding graph and a dense fsa vector.

    Args:
      decoding_graph:
        An FsaVec. It can be the composition result of a ctc topology
        and a transcript.
      dense_fsa_vec:
        It represents the neural network output. Refer to the help information
        in :class:`k2.DenseFsaVec`.
      output_beam:
         Beam to prune output, similar to lattice-beam in Kaldi.  Relative
         to best path of output.
      delay_penalty:
        A constant to penalize symbol delay, which is used to make symbol
        emit earlier for streaming models. It is almost the same as the
        `delay_penalty` in our `rnnt_loss`, See
        https://github.com/k2-fsa/k2/issues/955 and
        https://arxiv.org/pdf/2211.00490.pdf for more details.
      reduction:
        Specifies the reduction to apply to the output: 'none' | 'mean' | 'sum'.
        'none': no reduction will be applied, 'mean': the output losses will be
        divided by the target lengths and then the mean over the batch is taken.
        'sum': sum the output losses over batches.
      use_double_scores:
        True to use double precision floating point in computing
        the total scores. False to use single precision.
      target_lengths:
        Used only when `reduction` is `mean`. It is a 1-D tensor of batch
        size representing lengths of the targets, e.g., number of phones or
        number of word pieces in a sentence.
    Returns:
      If `reduction` is `none`, return a 1-D tensor with size equal to batch
      size. If `reduction` is `mean` or `sum`, return a scalar.
    )r   r   r   )r   r   r   r   )r	   )r   r   r   r   r   r   r   mr   r   r   ctc_loss   s   +rI   )rG   r   r
   TN)typingr   ImportErrortyping_extensionsr   r2   torch.nnnnautogradr   r   r   fsar   raggedr   Moduler	   rD   rE   r3   rI   r   r   r   r   <module>   sH    