o
    i                     @   s>   d dl Z d dlm  mZ d dlmZ G dd de jjZdS )    N)check_argument_typesc                       sz   e Zd ZdZ					ddededed	ed
ededef fddZde	j
fddZdd Zdd Zdd Zdd Z  ZS )CTCa  CTC module.

    Args:
        odim: dimension of outputs
        encoder_output_size: number of encoder projection units
        dropout_rate: dropout rate (0.0 ~ 1.0)
        ctc_type: builtin or gtnctc
        reduce: reduce the CTC loss into a scalar
        ignore_nan_grad: Same as zero_infinity (keeping for backward compatiblity)
        zero_infinity:  Whether to zero infinite losses and the associated gradients.
            builtinTNodimencoder_output_sizedropout_ratectc_typereduceignore_nan_gradzero_infinityc           
         s   t  sJ t   |}|| _tj||| _|| _|d ur |}| jdkr/tjj	d|d| _
n| jdkr?ddlm}	 |	j| _
ntd| j || _d S )Nr   none)	reductionr   gtnctcr   )GTNCTCLossFunctionz(ctc_type must be "builtin" or "gtnctc": )r   super__init__r   torchnnLinearctc_lor	   CTCLossctc_loss#espnet.nets.pytorch_backend.gtn_ctcr   apply
ValueErrorr
   )
selfr   r   r   r	   r
   r   r   eprojsr   	__class__ C/home/ubuntu/.local/lib/python3.10/site-packages/espnet2/asr/ctc.pyr      s"   







zCTC.__init__returnc                 C   s   | j dkr(|d}| ||||}|d}| jr"| | }|S || }|S | j dkr?tjjj|dd}| |||ddS t	)Nr         r   dimr   r   )
r	   log_softmaxr   sizer
   sumr   r   
functionalNotImplementedError)r   th_pred	th_targetth_ilenth_olenlossr(   	log_probsr    r    r!   loss_fn4   s   



zCTC.loss_fnc                    sz   |  tj|| jd}| jdkrdd  D }n|dd}t fddt|D }| 	||||j
|j|jd}|S )	a@  Calculate CTC loss.

        Args:
            hs_pad: batch of padded hidden state sequences (B, Tmax, D)
            hlens: batch of lengths of hidden state sequences (B)
            ys_pad: batch of padded character id sequence tensor (B, Lmax)
            ys_lens: batch of lengths of character sequence (B)
        )pr   c                 S   s   g | ]}||d k qS )r    ).0yr    r    r!   
<listcomp>V   s    zCTC.forward.<locals>.<listcomp>r   r$   c                    s    g | ]\}} |d |f qS )Nr    )r5   ilys_padr    r!   r7   [   s     )devicedtype)r   Fdropoutr   r	   	transposer   cat	enumerater2   tor<   r=   )r   hs_padhlensr;   ys_lensys_hatys_truer0   r    r:   r!   forwardH   s   

zCTC.forwardc                 C      t j| |ddS )zsoftmax of frame activations

        Args:
            Tensor hs_pad: 3d tensor (B, Tmax, eprojs)
        Returns:
            torch.Tensor: softmax applied 3d tensor (B, Tmax, odim)
        r#   r%   )r>   softmaxr   r   rD   r    r    r!   rK   c      zCTC.softmaxc                 C   rJ   )zlog_softmax of frame activations

        Args:
            Tensor hs_pad: 3d tensor (B, Tmax, eprojs)
        Returns:
            torch.Tensor: log softmax applied 3d tensor (B, Tmax, odim)
        r#   r%   )r>   r'   r   rL   r    r    r!   r'   m   rM   zCTC.log_softmaxc                 C   rJ   )zargmax of frame activations

        Args:
            torch.Tensor hs_pad: 3d tensor (B, Tmax, eprojs)
        Returns:
            torch.Tensor: argmax applied 2d tensor (B, Tmax)
        r#   r%   )r   argmaxr   rL   r    r    r!   rN   w   rM   z
CTC.argmax)r   r   TNT)__name__
__module____qualname____doc__intfloatstrboolr   r   Tensorr2   rI   rK   r'   rN   __classcell__r    r    r   r!   r      s6    !

r   )	r   torch.nn.functionalr   r*   r>   	typeguardr   Moduler   r    r    r    r!   <module>   s    