o
    i                     @   s:   d dl Z d dlZd dlm  mZ G dd dejjZdS )    Nc                       sz   e Zd ZdZ					ddedededed	ed
edef fddZde	j
fddZdd Zdd Zdd Zdd Z  ZS )CTCa  CTC module.

    Args:
        odim: dimension of outputs
        encoder_output_size: number of encoder projection units
        dropout_rate: dropout rate (0.0 ~ 1.0)
        ctc_type: builtin or warpctc
        reduce: reduce the CTC loss into a scalar
            builtinTodimencoder_output_sizedropout_ratectc_typereduceignore_nan_gradextra_linearc           
         s   t    |}|| _|rtj||| _nd | _|| _|| _| jdkr,tjj	dd| _
n!| jdkrEdd l}	|r<td |	j	d|d| _
ntd	| j || _d S )
Nr   none)	reductionwarpctcr   z4ignore_nan_grad option is not supported for warp_ctcT)size_averager	   z)ctc_type must be "builtin" or "warpctc": )super__init__r   torchnnLinearctc_lor   r
   CTCLossctc_losswarpctc_pytorchloggingwarning
ValueErrorr	   )
selfr   r   r   r   r	   r
   r   eprojswarp_ctc	__class__ I/home/ubuntu/.local/lib/python3.10/site-packages/funasr/models/ctc/ctc.pyr      s"   





zCTC.__init__returnc                 C   s  | j dkr|d}| ||||}|jr| jr|t|}|ddg}t	|}|
  }|dkr<td na||dkrt|d|  d|d d tj|dgdtj|jd}	d}
t|D ]\}}|| s{d|	|
|
| < |
|7 }
qk| |d d |d d f ||	 || || }n|d}| jr| | }|S || }|S | j d	kr|jtjd
}|  }|  }|  }| ||||}| jr| }|S | j dkrtjjj|dd}| |||ddS t)Nr      r   zTAll samples in this mini-batch got nan grad. Returning nan value instead of CTC loss   /z7 samples got nan grad. These were ignored for CTC loss.)dtypedevicer   )r'   gtnctcdimr   )r   log_softmaxr   requires_gradr
   grad_fnr   	ones_likesumisfinitelongr   r   sizefullboolr(   	enumerater	   tofloat32cpuintr   
functionalNotImplementedError)r   th_pred	th_targetth_ilenth_olenlossctc_gradindicesr3   target_masksindle	log_probsr!   r!   r"   loss_fn5   sj   







zCTC.loss_fnc                    s   | j dur|  tj|| jd}n|}| jdkr dd  D }n|dd}t fddt|D }|	|j
}| ||||j	|j
|jd	}|S )
a@  Calculate CTC loss.

        Args:
            hs_pad: batch of padded hidden state sequences (B, Tmax, D)
            hlens: batch of lengths of hidden state sequences (B)
            ys_pad: batch of padded character id sequence tensor (B, Lmax)
            ys_lens: batch of lengths of character sequence (B)
        N)pr)   c                 S   s   g | ]}||d k qS )r!   ).0yr!   r!   r"   
<listcomp>   s    zCTC.forward.<locals>.<listcomp>r   r%   c                    s    g | ]\}} |d |f qS )Nr!   )rL   ilys_padr!   r"   rN      s     )r(   r'   )r   Fdropoutr   r   	transposer   catr6   r7   r(   rI   r'   )r   hs_padhlensrR   ys_lensys_hatys_truerA   r!   rQ   r"   forward   s   


zCTC.forwardc                 C   ,   | j durtj|  |ddS tj|ddS )zsoftmax of frame activations

        Args:
            Tensor hs_pad: 3d tensor (B, Tmax, eprojs)
        Returns:
            torch.Tensor: softmax applied 3d tensor (B, Tmax, odim)
        Nr$   r*   )r   rS   softmaxr   rW   r!   r!   r"   r^         
zCTC.softmaxc                 C   r]   )zlog_softmax of frame activations

        Args:
            Tensor hs_pad: 3d tensor (B, Tmax, eprojs)
        Returns:
            torch.Tensor: log softmax applied 3d tensor (B, Tmax, odim)
        Nr$   r*   )r   rS   r,   r_   r!   r!   r"   r,      r`   zCTC.log_softmaxc                 C   r]   )zargmax of frame activations

        Args:
            torch.Tensor hs_pad: 3d tensor (B, Tmax, eprojs)
        Returns:
            torch.Tensor: argmax applied 2d tensor (B, Tmax)
        Nr$   r*   )r   r   argmaxr_   r!   r!   r"   ra      r`   z
CTC.argmax)r   r   TTT)__name__
__module____qualname____doc__r:   floatstrr5   r   r   TensorrI   r\   r^   r,   ra   __classcell__r!   r!   r   r"   r      s6    #Lr   )r   r   torch.nn.functionalr   r;   rS   Moduler   r!   r!   r!   r"   <module>   s    