o
    i&                     @   sl   d dl Z d dlZd dlZd dlZd dlm  mZ d dl	m
Z d dlmZ G dd dejjZd	ddZdS )
    N)parse)	to_devicec                       sT   e Zd ZdZd fdd	Zdd Zdd	 Zd
d Zdd Zdd Z	dddZ
  ZS )CTCa  CTC module

    :param int odim: dimension of outputs
    :param int eprojs: number of encoder projection units
    :param float dropout_rate: dropout rate (0.0 ~ 1.0)
    :param str ctc_type: builtin
    :param bool reduce: reduce the CTC loss into a scalar
    builtinTc                    s  t    || _d | _tj||| _tj|| _	d | _
ttjtdk r(|nd| _|| jkr:td| j d | jdkrO|rCdnd}tjj|dd| _n,| jd	krc|rXdnd}tjj|d
| _n| jdkrsddlm} |j| _ntd| jd| _|| _d S )Nz1.7.0r   zCTC was set to z due to PyTorch version.sumnoneT)	reductionzero_infinitycudnnctc)r   gtnctcr   )GTNCTCLossFunctionz*ctc_type must be "builtin" or "gtnctc": {})super__init__dropout_ratelosstorchnnLinearctc_loDropoutdropoutprobsV__version__ctc_typeloggingwarningCTCLossctc_loss#espnet.nets.pytorch_backend.gtn_ctcr   apply
ValueErrorformat	ignore_idreduce)selfodimeprojsr   r   r%   reduction_typer   	__class__ S/home/ubuntu/.local/lib/python3.10/site-packages/espnet/nets/pytorch_backend/ctc.pyr      s2   








zCTC.__init__c                 C   s   | j dv r3|d}tjjjdd | ||||}W d    n1 s%w   Y  ||d }|S | j dkrQdd |D }tjj	j|dd	}| |||d
dS t
)N)r   r
      T)deterministic   r   c                 S   s   g | ]}|  qS r,   )tolist).0tr,   r,   r-   
<listcomp>C       zCTC.loss_fn.<locals>.<listcomp>dimr   r   )r   log_softmaxr   backendscudnnflagsr   sizer   
functionalNotImplementedError)r&   th_pred	th_targetth_ilenth_olenr   targets	log_probsr,   r,   r-   loss_fn8   s   


zCTC.loss_fnc           	         s   fdd|D }   |} jdkr|dd} jdkrAt|tdd |D }| }t|} 	|||| _
nKd _
ttj|tjd	}ttjd
d |D tjd	}t|  }|j} jdkrut||} jdkr||}t| 	||||j|d	 _
t jjd dt|d  t jjd dt|d   jrǈ j
  _
tdtt j
   j
S )ac  CTC forward

        :param torch.Tensor hs_pad: batch of padded hidden state sequences (B, Tmax, D)
        :param torch.Tensor hlens: batch of lengths of hidden state sequences (B)
        :param torch.Tensor ys_pad:
            batch of padded character id sequence tensor (B, Lmax)
        :return: ctc loss value
        :rtype: torch.Tensor
        c                    s   g | ]	}|| j k qS r,   )r$   )r2   yr&   r,   r-   r4   T   s    zCTC.forward.<locals>.<listcomp>r   r   r0   r   c                 S   s   g | ]}t |qS r,   )len)r2   sr,   r,   r-   r4   \   r5   Ndtypec                 s   s    | ]}| d V  qdS )r   N)r<   )r2   xr,   r,   r-   	<genexpr>d   s    zCTC.forward.<locals>.<genexpr>r
   z input lengths:   
z output lengths: z	ctc loss:)r   r   r   	transposer   r   
LongTensorlongcatrE   r   
from_numpynpfromiterint32cpuintrK   tor   infor+   __name__joinstrsplitr%   r   float)	r&   hs_padhlensys_padysys_hatolensys_truerK   r,   rG   r-   forwardI   sV   





zCTC.forwardc                 C   s   t j| |dd| _| jS )zsoftmax of frame activations

        :param torch.Tensor hs_pad: 3d tensor (B, Tmax, eprojs)
        :return: log softmax applied 3d tensor (B, Tmax, odim)
        :rtype: torch.Tensor
        r.   r6   )Fsoftmaxr   r   r&   ra   r,   r,   r-   rj      s   zCTC.softmaxc                 C      t j| |ddS )zlog_softmax of frame activations

        :param torch.Tensor hs_pad: 3d tensor (B, Tmax, eprojs)
        :return: log softmax applied 3d tensor (B, Tmax, odim)
        :rtype: torch.Tensor
        r.   r6   )ri   r8   r   rk   r,   r,   r-   r8         zCTC.log_softmaxc                 C   rl   )zargmax of frame activations

        :param torch.Tensor hs_pad: 3d tensor (B, Tmax, eprojs)
        :return: argmax applied 2d tensor (B, Tmax)
        :rtype: torch.Tensor
        r.   r6   )r   argmaxr   rk   r,   r,   r-   rn      rm   z
CTC.argmaxr   c              
   C   s  ddd}|  |}|d}|||}t|dt|fd }tj|dt|ftjdd }|d |d  |d< |d |d  |d< tj	d|dD ]}	tj	t|D ]u}
||
 |ksq|
d	k sq||
 ||
d	  krt
||	d |
f ||	d |
d f g}|
|
d g}n't
||	d |
f ||	d |
d f ||	d |
d	 f g}|
|
d |
d	 g}t|||	 ||
   ||	|
f< |t| ||	|
f< q[qQd
tj|ddftjd }t
|d
t|d f |d
t|d	 f g}t|d t|d	 g}|t| |d
< tj	|dd	 d
d
D ]}	||	d ||	d df f ||	< qg }tj	d|dD ]}	||||	df   q:|S )a  forced alignment.

        :param torch.Tensor h: hidden state sequence, 2d tensor (T, D)
        :param torch.Tensor y: id sequence tensor 1d tensor (L)
        :param int y: blank symbol index
        :return: best alignment results
        :rtype: list
        r   c                 S   sZ   t | d} t j| jd dft jd| }t j|| gdd} | d} t | | d } | S )z1Insert blank token between every two label token.r0   r   rJ   )axisr   )rU   expand_dimszerosshapeint64concatenatereshapeappend)labelblank_idblanksr,   r,   r-   interpolate_blank   s   
z+CTC.forced_align.<locals>.interpolate_blankg   vH7BrJ   r0   )r   r   )r   r0   r.   r   Nr   )r8   squeezerU   rq   r<   rH   int16sixmovesrangearraymaxrn   onesrv   )r&   hrF   rx   rz   lpzy_intlogdelta
state_pathr3   rI   
candidates
prev_state	state_seqoutput_state_seqr,   r,   r-   forced_align   sH   


	

 (*"&$zCTC.forced_align)r   Tr{   )r\   
__module____qualname____doc__r   rE   rh   rj   r8   rn   r   __classcell__r,   r,   r*   r-   r      s    	">
		r   Tc                 C   s   t | dd}|dkrt|| j| j| j|dS |dkrStj }| jr7t|| j| jd | j|d}|	| |S t
|D ]}t|| j| j| | j|d}|	| q;|S td|)zReturns the CTC module for the given args and output dimension

    :param Namespace args: the program args
    :param int odim : The output dimension
    :param bool reduce : return the CTC loss in a scalar
    :return: the corresponding CTC module
    num_encsr0   )r   r%   r   z0Number of encoders needs to be more than one. {})getattrr   r(   r   r   r   r   
ModuleList	share_ctcrv   r   r"   r#   )argsr'   r%   r   	ctcs_listctcidxr,   r,   r-   ctc_for   s<   

r   )T)r   numpyrU   r~   r   torch.nn.functionalr   r=   ri   packaging.versionr   r   &espnet.nets.pytorch_backend.nets_utilsr   Moduler   r   r,   r,   r,   r-   <module>   s     Z