o
    }oi>                     @   sp   d dl mZ d dlZd dlmZ d dlmZmZmZm	Z	m
Z
 G dd deZG dd deZG d	d
 d
eZdS )    )ListN)Loss)
LabelsTypeLengthsTypeLogprobsTypeLossType
NeuralTypec                       sD   e Zd Zedd Zedd Z fddZdd Zd	d
 Z  Z	S )RNNTLossPytorchc                 C   6   t dt t dt t tdt t tdt dS z-Input types definitions for CTCLoss.
        )BTr   D)r   r   r   )actslabelsact_lens
label_lensr   r   r   tupler   self r   \/home/ubuntu/.local/lib/python3.10/site-packages/nemo/collections/asr/losses/rnnt_pytorch.pyinput_types   
   

zRNNTLossPytorch.input_typesc                 C      dt t diS zYOutput types definitions for CTCLoss.
        loss:
            NeuralType(None)
        loss)elements_typer   r   r   r   r   r   output_types$      zRNNTLossPytorch.output_typesc                    s   t    || _|| _d S N)super__init__blank	reduction)r   r%   r&   	__class__r   r   r$   ,   s   

zRNNTLossPytorch.__init__c                 C   s   |j s|jtjkr| }t|d}| ||||}| }| jdkr)| }|S | jdkr8t	|| }|S | jdkrC|
 }|S | jdkrP|
 |
  }|S N
mean_batchmeansummean_volume)is_cudadtypetorchfloat16floatlog_softmaxcompute_forward_probr&   r,   divr-   )r   r   r   r   r   forward_logproblossesr   r   r   forward1   s"   



zRNNTLossPytorch.forwardc                 C   s8  |j \}}}}t|||}	|	|j}	t|D ]}
t|D ]}|dkrR|
dkr2d|	d d |
|f< q|	d d |
d |f |d d |
d d| jf  |	d d |
|f< q|
dkrtj|d d |
|d f d|d d |d f dd	tj
dd}|	d d |
|d f ||	j |	d d |
|f< qtjt|	d d |
d |f |d d |
d || jf  |	d d |
|d f tj|d d |
|d f d|d d |d f dd	tj
dd gdd|	d d |
|f< qqg }t|D ]#}|	||| d || f |||| d || | jf  }|| qt|}|S )Nr              r*   dimindexr=   )shaper1   zerostodeviceranger%   gatherviewtypeint64reshape	logsumexpstackappend)r   r   r   r   r   r   r   U_	log_alphatugathered	log_probsb	to_appendlog_probr   r   r   r5   E   sJ   @82.8	"6
z$RNNTLossPytorch.compute_forward_prob)
__name__
__module____qualname__propertyr   r    r$   r9   r5   __classcell__r   r   r'   r   r	      s    


r	   c                	       sn   e Zd ZdZedd Zedd Zg ddfded	ee d
e	de
f fddZdd Zdd Zdd Z  ZS )TDTLossPytorchzW
    Pure Python implementation of TDT loss (https://arxiv.org/pdf/2304.06795.pdf)
    c                 C   r
   r   r   r   r   r   r   r      r   zTDTLossPytorch.input_typesc                 C   r   r   r   r   r   r   r   r       r!   zTDTLossPytorch.output_typesr-   r:   r%   	durationsr&   sigmac                    s0   t    || _|| _t|| _|| _|| _d S r"   )r#   r$   r%   r]   lenn_durationsr&   r^   )r   r%   r]   r&   r^   r'   r   r   r$      s   


zTDTLossPytorch.__init__c           
      C   s   |d d d d d d d | j  f }|d d d d d d | j  d f }t|d| j }t|d}| |||||\}}| }	| jdkrL|	 }	|	S | jdkr[t|	| }	|	S | jdkrf|	 }	|	S | jdkrs|	 |  }	|	S r)   )	r`   r1   r4   r^   r5   r&   r,   r6   r-   )
r   r   r   r   r   
label_actsduration_actsr7   rN   r8   r   r   r   r9      s$   $$



zTDTLossPytorch.forwardc                 C   s   t jt ||gdd}|S )Nr   r?   )r1   rJ   rK   )r   arT   retr   r   r   rJ      s   zTDTLossPytorch.logsumexpc                 C   s  |j \}}}}	t|||}
|
 }
t|D ]}t|D ]}t|D ]}|dkr|dkr4d|
|||f< q"d|
|||f< t| jD ]>\}}|| dkr~|dkr~|
||| |f |||| || jf  |||| ||f  }| |d|
|||f  |
|||f< q@q"d|
|||f< t| jD ]z\}}|| dkr|dkr|
||| |f |||| || jf  |||| ||f  }| |d|
|||f  |
|||f< |
||| |d f |||| |d |||d f f  |||| |d |f  }| |d|
|||f  |
|||f< qq"qqg }t|D ]]}t	dg d }t| jD ]E\}}|| | dkre|dkre|
||| | || f |||| | || | jf  |||| | || |f  }| |d| }q!|
| qt|}||
fS )zThis function implements Equation 7 in the TDT paper https://arxiv.org/pdf/2304.06795.pdf,
        Simply put, for each alpha(t, u), it sums over the contribution from all incoming blank arcs and non-blank arcs.
        r   r:   g     @      ?r;   )r@   r1   rA   cudarD   	enumerater]   r%   rJ   TensorrL   rK   )r   r   rb   r   r   r   r   r   rM   rN   rO   rT   rP   rQ   nltmprS   ttbbrV   r   r   r   r5      sv   $$"$)
z#TDTLossPytorch.compute_forward_prob)rW   rX   rY   __doc__rZ   r   r    intr   strr3   r$   r9   rJ   r5   r[   r   r   r'   r   r\   z   s    


*r\   c                       sT   e Zd ZdZedd Zedd Zdded	ef fd
dZ	dd Z
dd Z  ZS )MultiblankRNNTLossPytorchzj
    Pure Python implementation of multi-blank transducer loss (https://arxiv.org/pdf/2211.03541.pdf)
    c                 C   r
   r   r   r   r   r   r   r     r   z%MultiblankRNNTLossPytorch.input_typesc                 C   r   r   r   r   r   r   r   r      r!   z&MultiblankRNNTLossPytorch.output_typesr-   r:   r&   r^   c                    s&   t    || _|| _|| _|| _d S r"   )r#   r$   r%   big_blank_durationsr&   r^   )r   r%   rr   r&   r^   r'   r   r   r$     s
   

z"MultiblankRNNTLossPytorch.__init__c                 C   s   t |d| j }| ||||\}}| }| jdkr!| }|S | jdkr0t || }|S | jdkr;| }|S | jdkrH| |  }|S r)   )r1   r4   r^   r5   r&   r,   r6   r-   )r   r   r   r   r   r7   rN   r8   r   r   r   r9     s   



z!MultiblankRNNTLossPytorch.forwardc                 C   s  |j \}}}}tj||||jd}	t|D ]]}
t|D ]U}|dkr|
dkr1d|	d d |
|f< q|	d d |
d |f |d d |
d d| jf  |	d d |
|f< t| jD ]@\}}|
|kr|	d d |
| |f |d d |
| d| jd | f  }tjt	d|	d d |
|f  |gdd|	d d |
|f< qUq|
dkrtj
|d d |
|d f d|d d |d f ddtjdd}|	d d |
|d f | |	d d |
|f< qtjt	|	d d |
d |f |d d |
d || jf  |	d d |
|d f tj
|d d |
|d f d|d d |d f ddtjdd gdd|	d d |
|f< t| jD ]B\}}|
|krp|	d d |
| |f |d d |
| || jd | f  }tjt	d|	d d |
|f  |gdd|	d d |
|f< q/qqg }t|D ]d}|	||| d || f |||| d || | jf  }t| jD ]:\}}|| |kr|	||| | || f |||| | || | jd | f  }tjt	d| |gdd}q|| qzt	|}||	fS )	N)rC   r   r:   r;   re   r?   r*   r<   )r@   r1   rA   rC   rD   r%   rg   rr   rJ   rK   rE   rF   rG   rH   rI   rL   )r   r   r   r   r   r   r   rM   rN   rO   rP   rQ   idrl   rR   rS   rT   rU   rV   r   r   r   r5   +  sx   >8 8*.8	
8 36$
z.MultiblankRNNTLossPytorch.compute_forward_prob)r-   r:   )rW   rX   rY   rn   rZ   r   r    rp   r3   r$   r9   r5   r[   r   r   r'   r   rq      s    


rq   )typingr   r1   nemo.core.classesr   nemo.core.neural_typesr   r   r   r   r   r	   r\   rq   r   r   r   r   <module>   s   b 