o
    ei8:                     @   sn  d Z ddlZddlZddlZddlZddlmZ ddlmZ ddl	m
Z
 dZe
eZz-ddlmZ esQed edZeej dd	lmZ ejd
ed ned W n5 ey   dZed7 Zed7 Zed7 Zed7 Zed7 Zed7 Zed7 Zed7 Zed7 Zed7 Zeew e dd Ze dd Ze dd ZG dd deZG d d! d!eZdS )"zn
Transducer loss implementation (depends on numba)

Authors
 * Abdelwahab Heba 2020
 * Titouan Parcollet 2023
    N)Function)Module)
get_logger)cudazCNumba verbose is deactivated. To enable it, set NUMBA_VERBOSE to 1.numba)NumbaPerformanceWarningignore)categoryzCNumba verbose is enabled. To deactivate it, set NUMBA_VERBOSE to 0.z;The optional dependency Numba is needed to use this module
z,Cannot import numba. To use Transducer loss
z%Please follow the instructions below
=============================
If you use your localhost:
pip install numba
=export NUMBAPRO_LIBDEVICE='/usr/local/cuda/nvvm/libdevice/' 
>export NUMBAPRO_NVVM='/usr/local/cuda/nvvm/lib64/libnvvm.so' 
"================================ 
If you use conda:
zconda install numba cudatoolkitc              	   C   s  t jj}t jj}	d}
|	|| kr
|
|| k r|	dkrH|
dkr7|||
d df | ||
d d|f  |||
df< t j|||	d fd |
d7 }
nt j|||	fddk r|
dkrw||d|	d f | |d|	d |||	d f f  ||d|	f< nC|||
|	d f | ||
|	d |||	d f f  }|||
d |	f | ||
d |	|f  }t||tt	t
||   |||
|	f< |	|| k rt j|||	d fd t j|||	fd |
d7 }
|
|| k s|	|| kr|||| d || f | ||| d || |f  ||  ||< dS dS dS )a  
    Compute forward pass for the forward-backward algorithm using Numba cuda kernel.
    Sequence Transduction with naive implementation : https://arxiv.org/pdf/1211.3711.pdf

    Arguments
    ---------
    log_probs : torch.Tensor
        4D Tensor of (batch x TimeLength x LabelLength x outputDim) from the Transducer network.
    labels : torch.Tensor
        2D Tensor of (batch x MaxSeqLabelLength) containing targets of the batch with zero padding.
    alpha : torch.Tensor
        3D Tensor of (batch x TimeLength x LabelLength) for forward computation.
    log_p : torch.Tensor
        1D Tensor of (batch) for forward cost computation.
    T : torch.Tensor
        1D Tensor of (batch) containing TimeLength of each target.
    U : torch.Tensor
        1D Tensor of (batch) containing LabelLength of each target.
    blank : int
        Blank index.
    lock : torch.Tensor
        2D Tensor of (batch x LabelLength) containing bool(1-0) lock for parallel computation.
    r      Nr   blockIdxx	threadIdxatomicaddmaxmathlog1pexpabs)	log_probslabelsalphalog_pTUblanklockbutemitno_emit r+   c/home/ubuntu/transcripts/venv/lib/python3.10/site-packages/speechbrain/nnet/loss/transducer_loss.pycu_kernel_forward:   sJ   $
$!4&r-   c                 C   s  t jj}t jj}	|| d }
|	|| kr|
dkr|	|| kr[|
|| d kr4| ||
|	|f |||
|	f< n|||
d |	f | ||
|	|f  |||
|	f< t j|||	d fd |
d8 }
nt j|||	fddk r|
|| d kr|||
|	d f | ||
|	|||	f f  |||
|	f< n=|||
|	d f | ||
|	|||	f f  }|||
d |	f | ||
|	|f  }t||tt	t
||   |||
|	f< |	dkrt j|||	d fd t j|||	fd |
d8 }
|
dks|	dkr||ddf ||  ||< dS dS )a  
    Compute backward pass for the forward-backward algorithm using Numba cuda kernel.
    Sequence Transduction with naive implementation : https://arxiv.org/pdf/1211.3711.pdf

    Arguments
    ---------
    log_probs : torch.Tensor
        4D Tensor of (batch x TimeLength x LabelLength x outputDim) from the Transducer network.
    labels : torch.Tensor
        2D Tensor of (batch x MaxSeqLabelLength) containing targets of the batch with zero padding.
    beta : torch.Tensor
        3D Tensor of (batch x TimeLength x LabelLength) for backward computation.
    log_p : torch.Tensor
        1D Tensor of (batch) for backward cost computation.
    T : torch.Tensor
        1D Tensor of (batch) containing TimeLength of each target.
    U : torch.Tensor
        1D Tensor of (batch) containing LabelLength of each target.
    blank : int
        Blank index.
    lock : torch.Tensor
        2D Tensor of (batch x LabelLength) containing bool(1-0) lock for parallel computation.
    r   r   r   Nr   )r   r   betar!   r"   r#   r$   r%   r&   r'   r(   r)   r*   r+   r+   r,   cu_kernel_backward   s<    
((" r/   c              	   C   s  t jj}t jj}	|||	 k r|dkrDt||	||	 d ||	 f | |	||	 d ||	 |f  ||	ddf   ||	||	 d ||	 |f< |||	 d k rt||	 d D ]9}
||	||
f ||	|d |
f  ||	||
|f< t||	||
|f | |	||
|f  ||	ddf   ||	||
|f< qTt||	 D ]C\}
}|
||	 k r||	||
f ||	||
d f  ||	||
|f< t||	||
|f | |	||
|f  ||	ddf   ||	||
|f< qdS dS )a  
    Compute gradient for the forward-backward algorithm using Numba cuda kernel.
    Sequence Transduction with naive implementation : https://arxiv.org/pdf/1211.3711.pdf

    Arguments
    ---------
    log_probs : torch.Tensor
        4D Tensor of (batch x TimeLength x LabelLength x outputDim) from the Transducer network.
    labels : torch.Tensor
        2D Tensor of (batch x MaxSeqLabelLength) containing targets of the batch with zero padding.
    alpha : torch.Tensor
        3D Tensor of (batch x TimeLength x LabelLength) for backward computation.
    beta : torch.Tensor
        3D Tensor of (batch x TimeLength x LabelLength) for backward computation.
    grads : torch.Tensor
        Grads for backward computation.
    T : torch.Tensor
        1D Tensor of (batch) containing TimeLength of each target.
    U : torch.Tensor
        1D Tensor of (batch) containing LabelLength of each target.
    blank : int
        Blank index.
    r   r   N)r   r   r   r   r   r   range	enumerate)r   r   r    r.   gradsr"   r#   r$   r(   r&   r'   lr+   r+   r,   cu_kernel_compute_grad   s<   ,,,r4   c                   @   s(   e Zd ZdZedd Zedd ZdS )
Transducera  
    This class implements the Transducer loss computation with forward-backward algorithm
    Sequence Transduction with naive implementation : https://arxiv.org/pdf/1211.3711.pdf

    This class use torch.autograd.Function. In fact of using the forward-backward algorithm,
    we need to compute the gradient manually.

    This class can't be instantiated, please refer to TransducerLoss class

    It is also possible to use this class directly by using Transducer.apply
    c              	   C   sl  |  }|j\}}}	}
tj|||	|
f|j|jd}tj|||	f|j|jd}tj|||	f|j|jd}tj||	ftj|jd}tj|f|j|jd}tj|f|j|jd}t||	f |||||||| |d }t||	f |||||||| t	||f |||||||| || _
~~~~~~~~tj  |dkr|  S |dkrt| S |dkr| S td|)Computes the transducer loss.)dtypedevice)r8   r7   r   meansumnonezUnexpected reduction {})detachshapetorchzerosr7   r8   int32r-   r/   r4   r2   r   empty_cacher9   r:   	Exceptionformat)ctxr   r   r"   r#   r$   	reductionBmaxTmaxUAr2   r    r.   r%   log_p_alpha
log_p_betar+   r+   r,   forward  sP   





zTransducer.forwardc                 C   s2   | dddd| j}| j|ddddddfS )z.Backward computations for the transducer loss.r   r   N)viewtor2   mul_)rD   grad_outputr+   r+   r,   backward>  s   zTransducer.backwardN)__name__
__module____qualname____doc__staticmethodrL   rQ   r+   r+   r+   r,   r5     s    
,r5   c                       s*   e Zd ZdZd fdd	Zdd Z  ZS )	TransducerLossa  
    This class implements the Transduce loss computation with forward-backward algorithm.
    Sequence Transduction with naive implementation : https://arxiv.org/pdf/1211.3711.pdf

    The TransducerLoss(nn.Module) use Transducer(autograd.Function)
    to compute the forward-backward loss and gradients.

    Input tensors must be on a cuda device.

    Arguments
    ---------
    blank : int
        Token to use as blank token.
    reduction : str
        Type of reduction to use, default "mean"

    Example
    -------
    >>> import torch
    >>> loss = TransducerLoss(blank=0)
    >>> logits = torch.randn((1,2,3,5)).cuda().requires_grad_()
    >>> labels = torch.Tensor([[1,2]]).cuda().int()
    >>> act_length = torch.Tensor([2]).cuda().int()
    >>> # U = label_length+1
    >>> label_length = torch.Tensor([2]).cuda().int()
    >>> l = loss(logits, labels, act_length, label_length)
    >>> l.backward()
    r   r9   c                    s   t    || _|| _tj| _ztj W d S  t	yB   d}|d7 }|d7 }|d7 }|d7 }|d7 }|d7 }|d7 }|d	7 }t	|w )
Nz,cannot import numba. To use Transducer loss
r
   r   r   r   r   r   r   zDconda install numba cudatoolkit=XX (XX is your cuda toolkit version))
super__init__r$   rE   r5   applylossr   
cuda_pathsImportError)selfr$   rE   err_msg	__class__r+   r,   rY   c  s(   
zTransducerLoss.__init__c                 C   s\   t dd ||||fD r|d}| ||||| j| jS td|j|j|j|jg d)r6   c                 s   s    | ]}|j V  qd S )N)is_cuda).0r(   r+   r+   r,   	<genexpr>{  s    z)TransducerLoss.forward.<locals>.<genexpr>r   zFound inputs tensors to be on zB while needed to be on a 'cuda' device to use the transducer loss.)alllog_softmaxr[   r$   rE   
ValueErrorr8   )r^   logitsr   r"   r#   r   r+   r+   r,   rL   x  s   
zTransducerLoss.forward)r   r9   )rR   rS   rT   rU   rY   rL   __classcell__r+   r+   r`   r,   rW   E  s    rW   ) rU   loggingr   warningsr>   torch.autogradr   torch.nnr   speechbrain.utils.loggerr   NUMBA_VERBOSErR   loggerr   r   info	getLogger	nb_loggersetLevelERRORnumba.core.errorsr   simplefilterr]   r_   jitr-   r/   r4   r5   rW   r+   r+   r+   r,   <module>   s\    

K
G
5A