o
    i                     @   s   d Z ddlZddlZddlZddlZddlmZ ddlmZ ddl	m
Z
 ddlmZ ddlmZ ddlmZ dd	lmZ dd
lmZ ddlmZ dZG dd deZdS )z<RNN sequence-to-sequence speech recognition model (chainer).    N)reporter)ChainerASRInterface)ctc_for)att_for)decoder_for)encoder_for)label_smoothing_dist)E2E)get_subsamplei'  c                   @   sr   e Zd ZdZedd Zdd ZdddZd	d
 ZdddZ	dd Z
edddZedddZedddZdS )r	   a?  E2E module for chainer backend.

    Args:
        idim (int): Dimension of the inputs.
        odim (int): Dimension of the outputs.
        args (parser.args): Training config.
        flag_return (bool): If True, train() would return
            additional metrics in addition to the training
            loss.

    c                 C   s
   t | S )zAdd arguments.)E2E_pytorchadd_arguments)parser r   W/home/ubuntu/.local/lib/python3.10/site-packages/espnet/nets/chainer_backend/e2e_asr.pyr   &   s   
zE2E.add_argumentsc                 C   s   | j jtt| j S )zGet total subsampling factor.)encconv_subsampling_factorintnpprod	subsample)selfr   r   r   get_total_subsampling_factor+   s   z E2E.get_total_subsampling_factorTc                 C   s*  t j|  |j| _d| j  krdksJ d J d|j| _|j| _|j| _|j| _|d | _|d | _	t
|ddd| _|jrTtd|j  t||j|jd}nd	}|  ( t||| j| _t||| _t|| _t||| j| j	| j|| _W d	   n1 sw   Y  d	| _d	| _|| _d	S )
zConstruct an E2E object.

        :param int idim: dimension of inputs
        :param int odim: dimension of outputs
        :param Namespace args: argument Namespace containing options
        r      zmtlalpha must be [0,1]asrrnn)modearchzUse label smoothing with )
transcriptN)chainerChain__init__mtlalphaetypeverbose	char_listoutdirsoseosr
   r   lsm_typelogginginfor   
train_json
init_scoper   r   r   ctcr   attr   decacclossflag_return)r   idimodimargsr2   	labeldistr   r   r   r    /   s2   &






zE2E.__init__c           	      C   s0  |  ||\}}| jdkrd}n| ||}| jdkr d}d}n| ||\}}|| _| j}|dkr6|| _n|dkr>|| _n|| d| |  | _| jjtk rt	| jjst
d|i|  t
d|i|  t
d|i|  tdt| jj  t
d| ji|  ntd	| jj | jr| j|||fS | jS )
a  E2E forward propagation.

        Args:
            xs (chainer.Variable): Batch of padded character ids. (B, Tmax)
            ilens (chainer.Variable): Batch of length of each input batch. (B,)
            ys (chainer.Variable): Batch of padded target features. (B, Lmax, odim)

        Returns:
            float: Loss that calculated by attention and ctc loss.
            float (optional): Ctc loss.
            float (optional): Attention loss.
            float (optional): Accuracy.

        r   Nr   loss_ctcloss_attr0   z	mtl loss:r1   zloss (=%f) is not correct)r   r!   r-   r/   r0   r1   dataCTC_LOSS_THRESHOLDmathisnanr   reportr)   r*   strwarningr2   )	r   xsilensyshsr7   r8   r0   alphar   r   r   forward]   s2   

zE2E.forwardNc           
   	   C   s  |dd| j d ddf }| jj|jd tjd}t| jj|tjd}t	 O t
dd8 | |g|g\}}|jdkrK| j|jd }nd}| j|d ||||}	|	W  d   W  d   S 1 slw   Y  W d   dS 1 s|w   Y  dS )a  E2E greedy/beam search.

        Args:
            x (chainer.Variable): Input tensor for recognition.
            recog_args (parser.args): Arguments of config file.
            char_list (List[str]): List of Characters.
            rnnlm (Module): RNNLM module defined at `espnet.lm.chainer_backend.lm`.

        Returns:
            List[Dict[str, Any]]: Result of recognition.

        Nr   )dtypetrainFg        )r   xparrayshaper   int32r   Variablefloat32no_backprop_modeusing_configr   
ctc_weightr-   log_softmaxr9   r/   recognize_beam)
r   x
recog_argsr$   rnnlmilenh_lpzyr   r   r   	recognize   s   
RzE2E.recognizec                 C   s"   |  ||\}}| j||}|S )ay  E2E attention calculation.

        Args:
            xs (List): List of padded input sequences. [(T1, idim), (T2, idim), ...]
            ilens (np.ndarray): Batch of lengths of input sequences. (B)
            ys (List): List of character id sequence tensor. [(L1), (L2), (L3), ...]

        Returns:
            float np.ndarray: Attention weights. (B, Lmax, Tmax)

        )r   r/   calculate_all_attentions)r   r@   rA   rB   rC   att_wsr   r   r   r\      s   zE2E.calculate_all_attentionsr   c                 C   s   ddl m} || dS )z!Get customconverter of the model.r   )CustomConverter)subsampling_factor)(espnet.nets.chainer_backend.rnn.trainingr^   )r_   r^   r   r   r   custom_converter   s   
zE2E.custom_converterr   c                 C      ddl m} || ||||dS )z Get custom_updater of the model.r   )CustomUpdater)	converterdevice
accum_grad)r`   rd   )iters	optimizerre   rf   rg   rd   r   r   r   custom_updater   s   
zE2E.custom_updaterc                 C   rc   )z)Get custom_parallel_updater of the model.r   )CustomParallelUpdater)re   devicesrg   )r`   rk   )rh   ri   re   rl   rg   rk   r   r   r   custom_parallel_updater   s   zE2E.custom_parallel_updater)T)N)r   )rb   r   )r   )__name__
__module____qualname____doc__staticmethodr   r   r    rE   r[   r\   ra   rj   rm   r   r   r   r   r	      s    

.
6#r	   )rq   r)   r;   r   numpyr   r   )espnet.nets.chainer_backend.asr_interfacer   espnet.nets.chainer_backend.ctcr   *espnet.nets.chainer_backend.rnn.attentionsr   (espnet.nets.chainer_backend.rnn.decodersr   (espnet.nets.chainer_backend.rnn.encodersr   espnet.nets.e2e_asr_commonr   #espnet.nets.pytorch_backend.e2e_asrr	   r   &espnet.nets.pytorch_backend.nets_utilsr
   r:   r   r   r   r   <module>   s    