o
    i>5                     @   s   d Z ddlZddlZddlZddlZddlZddlZddlZddl	Z	ddlm
Z
 ddlmZ ddlmZ ddlmZ ddlmZmZmZ ddlmZmZmZ dd	lmZ dd
lmZ ddlmZ ddlm Z  G dd dej!Z"G dd dee	j#j$Z%dS )z:RNN sequence-to-sequence text translation model (pytorch).    N)reporter)label_smoothing_dist)MTInterface)uniform_init_parameters)get_subsamplepad_list	to_device)"add_arguments_rnn_attention_common add_arguments_rnn_decoder_common add_arguments_rnn_encoder_common)att_for)decoder_for)encoder_for)fill_missing_argsc                   @   s   e Zd ZdZdd ZdS )ReporterzA chainer reporter wrapper.c                 C   sD   t d|i|  t d|i|  t d|i|  t d|i|  dS )zReport at every step.lossaccpplbleuN)r   report)selfr   r   r   r    r   V/home/ubuntu/.local/lib/python3.10/site-packages/espnet/nets/pytorch_backend/e2e_mt.pyr   #   s   zReporter.reportN)__name__
__module____qualname____doc__r   r   r   r   r   r       s    r   c                       s   e Zd ZdZedd Zedd Zedd Zedd	 Z fd
dZ	dd Z
dd Zdd ZdddZdddZdd Z  ZS )E2EzE2E module.

    :param int idim: dimension of inputs
    :param int odim: dimension of outputs
    :param Namespace args: argument Namespace containing options

    c                 C   s"   t |  t |  t |  | S )zAdd arguments.)r   encoder_add_argumentsattention_add_argumentsdecoder_add_arguments)parserr   r   r   add_arguments4   s   


zE2E.add_argumentsc                 C      |  d}t|}| S )zAdd arguments for the encoder.zE2E encoder setting)add_argument_groupr   r!   groupr   r   r   r   <      
zE2E.encoder_add_argumentsc                 C   r#   )z Add arguments for the attention.zE2E attention setting)r$   r	   r%   r   r   r   r   C   r'   zE2E.attention_add_argumentsc                 C   r#   )zAdd arguments for the decoder.zE2E decoder setting)r$   r
   r%   r   r   r   r    J   r'   zE2E.decoder_add_argumentsc                    s(  t t|   tjj|  t|| j}|j| _|j	| _	t
|dd|_|j| _|j| _|j| _|j| _t | _|d | _|d | _d| _t|ddd| _|jrhtj|jrhtd|j  t||j|jd	}nd}t
|d
d| _ t
|dd| _!tjj"||j#| jd| _$tjj%|j&d| _'t(||j#| j| _)t*|| _+t,||| j| j| j+|| _-|j.r||krt/d|j#|j0krt/d| j-j$j1| j$_1|j2r|j3rt/d| j-j$j1| j-j4_1| 5  |j6r|j7|j8d|j9|j:|j;|j<|j=|j|jdd}t>j?di || _@|j6| _6nd| _6d| _<d| _Ad| _Bd| _CdS )zConstruct an E2E object.

        :param int idim: dimension of inputs
        :param int odim: dimension of outputs
        :param Namespace args: argument Namespace containing options
        	char_listN   r   mtrnn)modearchzUse label smoothing with )
transcriptmultilingualFreplace_sos)padding_idx)pz>When using tie_src_tgt_embedding, idim and odim must be equal.zBWhen using tie_src_tgt_embedding, eunits and dunits must be equal.z?When using tie_classifier, context_residual must be turned off.)	beam_sizepenalty
ctc_weightmaxlenratiominlenratio	lm_weightrnnlmnbestspaceblanktgt_langg    _r   )Dsuperr   __init__torchnnModuler   r"   etypeverbosegetattrr(   outdir	sym_spacer;   	sym_blankr<   r   r   soseospadr   	subsamplelsm_typeospathisfile
train_jsonlogginginfor   r/   r0   	EmbeddingeunitsembedDropoutdropout_ratedropoutr   encr   attr   dectie_src_tgt_embedding
ValueErrordunitsweighttie_classifiercontext_residualoutputinit_like_fairseqreport_bleur3   r4   r6   r7   r8   r9   r:   argparse	Namespace
trans_argslogzeror   r   )r   idimodimargs	labeldistrh   	__class__r   r   r?   Q   s   





zE2E.__init__c                 C   sp   t |  tjj| jjdd tjj| jj| j d tjj| j	jjdd tjj| j	jj| j d dS )znInitialize weight like Fairseq.

        Fairseq basically uses W, b, EmbedID.W ~ Uniform(-0.1, 0.1),
        gg?r   N)
r   r@   rA   inituniform_rV   r`   	constant_rK   r\   r   r   r   r   rd      s
    zE2E.init_like_fairseqc                    s    |||\}}   ||\}}} |||\ _ _ _ js+ j	s/d _
nvd} j|t|| j j j}g }	g }
dd |D }t|D ]I\}}|| } fdd|D } fdd|D }d| jjd}| jjd}d| jjd}|
|dg7 }
|	|dgg7 }	qQtj|	|
d	  _
t j}t|s j| j j j
  jS t d
|  jS )aE  E2E forward.

        :param torch.Tensor xs_pad: batch of padded input sequences (B, Tmax, idim)
        :param torch.Tensor ilens: batch of lengths of input sequences (B)
        :param torch.Tensor ys_pad: batch of padded token id sequence tensor (B, Lmax)
        :return: loss value
        :rtype: torch.Tensor
        g        Nc                 S   s    g | ]}|d  d dd qS )r   yseqr)   r   ).0	nbest_hypr   r   r   
<listcomp>   s     zE2E.forward.<locals>.<listcomp>c                    &   g | ]}t |d kr jt | qS ru   intr(   rv   idxrs   r   r   rx      s   & c                    ry   rz   r{   r}   rs   r   r   rx      s       d   zloss (=%f) is not correct)!target_language_biasingrZ   rY   rV   r\   r   r   r   trainingre   r   recognize_beam_batchr@   tensorrh   r(   r9   	enumeratejoinreplacer;   r<   splitnltk
bleu_scorecorpus_bleufloatmathisnanr   r   rR   warning)r   xs_padilensys_padhs_padhlens_lpz
nbest_hypslist_of_refshypsy_hatsiy_haty_trueseq_hatseq_trueseq_hat_textseq_true_text	loss_datar   rs   r   forward   sF   
	


zE2E.forwardc                 C   s^   | j r+|dddf d}|ddddf }|ddddf }tj||gdd}||fS )a$  Prepend target language IDs to source sentences for multilingual MT.

        These tags are prepended in source/target sentences as pre-processing.

        :param torch.Tensor xs_pad: batch of padded input sequences (B, Tmax, idim)
        :param torch.Tensor ilens: batch of lengths of input sequences (B)
        :return: source text without language IDs
        :rtype: torch.Tensor
        :return: target text without language IDs
        :rtype: torch.Tensor
        :return: target language IDs
        :rtype: torch.Tensor (B, 1)
        Nr   r)   )dim)r/   	unsqueezer@   cat)r   r   r   r   tgt_lang_idsr   r   r   r     s   zE2E.target_language_biasingNc              
   C   s   | j }|   | jr-t|d dd g}t| ttjt	t
|d dd tjd}nt|d g}t| ttjt	t
|d tjd}| | | |d|\}}	}	| j|d d|||}
|rk|   |
S )aO  E2E beam search.

        :param ndarray x: input source text feature (B, T, D)
        :param Namespace trans_args: argument Namespace containing options
        :param list char_list: list of characters
        :param torch.nn.Module rnnlm: language model module
        :return: N-best decoding results
        :rtype: list
        r   r)   Ndtype)r   evalr/   lenr   r@   
from_numpynpfromitermapr|   int64rZ   rY   rV   r   r\   recognize_beamtrain)r   xrh   r(   r9   previlenhhsr   yr   r   r   	translate  s    
( $zE2E.translatec                    s    j }    jr!tjdd |D tjd} fdd|D }ntjdd |D tjd} fdd|D }t| j}  	 
||\}	}
}tttt|
}
 j|	|
d|||}|rg   |S )	a~  E2E batch beam search.

        :param list xs:
            list of input source text feature arrays [(T_1, D), (T_2, D), ...]
        :param Namespace trans_args: argument Namespace containing options
        :param list char_list: list of characters
        :param torch.nn.Module rnnlm: language model module
        :return: N-best decoding results
        :rtype: list
        c                 s   s     | ]}t |d d V  qdS r)   Nr   rv   xxr   r   r   	<genexpr>M  s    z&E2E.translate_batch.<locals>.<genexpr>r   c              	      s$   g | ]}t  t|d d qS r   r   r@   r   r   rs   r   r   rx   N  s   $ z'E2E.translate_batch.<locals>.<listcomp>c                 s   s    | ]}t |V  qd S Nr   r   r   r   r   r   P  s    c                    s   g | ]
}t  t|qS r   r   r   rs   r   r   rx   Q  s    N)r   r   r/   r   r   r   r   rK   rZ   rY   rV   r@   r   listr   r|   r\   r   r   )r   xsrh   r(   r9   r   r   r   xpadr   r   r   r   r   rs   r   translate_batch=  s    zE2E.translate_batchc                 C   s|   |    t ( | |||\}}| | | ||\}}}| j|||}W d   n1 s3w   Y  | 	  |S )a  E2E attention calculation.

        :param torch.Tensor xs_pad: batch of padded input sequences (B, Tmax, idim)
        :param torch.Tensor ilens: batch of lengths of input sequences (B)
        :param torch.Tensor ys_pad: batch of padded token id sequence tensor (B, Lmax)
        :return: attention weights with the following shape,
            1) multi-head case => attention weights (B, H, Lmax, Tmax),
            2) other case => attention weights (B, Lmax, Tmax).
        :rtype: float ndarray
        N)
r   r@   no_gradr   rZ   rY   rV   r\   calculate_all_attentionsr   )r   r   r   r   hpadr   r   att_wsr   r   r   r   _  s   
zE2E.calculate_all_attentionsr   )r   r   r   r   staticmethodr"   r   r   r    r?   rd   r   r   r   r   r   __classcell__r   r   rn   r   r   +   s"    



k9

#"r   )&r   rf   rR   r   rN   chainerr   numpyr   r@   r   espnet.nets.e2e_asr_commonr   espnet.nets.mt_interfacer   *espnet.nets.pytorch_backend.initializationr   &espnet.nets.pytorch_backend.nets_utilsr   r   r   (espnet.nets.pytorch_backend.rnn.argumentr	   r
   r   *espnet.nets.pytorch_backend.rnn.attentionsr   (espnet.nets.pytorch_backend.rnn.decodersr   (espnet.nets.pytorch_backend.rnn.encodersr   espnet.utils.fill_missing_argsr   Chainr   rA   rB   r   r   r   r   r   <module>   s*   