o
    i&                     @   s  d dl Z d dlmZ d dlmZmZmZmZmZ d dl	Z	d dl
mZ d dlmZ d dlmZ d dlmZ d dlmZ d d	lmZ d d
lmZ d dlmZ d dlmZ d dlmZ d dl m!Z! d dl"m#Z# d dl$m%Z% ee	j&edkrzd dl'm(Z( nedddZ(G dd deZ)dS )    N)contextmanager)DictListOptionalTupleUnion)parse)check_argument_types)
AbsDecoder)
AbsEncoder)AbsFrontend)AbsPostEncoder)AbsPreEncoder)force_gatherable)AbsESPnetModel)ErrorCalculator)th_accuracy)add_sos_eos)LabelSmoothingLossz1.6.0)autocastTc                 c   s    d V  d S )N )enabledr   r   K/home/ubuntu/.local/lib/python3.10/site-packages/espnet2/mt/espnet_model.pyr      s   
r   c                %       s  e Zd ZdZdg dddddddddfd	ed
eeedf ee f de	e
 de	e dede	e dededeeedf ee f dededededededededef$ fddZdejdejd ejd!ejd"eejeeejf ejf f
d#d$Zdejdejd ejd!ejd"eeejf f
d%d&Zd ejd!ejd"eejejf fd'd(Zd ejd!ejd"eejejf fd)d*Zd+ejd,ejd-ejd.ejfd/d0Z  ZS )1ESPnetMTModelzEncoder-Decoder modelr   g        FTz<space>z<blank>
vocab_size
token_list.frontend
preencoderencoderpostencoderdecodersrc_vocab_sizesrc_token_list	ignore_id
lsm_weightlength_normalized_lossreport_bleu	sym_space	sym_blankextract_feats_in_collect_stats share_decoder_input_output_embed!share_encoder_decoder_input_embedc                    s  t  sJ t   |d | _|d | _|| _|| _|
| _| | _	|r<|j
d ur7|jd j|j
_td ntd |r^||krR|jd j|jd _td ntd| d| d || _|| _|| _|| _|| _t||
||d	| _|rt||||| _nd | _|| _d S )
N   r   z:Decoder input embedding and output linear layer are sharedzHDecoder has no output layer, so it cannot be shared with input embeddingz/Encoder and decoder input embeddings are sharedzsrc_vocab_size (z!) does not equal tgt_vocab_size (z?), so the encoder and decoder input embeddings cannot be shared)sizepadding_idx	smoothingnormalize_length)r	   super__init__soseosr   r"   r$   copyr   output_layerembedweightlogginginfowarningr   r   r    r   r!   r   criterion_mtMTErrorCalculatormt_error_calculatorr*   )selfr   r   r   r   r   r    r!   r"   r#   r$   r%   r&   r'   r(   r)   r*   r+   r,   	__class__r   r   r3   #   sV   






zESPnetMTModel.__init__texttext_lengthssrc_textsrc_text_lengthsreturnc                 K   s   |  dksJ |j|jd |jd   kr&|jd   kr&|jd ks3n J |j|j|j|jf|jd }|ddd| f }|ddd| f }| ||\}}| ||||\}	}
}|	}t| |
|d}t|||f|j\}}}|||fS )a  Frontend + Encoder + Decoder + Calc loss

        Args:
            text: (Batch, Length)
            text_lengths: (Batch,)
            src_text: (Batch, length)
            src_text_lengths: (Batch,)
            kwargs: "utt_id" is among the input.
        r-   r   N)lossaccbleu)	dimshapemaxencode_calc_mt_att_lossdictdetachr   device)r@   rC   rD   rE   rF   kwargs
batch_sizeencoder_outencoder_out_lensloss_mt_att
acc_mt_attbleu_mt_attrH   statsr9   r   r   r   forwardq   s2   



zESPnetMTModel.forwardc                 K   s>   | j r| ||\}}ntd| j   ||}}||dS )NzkGenerating dummy stats for feats and feats_lengths, because encoder_conf.extract_feats_in_collect_stats is )featsfeats_lengths)r*   _extract_featsr:   r<   )r@   rC   rD   rE   rF   rS   r\   r]   r   r   r   collect_feats   s   

zESPnetMTModel.collect_featsc                 C   s   t d | ||\}}W d   n1 sw   Y  | jdur)| ||\}}| ||\}}}| jdur?| ||\}}|d|dksTJ | |df|d| ksgJ | | f||fS )zFrontend + Encoder. Note that this method is used by mt_inference.py

        Args:
            src_text: (Batch, Length, ...)
            src_text_lengths: (Batch, )
        FNr   r-   )r   r^   r   r   r    r.   rM   )r@   rE   rF   r\   r]   rU   rV   _r   r   r   rN      s&   
	
	
zESPnetMTModel.encodec                 C   s   |  dksJ |j|d d d | f }t|| j| j| j\}}|d }| jd ur8| ||\}}||fS ||}}||fS )Nr-   )rK   rL   rM   r   r4   r5   r$   r   )r@   rE   rF   r`   r\   r]   r   r   r   r^      s   

zESPnetMTModel._extract_featsrU   rV   ys_padys_pad_lensc                 C   s   t || j| j| j\}}|d }| ||||\}}	| ||}
t|d| j|| jd}| j	s5| j
d u r8d }n|jdd}| 
| | }|
||fS )Nr-   r   )ignore_label)rK   )r   r4   r5   r$   r!   r=   r   viewr   trainingr?   argmaxcpu)r@   rU   rV   ra   rb   	ys_in_pad
ys_out_pad
ys_in_lensdecoder_outr`   loss_attacc_attbleu_attys_hatr   r   r   rO      s    
zESPnetMTModel._calc_mt_att_loss)__name__
__module____qualname____doc__intr   r   strr   r   r   r   r   r   r
   floatboolr3   torchTensorr   r[   r_   rN   r^   rO   __classcell__r   r   rA   r   r       s    	
N
5

+
r   )T)*r:   
contextlibr   typingr   r   r   r   r   rx   packaging.versionr   V	typeguardr	   espnet2.asr.decoder.abs_decoderr
   espnet2.asr.encoder.abs_encoderr   !espnet2.asr.frontend.abs_frontendr   'espnet2.asr.postencoder.abs_postencoderr   %espnet2.asr.preencoder.abs_preencoderr    espnet2.torch_utils.device_funcsr   espnet2.train.abs_espnet_modelr   espnet.nets.e2e_mt_commonr   r>   &espnet.nets.pytorch_backend.nets_utilsr   3espnet.nets.pytorch_backend.transformer.add_sos_eosr   <espnet.nets.pytorch_backend.transformer.label_smoothing_lossr   __version__torch.cuda.ampr   r   r   r   r   r   <module>   s,    