o
    i                     @   s   d Z ddlmZ ddlZddlmZ ddlmZ ddlm	Z	 ddl
mZ ddlmZ dd	lmZ dd
lmZ ddlmZ ddlmZ G dd deZdS )zMasked LM Decoder definition.    )TupleN)check_argument_types)
AbsDecoder)make_pad_mask)MultiHeadedAttention)DecoderLayer)PositionalEncoding)	LayerNorm)PositionwiseFeedForward)repeatc                       s   e Zd Zdddddddddeddfd	ed
edededededededededededef fddZde	j
de	j
de	j
de	j
dee	j
e	j
f f
ddZ  ZS )
MLMDecoder   i      g?g        embedTF
vocab_sizeencoder_output_sizeattention_headslinear_units
num_blocksdropout_ratepositional_dropout_rateself_attention_dropout_ratesrc_attention_dropout_rateinput_layeruse_output_layernormalize_beforeconcat_afterc              
      s   t  sJ t   | |d7 }|
dkr%tjtj| | || _n*|
dkrHtjtj| tj	 tj
tj | || _ntd|
 | _| jrZt	 | _|retj || _nd | _t| fdd| _d S )N   r   linearz'only 'embed' or 'linear' is supported: c                    s,   t  t t t S )N)r   r   r
   )lnumattention_dimr   r   r   r   r   r   r    S/home/ubuntu/.local/lib/python3.10/site-packages/espnet2/asr/decoder/mlm_decoder.py<lambda>G   s    
z%MLMDecoder.__init__.<locals>.<lambda>)r   super__init__torchnn
Sequential	Embeddingr   Linearr	   DropoutReLU
ValueErrorr   
after_normoutput_layerr   decoders)selfr   r   r   r   r   r   r   r   r   r   r   pos_enc_classr   r   	__class__r    r#   r&      s8   





zMLMDecoder.__init__hs_padhlens	ys_in_pad
ys_in_lensreturnc                 C   s   |}t |dddddf  |j}|d}|dddd|}|d|d|@ }|}	t | dddddf |	j}
| |}| |||	|
\}}}	}
| jrZ| 	|}| j
durd| 
|}|d}||fS )a?  Forward decoder.

        Args:
            hs_pad: encoded memory, float32  (batch, maxlen_in, feat)
            hlens: (batch)
            ys_in_pad:
                input token ids, int64 (batch, maxlen_out)
                if input_layer == "embed"
                input tensor (batch, maxlen_out, #mels) in the other cases
            ys_in_lens: (batch)
        Returns:
            (tuple): tuple containing:
            x: decoded token score before softmax (batch, maxlen_out, token)
                if use_output_layer is True,
            olens: (batch, )
        Nr      )r   todevicesize	transposer   r   r1   r   r/   r0   sum)r2   r6   r7   r8   r9   tgttgt_masktgt_max_lentgt_mask_tmpmemorymemory_maskxolensr"   r"   r#   forwardV   s"   $
$




zMLMDecoder.forward)__name__
__module____qualname__r   intfloatstrboolr&   r'   Tensorr   rJ   __classcell__r"   r"   r4   r#   r      sf    	
?r   )__doc__typingr   r'   	typeguardr   espnet2.asr.decoder.abs_decoderr   &espnet.nets.pytorch_backend.nets_utilsr   1espnet.nets.pytorch_backend.transformer.attentionr   5espnet.nets.pytorch_backend.transformer.decoder_layerr   1espnet.nets.pytorch_backend.transformer.embeddingr   2espnet.nets.pytorch_backend.transformer.layer_normr	   Aespnet.nets.pytorch_backend.transformer.positionwise_feed_forwardr
   .espnet.nets.pytorch_backend.transformer.repeatr   r   r"   r"   r"   r#   <module>   s   