o
    }oi                     @   s@   d dl Z d dlmZ g dZdZdddZddd	Zd
d ZdS )    N)NEG_INFform_attention_masktransformer_weights_initmask_padded_tokensg     c                 C   s~   | du rdS d| j d | j d f}| jtdd}|dur0ttj|tj| jd|}||@ }d|tj t	 }|dS )a  
    Build attention mask with optional masking of future tokens we forbid
    to attend to (e.g. as it is in Transformer decoder).

    Args:
        input_mask: binary mask of size B x L with 1s corresponding to valid
            tokens and 0s corresponding to padding tokens
        diagonal: diagonal where triangular future mask starts
            None -- do not mask anything
            0 -- regular translation or language modeling future masking
            1 -- query stream masking as in XLNet architecture
    Returns:
        attention_mask: mask of size B x 1 x L x L with 0s corresponding to
            tokens we plan to attend to and -10000 otherwise
    N   )dtype)r   device)
shapetobool	unsqueezetorchtrilonesr   floatr   )
input_maskdiagonal
attn_shape	attn_maskfuture_maskattention_mask r   c/home/ubuntu/.local/lib/python3.10/site-packages/nemo/collections/common/parts/transformer_utils.pyr      s   
r   {Gz?Tc                 C   s   t | tjr+|rtj| j n
tjj| jd|d | jdur)tj| jd dS dS t | tj	r=tjj| jd|d dS t | tj
rUtj| jd tj| jd dS dS )a{  
    Initialize different weights in Transformer model.

    Args:
        module: torch.nn.Module to be initialized
        std_init_range: standard deviation of normal initializer
        xavier: if True, xavier initializer will be used in Linear layers
            as was proposed in AIAYN paper, otherwise normal initializer
            will be used (like in BERT paper)
    g        )meanstdNg      ?)
isinstancennLinearinitxavier_uniform_weightnormal_bias	constant_	Embedding	LayerNorm)modulestd_init_rangexavierr   r   r   r   3   s   
r   c                 C   s   | |k}|S Nr   )tokenspad_idmaskr   r   r   r   M   s   r   r*   )r   T)r   torch.nnr   __all__r   r   r   r   r   r   r   r   <module>   s   

