o
    %ݫi                     @   s@   d Z ddlZddlmZ ddlmZ eeZG dd deZdS )zThis lobe enables the integration of huggingface pretrained mBART models.
Reference: https://arxiv.org/abs/2001.08210

Transformer from HuggingFace needs to be installed:
https://huggingface.co/transformers/installation.html

Authors
 * Ha Nguyen 2023
    N)HFTransformersInterface)
get_loggerc                       sV   e Zd ZdZ				d fdd	ZdddZe dd
dZdd Z	dd Z
  ZS )mBARTad  This lobe enables the integration of HuggingFace and SpeechBrain
    pretrained mBART models.

    Source paper mBART: https://arxiv.org/abs/2001.08210
    Transformer from HuggingFace needs to be installed:
    https://huggingface.co/transformers/installation.html

    The model is normally used as a text decoder of seq2seq models. It
    will download automatically the model from HuggingFace or use a local path.

    Arguments
    ---------
    source : str
        HuggingFace hub name: e.g "facebook/mbart-large-50-many-to-many-mmt"
    save_path : str
        Path (dir) of the downloaded model.
    freeze : bool (default: True)
        If True, the model is frozen. If False, the model will be trained
        alongside with the rest of the pipeline.
    target_lang: str (default: fra_Latn (a.k.a French)
        The target language code according to NLLB model.
    decoder_only : bool (default: True)
        If True, only take the decoder part (and/or the lm_head) of the model.
        This is useful in case one wants to couple a pre-trained speech encoder (e.g. wav2vec)
        with a text-based pre-trained decoder (e.g. mBART, NLLB).
    share_input_output_embed : bool (default: True)
        If True, use the embedded layer as the lm_head.

    Example
    -------
    >>> src = torch.rand([10, 1, 1024])
    >>> tgt = torch.LongTensor([[250008,    313,     25,    525,    773,  21525,   4004,      2]])
    >>> model_hub = "facebook/mbart-large-50-many-to-many-mmt"
    >>> save_path = "savedir"
    >>> model = mBART(model_hub, save_path) # doctest: +SKIP
    >>> outputs = model(src, tgt) # doctest: +SKIP
    Tfr_XXc           	         s   t  j|||dd || _|| _|| _| j|d |d |r3| jjjjj	| jj
_	d| jj
_d| jjjj_|r9| jj`| j D ]\}}d|v sJd|v rNd|_q>d|_q>d S )NT)source	save_pathfreeze	seq2seqlm)r   	pad_tokentgt_langFencoder_attn
layer_norm)super__init__target_langdecoder_onlyshare_input_output_embedload_tokenizermodeldecoderembed_tokensweightlm_headrequires_gradencodernamed_parameters)	selfr   r   r   r   r   r   kp	__class__ k/home/ubuntu/.local/lib/python3.10/site-packages/speechbrain/lobes/models/huggingface_transformers/mbart.pyr   <   s,   	
zmBART.__init__r   c                 C   s   |  |d| jjjjj}| jrKt 0 t| jjdr&| jjj	|dj
 }| jjj||dj
 }| j| }|W  d   S 1 sFw   Y  t| jjdr[| jjj	|dj
}| jjj||dj
}| j|}|S )a  This method implements a forward step for mt task using a wav2vec encoder
        (same than above, but without the encoder stack)

        Arguments
        ---------
        src : tensor
            output features from the w2v2 encoder (transcription)
        tgt : tensor
            The sequence to the decoder (translation) (required).
        pad_idx : int
            The index for <pad> token (default=0).

        Returns
        -------
        dec_out : torch.Tensor
            Decoder output.
        r   r   )inputs_embeds)	input_idsencoder_hidden_statesN)custom_paddingr   r   configpad_token_idr   torchno_gradhasattrr   last_hidden_statedetachr   )r   srctgtpad_idxdec_outr!   r!   r"   forwardd   s8   
 zmBART.forwardNc                 C   s\   |j tjtjfvr| }tj| |jd}| jjj|||dd}| j	|j
|jd fS )a  This method implements a decoding step for the transformer model.

        Arguments
        ---------
        tgt : torch.Tensor
            The sequence to the decoder.
        encoder_out : torch.Tensor
            Hidden output of the encoder.
        enc_len : torch.LongTensor
            The actual length of encoder states.

        Returns
        -------
        output : torch.Tensor
            Output of transformer.
        cross_attention : torch.Tensor
            Attention value.
        )deviceT)r$   r%   attention_maskoutput_attentions)dtyper)   longint64onessizer3   r   r   r   r,   cross_attentions)r   r/   encoder_outenc_lentgt_maskoutputr!   r!   r"   decode   s   zmBART.decodec                 C   s   |  }||||k< |S )aA  This method customizes the padding.
        Default pad_idx of SpeechBrain is 0.
        However, it happens that some text-based models like mBART reserves 0 for something else,
        and are trained with specific pad_idx.
        This method change org_pad to custom_pad

        Arguments
        ---------
        x : torch.Tensor
          Input tensor with original pad_idx
        org_pad : int
          Original pad_idx
        custom_pad : int
          Custom pad_idx

        Returns
        -------
        out : torch.Tensor
            Padded outputs.
        )clone)r   xorg_pad
custom_padoutr!   r!   r"   r&      s   zmBART.custom_paddingc                 C   s
   d|_ |S )zIf the config needs to be overridden, here is the place.

        Arguments
        ---------
        config : MBartConfig
            The original config needs to be overridden.

        Returns
        -------
        Overridden config
        g?)decoder_layerdrop)r   r'   r!   r!   r"   override_config   s   zmBART.override_config)Tr   TT)r   )N)__name__
__module____qualname____doc__r   r2   r)   r*   rA   r&   rH   __classcell__r!   r!   r   r"   r      s    *
(,%r   )	rL   r)   =speechbrain.lobes.models.huggingface_transformers.huggingfacer   speechbrain.utils.loggerr   rI   loggerr   r!   r!   r!   r"   <module>   s    
