o
    %ݫi&?                     @   s   d Z ddlZddlZddlZddlmZ ddlmZ ddlmZm	Z	m
Z
mZmZmZmZmZ ddlmZ ddlmZ ddlmZ eeZG d	d
 d
ejZdddZdS )ue  This lobe is the interface for huggingface transformers models
It enables loading config and model via AutoConfig & AutoModel.

Transformer from HuggingFace needs to be installed:
https://huggingface.co/transformers/installation.html

Authors
 * Titouan Parcollet 2021, 2022, 2023
 * Mirco Ravanelli 2021
 * Boumadane Abdelmoumene 2021
 * Ju-Chieh Chou 2021
 * Artem Ploujnikov 2021, 2022
 * Abdel Heba 2021
 * Aku Rouhe 2022
 * Arseniy Gorin 2022
 * Ali Safaya 2022
 * Benoit Wang 2022
 * Adel Moumen 2022, 2023
 * Andreas Nautsch 2022, 2023
 * Luca Della Libera 2022
 * Heitor Guimarães 2022
 * Ha Nguyen 2023
    N)
model_info)nn)
AutoConfigAutoFeatureExtractor	AutoModelAutoModelForCausalLMAutoModelForPreTrainingAutoModelForSeq2SeqLMAutoModelWithLMHeadAutoTokenizer)length_to_mask)fetch)
get_loggerc                       s   e Zd ZdZ									d" fdd	Z	d#dd	Zd
d Zdd Zdd Zdd Z	dd Z
dd Zdd Zdd Zdd Zdd Zdd Zd d! Z  ZS )$HFTransformersInterfacea  This lobe provides an interface for integrating any HuggingFace transformer model within SpeechBrain.

    We use AutoClasses for loading any model from the hub and its necessary components.
    For example, we build Wav2Vec2 class which inherits HFTransformersInterface for working with HuggingFace's wav2vec models.
    While Wav2Vec2 can enjoy some already built features like modeling loading, pretrained weights loading, all weights freezing,
    feature_extractor loading, etc.
    Users are expected to override the essential forward() function to fit their specific needs.
    Depending on the HuggingFace transformer model in question, one can also modify the state_dict by overwriting the _modify_state_dict() method,
    or adapting their config by modifying override_config() method, etc.
    See:
    https://huggingface.co/docs/transformers/model_doc/auto
    https://huggingface.co/docs/transformers/autoclass_tutorial

    Arguments
    ---------
    source : str
        HuggingFace hub name: e.g "facebook/wav2vec2-large-lv60"
    save_path : str
        save directory of the downloaded model.
    for_pretraining: bool (default: False)
        If True, build the model for pretraining
    with_lm_head : bool (default: False)
        If True, build the model with lm_head
    with_casual_lm : bool (default: False)
        If True, build casual lm  model
    seq2seqlm : bool (default: False)
        If True, build a sequence-to-sequence model with lm_head
    quantization_config : dict (default: None)
        Quantization config, extremely useful for deadling with LLM
    freeze : bool (default: True)
        If True, the model is frozen. If False, the model will be trained
        alongside with the rest of the pipeline.
    cache_dir : str or Path (default: None)
        Location of HuggingFace cache for storing pre-trained models, to which symlinks are created.
    device : any, optional
        Device to migrate the model to.
    **kwargs
        Extra keyword arguments passed to the `from_pretrained` function.

    Example
    -------
    >>> model_hub = "facebook/wav2vec2-base-960h"
    >>> save_path = "tmp"
    >>> model = HFTransformersInterface(model_hub, save_path=save_path)
     FNpretrained_modelsc                    s   t    |dd}tj||d|d\| _}| | j| _|| _|| _| jr+t	| _
n|r1t| _
n|r7t| _
n	|r=t| _
nt| _
| j|f||	|
d| || _| jrhtdt| jj d | | j d S | j  | j  d S )Ntrust_remote_codeFT)	cache_dirreturn_unused_kwargsr   )	save_pathr   devicez@speechbrain.lobes.models.huggingface_transformers.huggingface - z is frozen.)super__init__getr   from_pretrainedconfigoverride_configquantization_configfor_pretrainingr   
auto_classr
   r   r	   r   _from_pretrainedfreezeloggerwarningtypemodel__name__freeze_modelgradient_checkpointing_disabletrain)selfsourcer   r   with_lm_headwith_casual_lm	seq2seqlmr   r!   r   r   kwargsr   _unused_kwargs	__class__ q/home/ubuntu/.local/lib/python3.10/site-packages/speechbrain/lobes/models/huggingface_transformers/huggingface.pyr   `   sJ   
	
z HFTransformersInterface.__init__c           
      K   s   |  ||\}}}|s| jr| j| j| _|r*| j  t|||d}	| |	 n| js>| jj	|f| j|| j
d|| _|durJ| j| dS dS )a  This function manages the source checking and loading of the params.

        # 1. Is the model from HF or a local path
        # 2. Is the model pretrained with HF or SpeechBrain
        # 3. Download (if appropriate) and load with respect to 1. and 2.

        Arguments
        ---------
        source : str
            HuggingFace hub name: e.g "facebook/wav2vec2-large-lv60"
        save_path : str
            Path (dir) of the downloaded model.
        cache_dir : str
            Path (dir) in which a downloaded pretrained model configuration should be cached.
        device : any, optional
            Device to migrate the model to.
        **kwargs
            Extra keyword arguments passed to `from_pretrained` function.
        )filenamer+   savedir)r   r   r   N)_check_model_sourcer   r   from_configr   r%   r(   r   _load_sb_pretrained_parametersr   r   to)
r*   r+   r   r   r   r/   is_sb	ckpt_fileis_localckpt_full_pathr3   r3   r4   r       s0   

z(HFTransformersInterface._from_pretrainedc                 C   sl  d}t |}d}| sd}t |d |dd d }| rE|tt|d  }td	d
 tt|D rBd}t|}n|}n|}|rztdd
 t|D r\d}|||fS t|D ]}	|	drxtj	
||	}d}|||f  S qan3t|j}
|
D ]}	|	jdr|	j}d}|||f  S q|
D ]}	|	jdr|	j}d}|||f  S q| d}t|)a  Checks if the pretrained model has been trained with SpeechBrain and
        is hosted locally or on a HuggingFace hub.
        Called as static function in HFTransformersInterface._from_pretrained.

        Arguments
        ---------
        path : str
            Used as "source"; local path or HuggingFace hub name: e.g "facebook/wav2vec2-large-lv60"
        save_path : str
            norm_output (dir) of the downloaded model.

        Returns
        -------
        is_sb : bool
            Whether/not the model is deserializable w/ SpeechBrain or not (then, model conversion is needed).
        checkpoint_filename : str
            as of HuggingFace documentation: file name relative to the repo root (guaranteed to be here).
        is_local : bool
            Whether/not the model is hosted locally or on a HuggingFace hub.

        Raises
        ------
        ValueError
            If file is not found
        r   TFz	/models--/z--z
/snapshotsr   c                 s       | ]}| d V  qdS )).bin.safetensors.ckptNendswith.0Filer3   r3   r4   	<genexpr>  
    
z>HFTransformersInterface._check_model_source.<locals>.<genexpr>c                 s   r@   )rA   rB   NrD   rF   r3   r3   r4   rI     rJ   rC   rK   z< does not contain a .bin, .safetensors or .ckpt checkpoint !)pathlibPathexistsreplaceoslistdirstranyrE   pathjoinr   siblings	rfilenameFileNotFoundError)r*   rT   r   checkpoint_filenamer+   r=   sink
local_pathr;   rH   fileserr_msgr3   r3   r4   r7      sd   




z+HFTransformersInterface._check_model_sourcec                 K   s   dS )a  A custom loading ensures SpeechBrain compatibility for pretrain and model.

        For example, wav2vec2 model pretrained with SB (Wav2Vec2Pretrain) has slightly different keys from Wav2Vec2.
        This method handle the compatibility between the two.

        Users should modify this function according to their own tasks.

        Arguments
        ---------
        path : str
            Checkpoint path, file name relative to the repo root.
        **kwargs : dict
            Args to forward
        Nr3   )r*   rT   r/   r3   r3   r4   _modify_state_dict0  s   z*HFTransformersInterface._modify_state_dictc                 C   s   |  |}|du rtj|dd}| jj|dd}|jD ]}td| j d| d d	|   q|jD ]}td
| ddt	| jj
 d  q5dS )a  Loads the parameter of a HuggingFace model pretrained with SpeechBrain
        and the HuggingFace Pretrain Object. It is necessary to perform a custom
        loading because HuggingFace adds a level to the checkpoint when storing
        the model breaking the compatibility Pretrain and model de/serialization.

        For example, a typical Wav2Vec2 checkpoint for a given parameter
        would be: model.conv.weight.data while for Wav2Vec2Pretrain it
        is: model.wav2vec2.weight.data (wav2vec2 must be removed before loading).

        Arguments
        ---------
        path : pathlib.Path
            The full path to the checkpoint.
        Ncpu)map_locationF)strictzDuring parameter transfer to z loading from z*, the transferred parameters did not have zparameters for the key: zThe param with the key: z is discarded as it zis useless for finetuning this z model.)r^   torchloadr%   load_state_dictmissing_keysr"   r#   unexpected_keysr$   r&   )r*   rT   modified_state_dictincompatible_keysmissing_keyunexpected_keyr3   r3   r4   r9   A  s*   



z6HFTransformersInterface._load_sb_pretrained_parametersc                 K      t z?Users should modify this function according to their own tasks.NotImplementedErrorr*   r/   r3   r3   r4   forwardd     zHFTransformersInterface.forwardc                 K   rk   rl   rm   ro   r3   r3   r4   forward_encoderh  rq   z'HFTransformersInterface.forward_encoderc                 K   rk   rl   rm   ro   r3   r3   r4   forward_decoderl  rq   z'HFTransformersInterface.forward_decoderc                 K   rk   )zMight be useful for models like mbart, which can exploit SB's beamsearch for inference
        Users should modify this function according to their own tasks.rm   ro   r3   r3   r4   decodep     zHFTransformersInterface.decodec                 K   rk   )zeCustom encoding for inference
        Users should modify this function according to their own tasks.rm   ro   r3   r3   r4   encodeu  ru   zHFTransformersInterface.encodec                 C   s    |   | D ]}d|_qdS )a  
        Freezes parameters of a model.
        This should be overridden too, depending on users' needs, for example, adapters use.

        Arguments
        ---------
        model : from AutoModel.from_config
            Valid HuggingFace transformers model object.
        FN)eval
parametersrequires_grad)r*   r%   paramr3   r3   r4   r'   z  s   
z$HFTransformersInterface.freeze_modelc                 C   s   |S )a$  Users should modify this function according to their own tasks.

        Arguments
        ---------
        config : HuggingFace config object
            The original config.

        Returns
        -------
        config : HuggingFace config object
            Overridden config.
        r3   )r*   r   r3   r3   r4   r     s   z'HFTransformersInterface.override_configc                 K   s   t j|fd|i|| _dS )a  Load model's feature_extractor from the hub.

        Arguments
        ---------
        source : str
            HuggingFace hub name: e.g "facebook/wav2vec2-large-lv60"
        cache_dir : str
            Path (dir) in which a downloaded pretrained model configuration should be cached.
        **kwarg
            Keyword arguments to pass to the AutoFeatureExtractor.from_pretrained() method.
        r   N)r   r   feature_extractor)r*   r+   r   kwargr3   r3   r4   load_feature_extractor  s   z.HFTransformersInterface.load_feature_extractorc                 K   s   t j|fi || _dS )a  Load model's tokenizer from the hub.

        Arguments
        ---------
        source : str
            HuggingFace hub name: e.g "facebook/wav2vec2-large-lv60"
        **kwarg
            Keyword arguments to pass to the AutoFeatureExtractor.from_pretrained() method.
        N)r   r   	tokenizer)r*   r+   r|   r3   r3   r4   load_tokenizer  s   
z&HFTransformersInterface.load_tokenizer)	r   FFFFNFr   N)N)r&   
__module____qualname____doc__r   r    r7   r^   r9   rp   rr   rs   rt   rv   r'   r   r}   r   __classcell__r3   r3   r1   r4   r   1   s4    1F
6Y#r   c                 C   s0   d}|durt || jd  }t| }|S )az  This method generates the padding masks.

    Arguments
    ---------
    src : tensor
        The sequence to the encoder (required).
    wav_len : tensor
        The relative length of the wav given in SpeechBrain format.
    pad_idx : int
        The index for <pad> token (default=0).

    Returns
    -------
    src_key_padding_mask : tensor
        The padding mask.
    N   )rb   roundshaper   bool)srcwav_lenpad_idxsrc_key_padding_maskabs_lenr3   r3   r4   make_padding_masks  s
   r   )Nr   )r   rP   rL   rb   huggingface_hubr   r   transformersr   r   r   r   r   r	   r
   r   speechbrain.dataio.dataior   speechbrain.utils.fetchingr   speechbrain.utils.loggerr   r&   r"   Moduler   r   r3   r3   r3   r4   <module>   s     (   