o
    }oi^                  	   @   s  d dl Z d dlZd dlZd dlZd dlmZmZmZmZ d dl	Z	d dl
mZ d dlmZ d dlmZ d dlmZmZ d dlmZ d dlmZ d d	lmZmZ d d
lmZ d dlmZ d dlm Z  d dl!m"Z" d dl#m$Z$ d dl%m&Z&m'Z' d dl(m)Z) d dl*m+Z+ d dl,m-Z- d dl.m/Z/ d dl0m1Z1 d dl2m3Z3m4Z4 zd dl5m6Z6m7Z7 dZ8W n e9e:fy   dZ8Y nw dgZ;ej<=ej<>e?edZ@ejAe@dd G dd de-e/ZBdS )    N)AnyMappingOptionalUnion)_load)Trainer)_load_state)load_hparams_from_tags_csvload_hparams_from_yaml)rank_zero_only)pl_legacy_patch)
DictConfig	OmegaConf)TRANSFORMERS_CACHE)AutoTokenizer)
BertModule)VOCAB_FILE_NAME)get_lm_model)MEGATRON_CONFIG_MAP#get_megatron_pretrained_bert_models)get_tokenizer)NLPSaveRestoreConnector)ModelPT)
Exportable)SaveRestoreConnector)AppStatelogging)dist_checkpointingparallel_stateTFNLPModelnemo_nlp_tmpexist_okc                       sz  e Zd ZdZd5dedef fddZ	d6ded	ed
ef fddZ	e
dd ZdefddZe
			d7dededefddZedededef fddZe fddZedd Zed d! Zed"d# Ze			$d8d%ed&ed'ee d(efd)d*Zd9d+eeef d(ef fd,d-Ze			$				$d:d.ed/eeeef  d&eej d(ed0ed1edee d2ef fd3d4Z   Z!S );r   zBase class for NLP Models.NFcfgtrainerc                    s8  d | _ d | _d }d }d }d}|dr!|jddr!|jdd}t }|drt| ds?|jdr?||vr?| |j n*||v rit	|}	t
|||||	d}
t| dr[| jd u s`t| dsit|
dri|
j| _|drt|ddr|ddr| d|jj}t || t | _|dr|s|dd	rt|jj}|dd
r| d|jj}t
|||||d}
t| dr| jd u st| dst|
dr|
j| _|jj| _|jd ur|jddd urd|jddv s||v r|
jj | _ n|
jj | _ |dr|s|
| _|   d S d S d S )N language_modelpretrained_model_name	tokenizertokenizer_name)config_fileconfig_dict
vocab_filer$   r#   r,   tokenizer.vocab_fileconfigr*   language_model.config_filemegatron)hidden_size
bert_modelgetr&   r   hasattrr(   setup_tokenizercopydeepcopyr   register_artifactr,   super__init__r   _save_restore_connectorr   to_containerr.   r*   r'   r#   register_bert_model)selfr#   r$   
no_lm_initr,   r+   r*   pretrain_model_name#all_pretrained_megatron_bert_modelscopy_cfgr2   	__class__ Y/home/ubuntu/.local/lib/python3.10/site-packages/nemo/collections/nlp/models/nlp_model.pyr:   A   s   


	 



zNLPModel.__init__config_pathsrcverify_src_existsc                    s   t  j|||dS )zrOverrides ModelPT register_artifact default behavior.
        NLP models usually need artifacts that are optional.)rI   )r9   r8   )r>   rG   rH   rI   rC   rE   rF   r8      s   zNLPModel.register_artifactc                 C   sV  | j durt| j tr1| j j}|dd}|d }tjt|d }| j j	
| | d| dS t| drd| jv r| jtv rt| j d	 }|dur| jd }tjt|d }t|d
dd}|tj|dddd  W d   n1 sxw   Y  | d| dS td| j d dS td| j d dS td| j  d dS dS )z0Adds encoder config to .nemo archive for Jarvis.N/__encoder_configz.jsonr/   r'   r0   r.   wutf-8encoding   Tindent	sort_keys
zFor z&, set the config_file in the YAML filez*Registering MegatronBERT model config for zU                         is not yet supported. Please override this method if needed.z"Registering BERT model config for Q is not yet supported.                     Please override this method if needed.)r2   
isinstancer   name_or_pathreplaceospathjoinNEMO_NLP_TMPr.   to_json_filer8   r4   r'   r   openwritejsondumpsr   info)r>   r'   encoder_config_pathencoder_config_srcoutput_configfrE   rE   rF   r=      s4   


zNLPModel.register_bert_modelc                 C   s~   d}| dr| jd|jd}|js|s|jr0t|j||jr$t|jnd| jd|jdd| _	|du r=| j
d|d dS dS )a  Instantiates tokenizer based on config and registers tokenizer artifacts.

           If model is being restored from .nemo file then the tokenizer.vocab_file will
           be used (if it exists).

           Otherwise, we will use the vocab file provided in the config (if it exists).

           Finally, if no vocab file is given (this happens frequently when using HF),
           we will attempt to extract the vocab from the tokenizer object and then register it.

        Args:
            cfg (DictConfig): Tokenizer config
        Nr,   r-   rG   rH   tokenizer.tokenizer_model)r)   r,   special_tokenstokenizer_model)vocab_file_config_pathr#   )r3   r8   r,   r)   rk   r   rj   r   r<   r(   _register_vocab_from_tokenizer)r>   r#   r,   rE   rE   rF   r5      s   
	zNLPModel.setup_tokenizerr-   tokenizer_vocab_dictrl   vocab_dict_config_pathc                 C   sj  | j du r	tdt| j tr| j j  }tt| dd d}t	 }d|v r6|j
dur6||j
  tj|dd }|| | }tjt|}tj|dd	 tj||}	t|	d
dd}
|
tj|dddd  W d   n1 s|w   Y  | j||	d | j j jj}| j | tj|t| }||_| j||d dS td| j  d dS )a  Creates vocab file from tokenizer if vocab file is None.

        Args:
            vocab_file_config_path: path to the vocab_file in the config
            vocab_dict_config_path: path to the vocab_dict in the config
            cfg: tokenizer config
        Nz<Instantiate self.tokenizer before registering vocab from it.c                 S   s   | d S )N   rE   )itemrE   rE   rF   <lambda>   s    z9NLPModel._register_vocab_from_tokenizer.<locals>.<lambda>)keyr)   T)rT   r!   rM   rN   rO   rQ   rR   rU   rh   z Registering tokenizer vocab for rV   )r(   
ValueErrorrW   r   	get_vocabdictsorteditemshashlibmd5r)   updateencodera   rb   	hexdigestrZ   r[   r\   r]   makedirsr_   r`   r8   rD   __name__save_vocabularyr   r,   r   rc   )r>   rl   ro   r#   
vocab_dictmvocab_dict_strvocab_dict_hash	hash_pathvocab_json_srcrg   r)   vocab_file_srcrE   rE   rF   rm      s6   


z'NLPModel._register_vocab_from_tokenizer	path2file
out_folderreturnc                       t tt| |S N)r9   r   _unpack_nemo_file)r   r   rC   rE   rF   r   !     zNLPModel._unpack_nemo_filec                    r   r   )r9   r   _make_nemo_file_from_folder)filename
source_dirrC   rE   rF   r   %  r   z$NLPModel._make_nemo_file_from_folderc                 C      | j S r   )r2   r>   rE   rE   rF   input_module)     zNLPModel.input_modulec                 C   r   r   )
classifierr   rE   rE   rF   output_module-  r   zNLPModel.output_modulec                 C   s   t  }|jdur
dS dS ) NTF)r   model_parallel_group)r>   	app_staterE   rE   rF   is_model_parallel_initialized1  s   
z&NLPModel.is_model_parallel_initializedTcheckpoint_pathmap_locationhparams_filestrictc              	   K   s$  d}z| j dd d}tj|r|}tj|d}d}t  |dur,t||d}nt|dd d}W d   n1 s>w   Y  |durq|d	d
 }| dkrYt	|}	n| dv rdt
|}	ntdd|	d< |	|| j< | j|vr{i || j< || j d|| j }
| }d|v r|d |
| |
ddr|du ri }|d  D ]}|ddd}|d | ||< q||d< d|v rt| |fd|i|}nt| |f||
d|}|durUt sdd }|jjjdur|jjjj||jd |jj  | }|ddr;i }| D ]!\}}|ddd}|}t|dr0|jddd|_|||< q||d< n||d< tj ||d}|!| t|drU|"  t|
d r|
j#d!durl|$d"|
j#j% |
j#d#dur}|$d$|
j#j& |
j#d%dur|$d&|
j#j' t|
d'r|
j(d!dur|$d(|
j(j% |
j(d#dur|$d)|
j(j& |
j(d%dur|$d*|
j(j' t|
d+r |
j)d!dur|$d,|
j)j% |
j)d#dur|$d-|
j)j& |
j)d%dur |$d.|
j)j' |}W | j dd |S | j dd w )/z
        Loads ModelPT from checkpoint, with some maintenance of restoration.
        For documentation, please refer to LightningModule.load_from_checkpoin() documentation.
        NT)is_being_restoredz	common.ptF)r   c                 S   s   | S r   rE   )storagelocrE   rE   rF   rr   ]  s    z/NLPModel.load_from_checkpoint.<locals>.<lambda>.csv)ymlyamlz2.csv, .yml or .yaml is required for `hparams_file`on_gpur#   r$   megatron_amp_O2
state_dictzmodel.zmodel.module.rp   r   )r   r#   c                   S   s   d S r   rE   rE   rE   rE   rF   dummy  s   z,NLPModel.load_from_checkpoint.<locals>.dummy)r$   load_mlmr%   rs   )sharded_state_dictcheckpoint_dir"setup_transformer_engine_tp_groupsr(   rk   ri   r,   r-   
merge_fileztokenizer.merge_fileencoder_tokenizerz!encoder_tokenizer.tokenizer_modelzencoder_tokenizer.vocab_filezencoder_tokenizer.merge_filedecoder_tokenizerz!decoder_tokenizer.tokenizer_modelzdecoder_tokenizer.vocab_filezdecoder_tokenizer.merge_file)*_set_model_restore_staterZ   r[   isdirr\   r   pl_loadsplitlowerr	   r
   rt   CHECKPOINT_HYPER_PARAMS_KEYr3   r6   popr{   keysrY   ptl_load_stater   is_initializedr$   strategylauncherlaunchsetup_environmentr   rx   r4   rs   r   loadon_load_checkpointr   r(   r8   rk   r,   r   r   r   )clsr   r   r   r   kwargs
checkpointr   	extensionhparamsr#   config_kwargsnew_state_dictrs   new_keymodelr   r   mlm_sharded_state_dictkv	new_valuerE   rE   rF   load_from_checkpoint:  s   










zNLPModel.load_from_checkpointr   c              
      s   | j durd| j jjvrd|v r|d= ndd | D }|D ]}||= qztt| j||d}W |S  tyf } z(tt| j|dd}tdd	 |j	D rYt
d
|j	 d n|W Y d}~|S d}~ww )r   Nposition_idsz"bert_model.embeddings.position_idsc                 S   s   g | ]}d |v r|qS )r   rE   ).0xrE   rE   rF   
<listcomp>  s    z,NLPModel.load_state_dict.<locals>.<listcomp>)r   Fc                 s   s    | ]}| d V  qdS )_extra_stateN)endswith)r   srE   rE   rF   	<genexpr>  s    z+NLPModel.load_state_dict.<locals>.<genexpr>znLoding checkpoint created with Transformer Engine version lower than 1.13.                     Missing layers z will be ignored.)r2   
embeddings_modulesr   r9   r   load_state_dictRuntimeErrorallmissing_keysr   warning)r>   r   r   pos_id_keysrs   resultserC   rE   rF   r     s.   

zNLPModel.load_state_dictrestore_pathoverride_config_pathreturn_configsave_restore_connectorvalidate_access_integrityc	           	   
      sf   |d u rt  }tj|r||_t|tr&|ddr&|d u r&t	d d}t
 ||||||||S )Nuse_cpu_initializationFz9use_cpu_initialization is True, loading checkpoint on CPUcpu)r   rZ   r[   r   model_extracted_dirrW   r   r3   r   rc   r9   restore_from)	r   r   r   r   r   r   r   r$   r   rC   rE   rF   r     s*   

zNLPModel.restore_from)NF)F)r-   rn   N)NNT)T)NNTFNNT)"r   
__module____qualname____doc__r   r   r:   strboolr8   r   r=   r5   rm   staticmethodr   r   propertyr   r   r   classmethodr   r   r   r   r   r   r   torchdevicer   r   __classcell__rE   rE   rC   rF   r   >   s    W

' :


  	)Cr6   ry   ra   rZ   typingr   r   r   r   r   #lightning.fabric.utilities.cloud_ior   r   lightning.pytorchr   lightning.pytorch.core.savingr   r   r	   r
   lightning.pytorch.utilitiesr   %lightning.pytorch.utilities.migrationr   	omegaconfr   r   transformersr   =nemo.collections.common.tokenizers.huggingface.auto_tokenizerr   nemo.collections.nlp.modulesr   Anemo.collections.nlp.modules.common.huggingface.huggingface_utilsr   ,nemo.collections.nlp.modules.common.lm_utilsr   ;nemo.collections.nlp.modules.common.megatron.megatron_utilsr   r   3nemo.collections.nlp.modules.common.tokenizer_utilsr   (nemo.collections.nlp.parts.nlp_overridesr   nemo.core.classesr   nemo.core.classes.exportabler   +nemo.core.connectors.save_restore_connectorr   
nemo.utilsr   r   megatron.corer   r   HAVE_MEGATRON_COREImportErrorModuleNotFoundError__all__r[   r\   dirnamer   r]   r~   r   rE   rE   rE   rF   <module>   sF   