o
    }oiO,                     @   s   d dl Z d dlmZ d dlmZmZmZmZ d dlZd dl	Z	d dl
mZ d dlmZ d dlmZ d dlmZmZmZmZmZ d dlmZ d d	lmZ d d
lmZ d dlmZ G dd deZdS )    N)Path)AnyDictOptionalUnion)instantiate)	OmegaConf)
AutoConfig)ModelConfig	ModelImplPoolerConfig_get_and_verify_dtype_get_and_verify_max_len)get_hf_text_config)TarPath)is_nemo2_checkpoint)get_model_converterc                9   @   sb  e Zd ZdZddddddddddddddddddddejfdededed	ed
eeej	f de
dee deeeef  dee dee dee dee dee
 dee dee dedee
 de
dededededee dee deeeef  ded eeef d!df8d"d#Zed$eeef ded!eeef fd%d&Zd'eeef d!eeef fd(d)Zd*d+ ZdS ),NemoModelConfigz
    This class pretents to be a vllm.config.ModelConfig (with extra fields) but skips
    some of its initialization code, and initializes the configuration from a Nemo checkpoint instead.
    NFi       nemo_checkpoint	model_dir
model_typetokenizer_modedtypeseedrevisionoverride_neuron_configcode_revisionrope_scaling
rope_thetatokenizer_revisionmax_model_lenquantizationquantization_param_pathenforce_eagermax_seq_len_to_capturemax_logprobsdisable_sliding_windowdisable_cascade_attnuse_async_output_procdisable_mm_preprocessor_cachelogits_processor_patternoverride_pooler_configoverride_generation_configenable_sleep_mode
model_implreturnc           $   	   C   s  || _ || _|| _d | _|| _d| _d| _|| _|| _|	| _	|| _
|
| _|| _|| _|| _|| _|| _|| _|| _|| _|| _|| _|| _d | _i | _|| _|| _|| _d | _d| _d| _d | _|| _ | jdv rld| _!nd| _!| " | _#| $|| _%|| _&ddl'm(} | j&r|) st*dt+|| _,| j,d u rt-d	| d
t.|rt/|}t01|d j}d|v rt2|d dkr|d= | 3||}|d 4d}t5j1|t5j6d| _7W d    n1 sw   Y  | 8| j7d }t9|} | j:|d< | j,;| j7d | t<j=|fi || _>d|d v sJ |d }!nat?|A}"|"d 4d}#t5j1|#t5j6d| _7| 8| j7}| j,;| j7| W d    n	1 sHw   Y  t<j=|fi || _>W d    n	1 sbw   Y  | j7d d dkssJ | j7d d }!|!| _| j,@ g| j>_A| jd ur|
| j>d< tB| j>| _CtD| jC|| _EtF| jC|| j| G d| _H| I | _J| K | _L| M | _N| O  | P  | Q  d S )NFgenerate)draftr1   leftrightr   )current_platformz-Sleep mode is only supported on CUDA devices.zUnknown model type ""zcontext/model.yamladditional_special_tokensr)Loaderconfig
vocab_sizehuggingface_target_pretrained_model_namezmodel_config.yaml	tokenizerlibrarytyper   )	hf_configr!   r'   sliding_window_len)Rr   modelr   r?   r   skip_tokenizer_inittrust_remote_coder   r   r   r   r   r   r    r/   r"   r#   r$   r%   r&   r'   r(   served_model_namemultimodal_configmm_processor_kwargsr)   r*   r+   generation_configtask	is_hybridattention_chunk_sizer-   truncation_side_get_encoder_configencoder_config_init_pooler_configpooler_configr.   vllm.platformsr5   is_cuda
ValueErrorr   model_converterRuntimeErrorr   r   r   loadlen_change_paths_to_absolute_pathsopenyaml
SafeLoadernemo_model_config_load_hf_argumentsr   original_vocab_sizeconvert_configr	   	for_modelrB   r   get_architecturearchitecturesr   hf_text_configr   r   r   get_hf_config_sliding_windowr!   _init_attention_freeis_attention_free_init_has_inner_statehas_inner_state_init_has_noops	has_noops_verify_tokenizer_mode_verify_quantization_verify_cuda_graph)$selfr   r   r   r   r   r   r   r   r   r   r   r    r!   r"   r#   r$   r%   r&   r'   r(   r)   r*   r+   r,   r-   r.   r/   r5   tokenizer_configconfig_filehf_argsr?   tokenizer_idarchivemodel_config_file rw   Q/home/ubuntu/.local/lib/python3.10/site-packages/nemo/export/vllm/model_config.py__init__&   s   $












zNemoModelConfig.__init__rq   c                 C   sP   |d }ddg}|D ]}|  |d }r%|| }| sq
t| | |< q
| S )a2  
        Creates absolute path to the local tokenizers. Used for NeMo 2.0.

        Args:
            tokenizer_config (dict): Parameters for instantiating the tokenizer.
            nemo_checkpoint (path): Path to the NeMo2 checkpoint.
        Returns:
            dict: Updated tokenizer config.
        contextr>   
model_pathN)getexistsstrresolve)rq   r   context_path	path_keyspath_keypathtokenizer_pathrw   rw   rx   rZ      s   z/NemoModelConfig._change_paths_to_absolute_pathsnemo_configc                 C   s|   ddddddddgd	d
ddd
dddgd}i }|  D ] \}}t|ts'|g}|D ]}||}|dur:|||<  nq)q|S )zV
        Maps argument names used in NeMo to their corresponding names in HF.
        hidden_sizeffn_hidden_size
num_layersnum_attention_headsnum_query_groupsnum_moe_expertsmax_position_embeddingsencoder_seq_length#share_embeddings_and_output_weightslayernorm_epsilonattention_dropoutinit_method_stdrotary_basebiasadd_bias_linear)r   intermediate_sizenum_hidden_layersr   num_key_value_headsnum_local_expertsr   tie_word_embeddingsrms_norm_epsr   initializer_rangenorm_epsilonr   use_biasN)items
isinstancelistr|   )rp   r   hf_to_nemo_dictrs   hf_argnemo_argnemo_arg_optionvaluerw   rw   rx   r_      s6   

z"NemoModelConfig._load_hf_argumentsc                 O   sZ   t | j}|d d d }| r+|d}t|W  d   S 1 s&w   Y  i S )zF
        Prevent vLLM from trying to load a generation config
        rz   	artifactszgeneration_config.jsonr8   N)r   r   r}   r[   jsonrX   )rp   argskwargs	nemo_pathgeneration_config_pathfrw   rw   rx   try_get_generation_config   s   
 z)NemoModelConfig.try_get_generation_config)__name__
__module____qualname____doc__r   AUTOr~   r   torchr   intr   r   r   dictfloatboolr   ry   staticmethodr   rZ   r_   r   rw   rw   rw   rx   r       s    	


 ("$r   )r   pathlibr   typingr   r   r   r   r   r\   hydra.utilsr   	omegaconfr   transformersr	   vllm.configr
   r   r   r   r   vllm.transformers_utils.configr   nemo.export.tarutilsr   nemo.export.utilsr   !nemo.export.vllm.model_convertersr   r   rw   rw   rw   rx   <module>   s   