o
    }oi<                      @   s  d dl Z d dlZd dlZd dlZejdejjdd Zejdejjdd Zejdejjdd Z	ejdejjd	d
 Z
ejdejjdd Zejdejjdd Zejdejjdd Zejdejjdd ZdS )    NGPUc                  C   s   zddl m}  W n ty   td Y d S w t }||d}| |}td}|	 D ]}|
|s=J d| dq.d S )Nr   TensorRTLLMECould not import TRTLLM helpers. tensorrt_llm is likely not installed)z&model.embedding.word_embeddings.weightz8model.decoder.layers.0.self_attention.linear_proj.weightz^model\.[^.].*zKey 'z'' does not properly start with 'model.')nemo.export.tensorrt_llmr   ImportErrorpytestskipobject"get_nemo_to_trtllm_conversion_dictrecompilekeysmatch)r   dummy_statemodel_state_dictnemo_model_conversion_dictpatternkey r   R/home/ubuntu/.local/lib/python3.10/site-packages/tests/export/test_tensorrt_llm.py5test_get_nemo_to_trtllm_conversion_dict_on_nemo_model   s   


r   c                  C   sf   zddl m}  ddlm} W n ty   td Y d S w t }||d}||}|| ks1J d S )Nr   )DEFAULT_CONVERSION_DICTr   r   )z embedding.word_embeddings.weightz2decoder.layers.0.self_attention.linear_proj.weight)	Jmegatron.core.export.trtllm.model_to_trllm_mapping.default_conversion_dictr   r   r   r   r   r	   r
   r   )r   r   r   r   r   r   r   r   6test_get_nemo_to_trtllm_conversion_dict_on_mcore_model0   s   

r   c                  C   s   zddl m}  W n ty   td Y d S w d}| |dd}|j|ks'J |jtj	|dks3J |j
d u s:J |jd u sAJ |jd u sHJ dd	g}| ||dd
}|j|ksZJ | |ddd dd}|jdu sjJ |jdu sqJ |jd u sxJ d S )Nr   r   r   /tmp/test_model_dirF	model_dir
load_modeltrtllm_enginez/path/to/lora1z/path/to/lora2)r   lora_ckpt_listr   )r   use_python_runtimeenable_chunked_contextmax_tokens_in_paged_kv_cacher   )r   r   r   r   r	   r   
engine_dirospathjoinmodel	tokenizerconfigr    r!   r"   r#   )r   r   trt_llmr    r   r   r    test_tensorrt_llm_initializationF   s4   
r,   c                  C   s   zddl m}  W n ty   td Y d S w d}| |dd}|j}t|ts*J t|dks2J t	dd |D s=J |j
}t|tsGJ t|dksOJ d S )	Nr   r   r   r   Fr   c                 s   s    | ]}t |tV  qd S )N)
isinstancestr).0r(   r   r   r   	<genexpr>z   s    z5test_tensorrt_llm_supported_models.<locals>.<genexpr>)r   r   r   r   r	   get_supported_models_listr-   listlenallget_supported_hf_model_mappingdict)r   r   r+   supported_models
hf_mappingr   r   r   "test_tensorrt_llm_supported_modelsj   s   
r9   c                  C   s   zddl m}  W n ty   td Y d S w d}| |dd}ddlm} tj|jftj	|j	ftj
|j
fg}|D ]\}}||}||ksSJ d| d	| d
| q9d S )Nr   r   r   r   Fr   )DataTypez	Expected z for z, got )r   r   r   r   r	   megatron.core.export.data_typer:   torchfloat32float16bfloat16get_input_dtype)r   r   r+   r:   
test_casesstorage_dtypeexpected_dtypeinput_dtyper   r   r   test_tensorrt_llm_input_dtype   s"   




$rE   c                  C   s|   zddl m}  W n ty   td Y d S w d}| |dd}|j}|d ur6t|ts.J |dks4J d S |d u s<J d S )Nr   r   r   r   Fr   )r   r   r   r   r	   get_hidden_sizer-   int)r   r   r+   hidden_sizer   r   r   test_tensorrt_llm_hidden_size   s   
rI   c                  C   sD  zddl m}  W n ty   td Y d S w d}| |dd}|j}t|ts*J |d jdks3J |d jd	ks<J |d
 jdksEJ |d jdksNJ |d jdksWJ |d jdks`J |d jdksiJ |d jdksrJ |d jdks{J |j	}t|tsJ |d jdksJ |d jdksJ |d
 jdksJ d S )Nr   r   r   r   Fr   prompts   max_output_len   top_k   top_p   temperature   random_seed   stop_words_list   bad_words_list   no_repeat_ngram_sizeoutputsgeneration_logitscontext_logits)
r   r   r   r   r	   get_triton_inputr-   tuplenameget_triton_output)r   r   r+   triton_inputtriton_outputr   r   r   test_tensorrt_llm_triton_io   s0   
rd   c                  C   s   zddl m}  W n ty   td Y d S w d}| |dd}d}d}d	}t|||}||}t|tj	s:J |j
d |ksCJ |j
d
 |ksLJ |j
d |ksUJ d S )Nr   r   r   r   Fr   rM   rO   i  rK   )r   r   r   r   r	   r<   randn_pad_logitsr-   Tensorshape)r   r   r+   
batch_sizeseq_len
vocab_sizelogitspadded_logitsr   r   r   test_tensorrt_llm_pad_logits   s"   

rn   )r%   r   r   r<   markrun_only_onunitr   r   r,   r9   rE   rI   rd   rn   r   r   r   r   <module>   s8   



"



