o
    }oih                     @   sf   d dl Z d dlZd dlZd dlmZ d dlmZ d dlm	Z	 dd Z
dd Zed	kr1ee
  dS dS )
    N) get_llama_bidirectional_hf_model)OnnxLLMExporter)loggingc                  C   s   t jdd} | jdtddd | jdtdd	d
 | jddddd | jdtddd
 | jdtddd
 | jdtddd
 | jddddd |  S )Nz7Test ONNX and TensorRT export for LLM embedding models.)descriptionz--hf_model_pathTzHugging Face model id or path.)typerequiredhelpz--pooling_strategyavgzPooling strategy for the model.)r   defaultr   z--normalizeF
store_truez Normalize the embeddings or not.)r
   actionr   z--onnx_export_pathz/tmp/onnx_model/zPath to store ONNX model.z--onnx_opset   zONNX version to use for export.z--trt_model_pathz/tmp/trt_model/zPath to store TensorRT model.z--trt_version_compatiblez7Whether to generate version compatible TensorRT models.)argparseArgumentParseradd_argumentstrint
parse_args)parser r   Q/home/ubuntu/.local/lib/python3.10/site-packages/tests/export/test_export_onnx.pyget_args   s   r   c                 C   sZ  t | j| j| jdd\}}g d}ddddddddid}dg}ddd	di}t| j||d
}|j||| j||dd ddgddgddggddgddgddggdgdgdggdg}d }	| jrft	j
jg}	g d}
d}d}|j| j|||
||	d tj| jsJ tj| jsJ ddg}||}dgg|d< ||}|d u rtd d S d S )NT)model_name_or_path	normalizepooling_modetrust_remote_code)	input_idsattention_mask
dimensions
batch_size
seq_length)r      r   
embeddingsembedding_dim)onnx_model_dirmodel	tokenizerfp32)input_namesoutput_namesopsetdynamic_axes_inputdynamic_axes_outputexport_dtyper!            @      )z/model/norm/z/pooling_modulez	/ReduceL2z/Divlayer_names_only)trt_model_dirprofiles$override_layernorm_precision_to_fp32override_layers_to_fp32profiling_verbositytrt_builder_flagshelloworld   r   z5Output is None because ONNX runtime is not installed.)r   hf_model_pathr   pooling_strategyr   onnx_export_pathexport
onnx_opsettrt_version_compatibletrtBuilderFlagVERSION_COMPATIBLEexport_onnx_to_trttrt_model_pathospathexistsget_tokenizerforwardr   warning)argsr%   r&   r(   r+   r)   r,   onnx_exporterinput_profilesr9   r7   r6   r8   promptoutputr   r   r   export_onnx_trt+   sl   
	
	

rS   __main__)r   rH   tensorrtrC   1nemo.collections.llm.gpt.model.hf_llama_embeddingr   nemo.export.onnx_llm_exporterr   
nemo.utilsr   r   rS   __name__r   r   r   r   <module>   s   T