o
    ٷi/                     @   s   d dl Z d dlZd dlmZ d dlZd dlmZ d dlmZ d dl	m
Z
 d dlmZmZ d dlmZmZ d dlmZmZ d d	lmZ e eZg d
Zg dZG dd dZdS )    N)Path)float_to_float16_max_diff)	OnnxModel)optimize_model)	T5DecoderT5DecoderHelper)T5EncoderDecoderInitT5EncoderDecoderInitHelper)MT5ForConditionalGenerationT5ForConditionalGeneration)InferenceSession)zt5-smallzt5-basezt5-largezt5-3bzt5-11b)zgoogle/mt5-smallzgoogle/mt5-basezgoogle/mt5-largezgoogle/mt5-xlzgoogle/mt5-xxlc                   @   s<  e Zd Ze		d.dededededef
dd	Ze	
		d/dededejdededede	ee
eB f fddZe				d0dee
B dejdededededefddZe			d1dedee dB dedefd d!Ze				d2ded"ed#ed$ed%eded&ed'ed(efd)d*Zedee
B d+edejdefd,d-ZdS )3T5Helper F
output_dirmodel_name_or_pathsuffix
new_folderreturnc                 C   s^   |}t j|rt|jd }n|dd  ||7 }|r$t j| |n| }t j||d S )a  Build onnx path

        Args:
            output_dir (str): output directory
            model_name_or_path (str): pretrained model name, or path to the model checkpoint
            suffix (str, optional): suffix like "_encoder" or "_decoder_fp16" will be appended to file name. Defaults to None.
            new_folder (bool, optional): create a new directory for the model. Defaults to False.

        Returns:
            str: path of onnx model
        /z.onnx)ospathisdirr   partssplitjoin)r   r   r   r   
model_name	directory r   `/home/ubuntu/.local/lib/python3.10/site-packages/onnxruntime/transformers/models/t5/t5_helper.pyget_onnx_path!   s   zT5Helper.get_onnx_patht5	cache_dirdevice
model_typestate_dict_pathencoder_decoder_initc           
      C   s   |dkrt j| |d}n|dkrtj| |d}ntd|r&|t| t|j|j	|j
}| | t|j|j|j	|j
d| d}|rIdnd}	|	|d	|iS )
a{  Load model given a pretrained name or path, then build models for ONNX conversion.

        Args:
            model_name_or_path (str): pretrained model name or path
            cache_dir (str): cache directory
            device (torch.device): device to run the model
            model_type (str, optional): model type "t5" or "mt5"
            state_dict_path(str, optional): state dictionary path
            encoder_decoder_init (bool, optional): combine encoder and decoder kv cache initialization into one model.
        Returns:
            Dict[str, torch.nn.Module]: mapping from name to modules for ONNX conversion.
        r!   )r"   mt5z only support mode_type=t5 or mt5N)decoder_start_token_idoutput_cross_onlyr&   encoderdecoder)r   from_pretrainedr
   
ValueErrorload_state_dicttorchloadr   r+   lm_headconfigevaltor   r*   )
r   r"   r#   r$   r%   r&   modelr+   r*   encoder_namer   r   r   
load_model>   s&   	zT5Helper.load_modelTr5   onnx_model_pathverboseuse_external_data_formatuse_decoder_input_idsuse_int32_inputsc              	   C   s<   t | trt| |||||| d S t| ||||| d S )N)
isinstancer   r	   export_onnxr   )r5   r#   r8   r9   r:   r;   r<   r   r   r   r>   l   s$   


zT5Helper.export_onnxN
onnx_modelop_block_listforce_fp16_logitsuse_symbolic_shape_inferc                    sp  du rg ddd |   D }t}||}td| d|  |  jd j}d}|  }	||	v s9J |	| }
d}|
j	d	krw|
}td
|
j  d}|
j
D ]}| |}|dura nqTt|}td|
j d|  |dk }ntd|
j	 d|
j  g }g  |s|dur|s|g}|jg dvr|  }d}d}| d}|rd}|D ]=}
|
j vr| |
|	}| |
|} fdd|D } fdd|D }t|t| dkr |
j |d7 }d}q|st|| }td| d|  td   | |d}td|  |r%| jdddi| |S ddlm} || jfddi| |S )a  Convert model to mixed precision.
           It detects whether original model has fp16 precision weights, and set parameters for float16 conversion automatically.
        Args:
            onnx_model (OnnxModel): optimized ONNX model
            op_block_list (List[str], optional): operators need to run in fp32.
            force_fp16_logits (bool, optional): force logits and last MatMul node to be in float16. Defaults to False.
            use_symbolic_shape_infer (bool, optional): use symbolic shape inference to convert float to float16. Defaults to True.
        Returns:
            parameters(dict): a dictionary of parameters used in float16 conversion
        N)SimplifiedLayerNormalization SkipSimplifiedLayerNormalizationReluAddc                 S   s   h | ]}|j qS r   op_type).0noder   r   r   	<setcomp>   s    z0T5Helper.auto_mixed_precision.<locals>.<setcomp>z	fp32 op: z
 fp16 op: r   FMatMulz#Found last MatMul node for logits: z3max diff of converting weights in last MatMul node z: gư>z-Failed to find MatMul node for logits. Found z	 of node rF   Tc                    "   g | ]}|j v s| v r|qS r   rG   )rI   childnode_block_listr@   r   r   
<listcomp>       z1T5Helper.auto_mixed_precision.<locals>.<listcomp>c                    rM   r   rG   )rI   parentrO   r   r   rQ      rR      z#node counter of Add operator: fp32=z fp16=znode_block_list: )keep_io_typesr@   rP   force_fp16_initializersz!auto_mixed_precision parameters: rB   )convert_float_to_float16disable_shape_inferr   )nodesset
differenceloggerinfographoutputnameoutput_name_to_noderH   inputget_initializerr   debugwarninginput_name_to_nodesget_nodes_by_op_typeget_parentsget_childrenlenappendrW   float16r5   )r?   r@   rA   rB   op_full_setfp32_op_setfp16_op_setlogits_output_nameis_weight_fp16_precisionra   rJ   last_matmul_nodeinitializerrb   max_diffrU   rf   fp32_addchanged	add_nodesparentschildrenblocked_childrenblocked_parentsfp16_add
parametersrW   r   rO   r   auto_mixed_precision   s   






zT5Helper.auto_mixed_precisionoptimized_model_path
is_float16num_attention_headshidden_sizer~   use_gpuforce_fp16_ioc	              	   C   sr   ddl m}	 d}
|r|	d}
| |
_t| d||d|
|d}|r/|r)tj||d n|j|d |j||dd	 dS )
zHOptimize ONNX model with an option to convert it to use mixed precision.r   )FusionOptionsNr!   )r$   	num_headsr   	opt_leveloptimization_optionsr   )rA   )cast_input_outputT)all_tensors_to_one_file)fusion_optionsr   enable_skip_layer_normr   r   r~    convert_model_float32_to_float16save_model_to_file)r8   r   r   r   r   r:   r~   r   r   r   r   mr   r   r   optimize_onnx   s&   
zT5Helper.optimize_onnxort_sessionc                 C   s*   t | trt| |||S t| |||S )zQCompare the result from PyTorch and OnnxRuntime to verify the ONNX model is good.)r=   r   r	   verify_onnxr   )r5   r   r#   r<   r   r   r   r   #  s   
zT5Helper.verify_onnx)r   F)r!   r   F)TFTF)NFT)FTFF)__name__
__module____qualname__staticmethodstrboolr    r/   r#   dictr   r   r7   r>   r   listr~   intr   r   r   r   r   r   r   r       s    -
p	'r   )loggingr   pathlibr   r/   rl   r   r?   r   	optimizerr   
t5_decoderr   r   t5_encoder_decoder_initr   r	   transformersr
   r   onnxruntimer   	getLoggerr   r\   PRETRAINED_T5_MODELSPRETRAINED_MT5_MODELSr   r   r   r   r   <module>   s   
	