o
    ٷi3                     @   sx   d Z ddlZddlZddlZddlZddlmZ ddlmZ ddl	Z	ddl
Z	ddlmZmZ edZG dd	 d	ZdS )
zClass for ONNX model.    N)deque)Path   )MAXIMUM_PROTOBUFfind_by_nameneural_compressorc                   @   s  e Zd ZdZdd Zdd Zedd Zedd	 Zej	d
d	 Zdd Z
edd Zej	dd Zedd Zedd Zej	dd Zdd Zdd Zdd Zedd Zdd Zdd  Zd!d" Zd#d$ Zd%d& Zd'd( Zd)d* Zd+d, Zd-d. Zd/d0 Zd1d2 Zd3d4 Zd5d6 Zd7d8 Zd9d: Z d;d< Z!d=d> Z"d?d@ Z#ddBdCZ$edDdE Z%dFdG Z&edHdI Z'dJdK Z(dLdM Z)ddOdPZ*ddQdRZ+ddSdTZ,dUdV Z-dWdX Z.dYdZ Z/dd[d\Z0e1d]d^ Z2dd_d`Z3e1dadb Z4ddcddZ5dedf Z6ddgdhZ7ddidjZ8dkdl Z9ddmdnZ:dodp Z;dqdr Z<dsdt Z=dudv Z>ddwdxZ?	N	N	N	NddydzZ@	N	Ndd{d|ZAd}d~ ZBdd ZC	dddZDdd ZEdd ZFdd ZGdd ZHdddZIdddZJdd ZKdd ZLdNS )	ONNXModelzBuild ONNX model.c                 K   s.  t |ts|ntj|dd| _t |tsdn|| _|   | jr0| jdu r0|dds0t	
d | jrKt |trK|ddrKtj| jtj| j d| _t |trrtjt|jd rrd	d
lm} |t|j | _i | _i | _i | _| | jjj | | jjj i | _ | !  d| _"dS )a;  Initialize an ONNX model.

        Args:
            model (str or ModelProto): path to onnx model or loaded ModelProto model object.
            ignore_warning (bool): ignore large model warning. Default is False.
            load_external_data (bool): load external data for large model. Default is True.
        F)load_external_dataNignore_warningzPModel size > 2GB. Please use model path instead of onnx model object to quantizer	   Tconfig.jsonr   )
AutoConfig)#
isinstancestronnxload_model_model_pathcheck_is_large_model_is_large_modelgetloggerwarningexternal_data_helperload_external_data_for_modelospathdirname_configexistsr   parentjoinpathas_posixtransformersr   from_pretrainednode_name_counter_output_name_to_node_input_name_to_nodes_get_input_name_to_nodesgraphnode_get_output_name_to_node_graph_info_get_graph_info	_q_config)selfmodelkwargsr    r1   i/home/ubuntu/.local/lib/python3.10/site-packages/onnxruntime/quantization/neural_compressor/onnx_model.py__init__)   s&   
&
zONNXModel.__init__c                 C   s   d}| j jjD ]K}|dr|jtjjkrd| _ dS z|	 }|t
|7 }W n tyG } zdt|v rAd| _W Y d}~ dS |d}~ww |tkrRd| _ dS qd| _dS )zCheck model > 2GB.r   data_locationTNz$exceeds maximum protobuf size of 2GBF)r   r(   initializerHasFieldr4   r   TensorProtoEXTERNALr   SerializeToStringsys	getsizeof	Exceptionr   r   )r.   	init_sizeinit
init_byteser1   r1   r2   r   J   s(   
zONNXModel.check_is_large_modelc                 C      | j S )z!Check the onnx model is over 2GB.)r   r.   r1   r1   r2   is_large_modela      zONNXModel.is_large_modelc                 C   rA   )zReturn model path.r   rB   r1   r1   r2   
model_pathf   rD   zONNXModel.model_pathc                 C   
   || _ dS )zSet model path.NrE   )r.   r   r1   r1   r2   rF   k      
c                 C   s   dS )zReturn framework.onnxruntimer1   rB   r1   r1   r2   	frameworkp   s   zONNXModel.frameworkc                 C   rA   )zReturn q_config.r-   rB   r1   r1   r2   q_configt   rD   zONNXModel.q_configc                 C   rG   )zSet q_config.NrK   )r.   rL   r1   r1   r2   rL   y   rH   c                 C   rA   )z8Return huggingface config if model is Transformer-based.)r   rB   r1   r1   r2   	hf_config~   rD   zONNXModel.hf_configc                 C   rA   )zReturn model itself.)r   rB   r1   r1   r2   r/      rD   zONNXModel.modelc                 C   sD   || _ i | _|   i | _i | _| | j jj | | j jj dS )zSet model itself.N)	r   r+   r,   r%   r&   r'   r(   r)   r*   )r.   r/   r1   r1   r2   r/      s   c                 C      dd | j jjD S )zReturn input of model.c                 S      g | ]}|j qS r1   name.0ir1   r1   r2   
<listcomp>       z#ONNXModel.input.<locals>.<listcomp>)r   r(   inputrB   r1   r1   r2   rW         zONNXModel.inputc                 C   rN   )zReturn output of model.c                 S   rO   r1   rP   rR   r1   r1   r2   rU      rV   z$ONNXModel.output.<locals>.<listcomp>)r   r(   outputrB   r1   r1   r2   rY      rX   zONNXModel.outputc                 C   s>   i | _ |   i | _i | _| | jjj | | jjj dS )zUpdate model info.N)	r+   r,   r%   r&   r'   r   r(   r)   r*   rB   r1   r1   r2   update   s   zONNXModel.updatec                 C   rA   )zEReturn ORT Graph Info object holding information about backend graph.)r+   rB   r1   r1   r2   
graph_info   rD   zONNXModel.graph_infoc                 C   s(   | j jjD ]}| j|j|ji qdS )zUpdate graph info.N)r   r(   r)   r[   rZ   rQ   op_typer.   r)   r1   r1   r2   r,      s   zONNXModel._get_graph_infoc              	   C   s   t j|d dkrt jt j|d std| jrAtj| j	t j| j
d  tj| j	|dd|dd d dd	d
 nt| j	| | jdurrt| jdsUdn| jj}|| jj_t|jd }| jj|d	d dS dS )zSave ONNX model.r    z!"root" directory does not exists.T/_data   Fsave_as_external_dataall_tensors_to_one_filelocationsize_thresholdconvert_attributeN
model_typer   )use_diff)r   r   splitr   
ValueErrorrC   r   r   r   r   r   
save_modelsaver   hasattrri   	__class__r   r   r    r!   to_json_file)r.   rootri   output_config_filer1   r1   r2   rn      s(   ,


zONNXModel.savec                 C   
   | j jjS )zReturn model nodes.)r   r(   r)   rB   r1   r1   r2   nodes      
zONNXModel.nodesc                 C   rt   )zReturn model initializer.)r   r(   r5   rB   r1   r1   r2   r5      rv   zONNXModel.initializerc                 C      | j jS )zReturn model graph.)r   r(   rB   r1   r1   r2   r(         zONNXModel.graphc                 C   rw   )zReturn model ir_version.)r   
ir_versionrB   r1   r1   r2   ry      rx   zONNXModel.ir_versionc                 C   rw   )zReturn model opset_import.)r   opset_importrB   r1   r1   r2   rz      rx   zONNXModel.opset_importc                 C   &   || j jjv r| j jj| dS dS )zRemove a node from model.N)r   r(   r)   remover]   r1   r1   r2   remove_node      zONNXModel.remove_nodec                 C      |D ]}|  | qdS )zRemove nodes from model.N)r}   )r.   nodes_to_remover)   r1   r1   r2   remove_nodes      zONNXModel.remove_nodesc                 C   s   | j jj|g dS )zAdd a node to model.Nr   r(   r)   extendr]   r1   r1   r2   add_node   s   zONNXModel.add_nodec                 C   s   | j jj| dS )zAdd nodes to model.Nr   )r.   nodes_to_addr1   r1   r2   	add_nodes   rX   zONNXModel.add_nodesc                 C   s0   t |j| jjjdu r| jjj|g dS dS )zAdd a initializer to model.N)r   rQ   r   r(   r5   r   r.   tensorr1   r1   r2   add_initializer   s   zONNXModel.add_initializerc                 C   r   )zAdd initializers to model.N)r   )r.   tensorsr   r1   r1   r2   add_initializers   r   zONNXModel.add_initializersc                 C   &   | j jjD ]}|j|kr|  S qdS )zGet an initializer by name.N)r   r(   r5   rQ   )r.   rQ   r   r1   r1   r2   get_initializer   
   
zONNXModel.get_initializerc                 C   s:   d}|  |du r|S |  D ]}||jv r|d7 }q|S )z(Get the number of shares of initializer.r   Nr   )r   ru   rW   )r.   rQ   numr)   r1   r1   r2   get_initializer_share_num   s   
z#ONNXModel.get_initializer_share_numc                 C   r   )zGet a node by name.N)r   r(   r)   rQ   )r.   rQ   r)   r1   r1   r2   get_node  r   zONNXModel.get_nodec                 C   r{   )z!Remove an initializer from model.N)r   r(   r5   r|   r   r1   r1   r2   remove_initializer  r~   zONNXModel.remove_initializerc                 C   r   )zRemove initializers from model.N)r   )r.   init_to_remover5   r1   r1   r2   remove_initializers  r   zONNXModel.remove_initializersFc                 C   sf   |  |}| | |j}|j}|stj||||  ntjj||||	 |d}| 
| dS )zUpdate initializer.)rawN)r   r   dims	data_typer   helpermake_tensorflattentolisttostringr   )r.   r   arrayr   
old_tensorr   r   
new_tensorr1   r1   r2   set_initializer  s   

zONNXModel.set_initializerc                 C   rA   )zReturn input names of nodes.)r&   rB   r1   r1   r2   input_name_to_nodes&  rD   zONNXModel.input_name_to_nodesc                 C   s   |D ]>}dd |j D }t|dkr|D ]	}| |jj q|jD ]}t| dkr?|| jvr7|g| j|< q!| j| | q!qdS )zGet input names of nodes.c                 S   ,   g | ]}|j tjjks|j tjjkr|qS r1   typer   AttributeProtoGRAPHGRAPHSrS   attrr1   r1   r2   rU   .  
    z6ONNXModel._get_input_name_to_nodes.<locals>.<listcomp>r   N)		attributelenr'   gr)   rW   stripr&   append)r.   ru   r)   attrsr   
input_namer1   r1   r2   r'   +  s   

z"ONNXModel._get_input_name_to_nodesc                 C   rA   )zReturn output names of nodes.)r%   rB   r1   r1   r2   output_name_to_node=  rD   zONNXModel.output_name_to_nodec                 C   sh   |D ]/}dd |j D }t|dkr|D ]	}| |jj q|jD ]}t| dkr0|| j|< q!qdS )zGet output names of nodes.c                 S   r   r1   r   r   r1   r1   r2   rU   E  r   z6ONNXModel._get_output_name_to_node.<locals>.<listcomp>r   N)r   r   r*   r   r)   rY   r   r%   )r.   ru   r)   r   r   output_namer1   r1   r2   r*   B  s   

z"ONNXModel._get_output_name_to_nodec                 C   s>   g }|  |D ]}| |D ]}|j|jkr|| qq|S )zGet siblings nodes.)get_parentsget_childrenrQ   r   )r.   r)   siblingsr   childr1   r1   r2   get_siblingsQ  s   
zONNXModel.get_siblingsNc                 C   sB   |du r| j }g }|jD ]}||v r|| D ]}|| qq|S )zGet children nodes.N)r&   rY   r   )r.   r)   r   childrenrY   r   r1   r1   r2   r   Z  s   
zONNXModel.get_childrenc                 C   s8   |du r| j }g }|jD ]}||v r|||  q|S )zGet parents nodes.N)r%   rW   r   )r.   r)   r   parentsrW   r1   r1   r2   r   f  s   
zONNXModel.get_parentsc                 C   s>   |du r| j }t|j|krdS |j| }||vrdS || S )zGet parent node by idx.N)r%   r   rW   )r.   r)   idxr   rW   r1   r1   r2   
get_parentq  s   
zONNXModel.get_parentc                 C   s"   t |j}|| t||}|S )zFind out node by name.)listr)   r   r   )r.   	node_namenew_nodes_listr(   graph_nodes_listr)   r1   r1   r2   find_node_by_name  s   


zONNXModel.find_node_by_namec                 C   s4   g }|j D ]}|jD ]}||jkr|| q
q|S )z2Find all nodes with given initializer as an input.)r)   rW   rQ   r   )r.   r(   r5   ru   r)   
node_inputr1   r1   r2   find_nodes_by_initializer  s   



z#ONNXModel.find_nodes_by_initializerc                    s   | dstd| d dS  fdd j| d }|jdkr*||jd	 ks6|jd
kr8||jd kr8dS  |\}}|sGJ d| |sPJ d| ||fS )z*Help function to get scale and zero_point.
_quantizedzFind z) in the quantized graph is not quantized.NNc           	         s   j |  d }j| d}g d}|dur-|j|v r-|jd dddddd}n%|jdv rD|jd dddddd}n| dddddd}|d	 }|}|d
 }|}|du sl|du ry|dury |jd \}}||fS )z/Search scale and zero point tensor recursively.r   N)Reshape	TransposeSqueeze	UnsqueezeMaxPoolPadSplitr   r^   _QuantizeLinear_QuantizeInput)Gather_scale_zero_point)r&   r%   r   r\   rW   replacerY   r   )	tensor_namer)   r   direct_int8fp32_tensor_namescalescale_tensorzo	zo_tensor	_searcherr.   r1   r2   r     s2   


z+ONNXModel.get_scale_zero.<locals>._searcherr   QLinearConvr`   QGemmzmissing scale for tensor zmissing zero point for tensor )endswithr   debugr&   r\   rW   )r.   r   r)   r   r   r1   r   r2   get_scale_zero  s   
!zONNXModel.get_scale_zeroc                 C   s4   |rt jj| jdt|jd d t | j| dS )zBSave model to external data, which is needed for model size > 2GB.Tz.data)re   rf   N)r   r   convert_model_to_external_datar   r   rQ   rm   )r.   output_pathuse_external_data_formatr1   r1   r2   save_model_to_file  s
   zONNXModel.save_model_to_filec                 C   H   t |tr
t |tsJ tt| jD ]}| j| |kr!|| j|< qdS )zReplace input of a node.N)r   r   ranger   rW   )r)   old_input_namenew_input_namejr1   r1   r2   replace_node_input     
zONNXModel.replace_node_inputc                 C   |   |du rg }|du rg }t |dkr(| jjjD ]}|j|v r%t||| qdS | jjjD ]}|j|vr;t||| q-dS )zReplace inputs of all nodes.Nr   )r   r/   r(   r)   r\   r   r   )r.   r   r   white_optypeblack_optyper)   r1   r1   r2   replace_input_of_all_nodes     

z$ONNXModel.replace_input_of_all_nodesc                 C   r   )zReplace output of a node.N)r   r   r   r   rY   )r)   old_output_namenew_output_namer   r1   r1   r2   replace_node_output  r   zONNXModel.replace_node_outputc                 C   r   )zReplace outputs of all nodes.Nr   )r   r/   r(   r)   r\   r   r   )r.   r   r   r   r   r)   r1   r1   r2   replace_output_of_all_nodes  r   z%ONNXModel.replace_output_of_all_nodesc           
      C   s  g }|   }|D ]}|jdkr'|jd | jjjvr'|jd | jvr'|| q|jdkrbt| |dkrb| |d jdkrb|j	d | j
vrb| |d jd | jvrb|| || | qd}|jD ]}|| jv st||  v rxd} nqg|j	D ]}| |durq||| j
v s|| 	 v rd} nq||r|| q| | g }| jjjD ]+}|j| jvr|j| jjjvr|| |  j	D ]}	|	j|jkr|  j	|	 qq| | |   dS )	zRemove unused nodes.Constantr   QuantizeLinearr   DequantizeLinearTFN)ru   r\   rY   r   r(   r&   r   r   r   rW   r%   r   r   r   r5   rQ   r|   r   rZ   )
r.   unused_nodesru   r)   unusedrY   rW   ununsed_weightswgraph_inputr1   r1   r2   remove_unused_nodes  sR   








zONNXModel.remove_unused_nodesc                    s  |s@i }i j jjD ]3}|jD ]}t| dkr+||vr$|g||< q|| | q|jD ]}t| dkr=||< q/qnj}j	i  t
 t
 }j jjD ]
}||j  qSj jjD ]}tfdd|jD rw| qcrʈ }t fdd|jD s||vr|| qx| |j< |jD ]}	|	|v r fdd||	 D  qtdkrt|dkrt||  szdd   D }
ttdd	 |
D ttdd	 j jjD ksJ j jd
 j jj|
 dS )zTopological sort the model.r   c                 3   s$    | ]}| vo|  vV  qd S N)rW   rR   )r   r.   r1   r2   	<genexpr>D  s   " z-ONNXModel.topological_sort.<locals>.<genexpr>c                 3   s&    | ]}|v r| j  v V  qd S r   rP   rR   )	all_nodesr   r1   r2   r  I  s   $ c                    s"   g | ]}|j  vr|vr|qS r1   rP   rR   )r  qr1   r2   rU   Q  s   " z.ONNXModel.topological_sort.<locals>.<listcomp>c                 S   s   g | ]}|d  qS )r   r1   rR   r1   r1   r2   rU   U  s    c                 S   s   h | ]}|j qS r1   rP   )rS   nr1   r1   r2   	<setcomp>V  rV   z-ONNXModel.topological_sort.<locals>.<setcomp>r)   N)r/   r(   r)   rW   r   r   r   rY   r&   r%   r   r   rQ   allpopleftcopydeepcopyclearitemsr   
ClearField)r.   enable_subgraphr   r)   r   r   waitinpr  outru   r1   )r  r   r  r.   r2   topological_sort*  s\   






6zONNXModel.topological_sortc           	      C   s   |du rg }t  }|D ]}t|tr|| qt|tjr%||j qJ dg }|D ]}t|tr:|| q-t|tjrG||j q-J d|r{| }||v rVqK||vr`|| nqKt|t	| j
jj}| |D ]}||j qp|sM|S )z4Get nodes chain with given start node and stop node.NFzM'get_nodes_chain' function only support list[string]or list[NodeProto] params)r   r   r   r   r   	NodeProtorQ   r  r   r   r/   r(   r)   r   )	r.   startstopresult_chain
start_noder)   	stop_noder   r   r1   r1   r2   get_nodes_chainZ  s8   

zONNXModel.get_nodes_chainc                 C   s  g }| j jjD ]}d\}}|jdkr*|}| |g dg d| |g dg dg}|jdkr}|}| |g dg d| |g dg d	| j|g d
g d| jg d| |g dg d| |g dg d| |g dg d| |g dg dg}|sqt|sq|| q|S )z,Find split node for layer wise quantization.r   SkipLayerNormalizationMatMulr   r   r   r  Nr   r   r   r   Addr  r   r   r  r   r   r   r   r   r  r   Nr   r   r   r   Nr   r   r   r   Gemmr   r   r   r  Nr   r   r   r   r   r   return_indicer  r  r   r   r   r  r   Nr   r   r   r   r   Nr   r   r   r   )r  Mulr  r*  Divr  )Nr   Nr   Nr   )r  r*  r  SimplifiedLayerNormalizationr  )Nr   Nr   r   )r   r(   r)   r\   match_parent_pathr   anyr   )r.   start_nodesr)   r  qkv_nodes_listr1   r1   r2   +find_split_node_for_layer_wise_quantization  sx   


(z5ONNXModel.find_split_node_for_layer_wise_quantizationc                 C   s  g }| j jjD ]}|jdkr||jg qd\}}|jdkr7|}| |g dg d| |g dg dg}|jdkrv|}| |g dg d	| |g dg d
| j|g dg d| jg d| |g dg d| |g dg dg}|syqt|s~qdd |D d }g }|j	D ]}|| jvrq||d j
d krq|| qt|dkrq|d }	| j}
|
|	 }dd |D }|ddkr|dd |D  |s |S q|S )zFind qkv MatMul in Attention.

        Args:
            find_all (bool, optional): find all qkv MatMul. Defaults to False

        Returns:
            qkv (list): qkv MatMul list
        	Attentionr   r  r  r  r  r  r  r   r!  r"  r$  r%  r'  r(  r)  c                 S   s   g | ]}|d ur|qS r   r1   )rS   qkvr1   r1   r2   rU     s    z3ONNXModel.find_qkv_in_attention.<locals>.<listcomp>r`   r   r   c                 S   rO   r1   )r\   rS   r   r1   r1   r2   rU     rV   r     c                 S   s   g | ]
}|j d kr|jqS )r  )r\   rQ   r4  r1   r1   r2   rU     s    )r   r(   r)   r\   r   rQ   r-  r   r.  rW   rY   r   r   count)r.   find_allr3  r)   r  r0  	qkv_nodesother_inputsrW   
root_inputr   r   children_typesr1   r1   r2   find_qkv_in_attention  s   	




 zONNXModel.find_qkv_in_attentionc                 C   s   g }t t|D ]G}|t|d kr.||d  }|d dkr-|||d  ||d  g q|| }|| d t|k rO|||| d  ||| d  g q|S )a  Find MatMul in FFN.

        Args:
            attention_index (list): index of Attention
            attention_matmul_list (list): list of Attention and MatMul nodes
            block_len (int): block length

        Returns:
            list: list of MatMul in FFN
        r      r   )r   r   r   )r.   attention_indexattention_matmul_list	block_len
ffn_matmulr   indexr1   r1   r2   find_ffn_matmul"  s   zONNXModel.find_ffn_matmulc                 C   s   ddl m} ddlm} t||r=|| j| j\}}}| | | | | 	| | 
  |   |   | | dS td td dS )zExport Qlinear to QDQ model.r   )ONNXQlinear2QDQConfig)onnx_qlinear_to_qdqzGUnsupported config for export, only ONNXQlinear2QDQConfig is supported!N)neural_compressor.configrD  neural_compressor.utils.exportrE  r   r   r&   r   r   r   rZ   r   r  rn   r   r   exit)r.   	save_pathconfrD  rE  r   r   initsr1   r1   r2   export;  s   




zONNXModel.exportc                 C   sH   g }|D ]}||   vrtj }||_|| q| jjj | dS )zAdd the tensors to the model outputs to gets their values.

        Args:
            tensor_names: The names of tensors to be dumped.
        N)	rY   r   r   ValueInfoProtorQ   r   r   r(   r   )r.   tensor_namesadded_outputsr   added_tensorr1   r1   r2   add_tensors_to_outputsM  s   

z ONNXModel.add_tensors_to_outputsc                 C   sV   g }|D ]}||   v r|| jjj |   |  q|D ]
}| jjj | qdS )zRemove the tensors from the model outputs.

        Args:
            tensor_names: The names of tensors to be removed.
        N)rY   r   r   r(   rB  r|   )r.   rN  removed_outputsr   rY   r1   r1   r2   remove_tensors_from_outputs[  s   z%ONNXModel.remove_tensors_from_outputsc                 C   sR   |du rg }t |jD ]\}}||v r&|| }|j|kr&||vr&||f  S qdS )a  Find parent node based on constraints on op_type.

        Args:
            node (str): current node name.
            parent_op_type (str): constraint of parent node op_type.
            output_name_to_node (dict): dictionary with output name as key, and node as value.
            exclude (list): list of nodes that are excluded (not allowed to match as parent).

        Returns:
            parent: The matched parent node. None if not found.
            index: The input index of matched parent node. None if not found.
        Nr   )	enumeraterW   r\   )r.   r)   parent_op_typer   excluderT   rW   r   r1   r1   r2   match_first_parenth  s   zONNXModel.match_first_parentc           	      C   s   |dusJ |du s|dksJ |du rg }|du r| j }|du r6| ||||\}}|dur4|| |S |t|jkr?dS | |||}|durU|j|krU||vrU|S dS )a|  Find parent node based on constraints on op_type and index.

        Args:
            node (str): current node name.
            parent_op_type (str): constraint of parent node op_type.
            input_index (int or None): only check the parent given input index of current node.
            output_name_to_node (dict): dictionary with output name as key, and node as value.
            exclude (list): list of nodes that are excluded (not allowed to match as parent).
            return_indice (list): a list to append the input index when input_index is None.

        Returns:
            parent: The matched parent node.
        Nr   )r%   rW  r   r   rW   r   r\   )	r.   r)   rU  input_indexr   rV  r&  r   rB  r1   r1   r2   match_parent~  s"   
zONNXModel.match_parentc              	   C   sv   t |t |ks
J |du r| j}|}g }t|D ]\}}	| j||	|| |g |d}
|
du r1 dS ||
 |
}q|S )a  Find a sequence of input edges based on constraints on parent op_type and index.

        Args:
            node (str): current node name.
            parent_op_types (str): constraint of parent node op_type of each input edge.
            parent_input_index (list): constraint of input index of each input edge.
                                       None means no constraint.
            output_name_to_node (dict): dictionary with output name as key, and node as value.
            return_indice (list): a list to append the input index when there is
                                  no constraint on input index of an edge.

        Returns:
            parents: a list of matched parent node.
        N)rV  r&  )r   r%   rT  rY  r   )r.   r)   parent_op_typesparent_input_indexr   r&  current_nodematched_parentsrT   r\   matched_parentr1   r1   r2   r-    s&   
zONNXModel.match_parent_pathc                 C   s$   | j jjD ]
}d|jv r dS qdS )z~Check the model is smooth quantized or not.

        Returns:
            bool: the model is smooth quantized or not.
        _smooth_scaleTF)r/   r(   r5   rQ   )r.   r>   r1   r1   r2   is_smoothquant_model  s
   
zONNXModel.is_smoothquant_modelc                 C   s   |   }|S )z-Find split nodes for layer-wise quantization.)r1  )r.   split_nodesr1   r1   r2   find_split_nodes  s   zONNXModel.find_split_nodesTc              
   C   s  t  }|| j |jd t  }|| j |jd d}d}| jjjD ]#}	|dkr7|jj|	 n|dkrB|jj|	 |	j|krL|	j	}d}q)t
|dks`J d| dt
| d|d }
|rzdd	lm} || jd
tj| jd| _W n ty } ztd |d}~ww | |
\}}t j|
||}t|d
d}t|d
d}|  |  |jjj	| |jjj| g }g }|jD ].}||jv r| |\}}t j|||}||jjj	vr|| ||jjjvr|| q|D ]
}|jjj	| q|D ]}|jjj| q|  |  |  |  tj|}|| tj |d}||_!|"| |#  t$d| d |rs|| tj |d}||_!|"| |#  t$d| d ||fS ||fS )a[  Split model into two parts at a given node.

        Args:
            split_node_name (str): name of the node where the model is split at>
            path_of_model_to_split (str): path of model to be split.
            shape_infer (bool): do shape inference. Default is True.
            save_both_split_models (bool): whether to save the two split models.
                False means only save the first split model.
                True means save both the two split models.
                Default id True.

        Returns:
            tuple: the first split model, the second split model
        r)   Nr   r=  zJOnly support split at node with 1 output tensor, while current split node z has z output tensorsr   )infer_shapesT)
auto_mergebase_dirzShape infer fails for layer-wise quantization. We would recommend checking the graph optimization level of your model and setting it to 'DISABLE_ALL' or 'ENABLE_BASIC', as this may help avoid this error.)r
   zsplit_model_part_1.onnxzsave split model part 1 to z for layer wise quantizationzsplit_model_part_2.onnxzsave split model part 2 to )%r   
ModelProtoCopyFromr   r(   r  r)   r   rQ   rY   r   'neural_compressor.adaptor.ox_utils.utilrc  r   r   r   r   r<   r   error%_get_output_type_shape_by_tensor_namer   make_tensor_value_infor   _remove_unused_input_outputr/   rW   r   r   remove_unused_initrZ    load_model_initializer_by_tensorjoinrF   _save_split_modelr   r   )r.   split_node_namepath_of_model_to_splitshape_infersave_both_split_modelssplit_model_part_1split_model_part_2split_node_outputpart_idxr)   split_tensor_namerc  r@   split_tensor_typesplit_tensor_shapesplit_tensorinsert_output_for_model_1insert_input_for_model_2rY   output_typeoutput_shapeoutput_tensorrW   dir_of_model_to_splitsplit_model_part_1_pathsplit_model_part_2_pathr1   r1   r2   split_model_with_node  s   
 	







zONNXModel.split_model_with_nodec              	   C   sJ   t j|d rt |d  tj| j|dd|dd d ddd dS )	zSave split model as external data for layer wise quantization.

        Args:
            save_path (str): the path to save the split model
        ra   Tr_   r`   rb   Frc   N)r   r   r   r|   r   rm   r   rk   )r.   rI  r1   r1   r2   rp  `  s   
zONNXModel._save_split_modelc                 C   sX   t jj}d}| jjjD ]}|j|kr'|jjj	}dd |jjj
jD } ||fS q||fS )zGet output type and shape with a tensor name.

        Args:
            tensor_name (str): name of a tensor

        Returns:
            tuple: output type and shape
        Nc                 S   s    g | ]}| d r|jndqS )	dim_valuer`   )r6   r  )rS   dimr1   r1   r2   rU     s    zCONNXModel._get_output_type_shape_by_tensor_name.<locals>.<listcomp>)r   r7   FLOATr   r(   
value_inforQ   r   tensor_type	elem_typeshaper  )r.   r   r  r  rY   r1   r1   r2   rj  r  s   	


z/ONNXModel._get_output_type_shape_by_tensor_namec                 C   s   g }g }| j jjD ]}|j| jvr|| q	| j jjD ]}|j| jvr)|| q|D ]
}| j jj| q,|D ]
}| j jj| q9dS )z-Remove unused input & output for split model.N)	r   r(   rY   rQ   r   r   rW   r   r|   )r.   remove_outputsremove_inputsrY   rW   r1   r1   r2   rl    s   

z%ONNXModel._remove_unused_input_outputc                 C   s8   g }| j jjD ]}|j| jvr|| q| | dS )zRemove unused init.N)r   r(   r5   rQ   r   r   r   )r.   remov_initsr>   r1   r1   r2   rm    s   
zONNXModel.remove_unused_initc                 C   sP   |du rt j| j}| jjjD ]}|dr%|jt	j
jkr%t	j|| qdS )zLoad model initializer by tensor.

        Args:
            data_path (str, optional): the directory of saved initializer. Defaults to None.
        Nr4   )r   r   r   r   r   r(   r5   r6   r4   r   r7   r8   r   load_external_data_for_tensor)r.   	data_pathr>   r1   r1   r2   rn    s   z*ONNXModel.load_model_initializer_by_tensorexternal.datac                 C   s|   |r!t jt jt j| j|r!t t jt j| j| |   tj	j
| j|d tj	j| jt j| jd dS )a}  Write external data of merged quantized model to new location to save memory.

        Args:
            external_data_location (str, optional): external data location of merged quantized model.
                                                    Defaults to "external.data".
            overwrite (bool, optional): if True, remove existed externa data. Defaults to False.
        )rf   )filepathN)r   r   r   ro  r   r   r|   rn  r   r   r   r   write_external_data_tensors)r.   external_data_location	overwriter1   r1   r2   #write_external_data_to_new_location  s
   $ z-ONNXModel.write_external_data_to_new_locationc                 C   s   |   | t|  | t|  |   | jD ]}|j	|  vr0| j
jj| qg }| j
jjD ]}|j	| v rF|| q8|D ]
}| j
jj| qI| jD ]}|j	|  vrw|j	|  vrw|j	| jvrw| j
jj| qYdS )z'Merge two split model into final model.N)r  r   r   ru   r   r5   rZ   r(   rY   rQ   r   r   rW   r|   r   )r.   to_merge_modelrY   remove_outputrW   r1   r1   r2   merge_split_models  s,   
zONNXModel.merge_split_modelsc                 C   sh   i }g }| j jjD ]}|||j< || q	|D ]
}| j jj| q|D ]}| j jj||  q%dS )z:Re-org output of merged model for layer-wise quantization.N)r   r(   rY   rQ   r   r|   )r.   origin_outputoutputs
tmp_removerY   out_namer1   r1   r2   re_org_output  s   
zONNXModel.re_org_output)Fr   r   )NNNN)TT)r  F)M__name__
__module____qualname____doc__r3   r   propertyrC   rF   setterrJ   rL   rM   r/   rW   rY   rZ   r[   r,   rn   ru   r5   r(   ry   rz   r}   r   r   r   r   r   r   r   r   r   r   r   r   r'   r   r*   r   r   r   r   r   r   r   r   staticmethodr   r   r   r   r   r  r  r1  r<  rC  rL  rQ  rS  rW  rY  r-  r`  rb  r  rp  rj  rl  rm  rn  r  r  r  r1   r1   r1   r2   r   &   s    !








	




	

	
4




1
0'
LU

1
.
x

r   )r  r  loggingr   r:   collectionsr   pathlibr   r   onnx.external_data_helperutilr   r   	getLoggerr   r   r1   r1   r1   r2   <module>   s   
