o
    i%                     @   s   d dl Z d dlZd dlZd dlZed 	ddedefdd	Z				ddeded
efddZ	dddZ
d ddZdd Zdd Zdd Zdd Zdd ZdS )!    NignoreF   onnxquantizeopset_versionc           
      K   s  | j di |}|dtj|d}tj|dd t|ttfs&|f}|D ]}|	  |dkr@t
|f||||d| q(|dkrktj rKdntj rRd	n	tjj rZd
nd}	td|	 t|||	d q(|dkrtj stj stjj sJ dt|drt|drt||dd q(td|  t||dd q(|dkrtj stj stjj sJ dt|drt|drt||dd q(|S )N
output_dir
init_paramT)exist_okr   )data_inr   r   
export_dirtorchscriptcudaxpumpscpuz#Exporting torchscripts on device {})pathdevice	bladediscz=Currently bladedisc optimization for FunASR only supports GPUencoderdecoder)r   enable_fp16zexport_dir: 	onnx_fp16z=Currently onnx_fp16 optimization for FunASR only supports GPU )exportgetosr   dirnamemakedirs
isinstancelisttupleeval_onnxtorchr   is_availabler   backendsr   printformat_torchscriptshasattr_bladedisc_opt_for_encdec_onnx_opt_for_encdec)
modelr
   r   r   typekwargsmodel_scriptsr   mr   r   r   M/home/ubuntu/.local/lib/python3.10/site-packages/funasr/utils/export_utils.pyr   
   s`   0

r   r   c                    s>  | dd |  }t|tjr| }nt fdd|D }| dd}t| jtr3| jd }n|  }t	j
||}	tjj| ||	|d||  |  |  d		 |rzd
dlm}
m} d
d l}W n   td|	dd}||	}dd |jjD }dd |D }td|	| ||	|dgdd|
j|d d S d S )Nr   r   c                       g | ]}|  qS r   )to).0inputr   r   r1   
<listcomp>G       z_onnx.<locals>.<listcomp>verboseFz.onnxTr9   do_constant_foldingr   input_namesoutput_namesdynamic_axesr   )	QuantTypequantize_dynamiczwYou are quantizing the onnx model, please install onnxruntime first. via 
`pip install onnx`
`pip install onnxruntime`.z_quant.onnxc                 S   s   g | ]}|j qS r   )name)r4   nr   r   r1   r7   g   s    c                 S   s(   g | ]}d |v sd|v sd|v r|qS )outputbias_encoderbias_decoderr   r4   r0   r   r   r1   r7   h   s    "zQuantizing model from {} to {}MatMul)model_inputmodel_outputop_types_to_quantizeper_channelreduce_rangeweight_typenodes_to_exclude)r   export_dummy_inputsr   r#   Tensorr3   r    export_namestrr   r   joinr   r   export_input_namesexport_output_namesexport_dynamic_axesonnxruntime.quantizationr?   r@   RuntimeErrorreplaceloadgraphnoder&   r'   QUInt8)r,   r
   r   r   r   r.   dummy_inputr9   rQ   
model_pathr?   r@   r   quant_model_path
onnx_modelnodesrN   r   r6   r1   r"   8   s\   	

r"   r   c              	   C   s   |   }|dkr |  } t|tjr| }n	tdd |D }tj| |}t| jt	r?|
tj|| j dd d S |
tj||   dd d S )Nr   c                 S      g | ]}|  qS r   r   r4   ir   r   r1   r7          z!_torchscripts.<locals>.<listcomp>r   r   )rO   r   r   r#   rP   r    jittracerQ   rR   saver   r   rS   rY   )r,   r   r   r^   model_scriptr   r   r1   r(   w   s   
$r(   Tc              
   C   s   |   } zdd l}W n ty  } z
td W Y d }~nd }~ww |j }||_t + | |j	| d|d}W d    n1 sCw   Y  W d    |S W d    |S 1 s[w   Y  |S )Nr   zhWarning, if you are exporting bladedisc, please install it and try it again: pip install -U torch_blade
T)allow_tracingmodel_inputs)
r!   torch_blade	Exceptionr&   configConfigr   r#   no_gradoptimize)r,   rm   r   rn   etorch_config	opt_modelr   r   r1   _bladedisc_opt   s.   
(rw   c                 C   s6   t |dkr|d | g|dd  R S |d | fS )N   r   )len)r0   xscaler   r   r1   _rescale_input_hook   s   r|   c                 C   s.   t |tr|d | g|dd  R S || S )Nr   rx   )r   r    )r0   rz   yr{   r   r   r1   _rescale_output_hook   s   
r~   c                    s   t d   fdd| jjj}fdd|D }|  } | |  |D ]}|  q%td  d td d	 | jjj	
tjtd
 | jj D ]*\}}|drb|tjtd
 |dryfdd|  D }|| qOd S )Nr   c                    s8   t |tr	|d n|} t |    d S )Nr   )r   r    copy_r#   maxdetachabs)r0   rz   r}   val)absmaxr   r1   stat_input_hook   s   "z/_rescale_encoder_model.<locals>.stat_input_hookc                    r2   r   )register_forward_hookrF   )r   r   r1   r7      r8   z*_rescale_encoder_model.<locals>.<listcomp>   i   z$rescale encoder modules with factor=

)r{   	self_attnzfeed_forward.w_2c                    s   i | ]	\}}||  qS r   r   )r4   kv)
fp16_scaler   r1   
<dictcomp>   s    z*_rescale_encoder_model.<locals>.<dictcomp>)r#   tensorr   r   r,   encodersremoveintr&   	encoders0register_forward_pre_hook	functoolspartialr|   named_modulesendswithr   r~   
state_dictitemsload_state_dict)r,   
input_datar   hookshrA   r0   r   r   )r   r   r   r1   _rescale_encoder_model   s*   





r   c                    s   |   }t|tjr| }n	tdd |D }t   fdd}| j|}|  } | |  |	  |r:t
| | t| j|d d | _t| jt | _tj| |}|tj|| j d d S )Nc                 S   rc   r   rd   re   r   r   r1   r7      rg   z-_bladedisc_opt_for_encdec.<locals>.<listcomp>c                         t| d S Nextendr   r0   rz   decoder_inputsr   r1   get_input_hook      z1_bladedisc_opt_for_encdec.<locals>.get_input_hookr   z_blade.torchscript)rO   r   r#   rP   r   r    r   r   r   r   r   rw   r   rh   ri   rj   r   r   rS   rQ   )r,   r   r   r   r   hookrk   r   r   r1   r*      s    

 r*   c                    sf  |   }t|tjr| }n	tdd |D }t   fdd}| j|}|  } | |  |	  |r:t
| | | d| j d}td td| d	 tj|sjtjj| ||d
dd|  |  |  d	 | d| j d}td td| d	 tj|rtj|szddlm} W n   tdt|}	|j|	dd}
t|
| d S d S d S )Nc                 S   rc   r   rd   re   r   r   r1   r7      rg   z(_onnx_opt_for_encdec.<locals>.<listcomp>c                    r   r   r   r   r   r   r1   r      r   z,_onnx_opt_for_encdec.<locals>.get_input_hook/z
_hook.onnxz2**************************************************z[_onnx_opt_for_encdec(fp32)]: r   FT   r:   z_hook_fp16.onnxz[_onnx_opt_for_encdec(fp16)]: r   )float16z}You are converting the onnx model to fp16, please install onnxconverter-common first. via `pip install onnxconverter-common`.)keep_io_types)rO   r   r#   rP   r   r    r   r   r   r   r   rQ   r&   r   r   existsr   r   rT   rU   rV   onnxconverter_commonr   rX   rZ   convert_float_to_float16rj   )r,   r   r   r   r   r   fp32_model_pathfp16_model_pathr   fp32_onnx_modelfp16_onnx_modelr   r   r1   r+      sR   


r+   )NFr   r   )NFr   Nrd   )T)r   r#   r   warningsfilterwarningsboolr   r   rR   r"   r(   rw   r|   r~   r   r*   r+   r   r   r   r1   <module>   s:    

0

?
