o
    ٷiS                     @   s   d dl Z d dlZd dlZd dlmZ d dlmZ d dlZd dl	Z	d dl
Z
d dlmZ d dlmZ d dl	mZmZ d dlmZ d dlmZ d d	lmZ d d
lmZmZmZmZ d dlmZ e eZ G dd de
j!j"Z#dS )    N)chain)Path)convert_float_to_float16)RepeatedCompositeFieldContainer)
ModelProtoValueInfoProto)	OnnxModel)PastKeyValuesHelper)WhisperConfig)convert_inputs_for_ortget_model_dynamic_axesget_sample_decoder_inputsgroup_past_key_values)InferenceSessionc                       s  e Zd ZdZd5dedejjdede	f fddZ
				d6d
ejdejd	B deeej  d	B fddZ				d6d
ejdejd	B deeej  d	B fddZ				d6d
ejdejd	B deeej  d	B fddZdd Zdd Zdd Zd5de	de	de	fddZd7dede	d e	fd!d"Zd5d#ed e	fd$d%Zdefd&d'Zdede	fd(d)Z	*			*		*d8d+ed,ed-e	d.e	de	de	d/e	d0e	fd1d2Zd+ed,ede	de	fd3d4Z  ZS )9WhisperDecoderz/A Whisper decoder with optional past key valuesFconfigmodel
model_implno_beam_search_opc                    s   t    || _|j| _|| _|| _|dkrd n|jj| _|dkr#d n|j| _|dkr-|nd | _| jj	| _	| jj
| _| jj| j | _d S Nopenai)super__init__r   devicer   r   r   decoderproj_outmax_source_positionsdecoder_attention_heads	num_headsd_model	head_size)selfr   r   r   r   	__class__ k/home/ubuntu/.local/lib/python3.10/site-packages/onnxruntime/transformers/models/whisper/whisper_decoder.pyr   %   s   


zWhisperDecoder.__init__Ndecoder_input_idsencoder_hidden_statespast_key_valuesc           	      C   sJ   | j |||dd}| |j}|j}|d u r||fS t|\}}||fS )NT)r'   	input_idsr(   	use_cache)r   r   last_hidden_stater(   r	   group_by_self_and_cross)	r!   r&   r'   r(   outputslogitspresent_key_valuespresent_selfpresent_crossr$   r$   r%   
hf_forward4   s   zWhisperDecoder.hf_forwardc                    s&  i }|d ur`t |\}}dd |D }dd |D }dd |D }dd |D }t jjjD ]0\}}|d|  ||jj< |d| d  ||jj< |d|  ||jj< |d| d  ||jj< q/ j	 \}	}
 jj|||d}|d ur jjjD ]0}t
j||jj |	|jj gdd	 |	|jj< t
j||jj |	|jj gdd	 |	|jj< qyg g }} jjjD ]*}||	|jj  ||	|jj  |d u r||	|jj  ||	|jj  q fd
d|D } fdd|D }|
D ]}|  q|d u rt|| t|d }||fS ||fS )Nc                 S      g | ]}| d dqS       	transpose.0past_kvr$   r$   r%   
<listcomp>Z       z.WhisperDecoder.oai_forward.<locals>.<listcomp>c                 S   *   g | ]}| g |jd d dR qS Nr6   reshapeshaper9   r$   r$   r%   r<   [      * c                 S   r3   r4   r7   r9   r$   r$   r%   r<   \   r=   c                 S   r>   r?   rA   r9   r$   r$   r%   r<   ]   rD   r6   r5   )xxakv_cache)dimc                    8   g | ]}| g |jd d d jR ddqS Nr6   r@   r5   rB   rC   r    r8   r:   
present_kvr!   r$   r%   r<          *c                    rI   rJ   rK   rL   rN   r$   r%   r<      rO   )r   	enumerater   r   blocksattnkeyvalue
cross_attninstall_kv_cache_hookstorchcatdetachappendremover	   group_by_layerlen)r!   r&   r'   r(   past_kv_cacheself_attn_kv_cachescross_attn_kv_cachesidxblockrG   hooksr.   r0   r1   hookr/   r$   rN   r%   oai_forwardP   s`   






zWhisperDecoder.oai_forwardc                 C   s&   | j dkr| |||S | |||S r   )r   re   r2   )r!   r&   r'   r(   r$   r$   r%   forward   s   
zWhisperDecoder.forwardc                 C   s>   | j r	ddg}|S ddgttdd t| jjD }|S )Nr)   r'   c                 s   4    | ]}d | d| d| d| fV  qdS )past_key_self_past_value_self_past_key_cross_past_value_cross_Nr$   r:   ir$   r$   r%   	<genexpr>   s
    "
z-WhisperDecoder.input_names.<locals>.<genexpr>
first_passlistr   from_iterableranger   decoder_layers)r!   input_namesr$   r$   r%   ru      s   


zWhisperDecoder.input_namesc                 C   sZ   | j rdgttdd t| jjD }|S dgttdd t| jjD }|S )Nr.   c                 s   rg   )present_key_self_present_value_self_present_key_cross_present_value_cross_Nr$   rl   r$   r$   r%   rn      s    
z.WhisperDecoder.output_names.<locals>.<genexpr>c                 s   s$    | ]}d | d| fV  qdS )rv   rw   Nr$   rl   r$   r$   r%   rn      s    
ro   )r!   output_namesr$   r$   r%   rz      s&   



zWhisperDecoder.output_namesc                 C   s*   t | j||}d|v r| js|d d= |S )Nr)   r5   )r   r   r   )r!   ru   rz   dynamic_axesr$   r$   r%   r{      s   
zWhisperDecoder.dynamic_axesuse_fp16_inputsuse_int32_inputsreturn_dictc              	   C   sn   t | j| jd| jrdnd| jrdnd||d}|r!| jr|d= |S | jr,|d |d fS |d |d |d fS )	Nr6   r      r5   )
batch_sizepast_sequence_lengthsequence_lengthuse_fp16	use_int32r(   r&   r'   )r   r   r   rp   )r!   r|   r}   r~   inputsr$   r$   r%   r      s*   	zWhisperDecoder.inputsiois_cross	is_outputc                 C   s   |j jjjd }d|jv r|  | j|_|j jjjd }d|jv r4|  |r-| j|_n|r1dnd|_|j jjjd }d|jv rI|  | j	|_|S )Nr5   _dim_r6   total_sequence_lengthr      )
typetensor_typerC   rH   	dim_paramClearr   	dim_valuer   r    )r!   r   r   r   r   r   r    r$   r$   r%   fix_key_value_cache_dims   s   



z'WhisperDecoder.fix_key_value_cache_dimsio_listc                 C   s   g }g }g }|D ]D}d|j vrd|j vr|| qd|j v r6| j|d|d}| jr0|| q|| q| j|d|d}| jrG|| q|| q| jsV||| 7 }|S )Npastpresentr!   F)r   r   T)namerZ   r   r   )r!   r   r   reordered_ior_   r`   r   new_ior$   r$   r%   fix_io  s$   
zWhisperDecoder.fix_ioc                 C   s   | j |jjdd}t|jjdkr|jj  t|jjdks|jj| | j |jjdd}t|jjdkrE|jj  t|jjdks7|jj| |S )NF)r   r   T)r   graphinputr]   popextendoutput)r!   r   reordered_inputsreordered_outputsr$   r$   r%   fix_inputs_and_outputs0  s   z%WhisperDecoder.fix_inputs_and_outputsc                 C   s   | j dkr|rt|}|S r   )r   r   )r!   r   r|   r$   r$   r%   fix_layernorm_weights>  s   z$WhisperDecoder.fix_layernorm_weightsTonnx_model_pathproviderverboseuse_external_data_formatuse_encoder_hidden_statesuse_kv_cache_inputsc	                 C   s*  |o| | _ | o
|| _| j s| jsJ d| j||d}	|  }
|  }| |
|}t|jjddd t	
 J}tj|d}t|jjddd |rO|n|}tjj| |	|d|
||dd|d
 tj||d}| |}| ||}tj|||dd	 W d
   n1 sw   Y  | |||| d
S )al  Export decoder to ONNX

        Args:
            onnx_model_path (str): path to save ONNX model
            provider (str): provider to use for verifying parity on ONNX model
            verbose (bool, optional): print verbose information. Defaults to True.
            use_external_data_format (bool, optional): use external data format or not. Defaults to False.
            use_fp16_inputs (bool, optional): use float16 inputs for the KV caches. Defaults to False.
            use_int32_inputs (bool, optional): use int32 inputs for the decoder_input_ids. Defaults to True.
            use_encoder_hidden_states (bool, optional): use encoder_hidden_states as model input for decoder-init/decoder-without-past models. Defaults to False.
            use_kv_cache_inputs (bool, optional): use KV caches as model inputs for decoder-with-past models. Defaults to True.
        zVOnly one of `use_encoder_hidden_states` and `use_kv_cache_inputs` can be true at once.)r|   r}   T)parentsexist_okzdecoder.onnx   )	argsfexport_paramsru   rz   r{   opset_versiondo_constant_foldingr   )load_external_data)save_as_external_dataall_tensors_to_one_fileN)rp   
later_passr   ru   rz   r{   r   parentmkdirtempfileTemporaryDirectoryospathjoinrW   onnxexport
load_modelr   r   r   saveverify_onnx)r!   r   r   r   r   r|   r}   r   r   r   ru   rz   r{   tmp_dir_nametemp_onnx_model_pathout_pathr   r$   r$   r%   export_onnxJ  sJ   $

zWhisperDecoder.export_onnxc                 C   sJ  | j ||dd}g }| jr:| jd
i |}||d     |d D ]}|D ]}	||	    q*q&n'| jd
i |}||d     |d D ]}
||
    qSt||gd}|dt	||}z-t
|  D ]#\}}t|| ||  }td| d td	t|  qxW dS    Y dS )aw  Verify ONNX model outputs and PyTorch model outputs match

        Args:
            onnx_model_path (str): path to save ONNX model
            provider (str): execution provider for ONNX model
            use_fp16_inputs (bool, optional): use float16 inputs for the KV caches
            use_int32_inputs (bool, optional): use int32 inputs for the decoder_input_ids
        T)r|   r}   r~   r   r5   )	providersNz
Comparing z...z
Max diff: r$   )r   rp   rf   rZ   rY   cpunumpyr   runr   rP   rz   npabsloggerwarningmax)r!   r   r   r|   r}   r   
pt_outputsoutpresent_key_value_layerpresent_key_valuepresent_self_key_valuesessort_outputsrm   output_namediffr$   r$   r%   r     s0   zWhisperDecoder.verify_onnx)F)NN)FF)TFFTFT)__name__
__module____qualname____doc__r
   rW   nnModulestrboolr   Tensorrq   tupler2   re   rf   ru   rz   r{   r   r   r   r   r   r   r   r   r   r   __classcell__r$   r$   r"   r%   r   "   s    $

]

	
Qr   )$loggingr   r   	itertoolsr   pathlibr   r   r   r   rW   float16r   #google.protobuf.internal.containersr   r   r   
onnx_modelr   past_helperr	   transformersr
   whisper_inputsr   r   r   r   onnxruntimer   	getLoggerr   r   r   r   r   r$   r$   r$   r%   <module>   s$   
