o
    ٷi3:                     @   s   d dl Z d dlZd dlZd dlmZ d dlmZmZmZ d dlm	Z	m
Z
 d dlmZmZ e eZdd Zdd	d
Zdd ZdS )    N)	Precision)get_shared_initializers.update_decoder_subgraph_output_cross_attention?update_decoder_subgraph_share_buffer_and_use_decoder_masked_mha)TensorProtohelper)WhisperConfigWhisperTokenizerc                 C   sP   t tdd | }t|t|ksJ t||ddD ]\}}|j|v s%J qd S )Nc                 S   s   | S )N )
beam_inputr
   r
   i/home/ubuntu/.local/lib/python3.10/site-packages/onnxruntime/transformers/models/whisper/whisper_chain.py<lambda>   s    zverify_inputs.<locals>.<lambda>F)strict)listfilterlenzipname)beam_inputsgraph_inputsbeam_required_inputsgraph_inputr   r
   r
   r   verify_inputs   s
   r   Tc                 C   sN   |rt tdd | S t| dkr%| d dkr|   n	 | S t| dks| S )Nc                 S   s   | dkS )N r
   )elmr
   r
   r   r   #   s    zclean_list.<locals>.<lambda>r   r   )r   r   r   pop)arrremove_all_stringsr
   r
   r   
clean_list    s   
r   c           3      C   sf  t j| jdd}d|j_t j| jdd}d|j_tj| j| j	d}t
j| j| j	d}| jtjkr2dnd}| jtjkr<dnd	d
ddd| jtjkrIdnd| jtjkrRdnd| jrXdnd| jr^dndd| jrednd| jrkdnd| jrqdnd| jrwdnd| jr}|ndg}| jtjkrdnd}| jtjkrdnd}d| jr|nd| jr|nd| jrdnd| jrdndg}	g }
| jtjkr(tjd d	gdgd!tjd"}tjd dgdgd#tjd"}tjd dgdgd$tjd"}|
|||g | jrtjd dgdgd%tjd"}|
| | jrtjd dgdgd&tjd"}|
| | jr(tjd dgdgd'tjd"}|
| td(|jtd)|j td*|j!td+|"d,gd- td.|"d/gd- td0|"d1gd- | jrktd2|"d3gd- ndtd4|"d5gd- td6|"d7gd- td8| j#td9dtd:d;| jrtd<d=ndg}tjd>t$|d?d@t$|	d?d@dAdBdC}|j%t$|dd@ t&d	tjg dD}t&d
tj'd=g}t&dtj'd=g}t&dtj'd=g}t&dtj'd=g}t&dtjd=g}t&dtjd=g}t&dtj'|j(g}t&dtj'dE|j(g}t&dtj'dEdFg}t&dtj'd=g}t&dtj'dGd;g}t&dtj'dEdHg}t&dtjd=g} t$|||||||| jrN|nd| jrU|nd| jr\|nd| jrc|nd| jrj|nd| jrq|nd| jrx| ndg}!t&dtj'g dI}"t&dtjdEg}#t&dtjdEg}$t&dtjg dJ}%t&dKtjdEg}&t$|"| jr|#nd| jr|$nd| j)s| j*s| jr|%nd| jr|&ndg}'t+| dLr| j,rt-|jrt./dM nt.0dN t+| dOr| jrt1|j t2||}(|j%tdP|jtdQ|jg tj3dBd=dRtj3ddSdRg})|
| | jr>tjd dgdKgdTtjd"}*|
|* tj4|
dU|!|'|(dV}+dWdX |!D },dYdX |'D }-| j*rt j| j*dd}.|.j}/|+j5|/j5 |+j6|/j6 |/j7D ]}0|0j|,vr|0j|-vr|0jdkr|+j7|0g qv|+j8|/j8 t9||! |j:|j:ksJ t./dZ|j: d[ tj;|+d\|)|j:d]}1t<j=>| j?rt./d^| j? d_| j?d`   t<j=@| j?rt<A| j? t<j=@| j?d` rt<A| j?d`  t jB|1| j?| jCddt<j=D| j? d`da zt jEjF| j?ddb W d S  tGy2 }2 zt.jHdc|2 ddd W Y d }2~2d S d }2~2ww )eNT)load_external_datazencoderdecoderinit subgraphzdecoder subgraph)	cache_dirtemperature_fp16temperatureinput_features_fp16input_features
max_length
min_length	num_beamsnum_return_sequenceslength_penalty_fp16length_penaltyrepetition_penalty_fp16repetition_penalty
vocab_maskr   prefix_vocab_maskdecoder_input_idslogits_processorcross_qk_layer_headextra_decoding_idssequence_scores_fp16sequence_scoresscores_fp16scores	sequencescross_qkno_speech_probs_beamCastCastInputFeaturesToFp16)inputsoutputsr   toCastLengthPenaltyToFp16CastRepetitionPenaltyToFp16temperature_to_fp16CastOutputSequenceScoresToFp32CastScoresToFp32eos_token_idpad_token_iddecoder_start_token_idtranslate_token_idz<|translate|>r   transcribe_token_idz<|transcribe|>start_of_lm_token_idz<|startoflm|>no_speech_token_idz<|nospeech|>no_timestamps_token_idz<|notimestamps|>beginning_timestamp_token_idz<|0.00|>no_repeat_ngram_sizeearly_stopping
model_type   decoder_output_cross_qk   WhisperBeamSearchF)r   
BeamSearchzcom.microsoft)r=   r>   r   domain)
batch_sizefeature_sizesequence_lengthrW   initial_sequence_lengthnum_layer_headextra_decoding_ids_len)rW   r)   r&   )rW   r)   num_layer_head_cross_qkr&   framesno_speech_probsuse_gpuzUUpdated whisper decoder subgraph to use DecoderMaskedMultiHeadAttention successfully!zPDecoderMaskedMultiHeadAttention could not be applied to whisper decoder subgraphcollect_cross_qkdecoderencoder)rV   version   no_speech_probs_cast_to_fp32zWhisperBeamSearch Graph)r   r=   r>   initializerc                 S      g | ]}|j qS r
   r   ).0gir
   r
   r   
<listcomp>      zchain_model.<locals>.<listcomp>c                 S   rh   r
   ri   )rj   gor
   r
   r   rl     rm   zUsing IR version z for chained modelzonnxruntime.transformers)producer_nameopset_imports
ir_versionzOverwriting z and z.data)save_as_external_dataall_tensors_to_one_fileconvert_attributelocation)
full_checkz2An error occurred while running the ONNX checker: )exc_info)Ionnx
load_modelencoder_pathgraphr   decoder_pathr   from_pretrainedmodel_name_or_pathr!   r	   	precisionr   FLOAT16use_vocab_maskuse_prefix_vocab_maskuse_forced_decoder_idsuse_logits_processorra   r3   use_temperatureoutput_sequence_scoresoutput_scoresoutput_no_speech_probsr   	make_noder   extendappendFLOATmake_attributerE   rF   rG   convert_tokens_to_idsrN   r   	attributemake_tensor_value_infoINT32
vocab_sizeoutput_cross_qkcross_qk_onnx_modelhasattrr`   r   loggerinfowarningr   r   make_opsetid
make_graphrg   nodeinputoutputr   rq   make_model_gen_versionospathisfilebeam_model_output_direxistsremovesaveuse_external_data_formatbasenamecheckercheck_model	Exceptionerror)3argsencoder_modeldecoder_modelconfig	tokenizertemperature_namer   sequence_scores_namescores_namebeam_outputsgraph_nodesinput_features_cast_nodelen_pen_cast_noderep_pen_cast_nodetemp_cast_node output_sequence_scores_cast_nodeoutput_scores_cast_nodebeam_search_attrsr   r%   r&   r'   r(   r)   r+   r-   r.   r/   r0   r1   r2   r3   r#   r   r8   r5   r7   r9   r_   graph_outputsinitializersopset_importprob_cast_node
beam_graphbeam_graph_input_namesbeam_graph_output_namespost_qk_modelpost_qk_graphpgi
beam_modeler
   r
   r   chain_model.   s  













"r   )T)loggingr   rx   benchmark_helperr   convert_generationr   r   r   r   r   transformersr   r	   	getLogger__name__r   r   r   r   r
   r
   r
   r   <module>   s   

	