o
    ٷi-=                     @   s  d dl Z d dlZd dlZd dlmZ d dlmZ e e	Z
		d<dedejded	ed
ef
ddZ	d=dedejded	edef
ddZ	d>dedejded
efddZ	d>dedejdeded
ef
ddZdeeejejf  deeejejf  fddZdeeejejejejf  fddZ		d?dedejdedefdd Z	d>dejd!edefd"d#Z	d>dejd$edefd%d&Z	d>dedejded	ed
ef
d'd(Z		d<dedejded	ed
ef
d)d*Z			d@dedejded+ed,ed
edefd-d.Z		dAdedejded/ed	ed
edefd0d1Z		dAdedejded	eded!ed$ed
edefd2d3Zd4ed5efd6d7Z ded8ee! d9ee! fd:d;Z"dS )B    N)WhisperConfig)InferenceSession  Fconfigdevice
batch_sizesequence_lengthuse_fp16c                 C   s*   |rt jnt j}t j|| j|||d}|S Nr   dtype)torchfloat16float32randnnum_mel_bins)r   r   r   r   r	   torch_dtypeaudio_features r   j/home/ubuntu/.local/lib/python3.10/site-packages/onnxruntime/transformers/models/whisper/whisper_inputs.pyget_sample_audio_features!   s   r   T	use_int32c                 C   s.   |rt jnt j}t jd| j||f||d}|S )Nr   )lowhighsizer   r   )r   int32int64randint
vocab_size)r   r   r   r   r   r   decoder_input_idsr   r   r   get_sample_decoder_input_ids/   s
   r    c                 C   s,   |rt jnt j}t j|| j| j||d}|S r
   )r   r   r   r   max_source_positionsd_model)r   r   r   r	   r   encoder_hidden_statesr   r   r    get_sample_encoder_hidden_states?   s
   r$   past_seq_lenc                    st   | j | j | j|rtjntj fddt| jD } fddt| jD }t||S )Nc                    s8   g | ]}t j d t j d fqS r   r   rand.0_)r   r   	head_size	num_headsr%   r   r   r   
<listcomp>\       z.get_sample_past_key_values.<locals>.<listcomp>c                    s8   g | ]}t j d t j d fqS r&   r'   r)   )r   r   r,   r!   r-   r   r   r   r.   c   r/   )	decoder_attention_headsr"   r!   r   r   r   rangedecoder_layersflatten_past_key_values)r   r   r   r%   r	   self_attention_kv_cachescross_attention_kv_cachesr   )r   r   r,   r!   r-   r%   r   r   get_sample_past_key_valuesO   s   

r6   self_attn_kv_cachescross_attn_kv_cachesc                 C   s>   g }t | |ddD ]\\}}\}}||||f}|| q	|S )NF)strict)zipappend)r7   r8   past_key_valuesself_k_cacheself_v_cachecross_k_cachecross_v_cachelayer_kv_cachesr   r   r   r3   o   s   r3   	kv_cachesc                 C   sL   g g }}| D ]\}}}}| | | | | | | | q||fS N)r;   )rB   r7   r8   r=   r>   r?   r@   r   r   r   group_past_key_values~   s   



rD      num_alignment_headsc                 C   s(   |rt jnt j}t j|df||d}|S )N   r   )r   r   r   ones)r   r   rF   r   r   alignment_headsr   r   r   get_sample_alignment_heads   s   rJ   sot_sequence_lengthc                 C   &   |rt jnt j}t j|g| |d}|S r
   r   r   r   tensor)r   rK   r   r   
sot_lengthr   r   r   get_sample_sot_sequence_length      rP   segment_lengthc                 C   rL   r
   rM   )r   rR   r   r   segment_sizer   r   r   get_sample_segment_length   rQ   rT   c                    s<   j |rtjntj fddtjD }|S )Nc              
      s$   g | ]}t j jd qS r&   )r   r(   r!   r)   r   r   r   r-   r   r   r   r   r.      s    z"get_sample_QKs.<locals>.<listcomp>)r0   r   r   r   r1   r2   )r   r   r   r   r	   QKsr   rU   r   get_sample_QKs   s   rW   c                 C   s   t | ||||}d|iS )Nr   )r   )r   r   r   r   r	   r   r   r   r   get_sample_encoder_inputs   s   rX   decoder_sequence_lengthencoder_sequence_lengthc           	      C   s*   t | ||||}t| ||||}||dS )N)r   r   )r   r    )	r   r   r   rY   rZ   r	   r   r   r   r   r   r   &get_sample_encoder_decoder_init_inputs   s   	
r[   past_sequence_lengthc           
      C   s:   t | ||||}t| |||}t| ||||}	|||	dS )N)r   r#   r<   )r    r$   r6   )
r   r   r   r\   r   r	   r   r   r#   r<   r   r   r   get_sample_decoder_inputs   s   	r]   c	                 C   s@   t | |||}	t||}t||}t| ||||}
|	|||
dS )N)rI   rK   rR   rV   )rJ   rP   rT   rW   )r   r   r   r   rF   rK   rR   r	   r   rI   rV   r   r   r   get_sample_jump_times_inputs   s   

r^   inputsmodelc                 C   sf  d\}}d\}}}}d\}}	d| v r#t | d \}}|d j\}}}}i }
ttdd | }d|v }|D ]}|d	v rI| d
    |
|< q6|dkrZ| d    |
|< q6|dv rk| d    |
|< q6d|v ssd|v r|d   }|rt	j
|||	|f|jd}||d |d |d |d |f< ||
|< q6||
|< q6d|v sd|v r|d   }||
|< q6|dkrt	j|gt	jd|
|< q6|dkrt	j
|||	ft	jd|
|< q6|dkr| d    |
|< q6|dkr| d    |
|< q6|dkr| d    |
|< q6d|v r*| d d   |
|< q6td| |
S )N)NN)r   r   r   r   )   i  r<   r   c                 S   s   | j S rC   )name)ir   r   r   <lambda>  s    z(convert_inputs_for_ort.<locals>.<lambda>cache_indirection>   r   encoder_input_idsr   r#   >   	input_idsr   r   past_key_selfpast_value_self)r   past_key_crosspast_value_crossr\   rI   rK   rR   cross_qkrV   zUnknown name not recognized: )rD   shapelistmap
get_inputsdetachcpunumpypopnpzerosr   arrayr   
ValueError)r_   r`   r7   r8   r   r-   r%   r,   	num_beamsmax_seq_len
ort_inputsmodel_inputsuse_buffer_sharingrb   orig_kv_cachenew_kv_cacher   r   r   convert_inputs_for_ort  sP    





 r   input_namesoutput_namesc                 C   s<  i }|| D ]}|dv rddi||< q|dv rddd||< q|dkr*ddi||< q|d	v r/q|d
kr;ddd||< q|dkrFddi||< qd|v sNd|v rVddd||< qd|v s^d|v rfddd||< qd|v svd|v svd|v svd|v r}ddi||< qd|v rddd||< qd|v rddd||< qt d| |S )N>   r   rf   r   r   >   rg   r   r   )r   ra   rI   rF   >   rR   rK   logitsr#   rh   ri   r\   )r   rG   present_key_selfpresent_value_selftotal_sequence_lengthrj   rk   present_key_crosspresent_value_crossrl   
jump_times
max_lengthz$Unknown input or output name found: )	Exception)r   r   r   dynamic_axesrb   r   r   r   get_model_dynamic_axesL  s:   r   )r   F)T)F)rE   T)r   FT)FT)#loggingrs   ru   r   transformersr   onnxruntimer   	getLogger__name__loggerr   intboolr   r    r$   r6   rn   tupleTensorr3   rD   rJ   rP   rT   rW   rX   r[   r]   r^   dictr   strr   r   r   r   r   <module>   sj  




 








	

>