o
    ٷiN                     @  s   d dl mZ d dlZd dlZd dlmZmZ d dlm	Z	 d dl
mZmZ dHd
dZ		dIdJddZ				dKdLddZ					dMdNd"d#ZdOd%d&ZdPdQd'd(ZdRd+d,Z		 	-dSdTd0d1ZdUd3d4ZdVd7d8ZdWd;d<ZdXd?d@ZdYdFdGZdS )Z    )annotationsN)
AutoConfigAutoTokenizer)DynamicCache)InferenceSessionOrtValueattention_masktorch.Tensoruse_past_kvboolc                 C  s@   |   dd }|| dkd |r|d d df d}|S )N   r   )longcumsummasked_fill_	unsqueeze)r   r
   position_ids r   f/home/ubuntu/.local/lib/python3.10/site-packages/onnxruntime/transformers/models/llama/llama_inputs.pyget_position_ids   s
   r   ptFconfigr   devicetorch.device
batch_sizeintseq_lenenginestrreturn_dictc           
      C  s   t jd| j||ft jd}t j||t jd}t|dd}|dkr$| n||}|dkr1| n||}|dkr>| n||}|sJ|||fS |||d}	|	S )Nr   lowhighsizedtyper$   Fr
   ort	input_idsr   r   )torchrandint
vocab_sizeint64onesr   numpyto)
r   r   r   r   r   r   r)   r   r   inputsr   r   r   get_sample_inputs    s   
r2   r   past_seq_lenuse_fp16
world_sizec                   s&  t jd| j|dft jd}t j||d t jd}	t|	dd}
t| ||||d}|dkr/| n| }|dkr<|	 n|	 }	|dkrI|
 n|
 }
|dkrVt	|n fd	d
|D }|snt
|tshJ ||	|
|fS ||	|
d}|dkrt
|tsJ || |S t
|tsJ ||d< |S )Nr   r   r    r%   Tr&   r5   r'   c                   (   g | ]}|d    |d   fqS r   r   r0   .0kvr   r   r   
<listcomp>X      ( z2get_sample_with_past_kv_inputs.<locals>.<listcomp>r(   past_key_values)r*   r+   r,   r-   r.   r   get_past_kv_inputsr/   r0   flatten_past_kv_inputs
isinstancelistdictupdate)r   r   r   r3   r4   r   r   r5   r)   r   r   past_kvr1   r   r=   r   get_sample_with_past_kv_inputsC   s.   
 
rH   max_seq_lenuse_buffer_sharec                   s:  t jd| j||ft jd}t j||| t jd}t||dkd}t| ||||
d}|dkr1| n| }|dkr>| n| }|dkrK| n| }|dkrXt	|n fdd|D }|	spt
|tsjJ ||||fS |||d	}|dkrt
|tsJ || |rt|||}|S t
|tsJ ||d
< |S )Nr   r    r%   r&   r6   r'   c                   r7   r8   r9   r:   r=   r   r   r>      r?   z9get_merged_sample_with_past_kv_inputs.<locals>.<listcomp>r(   r@   )r*   r+   r,   r-   r.   r   rA   r/   r0   rB   rC   rD   rE   rF    enable_past_present_share_buffer)r   r   r   r   r3   rI   r4   rJ   r   r   r5   r)   r   r   rG   r1   r   r=   r   %get_merged_sample_with_past_kv_inputsy   s2    
rL   split_kvc                 C  sn  |rt jnt j}| j| j }	|sTt j||| j|dt jt 	|||fdd |t j|| j
|| j|	|t j|| j
|| j|	|t j|t jdd}
|
S t j||| j|t jt j	|||ft jdddd t jt j|t jdd}
t| j
D ])}|
d| dt j|| j||	|d	| dt j|| j||	|i q|rt|
||}
|
S )
Ng     r   )kr%   )x	attn_maskk_cachev_cachepos)rO   rP   rS   k__cachev_)npfloat16float32hidden_sizenum_attention_headsrandomrandastypetriur.   num_hidden_layersarrayr-   int32rangerF   rK   )r   r   r3   r   rI   r4   rJ   rM   np_dtype	head_size
ort_inputsir   r   r   get_msft_sample_inputs   sJ   
""$

rh   c                   sZ   | j | t| dr| jn| j| j |rtjntj fddt| j	D }|S )Nhead_dimc              
     s4   g | ]}t j d t j d fqS )r%   )r*   r]   )r;   _r   re   	num_headsr3   torch_dtyper   r   r>      s    z&get_past_kv_inputs.<locals>.<listcomp>)
num_key_value_headshasattrri   rZ   r[   r*   rX   rY   rc   r`   )r   r   r3   r4   r5   rG   r   rk   r   rA      s   
rA   r@   'list[tuple[torch.Tensor, torch.Tensor]]c                 C  s   i }t | D ]B\}\}}t| tr,|   |d| < |   |d| < q|   |d| d< |   |d| d< q|S )Npast_key_values_key_cache_past_key_values_value_cache_past_key_values..key.value)	enumeraterC   r   detachcpur/   )r@   rG   rg   past_kpast_vr   r   r   rB      s   
rB      	pt_inputsrE   c                 C  sl   i }|   D ]%\}}t|tjr|||< q|dkr!|t| q|   ||< q|r4t	|||}|S )Nr@   )
itemsrC   rW   ndarrayrF   rB   rw   rx   r/   rK   )r|   rJ   r3   rI   rf   rN   vr   r   r   convert_inputs_for_ort   s   
r   rf   c           
      C  st   |   D ]3\}}d|v sd|v r7|j\}}}}tj||||f|jd}	||	d |d |d |d |f< |	| |< q| S )Ncacher@   r%   )r}   shaperW   zerosr$   )
rf   r3   rI   rN   r   r   rl   rj   re   new_vr   r   r   rK     s    rK   modelr   c                 C  sh   dd |   D }t| }|| }t|r"td|  td|| }t|r2|D ]}||= q,|S )Nc                 S     h | ]}|j qS r   name)r;   model_inputr   r   r   	<setcomp>%      z$verify_ort_inputs.<locals>.<setcomp>z(The following model inputs are missing: zEThere are missing inputs to the model. Please add them and try again.)
get_inputssetkeyslenprint	Exception)r   rf   model_inputsuser_inputsmissing_inputsunnecessary_inputsunnecessary_inputr   r   r   verify_ort_inputs#  s   r   	device_idkv_cache_ortvaluesc                 C  s  |   }dd |  D }| D ]H\}}	||vrq|rKd|v s$d|v rK||vr;tj|	||d}
|||
 |
||< q|| |	 ||||  qtj|	||d}
|||
 q|  D ]*}|j}|rd|v smd|v r|	dd	dd}|
|||  q^|j|||d q^||fS )Nc                 S  r   r   r   )r;   rg   r   r   r   r   A  r   z/add_io_bindings_as_ortvalues.<locals>.<setcomp>r   r@   )device_typer   outpresent)
io_bindingr   r}   r   ortvalue_from_numpybind_ortvalue_inputupdate_inplaceget_outputsr   replacebind_ortvalue_outputbind_output)r   rf   r   r   rJ   r   r   r   rN   r   v_deviceoutputr   
input_namer   r   r   add_io_bindings_as_ortvalues7  s*   
r   r1   outputsc              	   C  s   t | |}d }tjtjtjtjd}|  }| D ](\}}	|j||	j	j
|	j	j
dkr,dn|	j	j|t|	j t|	j|	 d |	j	}q|  D ]6}
|
j}|rZd|v rZ||dd n|| }	|j||j
|j
dkrjdn|j|rqtjntjt|	j|	 d qG|S )N)ztorch.int32ztorch.int64ztorch.float16ztorch.float32rx   r   )r   r   r   element_typer   
buffer_ptrr   r@   )r   rW   rb   r-   rX   rY   r   r}   
bind_inputr   typeindexreprr$   tupler   data_ptrr   r   r   r   )r   r1   r   r4   rJ   r   pt_to_npr   rN   r   r   r   r   r   r   add_io_bindings_as_tensorsd  sF   
	r   	tokenizerr   requested_lengthprompt	list[str]c              	   C  s  |j |_|j|dd}|rtjntj}	tj|d |tjd}
tj|d |tjd}t|dd}|
j	d }||krS|
d d d |f }
|d d d |f }t|dd}n9||k r|
d d d	f 
d	j}|d d d	f 
d	j}t|| D ]}t||
f}
t||f}qut|dd}|
j	d }||ksJ |d
kr|
 n|
|d
kr| n||d
kr| n|d}|d
krg |d< |
j	\}}| j}| j}t| dr| jn| j| j }t| jD ]D}tj|||r|nd	|||	d}tj|||r|nd	|||	d}|d
kr|d| d| d| d| i q|d ||f qd }|d
krotj||| j||	d}d| i}|sot| jD ].}tj||||||	d}tj||||||	d}|d| d| d| d| i q@||fS )NT)paddingr)   )r   r$   r   Fr&   r   r   r'   r(   r@   ri   rs   rt   ru   logitszpresent.)	eos_token	pad_tokenbatch_encode_plusr*   rX   rY   tensorr-   r   r   r   Trc   hstack
contiguousmax_position_embeddingsrn   ro   ri   rZ   r[   r`   r   rF   appendr,   )r   r   r   r   r   r4   rJ   r   encodings_dictrm   r)   r   r   tokenized_lengthinput_ids_first_colattention_mask_first_colrj   r1   r   sequence_lengthmax_sequence_lengthrl   re   rg   past_key
past_valuer   r   present_keypresent_valuer   r   r   get_initial_inputs_and_outputs  s   







"r   )r   r	   r
   r   )r   F)r   r   r   r   r   r   r   r   r   r   r   r   )Fr   Fr   )r   r   r   r   r   r   r3   r   r4   r   r   r   r   r   r5   r   )FFr   Fr   )r   r   r   r   r   r   r   r   r3   r   rI   r   r4   r   rJ   r   r   r   r   r   r5   r   )r   r   r   r   r3   r   r   r   rI   r   r4   r   rJ   r   rM   r   )r   )
r   r   r   r   r3   r   r4   r   r5   r   )r@   rp   )Fr   r{   )r|   rE   rJ   r   r3   r   rI   r   )rf   rE   r3   r   rI   r   )r   r   rf   rE   )r   r   rf   rE   r   r   r   r   rJ   r   r   rE   )
r   r   r1   rE   r   rE   r4   r   rJ   r   )r   r   r   r   r   r   r   r   r   r   r4   r   rJ   r   r   r   )
__future__r   r/   rW   r*   transformersr   r   transformers.cache_utilsr   onnxruntimer   r   r   r2   rH   rL   rh   rA   rB   r   rK   r   r   r   r   r   r   r   r   <module>   sB   
(=
45




-0