o
    hiC                     @   s   d dl Z d dlZd dlmZ d dlmZ d dlmZ d dlZd dl	Z	d dl
mZmZ eeeee B f ZeeZG dd dZG dd	 d	ZG d
d dZG dd deZG dd dZdS )    N)OrderedDict)Mapping)Any)InferenceSession
RunOptionsc                   @   s   e Zd ZedededefddZededefddZedefd	d
ZedefddZ	ede
jfddZedejfddZededeee
jf fddZdS )
TypeHelperort_sessionnamereturnc                 C   :   t |  D ]\}}|j|kr|j  S qtd| d)Nzinput name 
 not found)	enumerate
get_inputsr	   type
ValueError)r   r	   _iinput r   h/home/ubuntu/transcripts/venv/lib/python3.10/site-packages/onnxruntime/transformers/io_binding_helper.pyget_input_type   s
   

zTypeHelper.get_input_typec                 C   r   )Nzoutput name r   )r   get_outputsr	   r   r   )r   r	   r   outputr   r   r   get_output_type   s
   

zTypeHelper.get_output_typeort_typec                 C   s:   t jt jt jt jtt jd}| |vrt|  d||  S )N)tensor(int64)tensor(int32)tensor(float)tensor(float16)tensor(bool)tensor(uint8) not found in map)numpylonglongintcfloat32float16booluint8r   )r   ort_type_to_numpy_type_mapr   r   r   ort_type_to_numpy_type"   s   z!TypeHelper.ort_type_to_numpy_typec                 C   s@   t jt jt jt jt jt jt jd}| |vrt|  d||  S )N)r   r   r   r   ztensor(bfloat16)r   r   r    )	torchint64int32r$   r%   bfloat16r&   r'   r   )r   ort_type_to_torch_type_mapr   r   r   ort_type_to_torch_type1   s   	z!TypeHelper.ort_type_to_torch_type
numpy_typec                 C   sX   t jtjt jtjt jtjt jtjt jtjttjt j	tj	i}| |vr(t
|  d||  S Nr    )r!   r"   r*   r+   r#   r,   r$   r%   r&   r'   r   )r0   numpy_type_to_torch_type_mapr   r   r   numpy_type_to_torch_typeA   s   	z#TypeHelper.numpy_type_to_torch_type
torch_typec                 C   sP   t jtjt jtjt jtjt jtjt jtt j	tj	i}| |vr$t
|  d||  S r1   )r*   r+   r!   r"   r,   r#   r$   r%   r&   r'   r   )r4   torch_type_to_numpy_type_mapr   r   r   torch_type_to_numpy_typeQ   s   z#TypeHelper.torch_type_to_numpy_typec                 C   sH   i }|   D ]}t|j||j< q|  D ]}t|j||j< q|S )z:Create a mapping from input/output name to numpy data type)r   r   r)   r   r	   r   )r   name_to_numpy_typer   r   r   r   r   get_io_numpy_type_map`   s   z TypeHelper.get_io_numpy_type_mapN)__name__
__module____qualname__staticmethodr   strr   r   r)   r/   r!   dtyper3   r*   r6   dictr8   r   r   r   r   r      s    "r   c                   @   sZ   e Zd ZedefddZe	ddejdejdejdeej fd	d
Z	edddZ
dS )IOBindingHelperr   c                 C   sJ   i }|  D ]\}}t| |}t|}tjt|||d||< q|S )zpReturns a dictionary of output name as key, and 1D tensor as value. The tensor has enough space for given shape.)r>   device)itemsr   r   r/   r*   emptyr!   prod)r   output_shapesrA   output_buffersr	   shaper   r4   r   r   r   get_output_buffersm   s   
z"IOBindingHelper.get_output_buffersN	input_idsposition_idsattention_maskpastc              
   C   s  |du r	t | }|  }| sJ |d|jjd|d t| |	  |dur^t
|D ].\}	}
|
 s9J |
	 }|dkrE|	 }|d|	 |
jjd|d|	  t|
 | q/|dur|| shJ |d|jjd|d t| |	  |dur| sJ |d|jjd|d t| |	  |  D ].}|j}|| }t| d|jj dt|   |||jjd|| || |	  q|S )	z)Returnas IO binding object for a session.NrI   r   past_rK   rJ   z device type=z shape=)r   r8   
io_bindingis_contiguous
bind_inputrA   r   listsizedata_ptrr   r   r	   loggerdebugbind_output)r   rI   rJ   rK   rL   rF   rE   name_to_np_typerN   ipast_irS   r   output_nameoutput_bufferr   r   r   prepare_io_bindingw   sv   

	
	
	

&	z"IOBindingHelper.prepare_io_bindingTc           
      C   sn   g }|   D ].}|j}|| }|| }|dt| |  }	|r/||	   q||	 q|S )z3Copy results to cpu. Returns a list of numpy array.r   )	r   r	   r!   rD   reshapeclonedetachappendcpu)
r   rF   rE   return_numpyort_outputsr   rZ   bufferrG   copy_tensorr   r   r   "get_outputs_from_io_binding_buffer   s    z2IOBindingHelper.get_outputs_from_io_binding_bufferN)T)r9   r:   r;   r<   r   rH   r*   TensorrQ   r\   rf   r   r   r   r   r@   l   s     		Ur@   c                   @   s   e Zd ZdZd"dedejfddZdedefd	d
Z	dd Z
dedejfddZdefddZd#deeejf dedefddZed$dedededeeef fd d!ZdS )%CudaSessionzLInference Session with IO Binding for ONNX Runtime CUDA or TensorRT providerFr   rA   c                 C   sr   || _ dd | j  D | _dd | j  D | _t| j | _| j  | _|| _	t
 | _t
 | _|| _i | _d S )Nc                 S      g | ]}|j qS r   r	   ).0r   r   r   r   
<listcomp>       z(CudaSession.__init__.<locals>.<listcomp>c                 S   rj   r   rk   )rl   r   r   r   r   rm      rn   )r   r   input_namesr   output_namesr   r8   io_name_to_numpy_typerN   enable_cuda_graphr   input_tensorsoutput_tensorsrA   buffer_sharing)selfr   rA   rr   r   r   r   __init__   s   
zCudaSession.__init__
input_namerZ   c                 C   s4   || j v sJ || jv sJ || j|< || j|< d S rg   )ro   rp   ru   )rv   rx   rZ   r   r   r   set_buffer_sharing   s   
zCudaSession.set_buffer_sharingc                 C   s   | ` | `| `d S rg   )rs   rt   rN   )rv   r   r   r   __del__   s   zCudaSession.__del__r	   tensorc              	   C   s   |j jd ur
|j jnd}t|jdkrdgnt|j}| j||j j|| j| ||	  || j
v rQ| j| j
| |j j|| j| ||	  || j| j
| < d S d S )Nr      )rA   indexlenrG   rQ   rN   rP   r   rq   rS   ru   rV   rt   )rv   r	   r{   	device_idtensor_shaper   r   r   bind_input_and_buffer_sharing   s*   
	z)CudaSession.bind_input_and_buffer_sharing
shape_dictc              
   C   sP  | j rH| D ]@\}}|| jv rG|| jv r&t| j| jt|kr"qtd| j| }tj	t|t
|dj| jd}|| j|< | || q| D ]Y\}}|| jv r|| jv rgt| j| jt|krgqL|| jv rmqL| j| }tj	t|t
|dj| jd}|| j|< | j||jj|jjdur|jjnd|t| |  qLdS )z Allocate tensors for I/O Bindingz(Expect static input shape for cuda graph)r>   )rA   Nr   )rr   rB   ro   rs   tuplerG   RuntimeErrorrq   r*   rC   r   r3   torA   r   rp   rt   ru   rN   rV   r   r}   rQ   rR   rS   )rv   r   r	   rG   numpy_dtyper{   r   r   r   allocate_buffers  sF   




"



zCudaSession.allocate_buffersNT	feed_dictrun_optionssynchronizec                 C   s   |  D ]G\}}t|tjr| sJ || jv rK| jrE| j|  | ks)J | j| j	|j	ks4J |j
jdks<J | j| | q| || q|rc| j  | j| j| | j  | jS | j| j| | jS )z$Bind input tensors and run inferencecuda)rB   
isinstancer*   rh   rO   ro   rr   rs   nelementr>   rA   r   copy_r   rN   synchronize_inputsr   run_with_iobindingsynchronize_outputsrt   )rv   r   r   r   r	   r{   r   r   r   infer<  s"   


zCudaSession.inferr   r   rr   streamr
   c                 C   s$   | d|d}|dkrt ||d< |S )NkSameAsRequested)r   arena_extend_strategyrr   r   user_compute_stream)r=   )r   rr   r   optionsr   r   r   get_cuda_provider_optionsR  s   z%CudaSession.get_cuda_provider_optionsF)NT)r   )r9   r:   r;   __doc__r   r*   rA   rw   r=   ry   rz   rh   r   	ShapeDictr   r?   r   r&   r   r<   intr   r   r   r   r   r   ri      s    "**ri   c                       s   e Zd Z				ddedejdeded	ed
ede	e
e
f dB f fddZddedefddZdde	e
ejf def fddZ  ZS )
GpuBindingFr   Nr   rA   r   enable_gpu_graphgpu_graph_idr   ru   c           
         sf   t  ||| |r| D ]
\}}	| ||	 q| | || _|r(t|nd | _|| _	d | _
d S rg   )superrw   rB   ry   r   r   copydeepcopyr   r   last_run_gpu_graph_id)
rv   r   rA   r   r   r   r   ru   rx   rZ   	__class__r   r   rw   b  s   


zGpuBinding.__init__disable_cuda_graph_in_runr
   c                 C   s.   t  }|rdn| j}|dt| || _|S )Nr   r   )r   r   add_run_config_entryr=   r   )rv   r   r   r   r   r   r   get_run_optionsy  s
   zGpuBinding.get_run_optionsr   c                    s*   |  |}| jr|dd t ||S )N'disable_synchronize_execution_providers1)r   r   r   r   r   )rv   r   r   r   r   r   r   r     s   
zGpuBinding.infer)Fr   r   Nr   )r9   r:   r;   r   r*   rA   r   r&   r   r?   r=   rw   r   r   rh   r   __classcell__r   r   r   r   r   a  s,    *r   c                	   @   sZ   e Zd ZdZddedejdedefdd	Z	
	dde	de
deeef dB defddZdS )GpuBindingManagerzA manager for I/O bindings that support multiple CUDA Graphs.
    One cuda graph is reused for same input shape. Automatically add a new cuda graph for new input shape.
    r   r|   r   rA   r   max_cuda_graphsc                 C   s(   || _ || _g | _d | _|| _|| _d S rg   )r   rA   graph_bindingsno_graph_bindingr   r   )rv   r   rA   r   r   r   r   r   rw     s   
zGpuBindingManager.__init__FNr   use_cuda_graphru   r
   c              	   C   s   | j D ]}|j|kr|  S qt| j | jks|s7| jd u r.t| j| j|| j|d| _| jS | j	| | jS t| j| j|dt| j | j|d}| j 
| |S )N)r   ru   T)r   r   r   ru   )r   r   r~   r   r   r   r   rA   r   r   r`   )rv   r   r   ru   gpu_graph_bindingr   r   r   get_binding  s.   


	zGpuBindingManager.get_binding)r   r|   )FN)r9   r:   r;   r   r   r*   rA   r   rw   r   r&   r?   r=   r   r   r   r   r   r   r     s    r   )r   loggingcollectionsr   collections.abcr   typingr   r!   r*   onnxruntimer   r   r=   r   rQ   r   r   	getLoggerr9   rT   r   r@   ri   r   r   r   r   r   r   <module>   s     
Zq ,