o
    wi~                     @   s   d dl Z d dlZd dlZd dlZd dlmZ d dlT d dlmZm	Z	m
Z
 ddlmZ ddlmZ ddlmZ d	d
lmZmZmZ d	dlmZmZmZmZ G dd deZdS )    N)MessageToJson)*)model_config_pb2service_pb2service_pb2_grpc   )_auth)InferenceServerClientPlugin)Request   )MAX_GRPC_MESSAGE_SIZEInferenceServerClientBaseKeepAliveOptions)_get_inference_request_grpc_compression_typeraise_errorraise_error_grpcc                       s  e Zd ZdZ								d> fdd	Zdd Zdd	 Zd
d Zdd Zdd Z	d?ddZ
d?ddZ	d@ddZ	dAddZ				dBddZ				dBddZ	dAddZ				dCdd Z			dAd!d"Z					dDd#d$Zdi dddfd%d&Z	dEd'd(Z	dAd)d*ZdAd+d,Z	dBd-d.Z	dFd0d1Z	d@d2d3Z	dBd4d5Z		d?d6d7Z	d@d8d9Z				/			/					dGd:d;Z			dHd<d=Z  Z S )IInferenceServerClientad  This feature is currently in beta and may be subject to change.

    An analogy of the :py:class:`tritonclient.grpc.InferenceServerClient` to enable
    calling via asyncio syntax. The object is intended to be used by a single
    thread and simultaneously calling methods with different threads is not
    supported and can cause undefined behavior.

    FNc
                    sz  t    |	d ur|	}
n|st }dtfdtfd|jfd|jfd|jfd|jfg}
|r6tj	j
|||
d| _n{|rd  } }}|d ur[t|d}| }W d    n1 sVw   Y  |d urxt|d}| }W d    n1 ssw   Y  |d urt|d}| }W d    n1 sw   Y  tj|||d	}tj	j
|||
d| _n	tj	j||
d| _t| j| _|| _d S )
Nzgrpc.max_send_message_lengthzgrpc.max_receive_message_lengthzgrpc.keepalive_time_mszgrpc.keepalive_timeout_msz#grpc.keepalive_permit_without_callsz!grpc.http2.max_pings_without_data)optionsrb)root_certificatesprivate_keycertificate_chain)super__init__r   r   keepalive_time_mskeepalive_timeout_mskeepalive_permit_without_callshttp2_max_pings_without_datagrpcaiosecure_channel_channelopenreadssl_channel_credentialsinsecure_channelr   GRPCInferenceServiceStub_client_stub_verbose)selfurlverbosesslr   r   r   credskeepalive_optionschannel_argschannel_optrc_bytespk_bytescc_bytesrc_fspk_fscc_fs	__class__ [/home/ubuntu/sommelier/.venv/lib/python3.10/site-packages/tritonclient/grpc/aio/__init__.pyr   <   sR   




zInferenceServerClient.__init__c                 C   s   |rt t|ddS |S )NT)preserving_proto_field_name)jsonloadsr   )r*   responseas_jsonr:   r:   r;   _return_response{   s   z&InferenceServerClient._return_responsec                    s   | S Nr:   r*   r:   r:   r;   
__aenter__   s   z InferenceServerClient.__aenter__c                    s   |   I d H  d S rB   )close)r*   typevalue	tracebackr:   r:   r;   	__aexit__   s   zInferenceServerClient.__aexit__c                    s   | j  I dH  dS )zWClose the client. Any future calls to server
        will result in an Error.

        N)r"   rE   rC   r:   r:   r;   rE      s   zInferenceServerClient.closec                 C   s2   t |}| | |jd ur|j }|S d}|S )Nr:   )r
   _call_pluginheadersitems)r*   rK   requestrequest_metadatar:   r:   r;   _get_metadata   s   
z#InferenceServerClient._get_metadatac              
         |  |}z&t }| jrtd|| | jj|||dI dH }| jr)t| |jW S  t	j
yD } zt| W Y d}~dS d}~ww )zJRefer to :py:meth:`tritonclient.grpc.InferenceServerClient.is_server_live`zis_server_live, metadata {}
{}rM   metadatatimeoutN)rO   r   ServerLiveRequestr)   printformatr(   
ServerLiveliver   RpcErrorr   r*   rK   client_timeoutrR   rM   r?   	rpc_errorr:   r:   r;   is_server_live       
z$InferenceServerClient.is_server_livec              
      rP   )zKRefer to :py:meth:`tritonclient.grpc.InferenceServerClient.is_server_ready`zis_server_ready, metadata {}
{}rQ   N)rO   r   ServerReadyRequestr)   rU   rV   r(   ServerReadyreadyr   rY   r   rZ   r:   r:   r;   is_server_ready   r^   z%InferenceServerClient.is_server_ready c           	   
      s   |  |}z3t|tkrtd tj||d}| jr#td|| | j	j
|||dI dH }| jr6t| |jW S  tjyQ } zt| W Y d}~dS d}~ww )zJRefer to :py:meth:`tritonclient.grpc.InferenceServerClient.is_model_ready`model version must be a stringnameversionzis_model_ready, metadata {}
{}rQ   N)rO   rF   strr   r   ModelReadyRequestr)   rU   rV   r(   
ModelReadyra   r   rY   r   )	r*   
model_namemodel_versionrK   r[   rR   rM   r?   r\   r:   r:   r;   is_model_ready   s(   
z$InferenceServerClient.is_model_readyc              
         |  |}z)t }| jrtd|| | jj|||dI dH }| jr)t| | ||W S  t	j
yG } zt| W Y d}~dS d}~ww )zPRefer to  :py:meth:`tritonclient.grpc.InferenceServerClient.get_server_metadata`z#get_server_metadata, metadata {}
{}rQ   N)rO   r   ServerMetadataRequestr)   rU   rV   r(   ServerMetadatarA   r   rY   r   r*   rK   r@   r[   rR   rM   r?   r\   r:   r:   r;   get_server_metadata   s    
z)InferenceServerClient.get_server_metadatac           
   
         |  |}z6t|tkrtd tj||d}| jr#td|| | j	j
|||dI dH }| jr6t| | ||W S  tjyT }	 zt|	 W Y d}	~	dS d}	~	ww )zNRefer to :py:meth:`tritonclient.grpc.InferenceServerClient.get_model_metadata`rd   re   z"get_model_metadata, metadata {}
{}rQ   N)rO   rF   rh   r   r   ModelMetadataRequestr)   rU   rV   r(   ModelMetadatarA   r   rY   r   
r*   rk   rl   rK   r@   r[   rR   rM   r?   r\   r:   r:   r;   get_model_metadata   (   
	z(InferenceServerClient.get_model_metadatac           
   
      rs   )zLRefer to :py:meth:`tritonclient.grpc.InferenceServerClient.get_model_config`rd   re   z get_model_config, metadata {}
{}rQ   N)rO   rF   rh   r   r   ModelConfigRequestr)   rU   rV   r(   ModelConfigrA   r   rY   r   rv   r:   r:   r;   get_model_config   rx   z&InferenceServerClient.get_model_configc              
      rn   )zVRefer to :py:meth:`tritonclient.grpc.InferenceServerClient.get_model_repository_index`z*get_model_repository_index, metadata {}
{}rQ   N)rO   r   RepositoryIndexRequestr)   rU   rV   r(   RepositoryIndexrA   r   rY   r   rq   r:   r:   r;   get_model_repository_index  s(   
z0InferenceServerClient.get_model_repository_indexc              
      s   |  |}zJtj|d}|dur||jd _| jr"td|| |dur5| D ]
\}}	|	|j| _	q*| j
j|||dI dH  | jrNtd| W dS W dS  tjyh }
 zt|
 W Y d}
~
dS d}
~
ww )zFRefer to :py:meth:`tritonclient.grpc.InferenceServerClient.load_model`rk   Nconfigz2load_model, metadata {}
override files omitted:
{}rQ   zLoaded model '{}')rO   r   RepositoryModelLoadRequest
parametersstring_paramr)   rU   rV   rL   bytes_paramr(   RepositoryModelLoadr   rY   r   )r*   rk   rK   r   filesr[   rR   rM   pathcontentr\   r:   r:   r;   
load_model+  s2   
	z InferenceServerClient.load_modelc              
      s   |  |}z3tj|d}||jd _| jrtd|| | jj	|||dI dH  | jr7td| W dS W dS  t
jyQ } zt| W Y d}~dS d}~ww )zHRefer to :py:meth:`tritonclient.grpc.InferenceServerClient.unload_model`r   unload_dependentszunload_model, metadata {}
{}rQ   NzUnloaded model '{}')rO   r   RepositoryModelUnloadRequestr   
bool_paramr)   rU   rV   r(   RepositoryModelUnloadr   rY   r   )r*   rk   rK   r   r[   rR   rM   r\   r:   r:   r;   unload_modelK  s"   
z"InferenceServerClient.unload_modelc           
   
      rs   )zURefer to ::py:meth:`tritonclient.grpc.InferenceServerClient.get_inference_statistics`rd   re   z(get_inference_statistics, metadata {}
{}rQ   N)rO   rF   rh   r   r   ModelStatisticsRequestr)   rU   rV   r(   ModelStatisticsrA   r   rY   r   rv   r:   r:   r;   get_inference_statisticsa  s0   
	z.InferenceServerClient.get_inference_statisticsc              
      s   |  |}zXt }|dur|dkr||_| D ]\}}	|	du r(|j|  q|j| jt|	t	r5|	n|	g q| j
rEtd|| | jj|||dI dH }
| j
rXt|
 | |
|W S  tjyv } zt| W Y d}~dS d}~ww )zQRefer to :py:meth:`tritonclient.grpc.InferenceServerClient.update_trace_settings`Nrc   z%update_trace_settings, metadata {}
{}rQ   )rO   r   TraceSettingRequestrk   rL   settingsrG   extend
isinstancelistr)   rU   rV   r(   TraceSettingrA   r   rY   r   )r*   rk   r   rK   r@   r[   rR   rM   keyrG   r?   r\   r:   r:   r;   update_trace_settings  s4   
	
z+InferenceServerClient.update_trace_settingsc           	   
      s   |  |}z4t }|dur|dkr||_| jr!td|| | jj|||dI dH }| jr4t| | 	||W S  t
jyR } zt| W Y d}~dS d}~ww )zNRefer to :py:meth:`tritonclient.grpc.InferenceServerClient.get_trace_settings`Nrc   z"get_trace_settings, metadata {}
{}rQ   )rO   r   r   rk   r)   rU   rV   r(   r   rA   r   rY   r   )	r*   rk   rK   r@   r[   rR   rM   r?   r\   r:   r:   r;   get_trace_settings  s$   
z(InferenceServerClient.get_trace_settingsc              
      s   |  |}z\t }| D ].\}}|du r|j|  q|dks%|dkr,||j| _q|dkr7||j| _q||j| _q| jrIt	d
|| | jj|||dI dH }	| jr\t	|	 | |	|W S  tjyz }
 zt|
 W Y d}
~
dS d}
~
ww )zORefer to :py:meth:`tritonclient.grpc.InferenceServerClient.update_log_settings`Nlog_file
log_formatlog_verbose_levelz#update_log_settings, metadata {}
{}rQ   )rO   r   LogSettingsRequestrL   r   r   uint32_paramr   r)   rU   rV   r(   LogSettingsrA   r   rY   r   )r*   r   rK   r@   r[   rR   rM   r   rG   r?   r\   r:   r:   r;   update_log_settings  s0   
z)InferenceServerClient.update_log_settingsc              
      rn   )zLRefer to :py:meth:`tritonclient.grpc.InferenceServerClient.get_log_settings`z get_log_settings, metadata {}
{}rQ   N)rO   r   r   r)   rU   rV   r(   r   rA   r   rY   r   rq   r:   r:   r;   get_log_settings  s    
z&InferenceServerClient.get_log_settingsc           	   
         |  |}z+tj|d}| jrtd|| | jj|||dI dH }| jr+t| | ||W S  t	j
yI } zt| W Y d}~dS d}~ww )z[Refer to :py:meth:`tritonclient.grpc.InferenceServerClient.get_system_shared_memory_status`rf   z/get_system_shared_memory_status, metadata {}
{}rQ   N)rO   r   SystemSharedMemoryStatusRequestr)   rU   rV   r(   SystemSharedMemoryStatusrA   r   rY   r   	r*   region_namerK   r@   r[   rR   rM   r?   r\   r:   r:   r;   get_system_shared_memory_status  s(   
z5InferenceServerClient.get_system_shared_memory_statusr   c           
   
      s   |  |}z0tj||||d}| jrtd|| | jj|||dI dH  | jr4td| W dS W dS  tj	yN }	 zt
|	 W Y d}	~	dS d}	~	ww )zYRefer to :py:meth:`tritonclient.grpc.InferenceServerClient.register_system_shared_memory`)rf   r   offset	byte_sizez-register_system_shared_memory, metadata {}
{}rQ   Nz.Registered system shared memory with name '{}')rO   r   !SystemSharedMemoryRegisterRequestr)   rU   rV   r(   SystemSharedMemoryRegisterr   rY   r   )
r*   rf   r   r   r   rK   r[   rR   rM   r\   r:   r:   r;   register_system_shared_memory  s,   
z3InferenceServerClient.register_system_shared_memoryc              
         |  |}z8tj|d}| jrtd|| | jj|||dI dH  | jr<|dkr5td| W dS td W dS W dS  tj	yV } zt
| W Y d}~dS d}~ww )z\Refer to  :py:meth:`tritonclient.grpc.InferenceServerClient.unregister_system_shared_memory`r   z/unregister_system_shared_memory, metadata {}
{}rQ   Nrc   z0Unregistered system shared memory with name '{}'z-Unregistered all system shared memory regions)rO   r   #SystemSharedMemoryUnregisterRequestr)   rU   rV   r(   SystemSharedMemoryUnregisterr   rY   r   r*   rf   rK   r[   rR   rM   r\   r:   r:   r;   unregister_system_shared_memory  s0   

z5InferenceServerClient.unregister_system_shared_memoryc           	   
      r   )zZRefer to  :py:meth:`tritonclient.grpc.InferenceServerClient.get_cuda_shared_memory_status`r   z-get_cuda_shared_memory_status, metadata {}
{}rQ   N)rO   r   CudaSharedMemoryStatusRequestr)   rU   rV   r(   CudaSharedMemoryStatusrA   r   rY   r   r   r:   r:   r;   get_cuda_shared_memory_status+  s(   
z3InferenceServerClient.get_cuda_shared_memory_statusc           
   
      s   |  |}z3tj|t|||d}| jrtd|| | jj	|||dI dH  | jr7td| W dS W dS  t
jyQ }	 zt|	 W Y d}	~	dS d}	~	ww )zXRefer to  :py:meth:`tritonclient.grpc.InferenceServerClient.register_cuda_shared_memory`)rf   
raw_handle	device_idr   z+register_cuda_shared_memory, metadata {}
{}rQ   Nz,Registered cuda shared memory with name '{}')rO   r   CudaSharedMemoryRegisterRequestbase64	b64decoder)   rU   rV   r(   CudaSharedMemoryRegisterr   rY   r   )
r*   rf   r   r   r   rK   r[   rR   rM   r\   r:   r:   r;   register_cuda_shared_memoryB  s2   

z1InferenceServerClient.register_cuda_shared_memoryc              
      r   )zZRefer to  :py:meth:`tritonclient.grpc.InferenceServerClient.unregister_cuda_shared_memory`r   z-unregister_cuda_shared_memory, metadata {}
{}rQ   Nrc   z.Unregistered cuda shared memory with name '{}'z+Unregistered all cuda shared memory regions)rO   r   !CudaSharedMemoryUnregisterRequestr)   rU   rV   r(   CudaSharedMemoryUnregisterr   rY   r   r   r:   r:   r;   unregister_cuda_shared_memoryb  s,   
z3InferenceServerClient.unregister_cuda_shared_memoryc                    s   |  |}t|tkrtd t|||||||||	|
|d}| jr*td|| z| jj	|||t
|dI dH }| jrAt| t|}|W S  tjy_ } zt| W Y d}~dS d}~ww )zARefer to :py:meth:`tritonclient.grpc.InferenceServerClient.infer`rd   rk   inputsrl   
request_idoutputssequence_idsequence_startsequence_endpriorityrS   r   zinfer, metadata {}
{})rM   rR   rS   compressionN)rO   rF   rh   r   r   r)   rU   rV   r(   
ModelInferr   InferResultr   rY   r   )r*   rk   r   rl   r   r   r   r   r   r   rS   r[   rK   compression_algorithmr   rR   rM   r?   resultr\   r:   r:   r;   inferz  sD   
zInferenceServerClient.inferc           
   
   C   s|   |  |}dd }G dd d}z| jj||||t|d}||| jW S  tjy= }	 zt|	 W Y d}	~	dS d}	~	ww )a  Runs an asynchronous inference over gRPC bi-directional streaming
        API.

        Parameters
        ----------
        inputs_iterator : asynchronous iterator
            Async iterator that yields a dict(s) consists of the input
            parameters to the :py:meth:`tritonclient.grpc.InferenceServerClient.async_stream_infer` function defined in
            :py:class:`tritonclient.grpc.InferenceServerClient`.
        stream_timeout : float
            Optional stream timeout. The stream will be closed once the
            specified timeout expires.
        headers: dict
            Optional dictionary specifying additional HTTP headers to include
            in the request.
        compression_algorithm : str
            Optional grpc compression algorithm to be used on client side.
            Currently supports "deflate", "gzip" and None. By default, no
            compression is used.

        Returns
        -------
        asynchronous iterator
            Yield tuple holding (:py:class:`tritonclient.grpc.InferResult`, :py:class:`tritonclient.grpc.InferenceServerException`) objects.

            Note
            ----
            This object can be used to cancel the inference request like below:

            >>> it = stream_infer(...)
            >>> ret = it.cancel()

        Raises
        ------
        :py:class:`tritonclient.grpc.InferenceServerException`
            If inputs_iterator does not yield the correct input.

        c                 S  s:  | 2 z3 d H W }t |tkrtd d|vsd|vrtd d|vr'd|d< t |d tkr3td d|vr;d |d< d	|vrCd|d	< d
|vrKd|d
< d|vrSd|d< d|vr[d|d< d|vrcd|d< d|vrkd |d< d|vrsd |d< t|d |d |d |d	 |d |d
 |d |d |d |d |d dV  q6 d S )Nz&inputs_iterator is not yielding a dictrk   r   zGmodel_name and/or inputs is missing from inputs_iterator's yielded dictrl   rc   zmodel_version must be a stringr   r   r   r   r   Fr   r   rS   r   r   )rF   dictr   rh   r   )inputs_iteratorr   r:   r:   r;   _request_iterator  sT   
z=InferenceServerClient.stream_infer.<locals>._request_iteratorc                   @   s,   e Zd Zdd Zdd Zdd Zdd Zd	S )
z=InferenceServerClient.stream_infer.<locals>._ResponseIteratorc                 S   s   || _ || _d S rB   )
_grpc_callr)   )r*   	grpc_callr,   r:   r:   r;   r     s   
zFInferenceServerClient.stream_infer.<locals>._ResponseIterator.__init__c                 S   s   | S rB   r:   rC   r:   r:   r;   	__aiter__  s   zGInferenceServerClient.stream_infer.<locals>._ResponseIterator.__aiter__c                    s\   | j   I d H }| jrt| d  }}|jdkr%t|jd}||fS t|j}||fS )Nrc   )msg)	r   r   	__anext__r)   rU   error_messageInferenceServerExceptionr   infer_response)r*   r?   r   errorr:   r:   r;   r     s   

zGInferenceServerClient.stream_infer.<locals>._ResponseIterator.__anext__c                 S   s
   | j  S rB   )r   cancelrC   r:   r:   r;   r     s   
zDInferenceServerClient.stream_infer.<locals>._ResponseIterator.cancelN)__name__
__module____qualname__r   r   r   r   r:   r:   r:   r;   _ResponseIterator  s
    r   )rR   rS   r   N)rO   r(   ModelStreamInferr   r)   r   rY   r   )
r*   r   stream_timeoutrK   r   rR   r   r   r   r\   r:   r:   r;   stream_infer  s   
-,z"InferenceServerClient.stream_infer)FFNNNNNN)NN)rc   NN)NFN)rc   NFN)NNNN)rc   rc   NFN)NNFN)r   NN)rc   Nrc   r   FFr   NNNNN)NNN)!r   r   r   __doc__r   rA   rD   rI   rE   rO   r]   rb   rm   rr   rw   r{   r~   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   __classcell__r:   r:   r8   r;   r   2   s    ?
	






#

!
$







!

9r   )r   sysr   	rapidjsonr=   google.protobuf.json_formatr   tritonclient.grpcr   r   r   rc   r   auth_pluginr	   _requestr
   _clientr   r   r   _utilsr   r   r   r   r   r:   r:   r:   r;   <module>   s   