o
    }oi                     @   s   d dl Z d dlmZ d dlZd dlmZmZ d dlmZ d dl	m
Z
 d dlmZ d dlmZ G dd	 d	e
Ze Ze ZG d
d deZeddd Zeddd ZeddefddZdS )    N)Path)FastAPIHTTPException)	BaseModel)BaseSettings)NemoQueryLLM)loggingc                       sr   e Zd ZU eed< eed< eed<  fddZedd Zedd	 Z	ed
d Z
edd Zedd Z  ZS )TritonSettings_triton_service_port_triton_service_ip_triton_request_timeoutc              
      s   t t|   z7ttjdd| _tjdd| _ttjdd| _	tjdd
 d	k| _tjd
d
 d	k| _W d S  tyW } ztd| W Y d }~d S d }~ww )NTRITON_PORTi  TRITON_HTTP_ADDRESSz0.0.0.0TRITON_REQUEST_TIMEOUT<   OPENAI_FORMAT_RESPONSEFalsetrueOUTPUT_GENERATION_LOGITSzQAn exception occurred trying to retrieve set args in TritonSettings class. Error:)superr	   __init__intosenvirongetr
   r   r   lower_openai_format_response_output_generation_logits	Exceptionr   error)selfr   	__class__ V/home/ubuntu/.local/lib/python3.10/site-packages/nemo/deploy/service/rest_model_api.pyr      s   zTritonSettings.__init__c                 C      | j S N)r
   r    r#   r#   r$   triton_service_port(      z"TritonSettings.triton_service_portc                 C   r%   r&   )r   r'   r#   r#   r$   triton_service_ip,   r)   z TritonSettings.triton_service_ipc                 C   r%   r&   )r   r'   r#   r#   r$   triton_request_timeout0   r)   z%TritonSettings.triton_request_timeoutc                 C   r%   )zd
        Retuns the response from Triton server in OpenAI compatible format if set to True.
        )r   r'   r#   r#   r$   openai_format_response4      z%TritonSettings.openai_format_responsec                 C   r%   )zf
        Retuns the generation logits along with text in Triton server output if set to True.
        )r   r'   r#   r#   r$   output_generation_logits;   r-   z'TritonSettings.output_generation_logits)__name__
__module____qualname__r   __annotations__strr   propertyr(   r*   r+   r,   r.   __classcell__r#   r#   r!   r$   r	      s   
 



r	   c                   @   sv   e Zd ZU eed< eed< dZeed< dZeed< dZ	eed< d	Z
eed
< dZeed< dZedB ed< dZeed< dS )CompletionRequestmodelprompti   
max_tokensg      ?temperatureg        top_p   top_kFstreamNstopfrequency_penalty)r/   r0   r1   r3   r2   r9   r   r:   floatr;   r=   r>   boolr?   r@   r#   r#   r#   r$   r6   G   s   
 r6   z
/v1/healthc                   C   s   ddiS )Nstatusokr#   r#   r#   r#   r$   health_checkS   s   rE   z/v1/triton_healthc               
      s   dt j dtt j d} td|   ztj| dd}|jdkr(dd	iW S t	d
dd tj
yE } zt	d
dt| dd}~ww )a:  
    This method exposes endpoint "/triton_health" which can be used to verify if Triton server is accessible while running the REST or FastAPI application.
    Verify by running: curl http://service_http_address:service_port/v1/triton_health and the returned status should inform if the server is accessible.
    zhttp://:z/v2/health/readyz+Attempting to connect to Triton server at:    )timeout   rC   z$Triton server is reachable and readyi  zTriton server is not ready)status_codedetailzCannot reach Triton server: N)triton_settingsr*   r3   r(   r   inforequestsr   rJ   r   RequestException)
triton_urlresponseer#   r#   r$   check_triton_healthX   s   

rS   z/v1/completions/requestc              
   C   s   z6t jd tt j }t|| jd}|j| jg| j| j	| j
| jt jt jt jd}t jr.|W S d|d d iW S  tyR } ztd| ddiW  Y d }~S d }~ww )	NrF   )url
model_name)promptsmax_output_lenr=   r;   r:   init_timeoutr,   r.   outputr   zIAn exception occurred with the post request to /v1/completions/ endpoint:r   zAn exception occurred)rL   r*   r3   r(   r   r7   	query_llmr8   r9   r=   r;   r:   r+   r,   r.   r   r   r   )rT   rU   nqrZ   r   r#   r#   r$   completions_v1l   s,   r]   )r   pathlibr   rN   fastapir   r   pydanticr   pydantic_settingsr   nemo.deploy.nlpr   
nemo.utilsr   r	   apprL   r6   r   rE   rS   postr]   r#   r#   r#   r$   <module>   s$   
,

