o
    iC                     @   s  d dl Z d dlmZ d dlmZ d dlmZmZmZm	Z	m
Z
 d dlmZ d dlmZ d dlmZmZ d dlmZ d dlmZ d d	lmZ eeZd
e
defddZe Zedde	ddfde
deee	 f dedeee	 f def
ddZedde
defddZe dde
defddZ!edde
fdd Z"ed!de
fd"d#Z#e d$e	dfde
d%efd&d'Z$d(efd)d*Z%dS )+    N)
HTTPStatus)	Annotated)	APIRouterFastAPIHTTPExceptionQueryRequest)JSONResponse)WeightTransferInitRequestWeightTransferUpdateRequest)EngineClient)init_logger)	PauseModerequestreturnc                 C   s
   | j jjS N)appstateengine_clientr    r   \/home/ubuntu/vllm_env/lib/python3.10/site-packages/vllm/entrypoints/serve/rlhf/api_router.pyr      s   
r   z/pauseabortFTraw_requestmodewait_for_inflight_requestsclear_cachec              
      s   t | }z|j|||dI dH  tdditjjdW S  ty9 } ztdt|itjjdW  Y d}~S d}~w t	y\ } zt
d tdd| itjjdW  Y d}~S d}~ww )	a  Pause generation requests to allow weight updates.

    Args:
        mode: How to handle in-flight requests:
            - ``"abort"``: Abort all in-flight requests immediately (default).
            - ``"wait"``: Wait for in-flight requests to complete.
            - ``"keep"``: Freeze requests in queue; they resume on /resume.
        wait_for_inflight_requests: DEPRECATED. Use ``mode="wait"`` instead.
        clear_cache: DEPRECATED. Whether to clear KV/prefix caches after
            draining. Ignored when mode="keep".
    )r   r   r   Nstatuspausedcontentstatus_codeerrorzFailed to pause generationzFailed to pause generation: )r   pause_generationr	   r   OKvalue
ValueErrorstrBAD_REQUEST	Exceptionlogger	exceptionINTERNAL_SERVER_ERROR)r   r   r   r   engineerrr   r   r   r#      s4   

r#   z/resumec              
      sx   t | }z| I dH  tdditjjdW S  ty; } ztd tdd| itj	jdW  Y d}~S d}~ww )z Resume generation after a pause.Nr   resumedr   zFailed to resume generationr"   zFailed to resume generation: )
r   resume_generationr	   r   r$   r%   r)   r*   r+   r,   )r   r-   r.   r   r   r   r0   K   s    
r0   z
/is_pausedc              
      st   t | }z	| I dH }W n$ ty2 } ztd tdd| itjjdW  Y d}~S d}~ww td|idS )z Return the current pause status.NzFailed to fetch pause statusr"   zFailed to fetch pause status: r   	is_pausedr    )	r   r1   r)   r*   r+   r	   r   r,   r%   )r   r-   r   r.   r   r   r   r1   _   s   
r1   z/init_weight_transfer_enginec              
      s   z	|   I d H }W n t jy } ztddd|d }~ww |d}|d u r0ttjjddt| t	|dI d H  t
ddid	S )
N  Invalid JSON formatr!   detail	init_infoz#Missing 'init_info' in request body)r7   messagezWeight transfer initializedr2   )jsonJSONDecodeErrorr   getr   r(   r%   r   init_weight_transfer_enginer
   r	   )r   bodyer7   r   r   r   r<   q   s"   

r<   z/update_weightsc              
      s   z	|   I d H }W n t jy } ztddd|d }~ww |d}|d u r0ttjjddt| jt	|ddI d H  t
dd	id
S )Nr3   r4   r5   update_infoz%Missing 'update_info' in request body)r?   r   r8   zWeights updatedr2   )r9   r:   r   r;   r   r(   r%   r   update_weightsr   r	   )r   r=   r>   r?   r   r   r   r@      s"   
r@   z/get_world_size
include_dpc                    s.   t | jj}|r|j}n|j}td|idS )a  Get the world size from the parallel config.

    Args:
        include_dp: If True (default), returns the world size including
            data parallelism (TP * PP * DP). If False, returns the world
            size without data parallelism (TP * PP).
    
world_sizer2   )r   vllm_configparallel_configworld_size_across_dprB   r	   )r   rA   rD   rB   r   r   r   get_world_size   s   rF   r   c                 C   s   t jsd S | t d S r   )envsVLLM_SERVER_DEV_MODEinclude_routerrouter)r   r   r   r   attach_router   s   rK   )&r9   httpr   typingr   fastapir   r   r   r   r   fastapi.responsesr	   	vllm.envsrG   %vllm.distributed.weight_transfer.baser
   r   vllm.engine.protocolr   vllm.loggerr   vllm.v1.enginer   __name__r*   r   rJ   postboolr#   r0   r;   r1   r<   r@   rF   rK   r   r   r   r   <module>   sX   ,