o
    `۷iL                     @   s  d dl Z d dlZd dlZd dlZd dlZd dlmZ d dlZd dlm	  m
Z
 d dlm  mZ d dlmZ d dlmZ d dlmZ d dlZeeZdZdZze jZW n ey`   e jZY nw dd	 Zd
d ZdedefddZ dedefddZ!dd Z"dS )    N)ThreadPoolExecutor)run_background_task)	GcsClient)_PARENT_DEATH_THREASHOLD   i   c                  C   s8   t jdv rd S ttjd } | dksJ td|  | S )Nwin32cygwinRAY_RAYLET_PIDr   zraylet pid is %s)sysplatformintosenvironloggerinfo)
raylet_pid r   R/home/ubuntu/vllm_env/lib/python3.10/site-packages/ray/_private/process_watcher.pyget_raylet_pid%   s   
r   c                 C   s^   t jdv r	tdt }tjrtd t| |||}t
|S td t	|| ||}t
|S )al  
    Creates an asyncio task to periodically check if the raylet process is still
    running. If raylet is dead for _PARENT_DEATH_THREASHOLD (5) times, prepare to exit
    as follows:

    - Write logs about whether the raylet exit is graceful, by looking into the raylet
    log and search for term "SIGTERM",
    - Flush the logs via GcsClient,
    - Exit.
    r   z&can't check raylet process in Windows.check_parent_via_pipe_check_parent)r   r   RuntimeErrorr   dashboard_constsPARENT_HEALTH_CHECK_BY_PIPEr   r   _check_parent_via_piper   r   )log_dir
gcs_clientparent_dead_callbackloopr   check_parent_taskr   r   r   create_check_raylet_task1   s   

	
r!   r   r   c           	   
   C   sT  t j| d}d}d}z_t|dddN}|dtj td| t	 }||tj
 | }tdd	 |D rB|d
7 }t| n|dt d7 }|dd|t d   7 }d}W d    n1 sdw   Y  W n$ ty } z|d| d| d7 }t| d}W Y d }~nd }~ww |rt| tjjjtj||d d S t| d S )Nz
raylet.outFzRaylet is terminated. rzutf-8)encodingr   c                 s   s    | ]}d |v V  qdS )zRaylet received SIGTERMNr   ).0liner   r   r   	<genexpr>]   s    z+report_raylet_error_logs.<locals>.<genexpr>zTermination is graceful.zTermination is unexpected. Possible reasons include: (1) SIGKILL by the user or system OOM killer, (2) Invalid memory access from Raylet causing SIGSEGV or SIGBUS, (3) Other termination signals. Last z lines of the Raylet logs:
z    TzFailed to read Raylet logs at z: !)r   )r   pathjoinopenseekioSEEK_ENDmaxtell_RAYLET_LOG_MAX_TAIL_SIZESEEK_SET	readlinesanyr   r   _RAYLET_LOG_MAX_PUBLISH_LINES	Exception	exceptionerrorray_privateutilspublish_error_to_driverray_constantsRAYLET_DIED_ERROR)	r   r   log_pathr7   msgfposraylet_logser   r   r   report_raylet_error_logsN   sL   	


rD   c              
      s   	 z6t dd}||dd I d H }W d    n1 sw   Y  t|dkr7|d t| | td W n tyR } ztd|  W Y d }~nd }~ww q)	NT   )max_workersc                   S   s
   t j S )N)r   stdinreadliner   r   r   r   <lambda>   s   
 z(_check_parent_via_pipe.<locals>.<lambda>r   z+_check_parent_via_pipe: The parent is dead.zIraylet health checking is failed. The agent process may leak. Exception: )	r   run_in_executorlenrD   r   exitr5   r   r6   )r   r   r   r   executor
input_datarC   r   r   r   r   ~   s,   

r   c           
         s   zjt  }d}	 | }|du }d}d}	|r!|jdk}| |jk}	|s'|s'|	r`|d7 }td| dt d| d	| d
| d|	 d |tk rQtt	j
I dH  q|d t|| td nd}tt	j
I dH  q	 ty   td td Y dS w )z0Check if raylet is dead and fate-share if it is.r   TNFrE   zRaylet is considered dead z X. If it reaches to z&, the agent will kill itself. Parent: z, parent_gone: z, init_assigned_for_parent: z, parent_changed: .z"_check_parent: The parent is dead.z$Failed to check parent PID, exiting.)psutilProcessparentpidr   warningr   asynciosleepr   'DASHBOARD_AGENT_CHECK_PARENT_INTERVAL_SrD   r   rL   r5   r6   )
r   r   r   r   	curr_procparent_death_cntrR   parent_goneinit_assigned_for_parentparent_changedr   r   r   r      sX   




'
r   )#rU   r,   loggingr   r   concurrent.futuresr   r8   ray._private.ray_constantsr9   r<   ray.dashboard.consts	dashboardconstsr   ray._common.utilsr   ray._rayletr   r   rP   	getLogger__name__r   r4   r0   create_taskAttributeErrorensure_futurer   r!   strrD   r   r   r   r   r   r   <module>   s<    


0
