o
    پiEj                     @  sJ  d dl mZ d dlZd dlZd dlZd dlZd dlmZ d dlm	Z	m
Z
mZmZmZ d dlZd dlmZ d dlZd dlZd dlmZ d dlmZmZmZmZmZmZ d dlmZ d dlm Z  d d	l!m"Z"m#Z#m$Z$m%Z% d d
l&m'Z' d dl(m)Z)m*Z*m+Z+m,Z,m-Z- e.e/Z0G dd deZ1G dd deZ2G dd deZ3G dd deZ4dS )    )annotationsN)cache)DictListOptionalTupleUnion)web)BaseKVBootstrapServerBaseKVManagerBaseKVReceiverBaseKVSenderKVArgsKVPoll)DisaggregationMode)get_pp_group)get_attention_dp_rankget_attention_dp_sizeget_attention_tp_rankget_attention_tp_size)
ServerArgs)format_tcp_addressget_local_ip_autoget_zmq_socket_on_hostis_valid_ipv6_addressmaybe_wrap_ipv6_addressc                   @  sF   e Zd Z	dd d
dZdd Zedd!ddZd"ddZd#ddZdS )$CommonKVManagerFargsr   disaggregation_moder   server_argsr   is_mla_backendOptional[bool]c                 C  sN  || _ || _|| _|j| _|j| _|j| _t | _	t
 | _t | _t | _|jr*dn|j| _| j jr6| j jnd| _|j| _| j j| _t | _t }t| j}t|tj|d\| _| _t d| d| j  i | _!| jt"j#kr| $  i | _%i | _&t' | _(d S | jt"j)kri | _*t+, | _-i | _.i | _/i | _0i | _1i | _2d S t3d| j )N   r   )hostzkv manager bind to :z Unsupported DisaggregationMode: )4kv_argsr    r   r#   bootstrap_hostdisaggregation_bootstrap_portbootstrap_portdist_init_addrr   attn_tp_sizer   attn_tp_rankr   attn_dp_sizer   attn_dp_rankenable_dp_attentiondp_sizesystem_dp_sizesystem_dp_rankpp_sizepp_rankr   local_ipzmqContextr   r   PULL	rank_portserver_socketloggerdebugrequest_statusr   PREFILL_register_to_bootstraptransfer_infosdecode_kv_args_tabler   pp_groupDECODEconnection_pool	threadingLockconnection_lock#required_prefill_response_num_tableprefill_attn_tp_size_tableprefill_dp_size_tableprefill_pp_size_tableprefill_page_size_table
ValueError)selfr   r   r   r    contextzmq_bind_host rP   Y/home/ubuntu/.local/lib/python3.10/site-packages/sglang/srt/disaggregation/common/conn.py__init__-   sP   




zCommonKVManager.__init__c                 C  s6  | j r*| j dr| j dr| j }n| j dd\}}nt| j ddd }n| j}t|}| d| j }d| d}d| j	| j
| j| j| j| j| j| j| j| j| jjd	}z%tj||d
d}|jdkrptd W dS td|j d|j  W dS  ty } ztd|  W Y d}~dS d}~ww )z4Register KVSender to bootstrap server via HTTP POST.[]r$   r"   r   http:///routePrefill)roler*   r+   r,   r-   r2   r3   r0   r1   rank_ipr8   	page_size   )jsontimeout   z4Prefill successfully registered to bootstrap server.z8Prefill instance failed to connect to bootstrap server: , z9Prefill instance failed to register to bootstrap server: N)r)   
startswithendswithrsplitsocketgethostbynamer&   r   r(   r*   r+   r,   r-   r2   r3   r0   r1   r4   r8   r%   rZ   requestsputstatus_coder:   r;   errortext	Exception)rM   r#   _bootstrap_server_urlurlpayloadresponseerP   rP   rQ   r>   e   sH   

z&CommonKVManager._register_to_bootstrapendpointstris_ipv6boolc                 C  s0   t  t j}|r|t jd || |S Nr"   )r5   r6   rc   PUSH
setsockoptIPV6connect)rM   rq   rs   rc   rP   rP   rQ   _connect   s
   
zCommonKVManager._connectsrc_kv_ptrs	List[int]dst_kv_ptrsreturn6Tuple[List[int], List[int], List[int], List[int], int]c                 C  s   | j j}t|d }|| }t|d }|d | }||d  }||kr1|d | }	||d  }
n3||k rT|| dkrT|| }||| }	|| }||| ||  }
n||| }	||| ||  }
t|}|||	|
|fS )N   r   r%   prefill_start_layerlen)rM   r{   r}   start_layernum_kv_layers	end_layerdst_num_total_layers
src_k_ptrs
src_v_ptrs
dst_k_ptrs
dst_v_ptrsmultiplier_ratiov_ptr_offsetlayers_current_pp_stagerP   rP   rQ   get_mha_kv_ptrs_with_pp   s.   z'CommonKVManager.get_mha_kv_ptrs_with_pp Tuple[List[int], List[int], int]c                 C  sH   | j j}|t| }t|t|kr|}n||| }t|}|||fS Nr   )rM   r{   r}   r   r   sliced_dst_kv_ptrsr   rP   rP   rQ   get_mla_kv_ptrs_with_pp   s   
z'CommonKVManager.get_mla_kv_ptrs_with_ppNF)r   r   r   r   r   r   r    r!   rq   rr   rs   rt   )r{   r|   r}   r|   r~   r   )r{   r|   r}   r|   r~   r   )	__name__
__module____qualname__rR   r>   r   rz   r   r   rP   rP   rP   rQ   r   ,   s    8.
!r   c                   @  sB   e Zd Zdd
dZdd ddZ	dd!ddZd"ddZdd ZdS )#CommonKVSendermgrr   bootstrap_addrrr   bootstrap_roomintdest_tp_ranksr|   r3   c                 C  s4   || _ || _d | _|| _d| _| j | jtj d S )Nr   )kv_mgrr   	aux_indexrl   curr_idxupdate_statusr   Bootstrapping)rM   r   r   r   r   r3   rP   rP   rQ   rR      s   zCommonKVSender.__init__Nnum_kv_indicesr   Optional[int]c                 C  s   || _ || _d S r   )r   r   )rM   r   r   rP   rP   rQ   init   s   
zCommonKVSender.init
kv_indicesnpt.NDArray[np.int32]state_indicesOptional[List[int]]c                 C     d S r   rP   )rM   r   r   rP   rP   rQ   send   s   zCommonKVSender.sendr~   r   c                 C  r   r   rP   rM   rP   rP   rQ   poll      zCommonKVSender.pollc                 C     t dNzFake KVReceiver Exceptionrj   r   rP   rP   rQ   failure_exception      z CommonKVSender.failure_exception)
r   r   r   rr   r   r   r   r|   r3   r   r   )r   r   r   r   )r   r   r   r   r~   r   )r   r   r   rR   r   r   r   r   rP   rP   rP   rQ   r      s    

r   c                   @  st   e Zd Ze Zi Zi Ze	 Z
		dd d	d
Zdd Zd!ddZed"d#ddZed$ddZdd Zdd ZdS )%CommonKVReceiverNr   r   r   rr   r   r   prefill_dp_rankc                 C  s  || _ || _|| _| j| j tj | j| jjvr|  \| _| _	| _
| _| jd u s4| j	d u s4| j
d u rN| j| j d| j  | j| j tj d | _d S | jd urp| jjj}| j|krpd| j d| d}t| t|td| j d| j	 d| j d| j
 d	| j 
 | j| jj| j< | j	| jj| j< | j
| jj| j< | j| jj| j< n!| jj| j | _| jj| j | _	| jj| j | _
| jj| j| _| jj| jkr| jjj| jj | _d
| _d
| j
| jj  | _| jg| _n| jj| jkr*| jj st!d | jjj| jj | jj| j  | _| jj| j | _d
| j
| jj  | _| jg| _nY| jj s4t!d dd t"| jjj| jj | j| jj  | jjj| jj d
 | j| jj  D | _| jd | _d
| _| jj rt| j
| jj | _n| j| jj | j
| jj  | _|d urtd|  || _#n|| j	 | _#| j#| _$| jj| j
ks| jjd
ksJ d| jj d| j
 df| j
| jjkr| jj%g| _&ndd t"| j
D | _&| j| jj'| j < | j d| j$ d| j }|| jj(vrsg }| jD ]l}	t)| j&D ]c}
| *|	| j$|
}|d ur@| jj r#t+|	| jkp| jd u  |d< nd|d< td| d| j$ d|	 d|
  |,| q| j| j d| jjj d| j$ d|
  | j| j tj   d S q|| _| j| jj(|< | -  n| jj(| | _t.| jdksJ d S )Nz;Could not fetch prefill parallel info from bootstrap_addr: z1Page size mismatch: prefill server has page_size=z", but decode server has page_size=z3. Both servers must use the same --page-size value.z"Fetch prefill parallel info from [z]: DP size:z
, TP size:z	 PP size:z Page size:r"   zPPerformance is NOT guaranteed when using different TP sizes for non-MLA models. c                 S     g | ]}|qS rP   rP   .0rankrP   rP   rQ   
<listcomp>Z  s    z-CommonKVReceiver.__init__.<locals>.<listcomp>r   zTargeting DP rank: zDecode pp size (z&) should be equal to prefill pp size (z) or 1c                 S  r   rP   rP   r   rP   rP   rQ   r     s    rk   is_dummyFzFetched bootstrap info: z for DP z TP z PP z0Could not fetch bootstrap info for engine rank: z and target_dp_group: z and target_pp_rank )/r   r   r   r   r   r   rI   &_get_prefill_parallel_info_from_serverprefill_attn_tp_sizeprefill_dp_sizeprefill_pp_sizeprefill_page_sizerecord_failureFailedbootstrap_infosr%   rZ   r:   rh   RuntimeErrorr;   rH   rJ   rK   getr*   engine_ranktarget_tp_rankrequired_dst_info_numr2   required_prefill_response_numtarget_tp_ranksr    warning_onceranger   target_dp_groupr3   target_pp_ranksrG   rC   reversed_get_bootstrap_info_from_serverrt   append_register_kv_argsr   )rM   r   r   r   r   decode_page_size	error_msgbootstrap_keyr   r   target_pp_rankbootstrap_inforP   rP   rQ   rR      s  








*



 



zCommonKVReceiver.__init__c              
   C  s   z2d| j  d| d| d| }tj|dd}|jdkr#| }|W S td|j d	|j  W d
S  tyM } ztd|  W Y d
}~d
S d
}~ww )z3Fetch the bootstrap info from the bootstrap server.rU   /route?engine_rank=&target_dp_group=&target_pp_rank=r[   r]   r^   z#Failed to get prefill server info: r_   Nz,Error fetching prefill info from bootstrap: )	r   re   r   rg   r\   r:   rh   ri   rj   )rM   r   r   r   rm   ro   r   rp   rP   rP   rQ   r     s   
z0CommonKVReceiver._get_bootstrap_info_from_serverr~   ATuple[Optional[int], Optional[int], Optional[int], Optional[int]]c              
   C  s   zDd| j  dd dd dd }t|}|jdkr5| }t|d t|d t|d	 t|d
 fW S td|j d|j  W dS  t	y_ } ztd|  W Y d}~dS d}~ww )z:Fetch the prefill parallel info from the bootstrap server.rU   r   r   r   r^   r   r   r   r   z%Failed to get prefill parallel info: r_   )NNNNz5Error fetching prefill parallel info from bootstrap: N)
r   re   r   rg   r\   r   r:   rh   ri   rj   )rM   rm   ro   prefill_parallel_inforp   rP   rP   rQ   r     s&   





z7CommonKVReceiver._get_prefill_parallel_info_from_serverFrq   rs   rt   c                 C  s   | j 8 || jvr*| jtj}|r|tjd || || j|< t	
 | j|< | j| | j| fW  d    S 1 s>w   Y  d S ru   )_global_lock_socket_cache_ctxrc   r5   rv   rw   rx   ry   rD   rE   _socket_locks)clsrq   rs   sockrP   rP   rQ   rz     s   


$zCommonKVReceiver._connectr   dictc                 C  s8   |d }|d }t |}| jt|||d\}}||fS )NrY   r8   )rs   )r   rz   r   )r   r   
ip_addressportis_ipv6_addressr   lockrP   rP   rQ   _connect_to_bootstrap_server  s   

z-CommonKVReceiver._connect_to_bootstrap_serverc                 C  r   r   rP   r   rP   rP   rQ   r     r   z"CommonKVReceiver._register_kv_argsc                 C  r   r   r   r   rP   rP   rQ   r     r   z"CommonKVReceiver.failure_exception)NN)r   r   r   rr   r   r   r   r   )r~   r   r   r   )r   r   )r   r   r   r5   r6   r   r   r   rD   rE   r   rR   r   r   classmethodrz   r   r   r   rP   rP   rP   rQ   r      s"     C
	r   c                   @  sf   e Zd ZdddZdd Zd	d
 Zdd ZdddZdddZdddZ	dd Z
dd Zd ddZdS )!CommonKVBootstrapServerr#   rr   r   r   c                 C  sl   || _ || _t | _t | _t | _	| 
  d | _d | _d | _d | _i | _tj| jdd| _|   d S )NT)targetdaemon)r#   r   r	   Applicationappr   storeasynciorE   r   _setup_routesr2   r*   r/   rZ   prefill_port_tablerD   Thread_run_serverthreadrun)rM   r#   r   rP   rP   rQ   rR     s   

z CommonKVBootstrapServer.__init__c                 C  s   | j   d S r   )r   startr   rP   rP   rQ   r     s   zCommonKVBootstrapServer.runc                 C  s*   | j jdd| j | j jd| j d S )N*rV   z/health)r   router	add_route_handle_routeadd_get_handle_health_checkr   rP   rP   rQ   r     s   z%CommonKVBootstrapServer._setup_routesc                   s   t jdddS )NOKr^   ri   status)r	   Response)rM   requestrP   rP   rQ   r    s   z,CommonKVBootstrapServer._handle_health_checkr  web.Requestc                   sH   |j }|dkr| |I d H S |dkr| |I d H S tjddddS )NPUTGETzMethod not allowedi  zapplication/json)ri   r  content_type)method_handle_route_put_handle_route_getr	   r  )rM   r  r  rP   rP   rQ   r     s   z%CommonKVBootstrapServer._handle_routec                   s  |  I d H }|d }|d }|d }|d }|d }|d }|d }	|d }
|d	 }|d
 }t|d }t|d }| jd u rD|| _| jd u rR|
dkrO|n|
| _| jd u rZ|| _| jd u rf|d urf|| _|dkr|
dkrq|}n|}| j4 I d H # || jvri | j|< || j| vri | j| |< W d   I d H  n1 I d H sw   Y  ||d| j| | |	< t	d| d| d|	 d| d| 
 t
jdddS )NrX   r*   r+   r,   r-   r2   r3   r0   r1   rY   r8   rZ   r"   rW   )rY   r8   zRegister prefill bootstrap: DPz TPz PPz with rank_ip: z and rank_port: r  r^   r  )r\   r   r*   r/   r2   rZ   r   r   r:   r;   r	   r  )rM   r  datarX   r*   r+   r,   r-   r2   r3   r0   r1   rY   r8   rZ   dp_grouprP   rP   rQ   r  &  sN   




( z)CommonKVBootstrapServer._handle_route_putc              	     s  |j d}|j d}|j d}|r|r|s tjdddS t|dkrDt|dkrDt|dkrD| j| j| j| jd}tj	|d	d
S | j
4 I d H  | jt| t| t| }W d   I d H  n1 I d H skw   Y  |d ur{tj	|d	d
S tjdddS )Nr   r   r   z$Missing inputs for bootstrap server.i  r  r   )r   r   r   r   r^   )r  zBootstrap info not Foundi  )queryr   r	   r  r   r*   r/   r2   rZ   json_responser   r   )rM   r  r   r   r   r   r   rP   rP   rQ   r  X  s2   (z)CommonKVBootstrapServer._handle_route_getc              
   C  s,  zzGt  | _t | j d }tt tjkr| j	j
}tj| j	|d| _| j| j  tj| j| j| jd}| j|  | j  W n tyd } zt
dt|  W Y d }~nd }~ww W | j| j  | j  d S W | j| j  | j  d S | j| j  | j  w )N)
access_log)r#   r   zServer error: )r   new_event_loop_loopset_event_looplogging	getLoggerr   getEffectiveLevelDEBUGr   r:   r	   	AppRunner_runnerrun_until_completesetupTCPSiter#   r   r   run_foreverrj   rh   rr   cleanupclose)rM   r  siterp   rP   rP   rQ   r   x  s.   
 z#CommonKVBootstrapServer._run_serverc                 C  sX   | j dur| j  r| j | j j td | j r*| jjdd td dS dS )ShutdownNzStopping server loop...r   r   zServer thread stopped)	r  
is_runningcall_soon_threadsafestopr:   infor   is_alivejoinr   rP   rP   rQ   r!    s   

zCommonKVBootstrapServer.closer~   r   c                 C  r   r   rP   r   rP   rP   rQ   r     s    zCommonKVBootstrapServer.pollN)r#   rr   r   r   )r  r  r   )r   r   r   rR   r   r   r  r   r  r  r   r!  r   rP   rP   rP   rQ   r     s    



2 
r   )5
__future__r   r   r  rc   rD   	functoolsr   typingr   r   r   r   r   numpynpnumpy.typingnptre   r5   aiohttpr	   #sglang.srt.disaggregation.base.connr
   r   r   r   r   r   sglang.srt.disaggregation.utilsr   sglang.srt.distributedr   sglang.srt.layers.dp_attentionr   r   r   r   sglang.srt.server_argsr   sglang.srt.utilsr   r   r   r   r   r  r   r:   r   r   r   r   rP   rP   rP   rQ   <module>   s4     
 #  