o
    پi5                     @  s8  d dl mZ d dlZd dlZd dlmZ d dlmZ d dlm	Z	 d dl
mZmZmZ d dlZd dlZd dlmZ d dlmZ d dlmZ erLd d	lmZ d
ZG dd de	Zeedd Zdd ZG dd dZ G dd dZ!G dd de	Z"G dd de	Z#d0ddZ$d1d"d#Z%d2d%d&Z&d3d(d)Z'd4d5d.d/Z(dS )6    )annotationsN)deque)nullcontext)Enum)TYPE_CHECKINGOptionalType)envs)is_npu)Reqz2.2.2.2c                   @  s   e Zd ZdZdZdZdS )DisaggregationModenullprefilldecodeN)__name__
__module____qualname__NULLPREFILLDECODE r   r   S/home/ubuntu/.local/lib/python3.10/site-packages/sglang/srt/disaggregation/utils.pyr      s    r    DISAGGREGATION_TEST_FAILURE_PROBc                   sd   t dkrddlm   fdd| D }ndd | D }tj|tjdd}tj|tjj	|d |
 S )	Nr   KVPollc                   s.   g | ]}t   tk rt jnt| qS r   )randomFAILURE_PROBintFailedpoll.0pollerr   r   r   
<listcomp>-   s     z'poll_and_all_reduce.<locals>.<listcomp>c                 S  s   g | ]}t | qS r   )r   r   r    r   r   r   r#   2   s    cpudtypedevice)opgroup)r   sglang.srt.disaggregation.baser   torchtensoruint8dist
all_reduceReduceOpMINtolist)pollers
gloo_grouppollstensor_to_reducer   r   r   poll_and_all_reduce(   s   
r7   c                   @  s6   e Zd ZdZdddZdd Zdd
dZdddZdS )ReqToMetadataIdxAllocatorzEA memory pool that maps a request to its first output token location.sizer   c                 C  s   || _ ttt|| _d S N)r9   r   listrange
free_slots)selfr9   r   r   r   __init__@   s   z"ReqToMetadataIdxAllocator.__init__c                 C  s
   t | jS r:   )lenr=   r>   r   r   r   available_sizeG   s   
z(ReqToMetadataIdxAllocator.available_sizereturnOptional[int]c                 C  s   t | jdkr	d S | j S Nr   )r@   r=   popleftrA   r   r   r   allocJ   s   
zReqToMetadataIdxAllocator.alloc
free_indexc                 C  s   | j | d S r:   )r=   append)r>   rH   r   r   r   freeP   s   zReqToMetadataIdxAllocator.freeN)r9   r   )rC   rD   )rH   r   )r   r   r   __doc__r?   rB   rG   rJ   r   r   r   r   r8   =   s    

r8   c                   @  s8   e Zd Z		ddddZdd ZdddZdddZdS )MetadataBuffers   Nr9   r   hidden_sizehidden_states_dtypetorch.dtypemax_top_logprobs_numcustom_mem_pooltorch.cuda.MemPoolc                 C  sb  || _ d}t rd}n| j rd}n	tj dkrd}| j r$tj| j nt  tj	|dftj
|d| _tj	|dftj
|d| _tj	|dftj|d| _tj	|dftj
|d| _tj	||ftj|d| _tj	||ftj
|d| _tj	|dftj|d| _tj	|dftj|d| _tj	||f||d| _tj	|dftj|d| _W d    d S 1 sw   Y  d S )Nr$   npuINTRA_NODE_NVLINKcuda   r%      )rR   r
   r	   SGLANG_MOONCAKE_CUSTOM_MEM_POOLgetr+   rV   use_mem_poolr   zerosint32
output_idscached_tokensfloat32output_token_logprobs_valoutput_token_logprobs_idxoutput_top_logprobs_valoutput_top_logprobs_idxoutput_topk_pint64output_topk_indexoutput_hidden_statesuint64bootstrap_room)r>   r9   rN   rO   rQ   rR   r'   r   r   r   r?   U   sR   	

"zMetadataBuffers.__init__c                 C  s  | j  | j | j | j | j | j | j | j | j	 | j
 g
}| j j| jj| jj| jj| jj| jj| jj| jj| j	j| j
jg
}| j d j| jd j| jd j| jd j| jd j| jd j| jd j| jd j| j	d j| j
d jg
}|||fS rE   )r^   data_ptrr_   ra   rb   rc   rd   re   rg   rh   rj   nbytes)r>   ptrs	data_lens	item_lensr   r   r   get_buf_infos   sD   










zMetadataBuffers.get_buf_infosidxc                 C  sT   | j | | j| | j| | j| | j| | j| | j| | j| | j| | j	| f
S r:   )
r^   r_   ra   rb   rc   rd   re   rg   rh   rj   )r>   rq   r   r   r   get_buf   s   zMetadataBuffers.get_bufreqr   c                 C  s\  |j d | j |j d< |j| j|j d< |jrk|jr%|jd | j|j d< |jr3|jd | j|j d< |jrOtj|jd tj	dd| j|j d t
|jd < |jrktj|jd tjdd| j|j d t
|jd < |jd ur|jd}| j|jd |f |j | j|jd |f |j | j|j |j |jd ur|jnd| j|jdf< d S )Nr   r$   r%   )r^   metadata_buffer_indexr_   return_logprobra   rb   rc   r+   r,   r`   r@   rd   r]   hidden_states_tensorre   r9   copy_rg   rh   rj   )r>   rs   topkr   r   r   set_buf   sH   


zMetadataBuffers.set_buf)rM   N)
r9   r   rN   r   rO   rP   rQ   r   rR   rS   )rq   r   )rs   r   )r   r   r   r?   rp   rr   ry   r   r   r   r   rL   T   s    :
'rL   c                   @      e Zd ZdZdZdZdZdZdS )TransferBackendmooncakemorinixlascendfakeN)r   r   r   MOONCAKEMORINIXLASCENDFAKEr   r   r   r   r{          r{   c                   @  rz   )KVClassTypekvargsmanagersenderreceiverbootstrap_serverN)r   r   r   KVARGSMANAGERSENDERRECEIVERBOOTSTRAP_SERVERr   r   r   r   r      r   r   transfer_backend
class_typerC   Optional[Type]c              
   C  s  ddl m}m} | tjkr5ddlm} ddlm}m	}m
}m} tj|tj|tj|tj|tj|i}	|	|S | tjkrbddlm} ddlm}
m}m}m} tj|tj|tj|tj|tj|
i}	|	|S | tjkrddlm}m}m}m} ddlm} tj|tj|tj|tj|tj|i}	|	|S | tjkrddlm} ddl m!}m"}m#}m$} tj|tj|tj|tj|tj|i}	|	|S | tj%krddlm} ddl m&}m}m} tj|tj|tj|tj|i}	|	|S t'd	|  )
Nr   )FakeKVReceiverFakeKVSender)KVArgs)MooncakeKVBootstrapServerMooncakeKVManagerMooncakeKVReceiverMooncakeKVSender)MoriKVBootstrapServerMoriKVManagerMoriKVReceiverMoriKVSender)AscendKVBootstrapServerAscendKVManagerAscendKVReceiverAscendKVSender)NixlKVBootstrapServerNixlKVManagerNixlKVReceiverNixlKVSender)FakeKVManagerr   r   zUnsupported transfer backend: )(sglang.srt.disaggregation.faker   r   r{   r   r*   r   "sglang.srt.disaggregation.mooncaker   r   r   r   r   r   r   r   r   r   rZ   r   sglang.srt.disaggregation.morir   r   r   r   r    sglang.srt.disaggregation.ascendr   r   r   r   r   sglang.srt.disaggregation.nixlr   r   r   r   r   r   
ValueError)r   r   r   r   r   r   r   r   r   class_mappingr   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   get_kv_class  sf   









r   
kv_indices
np.ndarray	page_sizer   c                 C  s   |dkr| S | d d | | S N   r   )r   r   r   r   r   kv_to_page_indicesh  s   r   num_kv_indicesc                 C  s   | | d | S r   r   )r   r   r   r   r   kv_to_page_numr  s   r   boolc                 C  s   ddl m} t| |S )Nr   )MLATokenToKVPool) sglang.srt.mem_cache.memory_poolr   
isinstance)target_kv_poolr   r   r   r   is_mla_backend|  s   
r   rs   r   error_messagestrc                 C  sJ   ddl m} |||| _| jr#g | _g | _g | _g | _g | _g | _	d S d S )Nr   )FINISH_ABORT)
"sglang.srt.managers.schedule_batchr   finished_reasonru   input_token_logprobs_valinput_token_logprobs_idxinput_top_logprobs_valinput_top_logprobs_idxinput_token_ids_logprobs_valinput_token_ids_logprobs_idx)rs   r   status_coder   r   r   r   prepare_abort  s   
r   )r   r{   r   r   rC   r   )r   r   r   r   )r   r   r   r   )rC   r   r:   )rs   r   r   r   ))
__future__r   osr   collectionsr   
contextlibr   enumr   typingr   r   r   numpynpr+   torch.distributeddistributedr.   sglang.srt.environr	   sglang.srt.utilsr
   r   r   FAKE_BOOTSTRAP_HOSTr   floatgetenvr   r7   r8   rL   r{   r   r   r   r   r   r   r   r   r   r   <module>   s8     $

a



