o
    پi                     @  s   d dl mZ d dlZd dlmZ d dlmZ d dlZd dlm	Z	m
Z
 d dlmZ eeZerAd dlmZ d dlmZ d d	lmZ G d
d dZdS )    )annotationsN)
HTTPStatus)TYPE_CHECKING)CaptureHiddenModeForwardMode)SamplingBatchInfo)	FutureMap)ScheduleBatch)
ServerArgsc                   @  s    e Zd ZdddZdd	d
ZdS )&ScheduleBatchDisaggregationDecodeMixinselfr	   c                 C  s  t j| _| j}dd |D }tdd |D }g }g }g }tdd |D }tj|tj| jd}d}	t	|D ]\}
}|
|j | jj|j d|j }|	|j |ksaJ d	|	 d
|j d| |||	|	|j < |	|j7 }	t|j}t|jtdt|jd  }|
| t|jdkr|| |jksJ d| d| d
|j |js| j||j 7  _||_d|_|
| d|_q6d}tjt|g tj| jd| _tj|tj| jd| _tj|tj| jd| _tj|tjd| _tj|tj| jd| _|| _ t|| _!| j"rdd |D | _#dd |D | _$|| _%dd |D | _&dd |D | _'dd |D | _(|| _)dd |D | _*t+,| | j-j.| _/dS )zl
        Prepare a prebuilt extend by populate metadata
        Adapted from .prepare_for_extend().
        c                 S  s    g | ]}|j t|jd  qS N)fill_idslenprefix_indices.0r r   i/home/ubuntu/.local/lib/python3.10/site-packages/sglang/srt/disaggregation/decode_schedule_batch_mixin.py
<listcomp>   s     zOScheduleBatchDisaggregationDecodeMixin.prepare_for_prebuilt.<locals>.<listcomp>c                 s  s    | ]}t |V  qd S r   )r   )r   idsr   r   r   	<genexpr>   s    zNScheduleBatchDisaggregationDecodeMixin.prepare_for_prebuilt.<locals>.<genexpr>c                 s  s    | ]}|j V  qd S r   extend_input_lenr   reqr   r   r   r   %   s    )dtypedevicer   NzExceeds total size: offset=z, req.extend_input_len=z, total_size=   zseq_len=z
, pre_len=F)r   c                 S     g | ]}|j qS r   )top_logprobs_numr   r   r   r   r   W       c                 S  r    r   )token_ids_logprobr   r   r   r   r   X   r"   c                 S  s   g | ]}t |jqS r   )r   r   r   r   r   r   r   [   s    c                 S  r    r   r   r   r   r   r   r   \   r"   c                 S  r    r   )extend_logprob_start_lenr   r   r   r   r   ]   r"   c                 S  r    r   )multimodal_inputsr   r   r   r   r   _   r"   )0r   PREBUILTforward_modereqssumtorchemptyint64r   	enumerateappendreq_pool_idxreq_to_token_poolreq_to_tokenr   r   r   origin_input_idsmax
output_idsretracted_staincached_tokensalready_computedis_retractedr$   tensorint32	input_idsreq_pool_indicesseq_lensseq_lens_cpuorig_seq_lensout_cache_locseq_lens_sumreturn_logprobtop_logprobs_numstoken_ids_logprobsextend_num_tokensprefix_lensextend_lensextend_logprob_start_lensextend_input_logprob_token_idsr%   r   from_schedule_batchmodel_config
vocab_sizesampling_info)r   r(   r;   rE   r=   pre_lensr<   
total_sizer@   offsetir   chunkpre_lenseq_lenrI   r   r   r   prepare_for_prebuilt   sx   







z;ScheduleBatchDisaggregationDecodeMixin.prepare_for_prebuiltserver_argsr
   
future_mapr   c                   s  g _ jD ]\}j |j d  j| |jdurbz|jjdu r,|j|j d  W n. ty[ } z"ddl	m
} d|j d|j d  d| }||tj|_W Y d}~nd}~ww | |j_qtjj jd_ j r|j |jr} |j9  tj fd	d
jD dd}tj fdd
jD dd}dd
 jD }	tj|	ddj}
ddlm} ||||
j jd}| tj|_ j!r|"t#j|_$|%|j$| |_&dS dS )z3Assign the buffered last input id to schedule batchNr   )FINISH_ABORTz$Grammar accept_token failed for req z with token z: )r   c                   *   g | ]}t j|jd   jt jdqS N)r   r   )r*   	as_tensoroutput_topk_pr   float32r   
num_statesr   r   r   r          zKScheduleBatchDisaggregationDecodeMixin.process_prebuilt.<locals>.<listcomp>)dimc                   rZ   r[   )r*   r\   output_topk_indexr   r,   r   r_   r   r   r      ra   c                 S  r    r   )hidden_states_tensorr   r   r   r   r      r"   )EagleDraftInput)topk_p
topk_indexhidden_statesverified_idnew_seq_lens)'r4   r(   r.   
tree_cachecache_unfinished_reqgrammarcurrent_tokenaccept_token
ValueError"sglang.srt.managers.schedule_batchrY   ridr   INTERNAL_SERVER_ERROR	to_finishfinishedr*   r9   r   spec_algorithmis_eaglespeculative_eagle_topkenable_multi_layer_eaglespeculative_num_stepsstackto!sglang.srt.speculative.eagle_infore   r=   prepare_for_extendr   LASTcapture_hidden_modeenable_overlapalloc_future_indicesr   future_indicesstore_to_map_for_new_batch	spec_info)r   rV   rW   r   erY   error_messagerf   rg   hidden_states_listrh   re   r   r   r_   r   process_prebuiltg   sr   





z7ScheduleBatchDisaggregationDecodeMixin.process_prebuiltN)r   r	   )r   r	   rV   r
   rW   r   )__name__
__module____qualname__rU   r   r   r   r   r   r      s    
Qr   )
__future__r   logginghttpr   typingr   r*   ,sglang.srt.model_executor.forward_batch_infor   r   'sglang.srt.sampling.sampling_batch_infor   	getLoggerr   logger!sglang.srt.managers.overlap_utilsr   rq   r	   sglang.srt.server_argsr
   r   r   r   r   r   <module>   s    
