o
    پiA                     @  s   d dl mZ d dlmZ d dlmZmZ d dlZd dlm	Z	 d dl
mZmZ er@d dlmZ d dlmZ d d	lmZ d d
lmZ e Zejde eddd ZeG dd dZG dd dZdS )    )annotations)	dataclass)TYPE_CHECKINGOptionalN)spec_need_hidden_states)get_compiler_backendis_npu)ModelWorkerBatch)GenerationBatchResult)EagleDraftInput)SpeculativeAlgorithmT)dynamicbackenddisablec                 C  s.   t | dk |t j|  dd | | d d < d S )Nr   )min)torchwhereclamp)	input_idsfuture_token_ids_map r   U/home/ubuntu/.local/lib/python3.10/site-packages/sglang/srt/managers/overlap_utils.py_resolve_future_token_ids   s
   r   c                   @  s"   e Zd ZU ded< dZded< dS )FutureIndicesztorch.TensorindicesNzOptional[slice]interval)__name__
__module____qualname____annotations__r   r   r   r   r   r      s   
 r   c                   @  sV   e Zd Z	d%d&d
dZd'ddZd(ddZd)ddZd*ddZd+d!d"Zd,d#d$Z	dS )-	FutureMapNmax_running_requestsintchunked_prefill_sizecontext_lendevicetorch.device	spec_algoOptional[SpeculativeAlgorithm]c                 C  s   d| _ |r|| d | nd}|d|  | _| jd|  | _|| _|| _| j r;d| _tj| jftj	| jd| _
d S d| _d S )Nr            Tdtyper%   F)	future_ctfuture_limitfuture_buffer_lenr%   r'   is_nonebuf_initializedr   emptyint64token_ids_buf)selfr!   r#   r$   r%   r'   max_num_chunksr   r   r   __init__$   s   	

zFutureMap.__init__draft_inputr   c                 C  s   d| _ |jd }|jd }|jd }|jd }tj| jg|jR |j	| j
d| _tj| jg|jR |j	| j
d| _tj| jg|jR |j	| j
d| _tj| jg|jR |j	| j
d| _t rv|jd }tj| jg|jR |j	| j
d| _d S d S )NTr   r,   )r2   topk_p
topk_indexverified_idnew_seq_lensr   r3   r0   shaper-   r%   
topk_p_buftopk_index_bufverified_id_bufnew_seq_lens_bufr   hidden_stateshidden_states_buf)r6   r9   topk_p0topk_index0verified_id0new_seq_lens0hidden_states0r   r   r   _lazy_init_bufI   sB   




zFutureMap._lazy_init_bufbsreturnr   c                 C  sR   | j }|| | j | _ |d }|d | }tj||tj| jd}t|t||dS )z?Update the circular buffer pointer and allocate future indices.r)   r,   )r   r   )r.   r/   r   aranger4   r%   r   slice)r6   rK   cur_future_ctstartendr   r   r   r   alloc_future_indiceso   s   zFutureMap.alloc_future_indicesmodel_worker_batchr	   c                 C  s   | j  rt|j| j d S |j}|d u rd S |jj}|t	
| j  | j| |_| j| |_| j| |_| j| |_t rI| j| |_d S d S N)r'   r1   r   r   r5   	spec_infofuture_indicesr   record_streamr   get_device_moduler%   current_streamr?   r:   r@   r;   rA   r<   rB   r=   r   rD   rC   )r6   rS   r9   r   r   r   r   resolve_futurex   s   
zFutureMap.resolve_futuresrN   boolc                 C  s*   | | j\}}}|dkr||kS ||kS )Nr   )r   r0   )r6   r[   rP   stopstepr   r   r   is_empty_slice   s   zFutureMap.is_empty_slicerV   batch_resultr
   c                 C  s6   | j  r|j}|j| j|< d S |j}| || d S rT   )r'   r1   r   next_token_idsr5   next_draft_inputstore_to_map_for_new_batch)r6   rV   r`   intvr9   r   r   r   store_to_map   s
   
zFutureMap.store_to_mapc                 C  sn   |j }| |r
d S | js| | |j| j|< |j| j|< |j| j	|< |j
| j|< t r5|j| j|< d S d S rT   )r   r_   r2   rJ   r:   r?   r;   r@   r<   rA   r=   rB   r   rC   rD   )r6   rV   r9   rd   r   r   r   rc      s   

z$FutureMap.store_to_map_for_new_batchrT   )
r!   r"   r#   r"   r$   r"   r%   r&   r'   r(   )r9   r   )rK   r"   rL   r   )rS   r	   )r[   rN   rL   r\   )rV   r   r`   r
   )rV   r   r9   r   )
r   r   r   r8   rJ   rR   rZ   r_   re   rc   r   r   r   r   r    #   s    
%
&
	


r    )
__future__r   dataclassesr   typingr   r   r   !sglang.srt.speculative.spec_utilsr   sglang.srt.utilsr   r   "sglang.srt.managers.schedule_batchr	   sglang.srt.managers.schedulerr
   !sglang.srt.speculative.eagle_infor    sglang.srt.speculative.spec_infor   _is_npucompiler   r   r    r   r   r   r   <module>   s"    
