o
    پio                     @  sZ   d dl mZ d dlmZ d dlmZmZ d dlZd dlm	Z	m
Z
mZ eG dd dZdS )    )annotations)	dataclass)DictOptionalN)ForwardBatchPPProxyTensors"compute_local_num_token_non_paddedc                   @  s   e Zd ZU ded< ded< ded< ded< ded< ded< ded< ded	< ded
< ded< ded< ded< ded< ded< ded< ded< ded< ed9d*d+Zd,d-d:d7d8Zd,S );GraphInputBuffersztorch.Tensor	input_idsinput_embedsreq_pool_indicesseq_lensseq_lens_cpuout_cache_loc	positionsmrope_positionsnum_token_non_paddedcustom_masknext_token_logits_bufferOptional[torch.Tensor]mamba_track_indicesmamba_track_maskglobal_num_tokens_gpu!global_num_tokens_for_logprob_gpuencoder_lensz!Optional[Dict[str, torch.Tensor]]pp_proxy_tensorsdevicetorch.devicemax_bsintmax_num_tokenhidden_size
vocab_sizedtypetorch.dtypedp_sizepp_sizeis_encoder_decoderboolrequire_mlp_tp_gatherseq_len_fill_valueencoder_len_fill_valuenum_tokens_per_bscache_loc_dtypeenable_mamba_trackreturn'GraphInputBuffers'c          !      C  s@  t | t j|ft jd}t j||f|d}t j|ft jd}t j|f|t jd}t j|f|d}t j|ft jd}t jd|ft jd}t jdt jd}t j|| | | t jd}t j||ft jd}|rqt j|ft jdnd }|r~t j|ft jdnd }|dkrt j||f|dt j||f|dd}nd }|	rt j|f|t jd}nd }|
rt j|ft jd}t j|ft jd}nt jdt jd}t jdt jd}W d    n1 sw   Y  t j|f|t jdd} | di d|d	|d
|d|d| d|d|d|d|d|d|d|d|d|d|d|d|S )N)r#      )   r2   )hidden_statesresidualcpu)r#   r   r
   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r    )	torchr   zerosint64int32fullonesr(   float)!clsr   r   r    r!   r"   r#   r%   r&   r'   r)   r*   r+   r,   r-   r.   r
   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r6   r6   [/home/ubuntu/.local/lib/python3.10/site-packages/sglang/srt/model_executor/input_buffers.pycreate#   s   
3	
zGraphInputBuffers.createN)r   forward_batchr   raw_bsraw_num_tokenbsrequire_gathered_buffernsa_enable_prefill_cp enable_num_token_non_padded_flagOptional[PPProxyTensors]c       
         C  s\  ||kr$| j | | j  | jd ur| j  | jd ur$| jd | jd | |j | jd | |j | j d | |j  | jd | |j | j	d | |j	 | jd urp|jd urp| jd | |j | jd ur|jd ur| jd | |j d }|j
d ur||kr| j
| | j
d | |j
 | j
d | }| jd ur|jd ur| jd | |j |jd ur| jd d d |f |j |r| j||  | j||  |	r|r|s|| }t|j|d}| j| n| j|j |
d ur,| jd ur,| j D ]\}}|
j| }|jd }|d | | q|S )NF)global_num_token_non_paddednum_tokens_per_dpr   )r   fill_r   zero_r   r   r
   copy_r   r   r   r   r   r   r   r   r   r   itemstensorsshape)selfrA   rB   rC   rD   r*   rE   r,   rF   rG   r   r   rJ   localkeybufsrcdimr6   r6   r?   populate_from_forward_batch   s\   











z-GraphInputBuffers.populate_from_forward_batch) r   r   r   r   r    r   r!   r   r"   r   r#   r$   r%   r   r&   r   r'   r(   r)   r(   r*   r   r+   r   r,   r   r-   r$   r.   r(   r/   r0   )rA   r   rB   r   rC   r   rD   r   r*   r   rE   r(   r,   r   rF   r(   rG   r(   r   rH   r/   r   )__name__
__module____qualname____annotations__classmethodr@   rW   r6   r6   r6   r?   r	      s,   
 mr	   )
__future__r   dataclassesr   typingr   r   r7   ,sglang.srt.model_executor.forward_batch_infor   r   r   r	   r6   r6   r6   r?   <module>   s    