o
    .i;                     @   s2   d dl Zd dlZd dlmZmZ G dd dZdS )    N)StagedWriteTensorUvaBackedTensorc                   @   s   e Zd ZdedededededejfddZed	efd
dZde	dede
e ded	df
ddZdddZde	d	dfddZdS )RequestStatemax_num_reqsmax_model_lenmax_num_batched_tokensnum_speculative_steps
vocab_sizedevicec                 C   s   || _ || _|| _|| _|| _|| _i | _i | _tt	|| _
tj| j tjd| _t| j | jftj|dd| _t| j tjd| _tj| j tjd| _t| j tj|d| _tj| j dtj|d| _tj| j | jtj|d| _tj| j tj|d| _d S )N)dtypeT)r   r
   uva_instead_of_gpu)r   r
      )r   r   r   r   r	   r
   req_id_to_indexindex_to_req_idlistrangefree_indicesnpzerosint32
prompt_lenr   torchprefill_token_idsr   prefill_lennum_computed_prefill_tokensnum_computed_tokensint64last_sampled_tokensdraft_tokensnext_prefill_tokens)selfr   r   r   r   r	   r
    r!   V/home/ubuntu/veenaModal/venv/lib/python3.10/site-packages/vllm/v1/worker/gpu/states.py__init__
   sH   	


zRequestState.__init__returnc                 C   s
   t | jS N)lenr   r    r!   r!   r"   num_reqsC   s   
zRequestState.num_reqsreq_idr   r   r   Nc                 C   s   t | jdksJ d| j }|| j|< || j|< || j|< t |}||ks1J d| d| || jj|< | j	|d| || j
|< | j|| d S )Nr   zNo free indiceszprefill_len z < prompt_len )r&   r   popr   r   r   r   r   r   stage_writer   r   stage_write_elem)r    r)   r   r   r   req_idxr   r!   r!   r"   add_requestG   s   





zRequestState.add_requestc                 C   s"   | j   | j  | j  d S r%   )r   copy_to_uvar   apply_writer   r'   r!   r!   r"   apply_staged_writes]   s   

z RequestState.apply_staged_writesc                 C   s8   | j |d }|d u rd S | j|d  | j| d S r%   )r   r*   r   r   append)r    r)   r-   r!   r!   r"   remove_requestb   s
   zRequestState.remove_request)r$   N)__name__
__module____qualname__intr   r
   r#   propertyr(   strr   r.   r1   r3   r!   r!   r!   r"   r   	   s:    
9

r   )numpyr   r   vllm.v1.worker.gpu.buffer_utilsr   r   r   r!   r!   r!   r"   <module>   s   