o
    i/                     @   s6  d dl Z d dlZd dlmZ d dlmZmZ d dlmZ d dl	m
Z
mZ d dlZd dlmZ d dlmZ d dlmZ d d	lmZ d d
lmZmZmZmZ d dlmZ d dlmZ e
rfd dlmZ d dl m!Z! eG dd dZ"G dd dZ#G dd de j$Z%e%j&ej'e%j(ej)e%j*ej+e%j,ej)e%j-ej.e%j/ej'iZ0dS )    N)deque)CallableMapping)	dataclass)TYPE_CHECKINGAny)MultiModalFeatureSpec)PoolingParams)SamplingParams)&length_from_prompt_token_ids_or_embeds)EngineCoreEventEngineCoreEventTypeEngineCoreRequestFinishReason)StructuredOutputRequest)ConstantList)LoRARequest)	BlockHashc                   @   s\   e Zd ZU dZee dB ed< ee dB ed< eed< eed< e	dB ed< e
dddZdS )StreamingUpdatezLightweight data for streaming session continuation.

    Contains only the fields needed to update an existing streaming session
    with new input data.
    Nmm_featuresprompt_token_ids
max_tokensarrival_timesampling_paramsrequestRequestreturnStreamingUpdate | Nonec                 C   s&   |j sd S | |j|j|j|j|jdS )N)r   r   r   r   r   )	resumabler   r   r   r   r   )clsr    r    E/home/ubuntu/vllm_env/lib/python3.10/site-packages/vllm/v1/request.pyfrom_request-   s   zStreamingUpdate.from_request)r   r   r   r   )__name__
__module____qualname____doc__listr   __annotations__intfloatr
   classmethodr"   r    r    r    r!   r      s   
 r   c                #   @   s   e Zd Z											d?dedee dB dedB dedB dedB d	ed
edB de	j
dB dee dB dddedB dedeeef dB ded ged f dB dededB ddf"ddZededed ged f dB dd fddZdeee B ddfddZd@dd Zedefd!d"Zedefd#d$Zedefd%d&Zedefd'd(Zedefd)d*Zedefd+d,Zdefd-d.Zdefd/d0ZdedB fd1d2Zd3edefd4d5Z 	dAd6e!d7edB ddfd8d9Z"dee# dB fd:d;Z$d<d defd=d>Z%dS )Br   r   NF
request_idr   r   pooling_paramseos_token_idclient_indexr   prompt_embedsr   lora_requestzLoRARequest | None
cache_saltprioritytrace_headersblock_hasherr   r   reasoning_endedr   c                 C   s  || _ || _|| _|| _|| _|| _|
| _t|| _	| j	d ur$|| j	_
|d ur*|nt | _tj| _g | _d | _d | _|d urDd| _n)|d uri|jd usOJ |j| _| j	d ur\tj| _|jd urh|jd| _ntd|| _|| _t||| _g | _| jd ur| j ndg| j | _d| _d| _ g | _!d| _"|| _#|	pg | _$t%| j| _&t%| j| _'|| _(d| _)d| _*d| _+d| _,d| _-g | _.|| _/| 0  | 1 | _2|| _3d | _4d S )N   kv_transfer_paramsz6sampling_params and pooling_params can't both be unsetr   F)5r,   r/   r3   r   r-   r.   r1   r   from_sampling_paramsstructured_output_requestr6   timer   RequestStatusWAITINGstatuseventsstop_reasonr8   r   WAITING_FOR_FSM
extra_argsget
ValueErrorr   r0   r   num_prompt_tokens_output_token_idscopy_all_token_idsnum_output_placeholdersdiscard_latest_async_tokensspec_token_idsnum_computed_tokensr2   r   r   output_token_idsall_token_idsr4   num_cached_tokensis_prefill_chunknum_nans_in_logitsnum_preemptionsnum_external_computed_tokensblock_hashes_block_hasherupdate_block_hashesget_skip_reading_prefix_cacheskip_reading_prefix_cacher   streaming_queue)selfr,   r   r   r-   r.   r/   r   r0   r   r1   r2   r3   r4   r5   r   r6   r    r    r!   __init__;   sx   








zRequest.__init__r   c                 C   s   | di d|j d|jd|jd|jd|jd|jd|jd|jd	|jd
|j	d|j
d|jd|jd|d|jd|jS )Nr,   r/   r   r0   r   r   r-   r.   r   r1   r2   r3   r4   r5   r   r6   r    )r,   r/   r   r0   r   r   r-   r.   r   r1   r2   r3   r4   r   r6   )r   r   r5   r    r    r!   from_engine_core_request   sB   	
z Request.from_engine_core_request	token_idsc                 C   sH   t |tr| j| | j| n| j| | j| |   d S N)
isinstancer)   rG   appendrI   extendrW   )r[   r^   r    r    r!   append_output_token_ids   s   
zRequest.append_output_token_idsc                 C   s$   | j dur| j|  |  dS dS )z=Compute block hashes for any new full blocks and append them.N)rV   rU   rb   r[   r    r    r!   rW      s   
zRequest.update_block_hashesc                 C   s
   | j d uS r_   )r;   rd   r    r    r!   use_structured_output      
zRequest.use_structured_outputc                 C   
   t | jS r_   )lenrI   rd   r    r    r!   
num_tokens   rf   zRequest.num_tokensc                 C   s   t | jt | j S r_   )rh   rI   rL   rd   r    r    r!   num_tokens_with_spec   s   zRequest.num_tokens_with_specc                 C   rg   r_   )rh   rG   rd   r    r    r!   num_output_tokens   rf   zRequest.num_output_tokensc                 C   rg   r_   )rh   r   rd   r    r    r!   num_encoder_inputs   rf   zRequest.num_encoder_inputsc                 C   s
   | j dkS )Nr   )rl   rd   r    r    r!   has_encoder_inputs   rf   zRequest.has_encoder_inputsc                 C   s@   | j d ur| j jd ur| j jS | jd ur| jjd ur| jjS dS )NF)r   rY   r-   rd   r    r    r!   rX      s   

z%Request.get_skip_reading_prefix_cachec                 C      t | jS r_   )r=   is_finishedr?   rd   r    r    r!   ro        zRequest.is_finishedc                 C   rn   r_   )r=   get_finished_reasonr?   rd   r    r    r!   rq     rp   zRequest.get_finished_reasoninput_idc                 C   s"   |t | jk s	J | j| j S r_   )rh   r   mm_positionget_num_embeds)r[   rr   r    r    r!   get_num_encoder_embeds	  s   zRequest.get_num_encoder_embeds
event_type	timestampc                 C   s   | j t|| d S r_   )r@   ra   r   	new_event)r[   rv   rw   r    r    r!   record_event  s   zRequest.record_eventc                 C   s   | j sd S | j g }| _ |S r_   )r@   )r[   r@   r    r    r!   take_events  s   zRequest.take_eventsotherc                 C   sX   | j |j kr| j |j k S | j|jkr| j|jk S | j|jkr$| j|jk S t| t|k S )z|
        Compare two requests based on priority, arrival time, and request ID.
        Used in priority scheduling.
        )r3   r   r,   id)r[   r{   r    r    r!   __lt__  s   zRequest.__lt__)r   NNNNNr   NNFN)r   Nr_   )&r#   r$   r%   strr'   r)   r
   r	   r*   torchTensorr   r   r   boolr\   r+   r   r]   rc   rW   propertyre   ri   rj   rk   rl   rm   rX   ro   r   rq   ru   r   ry   r   rz   r}   r    r    r    r!   r   :   s    
	


x



r   c                   @   s   e Zd ZdZe Ze Ze Ze Z	e Z
e Ze Ze Ze Ze Ze ZdefddZedd defddZedd dedB fd	d
ZdS )r=   zStatus of a request.r   c                 C   s   | j S r_   )namerd   r    r    r!   __str__9  s   zRequestStatus.__str__r?   c                 C   s
   | t jkS r_   )r=   	PREEMPTEDr?   r    r    r!   ro   <  rf   zRequestStatus.is_finishedNc                 C   s
   t | S r_   )_FINISHED_REASON_MAPrD   r   r    r    r!   rq   @  rf   z!RequestStatus.get_finished_reason)r#   r$   r%   r&   enumautor>   rB   WAITING_FOR_REMOTE_KVSWAITING_FOR_STREAMING_REQRUNNINGr   FINISHED_STOPPEDFINISHED_LENGTH_CAPPEDFINISHED_ABORTEDFINISHED_IGNOREDFINISHED_ERRORr~   r   staticmethodr   ro   r   rq   r    r    r    r!   r=   (  s$    r=   )1r   r<   collectionsr   collections.abcr   r   dataclassesr   typingr   r   r   vllm.multimodal.inputsr   vllm.pooling_paramsr	   vllm.sampling_paramsr
   
vllm.utilsr   vllm.v1.enginer   r   r   r   !vllm.v1.structured_output.requestr   vllm.v1.utilsr   vllm.lora.requestr   vllm.v1.core.kv_cache_utilsr   r   r   IntEnumr=   r   STOPr   LENGTHr   ABORTr   r   ERRORr   r   r    r    r    r!   <module>   s:    o"