o
    .i-                     @   sF  d dl Z d dlZd dlmZ d dlmZmZ d dlmZ d dl	m
Z
 d dlmZmZmZ d dlZd dlmZ d dlmZ d d	lmZ d d
lmZ d dlmZmZmZmZ d dlmZ d dlm Z  ernd dl!m"Z" d dl#m$Z$ eG dd dZ%G dd dZ&G dd de j'Z(e(j)ej*e(j+ej,e(j-ej.e(j/ej,e(j0ej1e(j2ej*iZ3dS )    N)deque)CallableMapping)	dataclass)partial)TYPE_CHECKINGAnyOptional)MultiModalFeatureSpec)PoolingParams)SamplingParams)&length_from_prompt_token_ids_or_embeds)EngineCoreEventEngineCoreEventTypeEngineCoreRequestFinishReason)StructuredOutputRequest)ConstantList)LoRARequest)	BlockHashc                   @   s\   e Zd ZU dZee dB ed< ee dB ed< eed< eed< e	dB ed< e
dddZdS )StreamingUpdatezLightweight data for streaming session continuation.

    Contains only the fields needed to update an existing streaming session
    with new input data.
    Nmm_featuresprompt_token_ids
max_tokensarrival_timesampling_paramsrequestRequestreturnStreamingUpdate | Nonec                 C   s&   |j sd S | |j|j|j|j|jdS )N)r   r   r   r   r   )	resumabler   r   r   r   r   )clsr    r"   L/home/ubuntu/veenaModal/venv/lib/python3.10/site-packages/vllm/v1/request.pyfrom_request.   s   zStreamingUpdate.from_request)r   r   r   r   )__name__
__module____qualname____doc__listr
   __annotations__intfloatr   classmethodr$   r"   r"   r"   r#   r       s   
 r   c                !   @   s  e Zd Z										d<dedee dB dedB dedB dedB d	ed
edB de	j
dB dee dB ded dedB dedeeef dB ded ged f dB deddf ddZededed ged f dB dd fddZdeee B ddfddZedefddZedefd d!Zedefd"d#Zedefd$d%Zedefd&d'Zedefd(d)Zdefd*d+Zdefd,d-ZdedB fd.d/Zd0edefd1d2Z 	d=d3e!d4edB ddfd5d6Z"dee# dB fd7d8Z$d9d defd:d;Z%dS )>r   r   NF
request_idr   r   pooling_paramseos_token_idclient_indexr   prompt_embedsr   lora_requestr   
cache_saltprioritytrace_headersblock_hasherr   r    r   c                 C   s  || _ || _|| _|| _|| _|| _|
| _t|| _	|d ur!|nt

 | _tj| _g | _d | _d | _|d ur;d| _n)|d ur`|jd usFJ |j| _| j	d urStj| _|jd ur_|jd| _ntd|| _|| _t||| _g | _| jd ur}| j ndg| j | _d| _d| _g | _ d| _!|| _"|	pg | _#t$| j| _%t$| j| _&|| _'d| _(d| _)d| _*d| _+g | _,d | _-|d urt.|| | _-| - | _,| / | _0|| _1d | _2d S )N   kv_transfer_paramsz6sampling_params and pooling_params can't both be unsetr   F)3r.   r1   r5   r   r/   r0   r3   r   from_sampling_paramsstructured_output_requesttimer   RequestStatusWAITINGstatuseventsstop_reasonr9   r   WAITING_FOR_FSM
extra_argsget
ValueErrorr   r2   r   num_prompt_tokens_output_token_idscopy_all_token_idsnum_output_placeholdersdiscard_latest_async_tokensspec_token_idsnum_computed_tokensr4   r   r   output_token_idsall_token_idsr6   num_cached_tokensnum_nans_in_logitsnum_preemptionsnum_external_computed_tokensblock_hashesget_hash_new_full_blocksr   get_skip_reading_prefix_cacheskip_reading_prefix_cacher    streaming_queue)selfr.   r   r   r/   r0   r1   r   r2   r   r3   r4   r5   r6   r7   r    r"   r"   r#   __init__<   sv   








zRequest.__init__r   c                 C   sB   | |j |j|j|j|j|j|j|j|j|j	|j
|j|j||jdS )N)r.   r1   r   r2   r   r   r/   r0   r   r3   r4   r5   r6   r7   r    )r.   r1   r   r2   r   r   r/   r0   r   r3   r4   r5   r6   r    )r!   r   r7   r"   r"   r#   from_engine_core_request   s"   z Request.from_engine_core_request	token_idsc                 C   s^   t |tr| j| | j| n| j| | j| | jd ur-| j|   d S d S N)
isinstancer+   rH   appendrJ   extendrV   rU   )rZ   r]   r"   r"   r#   append_output_token_ids   s   

zRequest.append_output_token_idsc                 C   s
   | j d uS r^   )r<   rZ   r"   r"   r#   use_structured_output      
zRequest.use_structured_outputc                 C   
   t | jS r^   )lenrJ   rc   r"   r"   r#   
num_tokens   re   zRequest.num_tokensc                 C   s   t | jt | j S r^   )rg   rJ   rM   rc   r"   r"   r#   num_tokens_with_spec   s   zRequest.num_tokens_with_specc                 C   rf   r^   )rg   rH   rc   r"   r"   r#   num_output_tokens   re   zRequest.num_output_tokensc                 C   rf   r^   )rg   r   rc   r"   r"   r#   num_encoder_inputs   re   zRequest.num_encoder_inputsc                 C   s
   | j dkS )Nr   )rk   rc   r"   r"   r#   has_encoder_inputs   re   zRequest.has_encoder_inputsc                 C   s@   | j d ur| j jd ur| j jS | jd ur| jjd ur| jjS dS )NF)r   rX   r/   rc   r"   r"   r#   rW      s   

z%Request.get_skip_reading_prefix_cachec                 C      t | jS r^   )r>   is_finishedr@   rc   r"   r"   r#   rn         zRequest.is_finishedc                 C   rm   r^   )r>   get_finished_reasonr@   rc   r"   r"   r#   rp      ro   zRequest.get_finished_reasoninput_idc                 C   s    |t | jk s	J | j| jjS r^   )rg   r   mm_positionget_num_embeds)rZ   rq   r"   r"   r#   get_num_encoder_embeds   s   zRequest.get_num_encoder_embeds
event_type	timestampc                 C   s   | j t|| d S r^   )rA   r`   r   	new_event)rZ   ru   rv   r"   r"   r#   record_event  s   zRequest.record_eventc                 C   s   | j sd S | j g }| _ |S r^   )rA   )rZ   rA   r"   r"   r#   take_events	  s   zRequest.take_eventsotherc                 C   sX   | j |j kr| j |j k S | j|jkr| j|jk S | j|jkr$| j|jk S t| t|k S )z|
        Compare two requests based on priority, arrival time, and request ID.
        Used in priority scheduling.
        )r5   r   r.   id)rZ   rz   r"   r"   r#   __lt__  s   zRequest.__lt__)
r   NNNNNr   NNFr^   )&r%   r&   r'   strr)   r+   r   r   r,   torchTensorr
   r	   r   r   boolr[   r-   r   r\   rb   propertyrd   rh   ri   rj   rk   rl   rW   rn   r   rp   rt   r   rx   r   ry   r|   r"   r"   r"   r#   r   ;   s    
	


q


r   c                   @   s   e Zd ZdZe Ze Ze Ze Z	e Z
e Ze Ze Ze Ze Ze ZdefddZedd defddZedd dedB fd	d
ZdS )r>   zStatus of a request.r   c                 C   s   | j S r^   )namerc   r"   r"   r#   __str__.  s   zRequestStatus.__str__r@   c                 C   s
   | t jkS r^   )r>   	PREEMPTEDr@   r"   r"   r#   rn   1  re   zRequestStatus.is_finishedNc                 C   s
   t | S r^   )_FINISHED_REASON_MAPrE   r   r"   r"   r#   rp   5  re   z!RequestStatus.get_finished_reason)r%   r&   r'   r(   enumautor?   rC   WAITING_FOR_REMOTE_KVSWAITING_FOR_STREAMING_REQRUNNINGr   FINISHED_STOPPEDFINISHED_LENGTH_CAPPEDFINISHED_ABORTEDFINISHED_IGNOREDFINISHED_ERRORr}   r   staticmethodr   rn   r   rp   r"   r"   r"   r#   r>     s$    r>   )4r   r=   collectionsr   collections.abcr   r   dataclassesr   	functoolsr   typingr   r   r	   r~   vllm.multimodal.inputsr
   vllm.pooling_paramsr   vllm.sampling_paramsr   
vllm.utilsr   vllm.v1.enginer   r   r   r   !vllm.v1.structured_output.requestr   vllm.v1.utilsr   vllm.lora.requestr   vllm.v1.core.kv_cache_utilsr   r   r   IntEnumr>   r   STOPr   LENGTHr   ABORTr   r   ERRORr   r   r"   r"   r"   r#   <module>   s<    c"