o
    .i                     @   sr  d dl Z d dlZd dlmZ d dlmZ d dlZd dlZd dl	Z	d dl
mZ d dlmZ d dlmZ d dlmZ d dlmZ d d	lmZmZ d d
lmZ dZG dd de jZG dd dejddddZG dd de jZG dd dejZG dd dejddddZ G dd dejdddZ!G dd dejddddZ"G dd de j#Z$G d d! d!ejZ%G d"d# d#e jZ&dS )$    N)Mapping)Any)LoRARequest)MultiModalFeatureSpec)PoolingParams)SamplingParams)SchedulerStats)LogprobsListsLogprobsTensors)UtilityResult)stoplengthaborterrorc                   @   s(   e Zd ZdZdZdZdZdZdd ZdS )	FinishReasona  
    Reason a request finished - stop, length, abort, or error.

    Int rather than Str for more compact serialization.

    stop - a stop string was emitted
    length - max_tokens was consumed, or max_model_len was reached
    abort - aborted by client
    error - retryable request-level internal error (e.g., KV load failure).
            Invariant: always converted to 500 Internal Server Error.

    r            c                 C   s
   t | j S N)FINISH_REASON_STRINGSvalueself r   T/home/ubuntu/veenaModal/venv/lib/python3.10/site-packages/vllm/v1/engine/__init__.py__str__-   s   
zFinishReason.__str__N)	__name__
__module____qualname____doc__STOPLENGTHABORTERRORr   r   r   r   r   r      s    r   c                   @   s  e Zd ZU eed< ee dB ed< ee dB ed< edB ed< e	dB ed< edB ed< e
ed< edB ed	< edB ed
< edB ed< dZejdB ed< dZeed< dZeed< dZeed< dZeeef dB ed< dZeed< dZedB ed< edee	B fddZdS )EngineCoreRequest
request_idNprompt_token_idsmm_featuressampling_paramspooling_paramseos_token_idarrival_timelora_request
cache_saltdata_parallel_rankprompt_embedsr   client_indexcurrent_waveprioritytrace_headersF	resumableexternal_req_idreturnc                 C   s$   | j dur| j S | jdusJ | jS )z2Return the processed params (sampling or pooling).N)r(   r)   r   r   r   r   paramsV   s   
zEngineCoreRequest.params)r   r   r   str__annotations__listintr   r   r   floatr   r/   torchTensorr0   r1   r2   r3   r   r4   boolr5   propertyr7   r   r   r   r   r$   1   s(   
 r$   TF)
array_likeomit_defaultsgcc                   @   s   e Zd ZdZdZdZdZdS )EngineCoreEventTypez&The type of engine core request event.r   r   r   N)r   r   r   r   QUEUED	SCHEDULED	PREEMPTEDr   r   r   r   rD   _   s
    rD   c                   @   sD   e Zd ZU dZeed< eed< e	d	dededB dd fddZdS )
EngineCoreEventa  A timestamped engine core event associated with a request.

    The timestamp is a monotonic timestamps and is used for by the engine
    frontend to calculate intervals between engine core events. These
    timestamps should not be compared with timestamps from other processes.
    type	timestampN
event_typer6   c                 C   s   |d u rt  n|}| ||S r   )time	monotonic)clsrK   rJ   r   r   r   	new_eventr   s   
zEngineCoreEvent.new_eventr   )	r   r   r   r   rD   r9   r<   classmethodrO   r   r   r   r   rH   g   s   
 rH   c                   @   s   e Zd ZU eed< ee ed< dZedB ed< dZ	e
dB ed< dZejdB ed< dZedB ed< dZeeB dB ed< dZee dB ed	< dZeeef dB ed
< dZeeef dB ed< dZeed< dZejdB ed< dZeed< edefddZdS )EngineCoreOutputr%   new_token_idsNnew_logprobsnew_prompt_logprobs_tensorspooling_outputfinish_reasonstop_reasoneventskv_transfer_paramsr3   r   num_cached_tokensrouted_expertsnum_nans_in_logitsr6   c                 C   s
   | j d uS r   )rV   r   r   r   r   finished   s   
zEngineCoreOutput.finished) r   r   r   r8   r9   r:   r;   rS   r	   rT   r
   rU   r=   r>   rV   r   rW   rX   rH   rY   dictr   r3   r   rZ   r[   npndarrayr\   r@   r?   r]   r   r   r   r   rQ   z   s    
 rQ   c                   @   s6   e Zd ZU eed< dZedB ed< dZedB ed< dS )UtilityOutputcall_idNfailure_messageresult)	r   r   r   r;   r9   rc   r8   rd   r   r   r   r   r   ra      s   
 ra   )rA   rC   c                   @   s   e Zd ZU dZeed< g Zee ed< dZ	e
dB ed< dZeed< dZedB ed< dZee dB ed	< dZedB ed
< dZedB ed< dd ZdS )EngineCoreOutputsr   engine_indexoutputsNscheduler_stats        rJ   utility_outputfinished_requestswave_complete
start_wavec                 C   s   | j dkrt | _ d S d S )Nri   )rJ   rL   rM   r   r   r   r   __post_init__   s   
zEngineCoreOutputs.__post_init__)r   r   r   rf   r;   r9   rg   r:   rQ   rh   r   rJ   r<   rj   ra   rk   setr8   rl   rm   rn   r   r   r   r   re      s   
 	re   c                   @   s$   e Zd ZdZdZdZdZdZdZdS )EngineCoreRequestTypezw
    Request types defined as hex byte strings, so it can be sent over sockets
    without separate encoding step.
                    N)	r   r   r   r   ADDr"   START_DP_WAVEUTILITYEXECUTOR_FAILEDr   r   r   r   rp      s    rp   c                   @   s6   e Zd ZU eed< eed< eed< eed< eed< dS )ReconfigureDistributedRequestnew_data_parallel_sizenew_data_parallel_ranknew_data_parallel_rank_localnew_data_parallel_master_ipnew_data_parallel_master_portN)r   r   r   r;   r9   r8   r   r   r   r   rz      s   
 rz   c                   @   s   e Zd ZdZdZdZdS )ReconfigureRankTypez:
    Rank type for reconfiguring distributed request.
    N)r   r   r   r   KEEP_CURRENT_RANKSHUTDOWN_CURRENT_RANKr   r   r   r   r      s    r   )'enumrL   collections.abcr   typingr   msgspecnumpyr_   r=   vllm.lora.requestr   vllm.multimodal.inputsr   vllm.pooling_paramsr   vllm.sampling_paramsr   vllm.v1.metrics.statsr   vllm.v1.outputsr	   r
   vllm.v1.serial_utilsr   r   IntEnumr   Structr$   rD   rH   rQ   ra   re   Enumrp   rz   r   r   r   r   r   <module>   sX   
.

 
