o
    
۾i                     @   s~  d dl Z d dlZd dlmZ d dlmZmZ d dlZd dlZ	d dl
Z
d dlmZ d dlmZ d dlmZ d dlmZ d dlmZ d d	lmZmZ d d
lmZ ed ZdZG dd de jZG dd dejddddZG dd de jZ G dd dejZ!G dd dejddddZ"G dd dejdddZ#G dd dejddddZ$G dd  d e j%Z&G d!d" d"ejZ'G d#d$ d$e jZ(dS )%    N)Mapping)AnyLiteral)LoRARequest)MultiModalFeatureSpec)PoolingParams)SamplingParams)SchedulerStats)LogprobsListsLogprobsTensors)UtilityResult)abortwaitkeep)stoplengthr   errorc                   @   s(   e Zd ZdZdZdZdZdZdd ZdS )	FinishReasona  
    Reason a request finished - stop, length, abort, or error.

    Int rather than Str for more compact serialization.

    stop - a stop string was emitted
    length - max_tokens was consumed, or max_model_len was reached
    abort - aborted by client
    error - retryable request-level internal error (e.g., KV load failure).
            Invariant: always converted to 500 Internal Server Error.

    r            c                 C   s
   t | j S N)FINISH_REASON_STRINGSvalueself r   K/home/ubuntu/.local/lib/python3.10/site-packages/vllm/v1/engine/__init__.py__str__3   s   
zFinishReason.__str__N)	__name__
__module____qualname____doc__STOPLENGTHABORTERRORr   r   r   r   r   r       s    r   c                   @   s  e Zd ZU eed< ee dB ed< ee dB ed< edB ed< e	dB ed< edB ed< e
ed< edB ed	< edB ed
< edB ed< dZejdB ed< dZeed< dZeed< dZeed< dZeeef dB ed< dZeed< dZedB ed< dZedB ed< edee	B fddZdS )EngineCoreRequest
request_idNprompt_token_idsmm_featuressampling_paramspooling_paramseos_token_idarrival_timelora_request
cache_saltdata_parallel_rankprompt_embedsr   client_indexcurrent_waveprioritytrace_headersF	resumableexternal_req_idreasoning_endedreturnc                 C   s$   | j dur| j S | jdusJ | jS )z2Return the processed params (sampling or pooling).N)r+   r,   r   r   r   r   params^   s   
zEngineCoreRequest.params)r   r    r!   str__annotations__listintr   r   r   floatr   r2   torchTensorr3   r4   r5   r6   r   r7   boolr8   r9   propertyr;   r   r   r   r   r'   7   s*   
 r'   TF)
array_likeomit_defaultsgcc                   @   s   e Zd ZdZdZdZdZdS )EngineCoreEventTypez&The type of engine core request event.r   r   r   N)r   r    r!   r"   QUEUED	SCHEDULED	PREEMPTEDr   r   r   r   rH   g   s
    rH   c                   @   sD   e Zd ZU dZeed< eed< e	d	dededB dd fddZdS )
EngineCoreEventa  A timestamped engine core event associated with a request.

    The timestamp is a monotonic timestamps and is used for by the engine
    frontend to calculate intervals between engine core events. These
    timestamps should not be compared with timestamps from other processes.
    type	timestampN
event_typer:   c                 C   s   |d u rt  n|}| ||S r   )time	monotonic)clsrO   rN   r   r   r   	new_eventz   s   
zEngineCoreEvent.new_eventr   )	r   r    r!   r"   rH   r=   r@   classmethodrS   r   r   r   r   rL   o   s   
 rL   c                   @   s  e Zd ZU eed< ee ed< dZedB ed< dZ	e
dB ed< dZejdB ed< dZedB ed< dZeeB dB ed< dZee dB ed	< dZeeef dB ed
< dZeeef dB ed< dZeed< dZeed< dZejdB ed< dZeed< edefddZ dS )EngineCoreOutputr(   new_token_idsNnew_logprobsnew_prompt_logprobs_tensorspooling_outputfinish_reasonstop_reasoneventskv_transfer_paramsr6   r   num_cached_tokensnum_external_computed_tokensrouted_expertsnum_nans_in_logitsr:   c                 C   s
   | j d uS r   )rZ   r   r   r   r   finished   s   
zEngineCoreOutput.finished)!r   r    r!   r<   r=   r>   r?   rW   r
   rX   r   rY   rA   rB   rZ   r   r[   r\   rL   r]   dictr   r6   r   r^   r_   r`   npndarrayra   rD   rC   rb   r   r   r   r   rU      s"   
 rU   c                   @   s6   e Zd ZU eed< dZedB ed< dZedB ed< dS )UtilityOutputcall_idNfailure_messageresult)	r   r    r!   r?   r=   rh   r<   ri   r   r   r   r   r   rf      s   
 rf   )rE   rG   c                   @   s   e Zd ZU dZeed< g Zee ed< dZ	e
dB ed< dZeed< dZedB ed< dZee dB ed	< dZedB ed
< dZedB ed< dd ZdS )EngineCoreOutputsr   engine_indexoutputsNscheduler_stats        rN   utility_outputfinished_requestswave_complete
start_wavec                 C   s   | j dkrt | _ d S d S )Nrn   )rN   rP   rQ   r   r   r   r   __post_init__   s   
zEngineCoreOutputs.__post_init__)r   r    r!   rk   r?   r=   rl   r>   rU   rm   r	   rN   r@   ro   rf   rp   setr<   rq   rr   rs   r   r   r   r   rj      s   
 	rj   c                   @   s$   e Zd ZdZdZdZdZdZdZdS )EngineCoreRequestTypezw
    Request types defined as hex byte strings, so it can be sent over sockets
    without separate encoding step.
                    N)	r   r    r!   r"   ADDr%   START_DP_WAVEUTILITYEXECUTOR_FAILEDr   r   r   r   ru      s    ru   c                   @   s6   e Zd ZU eed< eed< eed< eed< eed< dS )ReconfigureDistributedRequestnew_data_parallel_sizenew_data_parallel_ranknew_data_parallel_rank_localnew_data_parallel_master_ipnew_data_parallel_master_portN)r   r    r!   r?   r=   r<   r   r   r   r   r      s   
 r   c                   @   s   e Zd ZdZdZdZdS )ReconfigureRankTypez:
    Rank type for reconfiguring distributed request.
    N)r   r    r!   r"   KEEP_CURRENT_RANKSHUTDOWN_CURRENT_RANKr   r   r   r   r      s    r   ))enumrP   collections.abcr   typingr   r   msgspecnumpyrd   rA   vllm.lora.requestr   vllm.multimodal.inputsr   vllm.pooling_paramsr   vllm.sampling_paramsr   vllm.v1.metrics.statsr	   vllm.v1.outputsr
   r   vllm.v1.serial_utilsr   	PauseModer   IntEnumr   Structr'   rH   rL   rU   rf   rj   Enumru   r   r   r   r   r   r   <module>   sZ   
0

"
