o
    .i=                     @   s  d dl Z d dlmZmZ d dlmZ d dlmZmZ d dlm	Z	 d dl
mZ d dlmZ d dlmZmZ d dlmZ d dlmZ d d	lmZ d d
lmZ d dlmZ d dlmZ d dlmZmZ d dl m!Z!m"Z" d dl#m$Z$ d dl%m&Z& d dl'm(Z( d dl)m*Z* d dl+m,Z, d dl-m.Z. d dl/m0Z0 d dl1m2Z2 d dl3m4Z4 d dl5m6Z6 d dl7m8Z8 d dl9m:Z: d dl;m<Z< d dl=m>Z> d dl?m@Z@mAZA d dlBmCZCmDZD d dlEmFZF d d lGmHZH d d!lImJZJ eeKZLed"ed#ZMG d$d% d%ZNdS )&    N)CallableMapping)copy)Anycast)TypeVar)ParallelConfig
VllmConfig)1stateless_destroy_torch_distributed_process_group)get_dp_group)
EngineArgs)
PromptType)init_logger)LoRARequest)MULTIMODAL_REGISTRYMultiModalRegistry)PoolingRequestOutputRequestOutput)get_io_processor)PoolingParams)RendererLike)SamplingParams)SupportedTask)TokenizerLike)init_tracer)UsageContext)EngineCoreRequest)EngineCoreClient)InputProcessor)OutputProcessor)ParentRequest)Executor)StatLoggerFactoryStatLoggerManager)Metricget_metrics_snapshot)IterationStats)record_function_or_nullcontext)
WorkerBase_R)defaultc                   @   s2  e Zd ZdZdejdeddfdedee	 de
de
ded	ee dB d
ede
de
ddfddZeejddfdeded	ee dB de
dd f
ddZeejddfdeded	ee dB de
dd f
ddZdefddZde
fddZde
de
fddZedd Zdeed f fd!d"Zdid#ee d$e
ddfd%d&Z					'	djd(ed)eeB d*ee B d+e!dB d,e"dB d-e#ee$f dB d.e%eef dB d/ed0edB ddfd1d2Z&dee'e(B  fd3d4Z)d5d6 Z*d7d8 Z+d9d: Z,	dkd;e
d<e
de
fd=d>Z-dld@efdAdBZ.dmdCee dB fdDdEZ/de
fdFdGZ0dee1 fdHdIZ2e3de4dB fdJdKZ5de4fdLdMZ6e3de7fdNdOZ8dndPdQZ9dndRdSZ:d,e"de
fdTdUZ;dVede
fdWdXZ<de=e fdYdZZ>dVede
fd[d\Z?		]	dod^ee@eAgeBf B d_e!dB d`edae#ee$f dB deeB f
dbdcZCdde@eDjEgeBf deeB fdedfZFdgdh ZGdS )p	LLMEnginez-Legacy LLMEngine for backwards compatibility.FNvllm_configexecutor_class	log_statsaggregate_engine_loggingusage_contextstat_loggersmm_registryuse_cached_outputsmultiprocess_modereturnc
                 C   s6  || _ |j| _|j| _|j| _|| _|j}
|
j}|
jdko |dk| _|	s2|
jdkr2| js2|
	 | _
nd | _
d| _t| j | _t| j | jj| _t| j| j| j jjd| _| jj}|d uretd|}|| j_tj|	d||| jd| _d | _| jrt||||d| _| j  |	s| jjj| _| jrt  j!| _
| "  d S )N   external_launcherF)r.   stream_intervalzvllm.llm_engine)r4   asyncio_moder,   r-   r.   )r,   custom_stat_loggersenable_default_loggersr/   )#r,   observability_configmodel_configcache_configr.   parallel_configdistributed_executor_backenddata_parallel_sizeexternal_launcher_dpstateless_init_dp_groupdp_groupshould_execute_dummy_batchr   input_processorr   io_processor_pluginio_processorr   	tokenizerscheduler_configr8   output_processorotlp_traces_endpointr   tracerr   make_clientengine_corelogger_managerr#   log_engine_initializedmodel_executorr   	cpu_groupreset_mm_cache)selfr,   r-   r.   r/   r0   r1   r2   r3   r4   r?   executor_backendendpointrM    rX   V/home/ubuntu/veenaModal/venv/lib/python3.10/site-packages/vllm/v1/engine/llm_engine.py__init__2   sj   




zLLMEngine.__init__disable_log_statsc                 C   s   | |t || ||tjdS )Nr,   r-   r.   r0   r1   r4   )r!   	get_classenvsVLLM_ENABLE_V1_MULTIPROCESSING)clsr,   r0   r1   r[   rX   rX   rY   from_vllm_config   s   zLLMEngine.from_vllm_configengine_argsenable_multiprocessingc                 C   s@   | |}t|}tjrtd d}| |||j |||dS )z0Creates an LLM engine from the engine arguments.z'Enabling multiprocessing for LLMEngine.Tr\   )create_engine_configr!   r]   r^   r_   loggerdebugr[   )r`   rb   r0   r1   rc   r,   r-   rX   rX   rY   from_engine_args   s   


zLLMEngine.from_engine_argsc                 C   
   | j  S N)rK   get_num_unfinished_requestsrU   rX   rX   rY   rj         
z%LLMEngine.get_num_unfinished_requestsc                 C   s,   | j  }| jd u r|p| j S | |S ri   )rK   has_unfinished_requestsrD   rO   dp_engines_runninghas_unfinished_requests_dp)rU   has_unfinishedrX   rX   rY   rm      s   


z!LLMEngine.has_unfinished_requestsrp   c                 C   s    t | j|}|s|rd| _|S NT)r   has_unfinished_dprD   rE   )rU   rp   aggregated_has_unfinishedrX   rX   rY   ro      s   z$LLMEngine.has_unfinished_requests_dpc                 C   s   |S ri   rX   )r`   outputsoutput_typerX   rX   rY   validate_outputs   s   zLLMEngine.validate_outputs.c                 C   rh   ri   )rO   get_supported_tasksrk   rX   rX   rY   rw      rl   zLLMEngine.get_supported_tasksrequest_idsinternalc                 C   s   | j ||}| j| dS )z3Remove request_ids from EngineCore and Detokenizer.N)rK   abort_requestsrO   )rU   rx   ry   rX   rX   rY   abort_request   s   zLLMEngine.abort_requestr   
request_idpromptparamsarrival_timelora_requesttokenization_kwargstrace_headerspriorityprompt_textc
              
   C   sN  t |tstdt| t |tr |}
||
jkrtd n*|	d u s&J | j	||||||||}
t |tr;|}	nt |t
rJttd B |d}	| j|
 |
j}t |tr[|jnd}|dkrr| j|
|	d d | j|
 d S t|
}t|D ]*}||\}}||d kr|
nt|
}||_||_| j||	|| | j| qzd S )Nz!request_id must be a string, got zAsyncLLM.add_request() was passed a request_id parameter that does not match the EngineCoreRequest.request_id attribute. The latter will be used, and the former will be ignored.r}   r6   r   )
isinstancestr	TypeErrortyper   r|   re   warning_oncerF   process_inputsr   r   getassign_request_idr~   r   nrK   add_requestrO   r    rangeget_child_infor   sampling_params)rU   r|   r}   r~   r   r   r   r   r   r   requestr   
parent_reqidxchild_paramschild_requestrX   rX   rY   r      sT   





zLLMEngine.add_requestc                 C   sj  | j rd| _ | j  g S td | j }W d    n1 s!w   Y  td" | jr1t nd }| jj|j	|j
|d}| j|j W d    n1 sOw   Y  td | j|j W d    n1 sjw   Y  td7 | jd ur|jd ur| jj|j|| j d |   W d    |jS W d    |jS W d    |jS 1 sw   Y  |jS )NFzllm_engine step: get_outputz llm_engine step: process_outputs)engine_core_timestampiteration_statszllm_engine step: abort_requestszllm_engine step: record_stats)scheduler_statsr   mm_cache_stats)rE   rO   execute_dummy_batchr'   
get_outputr.   r&   rK   process_outputsrt   	timestampupdate_scheduler_statsr   rz   reqs_to_abortrP   recordrF   stat_mm_cachedo_log_stats_with_intervalrequest_outputs)rU   rt   r   processed_outputsrX   rX   rY   step  sJ   







	
	
		zLLMEngine.stepc                 C      | j d d S rq   rO   profilerk   rX   rX   rY   start_profile>     zLLMEngine.start_profilec                 C   r   )NFr   rk   rX   rX   rY   stop_profileA  r   zLLMEngine.stop_profilec                 C   s   | j   | j  d S ri   )rF   clear_mm_cacherO   rT   rk   rX   rX   rY   rT   D  s   
zLLMEngine.reset_mm_cachereset_running_requestsreset_connectorc                 C   s   | j ||S ri   )rO   reset_prefix_cache)rU   r   r   rX   rX   rY   r   H  s   zLLMEngine.reset_prefix_cacher6   levelc                 C   s,   | j | | jd ur| jd| d S d S )Nr6   )rO   sleeprP   record_sleep_state)rU   r   rX   rX   rY   r   O     
zLLMEngine.sleeptagsc                 C   s,   | j | | jd ur| jdd d S d S )Nr   )rO   wake_uprP   r   )rU   r   rX   rX   rY   r   U  r   zLLMEngine.wake_upc                 C   rh   ri   )rO   is_sleepingrk   rX   rX   rY   r   [  rl   zLLMEngine.is_sleepingc                 C   s   | j sJ dt S )NzStat logging disabled)r.   r%   rk   rX   rX   rY   get_metrics^  s   zLLMEngine.get_metricsc                 C      | j jS ri   )rF   rI   rk   rX   rX   rY   rI   b     zLLMEngine.tokenizerc                 C   rh   ri   )rF   get_tokenizerrk   rX   rX   rY   r   f  rl   zLLMEngine.get_tokenizerc                 C   r   ri   )rF   rendererrk   rX   rX   rY   r   i  r   zLLMEngine.rendererc                 C   s   | j r
| j   dS dS )z Log stats if logging is enabled.N)rP   logrk   rX   rX   rY   do_log_statsm  s   zLLMEngine.do_log_statsc                 C   s>   t   }t| ds|| _|| j tjkr|   || _dS dS )z,Log stats when the time interval has passed._last_log_timeN)timehasattrr   r^   VLLM_LOG_STATS_INTERVALr   )rU   nowrX   rX   rY   r   r  s   

z$LLMEngine.do_log_stats_with_intervalc                 C      | j |S )z<Load a new LoRA adapter into the engine for future requests.)rO   add_lora)rU   r   rX   rX   rY   r   {     zLLMEngine.add_loralora_idc                 C   r   )z&Remove an already loaded LoRA adapter.)rO   remove_lorarU   r   rX   rX   rY   r     r   zLLMEngine.remove_lorac                 C   rh   )zList all registered adapters.)rO   
list_lorasrk   rX   rX   rY   r     s   
zLLMEngine.list_lorasc                 C   r   )z&Prevent an adapter from being evicted.)rO   pin_lorar   rX   rX   rY   r     r   zLLMEngine.pin_lorarX   methodtimeoutargskwargsc                 C   s   | j ||||S ri   )rO   collective_rpc)rU   r   r   r   r   rX   rX   rY   r     s   zLLMEngine.collective_rpcfuncc                 C   s   | j d|fdS )Napply_model)r   )r   )rU   r   rX   rX   rY   r     r   zLLMEngine.apply_modelc                 C   s.   t | dd }|d ur| jst| d S d S d S )NrD   )getattrrB   r
   )rU   rD   rX   rX   rY   __del__  s   zLLMEngine.__del__)F)NNNNr   N)FF)r6   ri   )r5   N)NrX   N)H__name__
__module____qualname____doc__r   ENGINE_CONTEXTr   r	   r   r!   boollistr"   r   rZ   classmethodra   r   rg   intrj   rm   ro   rv   tupler   rw   r   r{   r   r   r   r   floatr   dictr   r   r   r   r   r   r   r   rT   r   r   r   r   r$   r   propertyr   rI   r   r   r   r   r   r   r   setr   r   r   r(   r)   r   nnModuler   r   rX   rX   rX   rY   r+   /   s   
	

V


	

G$


	
"	r+   )Or   collections.abcr   r   r   typingr   r   torch.nnr   typing_extensionsr   	vllm.envsr^   vllm.configr   r	   vllm.distributedr
   vllm.distributed.parallel_stater   vllm.engine.arg_utilsr   vllm.inputsr   vllm.loggerr   vllm.lora.requestr   vllm.multimodalr   r   vllm.outputsr   r   vllm.plugins.io_processorsr   vllm.pooling_paramsr   vllm.renderersr   vllm.sampling_paramsr   
vllm.tasksr   vllm.tokenizersr   vllm.tracingr   vllm.usage.usage_libr   vllm.v1.enginer   vllm.v1.engine.core_clientr   vllm.v1.engine.input_processorr   vllm.v1.engine.output_processorr    vllm.v1.engine.parallel_samplingr    vllm.v1.executorr!   vllm.v1.metrics.loggersr"   r#   vllm.v1.metrics.readerr$   r%   vllm.v1.metrics.statsr&   vllm.v1.utilsr'   vllm.v1.worker.worker_baser(   r   re   r)   r+   rX   rX   rX   rY   <module>   sL   