o
    -i60                     @   s   d dl mZ d dlmZ d dlmZmZmZmZm	Z	 d dl
mZmZ d dlmZ d dlmZ d dlmZ d dlmZ d d	lmZ d d
lmZ erNd dlmZ eeZed Zed ZeeG dd dZdS )    )Callable)InitVar)TYPE_CHECKINGAnyClassVarLiteralcast)Fieldfield_validator)	dataclass)Self)config)init_logger)	safe_hash)resolve_obj_by_qualname)SchedulerInterface)generatepoolingdraft)fcfspriorityc                   @   s  e Zd ZU dZee ed< 	 ee ed< 	 dZe	e ed< dZ
e	e ed< dZeed	< 	 eed
dZeed< 	 ee
d
dZeed< 	 ed
d
dZeed< 	 ed
d
dZeed< 	 dZeed< 	 dZeed< 	 dZeed< 	 eddZeed< 	 eddZeed< 	 dZeed< 	 dZeed< 	 eddZeee B ed< 	 dZedB ed< 	 eddZeed < 	 ed
d
dZ eed!< 	 e!d"d# Z"d$ed% fd&d'Z#d$efd(d)Z$e%dd d*d+e&d,e'd-e(d$e'fd.d/Z)deded$dfd0d1Z*ded$e+fd2d3Z,dS )4SchedulerConfigzScheduler configuration.max_model_lenis_encoder_decoderi   DEFAULT_MAX_NUM_BATCHED_TOKENS   DEFAULT_MAX_NUM_SEQSr   runner_type   )defaultgemax_num_batched_tokensmax_num_seqsmax_num_partial_prefillsmax_long_partial_prefillsr   long_prefill_token_thresholdTenable_chunked_prefillFis_multimodal_model)initmax_num_encoder_input_tokensencoder_cache_sizer   policydisable_chunked_mm_inputN)r   scheduler_clsdisable_hybrid_kv_cache_managerasync_schedulingstream_intervalc                  K   s.   d| vrd| d< d| vrd| d< t di | S )z`
        Factory method to create `SchedulerConfig` with default values for `InitVar`s.
        r   i    r   FN )r   )kwargsr1   r1   R/home/ubuntu/veenaModal/venv/lib/python3.10/site-packages/vllm/config/scheduler.pydefault_factory   s
   zSchedulerConfig.default_factoryreturnr   c                 C   sd   | j d u r| jrddlm} |S ddlm} |S td| j  t| j t	s-t
td | j S t| j S )Nr   )AsyncScheduler)	SchedulerzpUsing custom scheduler class %s. This scheduler interface is not public and compatibility may not be maintained.r   )r-   r/   "vllm.v1.core.sched.async_schedulerr6   vllm.v1.core.sched.schedulerr7   loggerwarning_once
isinstancestrr   typer   )selfr6   r7   r1   r1   r3   get_scheduler_cls   s   

z!SchedulerConfig.get_scheduler_clsc                 C   s,   g }| | j tt| dd }|S )a  
        WARNING: Whenever a new field is added to this config,
        ensure that it is included in the factors list if
        it affects the computation graph.

        Provide a hash that uniquely identifies all the configs
        that affect the structure of the computation
        graph from input ids/embeddings to the final hidden states,
        excluding anything before input ids/embeddings and after
        the final hidden states.
        F)usedforsecurity)appendr!   r   r=   encode	hexdigest)r?   factorshash_strr1   r1   r3   compute_hash   s   zSchedulerConfig.compute_hashwrap)modevaluehandlerc                 C   s   |du rdS ||S )zFSkip validation if the value is `None` when initialisation is delayed.Nr1   )clsrJ   rK   r1   r1   r3   _skip_none_validation   s   z%SchedulerConfig._skip_none_validationc                 C   s   |rd| _ d| _d| _td | j| _| j| _| jr"td| j | jdkr>| jdkr3t	|d | _td| j| j
| j | | d S )	NTFr   zYEncoder-decoder models do not support chunked prefill nor prefix caching; disabling both.z:Chunked prefill is enabled with max_num_batched_tokens=%d.r   g{Gz?zConcurrent partial prefills enabled with max_num_partial_prefills=%d, max_long_partial_prefills=%d, long_prefill_token_threshold=%d)r,   r&   r%   r:   infor!   r)   r*   r#   intr$   verify_max_model_len)r?   r   r   r1   r1   r3   __post_init__   s0   

	zSchedulerConfig.__post_init__c                 C   s   | j |k r| jstd| j  d| d| j | jk r'td| j  d| j d| j | j| kr:td| j | j|  | jdkrW| jsFtd| j|krWtd	| j d
| d| j| jkrjtd| jd| jd| S )Nzmax_num_batched_tokens (z!) is smaller than max_model_len (z). This effectively limits the maximum sequence length to max_num_batched_tokens and makes vLLM reject longer sequences. Please increase max_num_batched_tokens or decrease max_model_len.z1) must be greater than or equal to max_num_seqs (z).zlmax_num_batched_tokens (%d) exceeds max_num_seqs * max_model_len (%d). This may lead to unexpected behavior.r   zDChunked prefill must be enabled to set max_num_partial_prefills > 1.zlong_prefill_token_threshold (z,) cannot be greater than the max_model_len (zself.max_long_partial_prefills=z= must be less than or equal to self.max_num_partial_prefills=.)	r!   r&   
ValueErrorr"   r:   warningr#   r%   r$   )r?   r   r1   r1   r3   rP      sP   

	



z$SchedulerConfig.verify_max_model_len)-__name__
__module____qualname____doc__r   rO   __annotations__boolr   r   r   r   
RunnerTyper	   r!   r"   r#   r$   r%   r&   r'   r)   r*   r+   SchedulerPolicyr,   r-   r=   r>   objectr.   r/   r0   staticmethodr4   r@   rG   r
   classmethodr   r   rM   rQ   r   rP   r1   r1   r1   r3   r      sb   
 	

#r   N) collections.abcr   dataclassesr   typingr   r   r   r   r   pydanticr	   r
   pydantic.dataclassesr   typing_extensionsr   vllm.config.utilsr   vllm.loggerr   vllm.utils.hashingr   vllm.utils.import_utilsr   vllm.v1.core.sched.interfacer   rU   r:   r[   r\   r   r1   r1   r1   r3   <module>   s$   