o
    -io-                     @   s   d dl Z d dlmZ d dlmZmZmZ d dlmZm	Z	m
Z
 d dlmZ d dlmZ d dlmZ d dlmZ d d	lmZmZ erGd d
lmZ neZeeZed Zed Zed Zed Zed Zed ZeeG dd dZ dS )    N)field)TYPE_CHECKINGAnyLiteral)FieldSkipValidationfield_validator)	dataclass)config)init_logger)	GiB_bytes)
format_gibget_cpu_memory)ParallelConfig)             @         )autobfloat16fp8fp8_e4m3fp8_e5m2fp8_inc
fp8_ds_mla)r   float32float16)allalignnone)sha256sha256_cborxxhashxxhash_cbor)nativelmcachec                   @   s  e Zd ZU dZdZee ed< 	 eddddZ	e
ed< 	 ed	dd
Ze
ed< 	 dZeed< 	 dZeed< 	 dZedB ed< 	 dZedB ed< 	 dZeed< 	 dZeed< 	 eddd
Ze
ed< 	 dZeed< 	 dZedB ed< 	 dZedB ed< 	 edddZedB ed< 	 dZeed< 	 dZeed< 	 dZeed< 	 e ddd Z!edB ed!< 	 e ddd Z"edB ed"< 	 dZ#eed#< 	 dZ$edB ed$< 	 dZ%e
dB ed%< 	 d&Z&e'ed'< 	 d(e(fd)d*Z)d+d, Z*e+dd-d.e,ded(efd/d0Z-d1e.d(dfd2d3Z/dS )4CacheConfigzConfiguration for the KV cache.N
block_sizeg?r   r   )defaultgtlegpu_memory_utilization   )r+   ge
swap_spacer   cache_dtypeFis_attention_freenum_gpu_blocks_overridesliding_windowTenable_prefix_cachingr#   prefix_caching_hash_algocpu_offload_gbcalculate_kv_scalescpu_kvcache_space_bytesmamba_page_size_padded)r+   r,   mamba_block_sizemamba_cache_dtypemamba_ssm_cache_dtyper"   mamba_cache_mode)r+   initnum_gpu_blocksnum_cpu_blockskv_sharing_fast_prefillkv_cache_memory_byteskv_offloading_sizer'   kv_offloading_backendreturnc                 C   s*   h d}ddl m}m} || |}||S )a  
        WARNING: Whenever a new field is added to this config,
        ensure that it is included in the factors list if
        it affects the computation graph.

        Provide a hash that uniquely identifies all the configs
        that affect the structure of the computation
        graph from input ids/embeddings to the final hidden states,
        excluding anything before input ids/embeddings and after
        the final hidden states.
        >   r1   rB   rA   r3   r6   r.   r;   r:   rC   r4   r7   r   )get_hash_factorshash_factors)vllm.config.utilsrH   rI   )selfignored_factorsrH   rI   factors rN   N/home/ubuntu/veenaModal/venv/lib/python3.10/site-packages/vllm/config/cache.pycompute_hash   s   
zCacheConfig.compute_hashc                 C   s   dd | j  D S )Nc                 S   s   i | ]	\}}|t |qS rN   )str).0keyvaluerN   rN   rO   
<dictcomp>   s    z,CacheConfig.metrics_info.<locals>.<dictcomp>)__dict__items)rK   rN   rN   rO   metrics_info   s   zCacheConfig.metrics_infoafter)modec                 C   s   | dr
td |S )Nr   zUsing fp8 data type to store kv cache. It reduces the GPU memory footprint and boosts the performance. Meanwhile, it may cause accuracy drop without a proper scaling factor.)
startswithloggerinfo)clsr2   rN   rN   rO   _validate_cache_dtype   s
   
z!CacheConfig._validate_cache_dtypeparallel_configc                 C   st   t | jt }t }|j}|| }t| dt| d}|d| kr*td| |d| kr8t	d| d S d S )Nz GiB out of the z6 GiB total CPU memory is allocated for the swap space.gffffff?zToo large swap space. g?z!Possibly too large swap space. %s)
mathceilr1   r   r   tensor_parallel_sizer   
ValueErrorr\   warning)rK   r`   swap_space_bytestotal_cpu_memorynum_gpus_per_nodecpu_memory_usagemsgrN   rN   rO   verify_with_parallel_config   s   
z'CacheConfig.verify_with_parallel_config)0__name__
__module____qualname____doc__r*   r   	BlockSize__annotations__r   r.   floatr1   r2   
CacheDTyper3   boolr4   intr5   r6   r7   PrefixCachingHashAlgor8   r9   r:   r;   r<   r=   
MambaDTyper>   r?   MambaCacheModer   rA   rB   rC   rD   rE   rF   KVOffloadingBackendrQ   rP   rX   r   classmethodr_   r   rk   rN   rN   rN   rO   r)   '   st   
 
	"

r)   )!ra   dataclassesr   typingr   r   r   pydanticr   r   r   pydantic.dataclassesr	   rJ   r
   vllm.loggerr   vllm.utils.mem_constantsr   vllm.utils.mem_utilsr   r   vllm.config.parallelr   rl   r\   rp   rs   rw   rx   rv   ry   r)   rN   rN   rN   rO   <module>   s0   	