o
    i>                     @   s   d dl Z d dlZd dlZd dlZd dlZd dlZd dlmZ d dlm	Z	 d dl
Z
d dlZd dlZd dlmZ d dlmZ d dlmZ d dlmZ dd	lmZmZmZ eeZe	red d
lmZ d dlmZ ndZdddZ eG dd dZ!G dd deZ"dS )    N)	dataclass)TYPE_CHECKING)envs)init_logger)is_quantized_kv_cache)AttentionBackendEnum   )CpuArchEnumPlatformPlatformEnum)
VllmConfig)AttentionSelectorConfigc                 C   s4   t tdrtt| S t dkrt S td)Nsched_getaffinityDarwinUnsupported OS)hasattroslenr   platformsystem	cpu_countNotImplementedError)pid r   H/home/ubuntu/vllm_env/lib/python3.10/site-packages/vllm/platforms/cpu.pyget_max_threads!   s
   
r   c                   @   sZ   e Zd ZU dZeed< dZeed< dZeed< ede	defddZ
ed	efd
dZdS )LogicalCPUInfoidphysical_core	numa_nodevaluereturnc                 C   s(   zt |}W |S  ty   d}Y |S w )Nr   )int	Exception)clsr!   	int_valuer   r   r   _int0   s   
zLogicalCPUInfo._intobj_dictc                 C   sZ   |  d}|  d}|  d}|d u s+|d u s+|d u s+tt|t|t|dS | S )Ncpucorenoder   r   r    )getr   r'   )r(   r   r   r    r   r   r   json_decoder8   s   


zLogicalCPUInfo.json_decoderN)__name__
__module____qualname__r   r#   __annotations__r   r    classmethodstrr'   staticmethoddictr.   r   r   r   r   r   *   s   
 r   c                   @   sr  e Zd ZU ejZdZeed< dZ	eed< dZ
eed< dZeed< dZed	eej fd
dZed/ded	efddZeddddd	efddZed/ded	efddZedejd	dfddZedd Zeded	dfdd Zed	eee ee f fd!d"Zed	efd#d$Z ed	efd%d&Z!ed	efd'd(Z"ed	efd)d*Z#ed	efd+d,Z$ed	efd-d.Z%dS )0CpuPlatformr)   device_namedevice_typeCPUdispatch_keygloodist_backendCPU_VISIBLE_MEMORY_NODESr"   c                 C   s   |   tjkrtjtjgS |   tjkr4tj	dr4t
jdgdd dkr.tjtjtjgS tjtjgS |   tjkr?tjgS tjtjtjgS )Ndarwinz#sysctl -n hw.optional.arm.FEAT_BF16T)shell   1)get_cpu_architecturer	   POWERPCtorchbfloat16float32ARMsysr   
startswith
subprocesscheck_outputstripfloat16RISCV)selfr   r   r   supported_dtypesP   s    zCpuPlatform.supported_dtypesr   	device_idc                 C      dS )Nr)   r   )r%   rQ   r   r   r   get_device_name{      zCpuPlatform.get_device_nameselected_backendr   attn_selector_configr   c                 C   s@   |r|t jkrtd| |jrtd|jrtdt j S )NzCannot use %s backend on CPU.zMLA is not supported on CPU.z)Sparse Attention is not supported on CPU.)r   CPU_ATTNloggerinfouse_mlar   
use_sparseget_path)r%   rU   rV   r   r   r   get_attn_backend_cls   s   
z CpuPlatform.get_attn_backend_clsc           
      C   s   ddl m} ddlm} tj}d}|d u rFtj|r%dd t	|D ng }t
|p,d}t j| }d}	t||	 }td	|| |S ||9 }|S )
Nr   )	GiB_bytes)
format_gibz/sys/devices/system/nodec                 S   s   g | ]	}| d r|qS )r+   )rI   ).0dr   r   r   
<listcomp>       z7CpuPlatform.get_device_total_memory.<locals>.<listcomp>r   g      ?z:VLLM_CPU_KVCACHE_SPACE not set. Using %s GiB for KV cache.)vllm.utils.mem_constantsr^   vllm.utils.mem_utilsr_   r   VLLM_CPU_KVCACHE_SPACEr   pathexistslistdirr   psutilvirtual_memorytotalr#   rX   warning_once)
r%   rQ   r^   r_   kv_cache_spacenode_dirnodesnum_numa_nodesfree_cpu_memoryDEFAULT_CPU_MEM_UTILIZATIONr   r   r   get_device_total_memory   s(   
z#CpuPlatform.get_device_total_memorydeviceNc                 C   s   t j| dS )z:
        Set the device for the current platform.
        N)rD   r)   
set_device)r%   ru   r   r   r   rv      s   zCpuPlatform.set_devicec                 C   s   t  S )N)rD   no_gradr%   r   r   r   inference_mode   s   zCpuPlatform.inference_modevllm_configc              	   C   s6  |j }|d ur
d|_|j}|jd u rd|_|jd dkr!td |j}d|_|js-|j	r6t
|jr6td|jdrDtd	 d
|_t |_|j}|jdkre|jd ure|jdkretd|j d|_|jd
krmd|_|jrxtd d|_ddlm} g |j_|j}|jj|jkrtjdddkrd}nd}|j|_||_ |j!"ddddd |j#d ur|j$|_|j%j&dksJ dtjd< t't( tjd< t)j*dkrt't+, tjd< nt-d dtjd< dtjd< t.d d!}	d"|	v rdtjd#< dtjd$< d%tjd&< d%tjd'< d%tjd(< t/0 d)krot12 t3j4t3j5fv rod*|	v sod+|	v sotj67t+j8}
tj67|
}tj69|d,tj69|
d-g}g }|D ]}|:t;;tj69|d. qG|ro|d }|	rf|	d/7 }	|	|7 }	|	tjd < t'|jj<tjd0< |d ur|j=rt-d1 d|j_t>|j j?|jj@|j_Ad S d S d S )2NT       r   z^CPU backend prefers block_size is multiples of 32, otherwise the performance is not optimized.FzXChunked-prefill and prefix-cache on the CPU backend is not compatible with FP8 KV cache.fp8zCCPU backend doesn't support KV cache quantization fallback to auto.autor   mpzH%s is not supported on CPU, fallback to mp distributed executor backend.z#vllm.v1.worker.cpu_worker.CPUWorkerz5Dual-Batch Overlap is not supported on CPU, disabled.)CompilationModeVLLM_CPU_CI_ENV0eagerinductor)dcesize_assertsnan_assertsepilogue_fusionr)   spawnVLLM_WORKER_MULTIPROC_METHODNUMEXPR_MAX_THREADSnobindOMP_NUM_THREADSz+Disabling binding processes to CPU cores...1TORCHINDUCTOR_COMPILE_THREADS"VLLM_DISABLE_SHARED_EXPERTS_STREAM
LD_PRELOAD zlibiomp5.soKMP_BLOCKTIME
KMP_TPAUSEz	dist,distKMP_FORKJOIN_BARRIER_PATTERNKMP_PLAIN_BARRIER_PATTERNKMP_REDUCTION_BARRIER_PATTERNLinuxlibomplibgompz
torch.libslibzlibgomp*.so*:LOCAL_WORLD_SIZEz`MLA is enabled on a non-GPU platform; forcing chunked prefill and prefix caching to be disabled.)Bmodel_configdisable_cascade_attncache_config
block_sizerX   warningscheduler_configasync_schedulingenable_chunked_prefillenable_prefix_cachingr   cache_dtypeRuntimeErrorrI   r7   rt   cpu_kvcache_space_bytesparallel_config
world_sizedistributed_executor_backend
worker_cls
enable_dbovllm.configr   compilation_configcudagraph_capture_sizesmodeVLLM_COMPILEr   environr-   DYNAMO_TRACE_ONCEbackendinductor_compile_configupdatelora_configNONEdevice_configr9   r4   r   r   VLLM_CPU_OMP_THREADS_BINDrD   get_num_threadsrY   getenvr   r   r
   rB   r	   rG   rC   rg   dirname__file__joinextendglobtensor_parallel_sizerZ   maxmax_model_lenDEFAULT_MAX_NUM_BATCHED_TOKENSmax_num_batched_tokens)r%   rz   r   r   r   r   r   r   r   ld_preload_str	torch_pkg	site_roottorch_libs_pathspytorch_libgomp_so_candidates
torch_libspytorch_libgomp_sor   r   r   check_and_update_config   s   







	











	

z#CpuPlatform.check_and_update_configc                    s  t  dksJ tjdddd}tdd|}tj|tj	dd }d	d
 |D }t
tdr3td ntd fdd
|D }t |D ]}|j qEt}tj}|tjv r}tj| dkr}dd
 tj| dD }fdd
ttt|D }||fS )Nr   zlscpu -J -e=CPU,CORE,NODET)r@   textz"node":\s*-\s*(,|\n)z"node": 0\1)object_hookcpusc                 S   s$   g | ]}d |j |j|jfvr|qS )r   r,   r`   xr   r   r   rb   r  s
    z>CpuPlatform.get_allowed_cpu_core_node_list.<locals>.<listcomp>r   r   r   c                    s   g | ]	}|j  v r|qS r   )r   r   )allowed_cpu_id_listr   r   rb   }  rc   r   c                 S   s   g | ]}t |qS r   )r#   )r`   sr   r   r   rb     s    ,c                    s   g | ]}| v r|qS r   r   r   )allowed_numa_nodesr   r   rb     s    )r   r   rJ   rK   resubjsonloadsr   r.   r   r   r   r   setaddr    sortedr7   device_control_env_varr   splitlist)r%   lscpu_outputlogical_cpu_listr   allowed_numa_nodes_listenv_keyvisible_nodesr   )r   r   r   get_allowed_cpu_core_node_listd  s8   

z*CpuPlatform.get_allowed_cpu_core_node_listc                 C   rR   )NFr   rx   r   r   r   is_pin_memory_available  rT   z#CpuPlatform.is_pin_memory_availablec                 C   rR   )Nz4vllm.lora.punica_wrapper.punica_cpu.PunicaWrapperCPUr   rx   r   r   r   get_punica_wrapper  rT   zCpuPlatform.get_punica_wrapperc                 C   rR   )zW
        Get device specific communicator class for distributed communication.
        zFvllm.distributed.device_communicators.cpu_communicator.CpuCommunicatorr   rx   r   r   r   get_device_communicator_cls  s   z'CpuPlatform.get_device_communicator_clsc                 C   rR   NTr   rx   r   r   r   supports_structured_output  rT   z&CpuPlatform.supports_structured_outputc                 C   rR   r   r   rx   r   r   r   opaque_attention_op  rT   zCpuPlatform.opaque_attention_opc                 C   rR   r   r   rx   r   r   r   support_hybrid_kv_cache  rT   z#CpuPlatform.support_hybrid_kv_cacher   )&r/   r0   r1   r   r:   _enumr8   r4   r2   r9   r;   r=   r   propertyr   rD   dtyperP   r3   r#   rS   r]   rt   ru   rv   ry   r   r   tupler   r   boolr   r   r   r   r   r   r   r   r   r   r7   H   sT   
 *
 2 )r7   r   )#r   r   r   r   rJ   rH   dataclassesr   typingr   rj   regexr   rD   vllmr   vllm.loggerr   vllm.v1.attention.backendr   #vllm.v1.attention.backends.registryr   	interfacer	   r
   r   r/   rX   r   r   vllm.v1.attention.selectorr   r   r   r7   r   r   r   r   <module>   s2   
	