o
    
۾ipX                     @   sJ  d dl Z d dlZd dlZd dlZd dlZd dlmZ d dlmZm	Z	m
Z
 d dlZd dlmZ d dlmZ ermd dlmZmZ d dlmZ d dlmZmZ d d	lmZ d d
lmZmZ d dlmZ d dlm Z  d dl!m"Z" ne#Z ee$Z%de&fddZ'G dd dej(Z)G dd dej(Z*G dd de
Z+G dd dZ,G dd de,Z-dS )    N)	timedelta)TYPE_CHECKINGAny
NamedTuple)init_logger)AttentionBackendEnum)PrefixStoreProcessGroup)
VllmConfig)ProcessorInputs
PromptType)PoolingParams)
DictPrompt	TokPrompt)SamplingParams)FlexibleArgumentParser)AttentionSelectorConfigreturnc                   C   s   dd t  v S )N	microsoft )joinplatformunamelower r   r   L/home/ubuntu/.local/lib/python3.10/site-packages/vllm/platforms/interface.pyin_wsl    s   r   c                   @   D   e Zd Ze Ze Ze Ze Ze Z	e Z
e ZdS )PlatformEnumN)__name__
__module____qualname__enumautoCUDAROCMTPUXPUCPUOOTUNSPECIFIEDr   r   r   r   r   %       r   c                   @   r   )CpuArchEnumN)r   r    r!   r"   r#   X86ARMPOWERPCS390XRISCVOTHERUNKNOWNr   r   r   r   r,   /   r+   r,   c                   @   s   e Zd ZU eed< eed< dedefddZdedefddZdedefd	d
Z	dedefddZ
dedefddZdefddZdefddZdS )DeviceCapabilitymajorminorotherr   c                 C   s&   t |tstS | j| jf|j|jfk S N
isinstancer4   NotImplementedr5   r6   selfr7   r   r   r   __lt__=      
zDeviceCapability.__lt__c                 C   s&   t |tstS | j| jf|j|jfkS r8   r9   r<   r   r   r   __le__B   r?   zDeviceCapability.__le__c                 C   s&   t |tstS | j| jf|j|jfkS r8   r9   r<   r   r   r   __eq__G   r?   zDeviceCapability.__eq__c                 C   s&   t |tstS | j| jf|j|jfkS r8   r9   r<   r   r   r   __ge__L   r?   zDeviceCapability.__ge__c                 C   s&   t |tstS | j| jf|j|jfkS r8   r9   r<   r   r   r   __gt__Q   r?   zDeviceCapability.__gt__c                 C   s   | j  d| j S )N.)r5   r6   r=   r   r   r   as_version_strV   s   zDeviceCapability.as_version_strc                 C   s.   d| j   krdk sJ  J | jd | j  S )z
        Express device capability as an integer `<major><minor>`.

        It is assumed that the minor version is always a single digit.
        r   
   )r6   r5   rE   r   r   r   to_intY   s   zDeviceCapability.to_intN)r   r    r!   int__annotations__r   boolr>   r@   rA   rB   rC   strrF   rH   r   r   r   r   r4   9   s   
 r4   c                   @   s  e Zd ZU eed< eed< eed< dZeed< dZeed< dZeed	< g Z	e
e ed
< dZeed< dZeed< g Ze
e ed< g Ze
e ed< dZedB ed< edefddZede
ej fddZdefddZdefddZdefddZdefddZdefdd Zdefd!d"Zdefd#d$Zd%edefd&d'Zdefd(d)Z defd*d+Z!e"defd,d-Z#e"defd.d/Z$e"d0efd1d2Z%e"dd3d4Z&e"d5d6d7d8defd9d:Z'e"de
d6 fd;d<Z(e"	dd=ed>ejd?d@dd6fdAdBZ)e"	Cdd0ede*dB fdDdEZ+e"	CddFe,eef eB d0edefdGdHZ-e"	CddFe,eef eB d0edefdIdJZ.e"	CddFed0edefdKdLZ/e"dd0edefdMdNZ0e"dd0edefdOdPZ1e"dd0edefdQdRZ2e"dSdT Z3e"dUej4ddfdVdWZ5e"	ddXe6dB ddfdYdZZ7e"dd]d^Z8e"d_eddfd`daZ9e"dbeddfdcddZ:e"de;fdedfZ<e"defdgdhZ=e"	ddUej>j?dB de@fdidjZAe"defdkdlZBe"d>ejde,e@e@f fdmdnZCe"defdodpZDe"defdqdrZEe"defdsdtZFe"defdudvZGe"defdwdxZHe"defdydzZIe"dejfd{d|ZJe"defd}d~ZKe"defddZLe"defddZMe"								dddZNdefddZOdefddZPe"defddZQe"d?edddededeRddfddZSe"d>ejfddZTe"defddZUe"defddZVe"defddZWe"dd ZXe"deYee,edf f fddZZe"dedB fddZ[e"dedefddZ\e"deYeef fddZ]dS )Platform_enumdevice_namedevice_typer(   dispatch_key ray_device_key'VLLM_DEVICE_CONTROL_ENV_VAR_PLACEHOLDERdevice_control_env_varray_noset_device_env_varsinductorsimple_compile_backenddist_backendsupported_quantizationadditional_env_varsN_global_graph_poolr   c                 C      dS )z3Inductor config key for the PassManager custom passpost_grad_custom_post_passr   rE   r   r   r   pass_key      zPlatform.pass_keyc                 C   s   t jt jt jgS )z6Returns the supported dtypes for the current platform.)torchbfloat16float16float32rE   r   r   r   supported_dtypes   s   zPlatform.supported_dtypesc                 C      | j tjkS r8   )rN   r   r$   rE   r   r   r   is_cuda      zPlatform.is_cudac                 C   rf   r8   )rN   r   r%   rE   r   r   r   is_rocm   rh   zPlatform.is_rocmc                 C   rf   r8   )rN   r   r&   rE   r   r   r   is_tpu   rh   zPlatform.is_tpuc                 C   rf   r8   )rN   r   r'   rE   r   r   r   is_xpu   rh   zPlatform.is_xpuc                 C   rf   r8   )rN   r   r(   rE   r   r   r   is_cpu   rh   zPlatform.is_cpuc                 C   rf   r8   )rN   r   r)   rE   r   r   r   is_out_of_tree   rh   zPlatform.is_out_of_treec                 C   rf   r8   )rN   r   r*   rE   r   r   r   is_unspecified   rh   zPlatform.is_unspecified
prompt_lenc                 C      t jS r8   )sysmaxsize)r=   ro   r   r   r   get_max_output_tokens   s   zPlatform.get_max_output_tokensc                 C      | j tjtjfv S )z1Stateless version of [torch.cuda.is_available][].rN   r   r$   r%   rE   r   r   r   is_cuda_alike   s   zPlatform.is_cuda_alikec                 C   rt   r8   ru   rE   r   r   r   is_sleep_mode_available   s   z Platform.is_sleep_mode_availablec                 C   r]   )z
        Get the pass manager class for this platform.
        It will be registered as a custom pass under the current_platform.pass_key.
        z8vllm.compilation.passes.pass_manager.PostGradPassManagerr   clsr   r   r   get_pass_manager_cls      zPlatform.get_pass_manager_clsc                 C   s   | j S )zF
        Get the custom compile backend for current platform.
        )rX   rx   r   r   r   get_compile_backend   s   zPlatform.get_compile_backend	device_idc                 C   sB   | j tjv rtj| j  dkrtj| j  d}|| }t|S |S )NrR   ,)rU   osenvironsplitrI   )ry   r}   
device_idsphysical_device_idr   r   r   device_id_to_physical_device_id   s   z(Platform.device_id_to_physical_device_idc              
   C   sv   zddl }W n ty } ztd| W Y d}~nd}~ww tt ddl}W d   dS 1 s4w   Y  dS )z'Import any platform-specific C kernels.r   Nz!Failed to import from vllm._C: %r)vllm._CImportErrorloggerwarning
contextlibsuppressvllm._moe_C)ry   vllmer   r   r   import_kernels   s   
"zPlatform.import_kernelsselected_backendr   attn_selector_configr   c                 C   r]   )z,Get the attention backend class of a device.rR   r   )ry   r   r   r   r   r   get_attn_backend_cls      zPlatform.get_attn_backend_clsc                 C   s   t jgS r8   )r   
TORCH_SDPArx   r   r   r   get_supported_vit_attn_backends   s   z(Platform.get_supported_vit_attn_backends	head_sizedtypebackendzAttentionBackendEnum | Nonec                 C   s\   |dur!||   v sJ d| d|    td| d |S tdtj d tjS )a  
        Get the vision attention backend class of a device.

        NOTE: ViT Attention should be checked and override in the platform-specific
        implementation. we should not override this in any other places, like
        the model_executor/models/<model_name>.py.

        We check if the backend is None or not:
            1. If not, check if the backend is supported by the platform.
            2. If None, continue to the default selection logic.
        NzBackend z; is not supported for vit attentionSupported backends are: zUsing backend z for vit attentionzUsing default backend )r   r   	info_oncer   r   )ry   r   r   r   r   r   r   get_vit_attn_backend   s   zPlatform.get_vit_attn_backendr   c                 C   r]   )z:Stateless version of [torch.cuda.get_device_capability][].Nr   ry   r}   r   r   r   get_device_capability  r{   zPlatform.get_device_capability
capabilityc                 C   s6   | j |d}|du rdS t|tr||kS | |kS )a.  
        Test whether this platform is compatible with a device capability.

        The `capability` argument can either be:

        - A tuple `(major, minor)`.
        - An integer `<major><minor>`. (See
        [`DeviceCapability.to_int`][vllm.platforms.interface.DeviceCapability.to_int])
        r}   NFr   r:   tuplerH   ry   r   r}   current_capabilityr   r   r   has_device_capability     
zPlatform.has_device_capabilityc                 C   s6   | j |d}|du rdS t|tr||kS | |kS )a3  
        Test whether this platform has exactly the specified device capability.

        The `capability` argument can either be:

        - A tuple `(major, minor)`.
        - An integer `<major><minor>`. (See
        [`DeviceCapability.to_int`][vllm.platforms.interface.DeviceCapability.to_int])
        r   NFr   r   r   r   r   is_device_capability3  r   zPlatform.is_device_capabilityc                 C   s,   | j |d}|du rdS | d |d kS )z
        Returns True if the device capability is any <major>.x.
        Mirrors CUDA 13 'family' architecture semantics (e.g. 10.x, 11.x, 12.x).
        r   NFrG   )r   rH   r   r   r   r   is_device_capability_familyK  s   
z$Platform.is_device_capability_familyc                 C      t )zGet the name of a device.NotImplementedErrorr   r   r   r   get_device_nameZ  r`   zPlatform.get_device_namec                 C   r   )z.Get the uuid of a device, e.g. the PCI bus ID.r   r   r   r   r   get_device_uuid_  r`   zPlatform.get_device_uuidc                 C   r   )z*Get the total memory of a device in bytes.r   r   r   r   r   get_device_total_memoryd  r`   z Platform.get_device_total_memoryc                 C   s   t jddS )a  A device-specific wrapper of `torch.inference_mode`.

        This wrapper is recommended because some hardware backends such as TPU
        do not support `torch.inference_mode`. In such a case, they will fall
        back to `torch.no_grad` by overriding this method.
        T)mode)ra   inference_moderx   r   r   r   r   i  s   zPlatform.inference_modedevicec                 C   r   )z:
        Set the device for the current platform.
        r   ry   r   r   r   r   
set_devices     zPlatform.set_deviceparserc                 C   r]   )a  
        Do some pre-registration or update action for the current platform.

        This function is called before global VllmConfig is initialized or cli
        arguments are parsed. It's used for out-of-tree platforms to register or
        update the configuration.

        For example, the out-of-tree quantization config can be imported and
        registered here dynamically.
        Nr   )ry   r   r   r   r   pre_register_and_updatez  s   z Platform.pre_register_and_updatevllm_configr
   c                 C   r]   )ac  
        Check and update the configuration for the current platform.

        It can raise an exception if the configuration is not compatible with
        the current platform, or it can update the configuration to make it
        compatible with the current platform.

        The config is passed by reference, so it can be modified in place.
        Nr   )ry   r   r   r   r   check_and_update_config  s   z Platform.check_and_update_config
model_archc                 C   r]   )a  
        Verify whether the current platform supports the specified model
        architecture.

        - This will raise an Error or Warning based on the model support on
        the current platform.
        - By default all models are considered supported.
        Nr   )ry   r   r   r   r   verify_model_arch  s   
zPlatform.verify_model_archquantc                 C   s.   | j r|| j vrt| d| j ddS dS )zW
        Verify whether the quantization is supported by the current platform.
        z, quantization is currently not supported in rD   N)rZ   
ValueErrorrO   )ry   r   r   r   r   verify_quantization  s
   zPlatform.verify_quantizationc                 C   sr   t   }|dv rtjS |ds|drtjS |dr"tjS |dkr)tjS |dr1tj	S |r6tj
S tjS )z
        Determine the CPU architecture of the current system.
        Returns CpuArchEnum indicating the architecture type.
        )x86_64amd64i386i686armaarchppcs390xriscv)r   machiner   r,   r-   
startswithr.   r/   r0   r1   r2   r3   )ry   r   r   r   r   get_cpu_architecture  s   

zPlatform.get_cpu_architecturec                 C   s   t  r
td dS dS )z?Checks whether pin memory is available on the current platform.zPUsing 'pin_memory=False' as WSL is detected. This may slow down the performance.FT)r   r   r   rx   r   r   r   is_pin_memory_available  s   z Platform.is_pin_memory_availablec                 C   r   )z3
        Return the memory usage in bytes.
        r   r   r   r   r   get_current_memory_usage  r   z!Platform.get_current_memory_usagec                 C   r   )zA
        Return the punica wrapper for current platform.
        r   rx   r   r   r   get_punica_wrapper  r   zPlatform.get_punica_wrapperc                 C   s   t dt dfS )zE
        Return the platform specific values for (-inf, inf)
        z-infinf)floatry   r   r   r   r   get_infinity_values  s   zPlatform.get_infinity_valuesc                 C   r]   )zF
        Checks if the platform allows inplace memory updates
        Tr   rx   r   r   r   can_update_inplace  r   zPlatform.can_update_inplacec                 C   r]   )zK
        Returns how much padding the LoRA logits need for kernels
           r   rx   r   r   r   get_lora_vocab_padding_size  r   z$Platform.get_lora_vocab_padding_sizec                 C   r]   )zW
        Get device specific communicator class for distributed communication.
        zUvllm.distributed.device_communicators.base_device_communicator.DeviceCommunicatorBaser   rx   r   r   r   get_device_communicator_cls  r   z$Platform.get_device_communicator_clsc                 C   r]   )zI
        Returns whether the current platform supports MX types.
        Fr   rx   r   r   r   supports_mx  r   zPlatform.supports_mxc                 C   r]   )zJ
        Returns whether the current platform supports FP8 types.
        Fr   rx   r   r   r   supports_fp8  r   zPlatform.supports_fp8c                 C   r]   )a  
        Returns whether the preferred FP8 type is FNUZ on the current platform.

        There are two representations of FP8, OCP FP8 and FNUZ FP8.
        The OCP specification can be found at https://tinyurl.com/b7jvwpft.
        The FNUZ specification can be found at https://tinyurl.com/5n6hwwu5.

        AMD's MI300 and MI325 have native hardware support for FNUZ. All other
        hardware has converged on the OCP FP8 standard.
        Fr   rx   r   r   r   is_fp8_fnuz	     zPlatform.is_fp8_fnuzc                 C   rp   )z
        Returns the preferred FP8 type on the current platform.

        See the documentation for is_fp8_fnuz for details.
        )ra   float8_e4m3fnrx   r   r   r   	fp8_dtype  s   zPlatform.fp8_dtypec                 C   r]   )zS
        Whether to use allgather in LogitsProcessor to gather the logits.
        Tr   rx   r   r   r   use_all_gather   r   zPlatform.use_all_gatherc                 C   r]   )zR
        Returns if custom allreduce is supported on the current platform
        Fr   rx   r   r   r   use_custom_allreduce'  r   zPlatform.use_custom_allreducec                 C   r]   )zu
        Returns True if we register attention as one giant opaque custom op
        on the current platform
        Fr   rx   r   r   r   opaque_attention_op.  r{   zPlatform.opaque_attention_opprompt#PromptType | DictPrompt | TokPromptparamsSamplingParams | PoolingParamsprocessed_inputsr   c                 C   r]   )z6Raises if this request is unsupported on this platformNr   )ry   r   r   r   r   r   r   validate_request6  s    zPlatform.validate_requestkeyc                 C   sJ   t t| jd }|d urt||rt ||}|d ur|S td| j| d S )Nz1Current platform %s does not have '%s' attribute.)getattrra   rP   hasattrr   r   )r=   r   r   attrr   r   r   __getattr__?  s   
zPlatform.__getattr__c                 C   s    | j }|jdu r|  |_|jS )zA
        Return the global graph pool for this platform.
        N)	__class__r\   graph_pool_handle)r=   ry   r   r   r   get_global_graph_poolO  s   

zPlatform.get_global_graph_poolc                 C   r]   )zB
        Get static graph wrapper class for static graph.
        z=vllm.compilation.base_static_graph.AbstractStaticGraphWrapperr   rx   r   r   r   get_static_graph_wrapper_clsX  r   z%Platform.get_static_graph_wrapper_clsprefix_storer   
group_rank
group_sizetimeoutr	   c                 C   r   )zI
        Init platform-specific torch distributed process group.
        r   )ry   r   r   r   r   r   r   r   r   #stateless_init_device_torch_dist_pg_  r   z,Platform.stateless_init_device_torch_dist_pgc                 C   r   )zJ
        Check if the dtype is supported by the current platform.
        r   r   r   r   r   check_if_supports_dtypem  r   z Platform.check_if_supports_dtypec                 C   r]   )zV
        Returns if the hybrid kv cache is supported by the current platform.
        Fr   rx   r   r   r   support_hybrid_kv_cachet  r   z Platform.support_hybrid_kv_cachec                 C   r]   )zQ
        Returns if the graph mode is supported by the current platform.
        Fr   rx   r   r   r   support_static_graph_mode{  r   z"Platform.support_static_graph_modec                 C   r]   )zN
        Returns if the current platform needs to sync weight loader.
        Fr   rx   r   r   r   use_sync_weight_loader  r   zPlatform.use_sync_weight_loaderc                    s   |   s S  fdd}|S )zD
        Wrap the original weight loader to make it synced.
        c                    s4    | g|R i |}| j t dkrt|  |S )Ncpu)r   ra   _sync)paramargskwargsoutoriginal_weight_loaderr   r   _synced_weight_loader  s   
zAPlatform.make_synced_weight_loader.<locals>._synced_weight_loader)r   )ry   r   r   r   r   r   make_synced_weight_loader  s   z"Platform.make_synced_weight_loader.c                 C      i S )zo
        Returns a mapping from device_type to a tuple of supported
        kv_buffer_device for nixl.
        r   rx   r   r   r   get_nixl_supported_devices  r{   z#Platform.get_nixl_supported_devicesc                 C   r]   )zH
        Returns the nixl memory type for the current platform.
        Nr   rx   r   r   r   get_nixl_memory_type  r   zPlatform.get_nixl_memory_typemax_model_lenc                 C   s   |S )z?
        Check max_model_len for the current platform.
        r   )ry   r   r   r   r   check_max_model_len  r   zPlatform.check_max_model_lenc                 O   r   )zX
        Set some additional forward context for the current platform if needs.
        r   )ry   r   r   r   r   r   set_additional_forward_context  r   z'Platform.set_additional_forward_context)r   Nr8   )r   )r   r
   r   N)r   r   r   r   r   r   r   N)^r   r    r!   r   rJ   rL   rQ   rS   rU   rV   listrX   rY   rZ   r[   r\   r   propertyr_   ra   r   re   rK   rg   ri   rj   rk   rl   rm   rn   rI   rs   rv   rw   classmethodrz   r|   r   r   r   r   r   r4   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r,   r   r   typesDevicer   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   dictr   r   r  r  r   r   r   r   rM   c   s  
 	
		
	
 rM   c                   @   s   e Zd ZejZdZdS )UnspecifiedPlatformrR   N)r   r    r!   r   r*   rN   rP   r   r   r   r   r	    s    r	  ).r   r"   r   r   rq   datetimer   typingr   r   r   ra   vllm.loggerr   #vllm.v1.attention.backends.registryr   torch.distributedr   r	   vllm.configr
   vllm.inputsr   r   vllm.pooling_paramsr   vllm.renderers.inputsr   r   vllm.sampling_paramsr   vllm.utils.argparse_utilsr   vllm.v1.attention.selectorr   objectr   r   rK   r   Enumr   r,   r4   rM   r	  r   r   r   r   <module>   s>   

*    X