o
    پi-                     @  s   d dl mZ d dlZd dlZd dlmZ d dlmZmZm	Z	 d dl
Zd dlZd dlmZ d dlmZ er:d dlmZ eeZG dd	 d	ejZG d
d dejZG dd dejZG dd de	ZG dd dZG dd deZdS )    )annotationsN)	lru_cache)TYPE_CHECKINGAny
NamedTuple)init_logger)resolve_obj_by_qualname)AttentionImplc                   @  s   e Zd Ze Ze Ze Ze Ze Z	e Z
e Ze Ze Ze Ze Ze Ze Zdd ZedddZdS )	AttentionBackendEnumc                 C  s
   | j  S N)namelowerself r   e/home/ubuntu/.local/lib/python3.10/site-packages/sglang/multimodal_gen/runtime/platforms/interface.py__str__)   s   
zAttentionBackendEnum.__str__returnboolc                 C  s    | t jt jt jt jt jt jhv S r   )r
   SLIDING_TILE_ATTNVIDEO_SPARSE_ATTNSPARSE_VIDEO_GEN_2_ATTN
VMOBA_ATTNSLA_ATTNSAGE_SLA_ATTNr   r   r   r   	is_sparse,   s   zAttentionBackendEnum.is_sparseNr   r   )__name__
__module____qualname__enumautoFA2FAr   
TORCH_SDPA	SAGE_ATTNSAGE_ATTN_3r   r   r   AITERr   r   NO_ATTENTIONr   propertyr   r   r   r   r   r
      s"    r
   c                   @  sT   e Zd Ze Ze Ze Ze Ze Z	e Z
e Ze Ze ZdS )PlatformEnumN)r   r   r   r    r!   CUDAROCMTPUCPUMPSNPUMUSAOOTUNSPECIFIEDr   r   r   r   r*   8   s    r*   c                   @  s$   e Zd Ze Ze Ze ZdS )CpuArchEnumN)r   r   r   r    r!   X86ARMr3   r   r   r   r   r4   D   s    r4   c                   @  s2   e Zd ZU ded< ded< dddZddd	Zd
S )DeviceCapabilityintmajorminorr   strc                 C  s   | j  d| j S )N.)r9   r:   r   r   r   r   as_version_strN   s   zDeviceCapability.as_version_strc                 C  s.   d| j   krdk sJ  J | jd | j  S )z
        Express device capability as an integer ``<major><minor>``.

        It is assumed that the minor version is always a single digit.
        r   
   )r:   r9   r   r   r   r   to_intQ   s   zDeviceCapability.to_intNr   r;   )r   r8   )r   r   r   __annotations__r=   r?   r   r   r   r   r7   J   s
   
 
r7   c                   @  s
  e Zd ZU ded< ded< ded< dZded< d	Zded
< dZded< g Zded< eddd~ddZ	eddd~ddZ
eddd~ddZeddd~ddZeddd~ddZeedddd Zeedddd  Zeeddd!d" Zed~d#d$Zed~d%d&Zeddd~d'd(Zeddd~d)d*Zeddd~d+dZ
d~d,d-Zeddd~d.d/Zeddd~d0d1Zeddd2d3 Zeddd~d4d5Zeeddd~d6d7Zedd9d:ZeddAdBZe	CdddFdGZe	CdddJdKZedddLdMZ edddNdOZ!eedddddPdQZ"eddddSdTZ#eddddUdVZ$eddYdZZ%ed[d\ Z&eddd`daZ'eddcddZ(eddfdgZ)e	dddjdkZ*e	C	l	m	dddrdsZ+eddtduZ,eddwdxZ-ed~dydzZ.dd|d}Z/dS )Platformr*   _enumr;   device_namedevice_typeNztorch.device | Nonedevicer.   dispatch_keyinductorsimple_compile_backendz	list[str]supported_quantization   )maxsizer   r   c                 C     |   S r   )is_cuda_staticr   r   r   r   is_cudao      zPlatform.is_cudac                 C     | j tjkS r   )rC   r*   r0   r   r   r   r   is_npus      zPlatform.is_npuc                 C  rM   r   )is_rocm_staticr   r   r   r   is_rocmw   rP   zPlatform.is_rocmc                 C  rQ   r   )rC   r*   r-   r   r   r   r   is_tpu{   rS   zPlatform.is_tpuc                 C  rQ   r   )rC   r*   r.   r   r   r   r   is_cpu   rS   zPlatform.is_cpuc                 C     |   sdS tj d dkS )NFr   r>   rN   torchcudaget_device_capabilityclsr   r   r   is_blackwell      zPlatform.is_blackwellc                 C  s   |   sdS tj dkS )NF)	   r   rY   r]   r   r   r   	is_hopper   s   zPlatform.is_hopperc                 C  rX   )NFr      rY   r]   r   r   r   is_sm120   r`   zPlatform.is_sm120c                 C     t | dd tjkS NrC   )getattrr*   r+   r]   r   r   r   rN         zPlatform.is_cuda_staticc                 C  re   rf   )rg   r*   r,   r]   r   r   r   rT      rh   zPlatform.is_rocm_staticc                 C     t tdo	tj S )Nhpu)hasattrrZ   rj   is_availabler   r   r   r   is_hpu      zPlatform.is_hpuc                 C  ri   )Nxpu)rk   rZ   ro   rl   r   r   r   r   is_xpu   rn   zPlatform.is_xpuc                 C  ri   )Nnpu)rk   rZ   rq   rl   r   r   r   r   rR      rn   c                 C  rQ   r   )rC   r*   r2   r   r   r   r   is_out_of_tree   s   zPlatform.is_out_of_treec                 C  s   | j tjtjtjfv S )z5Stateless version of :func:`torch.cuda.is_available`.)rC   r*   r+   r,   r1   r   r   r   r   is_cuda_alike   s   zPlatform.is_cuda_alikec                 C  rQ   r   )rC   r*   r/   r   r   r   r   is_mps   rS   zPlatform.is_mpsc                 C  s,   zt tdo
tj W S  ty   Y dS w )NmusaF)rk   rZ   ru   rl   ModuleNotFoundErrorr   r   r   r   is_musa   s
   zPlatform.is_musac                 C  rM   r   )rU   r   r   r   r   is_hip   rP   zPlatform.is_hipc                 C     dS )NTr   r]   r   r   r   is_amp_supported      zPlatform.is_amp_supportedtorch.devicec                 C     t r   NotImplementedErrorr]   r   r   r   get_local_torch_device   s   zPlatform.get_local_torch_deviceselected_backendAttentionBackendEnum | None	head_sizer8   dtypetorch.dtypec                 C  ry   )z,Get the attention backend class of a device. r   )r^   r   r   r   r   r   r   get_attn_backend_cls_str   s   z!Platform.get_attn_backend_cls_strr   	device_idDeviceCapability | Nonec                 C  ry   )z>Stateless version of :func:`torch.cuda.get_device_capability`.Nr   r^   r   r   r   r   r\      s   zPlatform.get_device_capability
capabilitytuple[int, int] | intc                 C  s6   | j |d}|du rdS t|tr||kS | |kS )z
        Test whether this platform is compatible with a device capability.

        The ``capability`` argument can either be:

        - A tuple ``(major, minor)``.
        - An integer ``<major><minor>``. (See :meth:`DeviceCapability.to_int`)
        )r   NF)r\   
isinstancetupler?   )r^   r   r   current_capabilityr   r   r   has_device_capability   s   
zPlatform.has_device_capabilityc                 C  r}   )zGet the name of a device.r~   r   r   r   r   get_device_name   r{   zPlatform.get_device_namec                 C  r}   )z.Get the uuid of a device, e.g. the PCI bus ID.r~   r   r   r   r   get_device_uuid   r{   zPlatform.get_device_uuidc                 C  r}   )z*Get the total memory of a device in bytes.r~   r   r   r   r   get_device_total_memory   s   z Platform.get_device_total_memory
local_rankc                 C  s`   |   s|  rtd|S |  rtd|S |  r"td|S |  r+tdS tdS )Nr[   rq   ru   mpscpu)rO   rU   rZ   rF   rR   rw   rt   )r   r   r   r   r   
get_device  s   

zPlatform.get_devicec                 C  s8   |   rdS |  rdS |  rdS |  rdS td)NncclhcclmcclgloozGNo Accelerators(AMD/NV/MTT GPU, AMD MI instinct accelerators) available)rs   rR   rw   rt   r   r   r   r   r   !get_torch_distributed_backend_str  s   z*Platform.get_torch_distributed_backend_strenforce_eagerbool | Nonec                 C  r}   )zF
        Check if the current platform supports async output.
        r~   )r^   r   r   r   r   is_async_output_supported!     z"Platform.is_async_output_supportedc                 C  s   t jddS )a  A device-specific wrapper of `torch.inference_mode`.

        This wrapper is recommended because some hardware backends such as TPU
        do not support `torch.inference_mode`. In such a case, they will fall
        back to `torch.no_grad` by overriding this method.
        T)mode)rZ   inference_moder]   r   r   r   r   (  s   zPlatform.inference_modeseed
int | NoneNonec                 C  s<   |durt | tj | t| tj| dS dS )z
        Set the seed of each random module.
        `torch.manual_seed` will set seed on all devices.

        Loosely based on: https://github.com/Lightning-AI/pytorch-lightning/blob/2.4.0/src/lightning/fabric/utilities/seed.py#L20
        N)randomr   nprZ   manual_seedr[   manual_seed_all)r^   r   r   r   r   seed_everything2  s   

zPlatform.seed_everything
model_archc                 C  ry   )a  
        Verify whether the current platform supports the specified model
        architecture.

        - This will raise an Error or Warning based on the model support on
        the current platform.
        - By default all models are considered supported.
        Nr   )r^   r   r   r   r   verify_model_arch@  s   
zPlatform.verify_model_archquantc                 C  s.   | j r|| j vrt| d| j ddS dS )zW
        Verify whether the quantization is supported by the current platform.
        z, quantization is currently not supported in r<   N)rJ   
ValueErrorrD   )r^   r   r   r   r   verify_quantizationL  s   zPlatform.verify_quantizationtorch.types.Device | Nonefloatc                 C  r}   )z3
        Return the memory usage in bytes.
        r~   )r^   rF   r   r   r   get_current_memory_usageW  s   z!Platform.get_current_memory_usageFTdistributedempty_cache	cpu_groupr   c                 C  r}   )z5
        Return the available memory in GiB.
        r~   )r^   r   r   r   r   r   r   r   get_available_gpu_memory`  s   z!Platform.get_available_gpu_memoryc                 C  ry   )zW
        Get device specific communicator class for distributed communication.
        znsglang.multimodal_gen.runtime.distributed.device_communicators.base_device_communicator.DeviceCommunicatorBaser   r]   r   r   r   get_device_communicator_clsm  r   z$Platform.get_device_communicator_clsr4   c                 C  s   t jS )z1Get the CPU architecture of the current platform.)r4   r3   r]   r   r   r   get_cpu_architecturet  s   zPlatform.get_cpu_architecturec                 C  ry   )zKWhether to enable DIT layerwise offload by default on the current platform.Tr   r]   r   r   r   /enable_dit_layerwise_offload_for_wan_by_defaulty  r{   z8Platform.enable_dit_layerwise_offload_for_wan_by_defaultr	   c                 O  s   | j |i |}t|S r   )r   r   )r   argskwargsattention_cls_strr   r   r   get_attn_backend~  s   zPlatform.get_attn_backendr   )r   r|   )r   r   r   r8   r   r   r   r;   )r   )r   r8   r   r   )r   r   r   r8   r   r   )r   r8   r   r;   )r   r8   r   r8   )r   r8   r   r|   r@   )r   r   r   r   r   )r   r   r   r   )r   r;   r   r   )r   r;   r   r   )rF   r   r   r   )r   FTN)
r   r8   r   r   r   r   r   r   r   r   )r   r4   )r   r	   )0r   r   r   rA   rF   rG   rI   rJ   r   rO   rR   rU   rV   rW   classmethodr_   rb   rd   rN   rT   rm   rp   rr   rs   rt   rw   rx   rz   r   r   r\   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   rB   [   s   
 

	
	
rB   c                   @  s   e Zd ZejZdZdS )UnspecifiedPlatformr   N)r   r   r   r*   r3   rC   rE   r   r   r   r   r     s    r   )
__future__r   r    r   	functoolsr   typingr   r   r   numpyr   rZ   1sglang.multimodal_gen.runtime.utils.logging_utilsr   sglang.multimodal_gen.utilsr   Isglang.multimodal_gen.runtime.layers.attention.backends.attention_backendr	   r   loggerEnumr
   r*   r4   r7   rB   r   r   r   r   r   <module>   s(     *