o
    پi)                     @   s  d Z ddlZddlmZ ddlmZmZ ddlmZm	Z	 ddl
Z
ddlZddlZddlZddlmZ ddlmZ ddlmZmZmZmZ dd	lmZ eeZed
Ze	dZdedefddZdeeef deeef fddZG dd deZ G dd de Z!G dd de Z"dZ#dej$vrzze%  dZ#W n e&y   dZ#Y nw W e#re'  ne#re'  w w e#re!ne"Z(zddl)m*Z* e+ee*se(,  W n e-y   e(,  Y nw edkre.e(j e.e(/  e.e(0  e.e(1  e.e(2g d dS dS )z{
This file is a platform abstraction for MThreads (MUSA) GPUs,
adjusted to match the structure and interface of `cuda.py`.
    N)Callable)	lru_cachewraps)AnyTypeVar)	ParamSpec)envs)AttentionBackendEnumDeviceCapabilityPlatformPlatformEnum)init_logger_P_R	device_idreturnc                 C   sD   dt jv r t jd d}|dgkrd}t|||  }t|S | S )NMUSA_VISIBLE_DEVICES, a
  MUSA_VISIBLE_DEVICES is set to empty string, which means GPU support is disabled. If you are using ray, please unset the environment variable `MUSA_VISIBLE_DEVICES` inside the worker/actor. Check https://github.com/vllm-project/vllm/issues/8402 for more information.)osenvironsplitRuntimeErrorint)r   
device_idsmsgphysical_device_id r   `/home/ubuntu/.local/lib/python3.10/site-packages/sglang/multimodal_gen/runtime/platforms/musa.pydevice_id_to_physical_device_id$   s   

r   fnc                    s*   t  dtjdtjdtf fdd}|S )Nargskwargsr   c                     s,   t   z | i |W t   S t   w N)pymtmlnvmlInitnvmlShutdown)r!   r"   r    r   r   wrapper8   s   z"with_mtml_context.<locals>.wrapper)r   r   r!   r"   r   )r    r(   r   r'   r   with_mtml_context7   s    r)   c                   @   st  e Zd ZU ejZdZeed< dZ	eed< dZ
eed< dZeed< edejfd	d
Zed/dededB fddZed/dedefddZeeddd/dedefddZededB defddZedee defddZed0ddZe	d1dejjdB defdd Ze		!	"	d2ded#ed$ed%edef
d&d'Z ed(e!dB d)ed*ej"defd+d,Z#edefd-d.Z$dS )3MusaPlatformBasemusadevice_namedevice_typeMUSAdispatch_keyr   device_control_env_varr   c                 C   s   t dtj S )Nzmusa:)torchdevicer   
LOCAL_RANKclsr   r   r   get_local_torch_deviceJ   s   z'MusaPlatformBase.get_local_torch_devicer   r   Nc                 C      t r#   NotImplementedErrorr5   r   r   r   r   get_device_capabilityN      z&MusaPlatformBase.get_device_capabilityc                 C   r7   r#   r8   r:   r   r   r   get_device_nameR   r<   z MusaPlatformBase.get_device_name   maxsizec                 C   r7   r#   r8   r:   r   r   r   get_device_total_memoryV   s   z(MusaPlatformBase.get_device_total_memoryenforce_eagerc                 C   s   |r	t d dS dS )NzTo see benefits of async output processing, enable MUSA graph. Since, enforce-eager is enabled, async output processor cannot be usedFT)loggerwarning)r5   rB   r   r   r   is_async_output_supported[   s   z*MusaPlatformBase.is_async_output_supportedr   c                 C   r7   r#   r8   )r5   r   r   r   r   is_full_mtlinkf   r<   zMusaPlatformBase.is_full_mtlinkc                 C   s   d S r#   r   r4   r   r   r   log_warningsj   r<   zMusaPlatformBase.log_warningsr2   c                 C   s   t j| tt j|S r#   )r1   cudareset_peak_memory_statsfloatmax_memory_allocated)r5   r2   r   r   r   get_current_memory_usagen   s   z)MusaPlatformBase.get_current_memory_usageFTdistributedempty_cache	cpu_groupc           
      C   s   |rt j  t j rt j }t j|}|jr t	 j
}nt j|\}}|rIdd lm} t j|t jdd}	|j|	|jj|d t|	 }|d S )Nr   r+   )dtyper2   )opgroupi   @)r1   rH   rN   rM   is_initializedget_rankget_device_propertiesis_integratedpsutilvirtual_memory	availablemem_get_infotorch.distributedtensorfloat32
all_reduceReduceOpMINrJ   item)
r5   r   rM   rN   rO   device_propsfree_gpu_memory_distr\   r   r   r   get_available_gpu_memoryu   s   


z)MusaPlatformBase.get_available_gpu_memoryselected_backend	head_sizerP   c                 C   s   t d 	 dS )NzUsing Torch SDPA backend.zHsglang.multimodal_gen.runtime.layers.attention.backends.sdpa.SDPABackend)rC   info)r5   rg   rh   rP   r   r   r   get_attn_backend_cls_str   s   
z)MusaPlatformBase.get_attn_backend_cls_strc                 C   s   dS )Nzasglang.multimodal_gen.runtime.distributed.device_communicators.cuda_communicator.CudaCommunicatorr   r4   r   r   r   get_device_communicator_cls   r<   z,MusaPlatformBase.get_device_communicator_clsr   r   Nr#   )r   FTN)%__name__
__module____qualname__r   r.   _enumr,   str__annotations__r-   r/   r0   classmethodr1   r2   r6   r   r
   r;   r=   r   rA   boolrE   listrF   rG   typesDevicerJ   rL   r   rf   r	   rP   rj   rk   r   r   r   r   r*   C   sp   
 

r*   c                
       s0  e Zd ZeeddeddededB fddZeedde	dd	e	eef eB dede
f fd
dZeeddeddedefddZeeddeddedefddZeeddeddedefddZeedee de
fddZeddedefddZeedddZ  ZS )MtmlMusaPlatform   r?   r   r   r   Nc                 C   sD   zt |}t|}t|\}}t||dW S  ty!   Y d S w N)majorminor)r   r$   nvmlDeviceGetHandleByIndex"nvmlDeviceGetCudaComputeCapabilityr
   r   )r5   r   r   handler|   r}   r   r   r   r;      s   
z&MtmlMusaPlatform.get_device_capability
capabilityc                    s*   z
t t ||W S  ty   Y dS w )NF)ru   superhas_device_capabilityr   )r5   r   r   	__class__r   r   r      s
   z&MtmlMusaPlatform.has_device_capabilityc                 C   s   t |}| |S r#   )r   _get_physical_device_name)r5   r   r   r   r   r   r=      s   
z MtmlMusaPlatform.get_device_namec                 C   s    t |}t|}tt|S r#   )r   r$   r~   rr   nvmlDeviceGetUUIDr5   r   r   r   r   r   r   get_device_uuid   s   
z MtmlMusaPlatform.get_device_uuidc                 C   s"   t |}t|}tt|jS r#   )r   r$   r~   r   nvmlDeviceGetMemoryInfototalr   r   r   r   rA      s   
z(MtmlMusaPlatform.get_device_total_memoryphysical_device_idsc              
   C   s   dd |D }t |D ]8\}}t |D ]/\}}||k rBzt||tj}|tjkr.W   dS W q tjyA   td Y   dS w qqdS )zP
        query if the set of gpus are fully connected by mtlink (1 hop)
        c                 S   s   g | ]}t |qS r   )r$   r~   .0ir   r   r   
<listcomp>       z3MtmlMusaPlatform.is_full_mtlink.<locals>.<listcomp>FzOMTLink detection failed. This is normal if your machine has no MTLink equipped.T)	enumerater$   nvmlDeviceGetP2PStatusNVML_P2P_CAPS_INDEX_NVLINKNVML_P2P_STATUS_OK	NVMLErrorrC   	exception)r5   r   handlesr   r   jpeer_handle
p2p_statusr   r   r   rF      s,   


zMtmlMusaPlatform.is_full_mtlinkc                 C   s   t |}tt |S r#   )r$   r~   rr   nvmlDeviceGetName)r5   r   r   r   r   r   r      s   
z*MtmlMusaPlatform._get_physical_device_namec                    sh   t  }|dkr. fddt|D }tt|dkr0tjddkr2t	dd
| d S d S d S d S )Nr>   c                    s   g | ]}  |qS r   )r   r   r4   r   r   r      r   z1MtmlMusaPlatform.log_warnings.<locals>.<listcomp>MUSA_DEVICE_ORDER
PCI_BUS_IDzDetected different devices in the system: %s. Please make sure to set `MUSA_DEVICE_ORDER=PCI_BUS_ID` to avoid unexpected behavior.z, )r$   nvmlDeviceGetCountrangelensetr   r   getrC   rD   join)r5   r   device_namesr   r4   r   rG      s   zMtmlMusaPlatform.log_warningsrl   rm   )rn   ro   rp   rt   r   r)   r   r
   r;   tupleru   r   rr   r=   r   rA   rv   rF   r   rG   __classcell__r   r   r   r   ry      sH    	
ry   c                   @   sx   e Zd ZeddedefddZeddedefddZee	dd	ddedefd
dZ
edee defddZdS )NonMtmlMusaPlatformr   r   r   c                 C   s   t j|\}}t||dS r{   )r1   rH   r;   r
   )r5   r   r|   r}   r   r   r   r;   
  s   z)NonMtmlMusaPlatform.get_device_capabilityc                 C   s   t tj|S r#   )rr   r1   rH   r=   r:   r   r   r   r=     s   z#NonMtmlMusaPlatform.get_device_namer>   r?   c                 C   s   t j|}t|jS r#   )r1   rH   rU   r   total_memory)r5   r   rb   r   r   r   rA     s   
z+NonMtmlMusaPlatform.get_device_total_memoryr   c                 C   s   t d dS )Nz^MTLink detection not possible, as context support was not found. Assuming no MTLink available.F)rC   error)r5   r   r   r   r   rF     s   z"NonMtmlMusaPlatform.is_full_mtlinkNrl   )rn   ro   rp   rt   r   r
   r;   rr   r=   r   rA   rv   ru   rF   r   r   r   r   r   	  s    r   FMUSA_DISABLE_MTMLT)_MockModule__main__)r   r>                     )3__doc__r   collections.abcr   	functoolsr   r   typingr   r   rW   r$   r1   torchadatyping_extensionsr   sglang.multimodal_genr   1sglang.multimodal_gen.runtime.platforms.interfacer	   r
   r   r   1sglang.multimodal_gen.runtime.utils.logging_utilsr   rn   rC   r   r   r   r   r)   r*   ry   r   mtml_availabler   r%   	Exceptionr&   MusaPlatformsphinx.ext.autodoc.mockr   
isinstancerG   ModuleNotFoundErrorprintr=   r;   rA   rF   r   r   r   r   <module>   sj    "db



