o
    ٷiW                     @   sf   d dl Z d dlmZ d dlmZ d dlmZ d dlmZ d dl	m
Z
mZ eeZG dd de
eZdS )	    N)init_logger)CudaPlatformBase)DeviceCapability)DiffusionAttentionBackendEnum)OmniPlatformOmniPlatformEnumc                   @   s"  e Zd ZdZejZedefddZ	edefddZ
edefddZed	ed
B dedefddZedefddZed"ded
B dejfddZed#deded
B fddZedefddZeded
B fddZed$ddZed"dejd
B defddZed#dedefd d!Zd
S )%CudaOmniPlatformzCUDA/GPU implementation of OmniPlatform (default).

    Inherits all CUDA-specific implementations from vLLM's CudaPlatform,
    and adds Omni-specific interfaces from OmniPlatform.
    returnc                 C      dS )Nz*vllm_omni.worker.gpu_ar_worker.GPUARWorker clsr   r   U/home/ubuntu/.local/lib/python3.10/site-packages/vllm_omni/platforms/cuda/platform.pyget_omni_ar_worker_cls      z'CudaOmniPlatform.get_omni_ar_worker_clsc                 C   r
   )Nz:vllm_omni.worker.gpu_generation_worker.GPUGenerationWorkerr   r   r   r   r   get_omni_generation_worker_cls   r   z/CudaOmniPlatform.get_omni_generation_worker_clsc                 C   r
   )Nz&vllm_omni/model_executor/stage_configsr   r   r   r   r   get_default_stage_config_path    r   z.CudaOmniPlatform.get_default_stage_config_pathselected_backendN	head_sizec                 C   s   ddl m} |  }d}|d ur&|\}}|d | }d|  ko#dk n  }| }	|	dd}
|o3|
}|d uri| }|dkr[|s[|sJtd	 n|
sQtd
 td t	j
 S t	| }td| | S |rutd t	j S td t	j
 S )Nr   )PACKAGES_CHECKERF
   P   d   has_flash_attn
FLASH_ATTNzkFlash Attention requires GPU with compute capability >= 8.0 and < 10.0. Falling back to TORCH_SDPA backend.zKFlash Attention packages not available. Falling back to TORCH_SDPA backend.z.Defaulting to diffusion attention backend SDPAz&Using diffusion attention backend '%s'z4Defaulting to diffusion attention backend FLASH_ATTN)vllm_omni.diffusion.envsr   get_device_capabilityget_packages_infogetupperloggerwarninginfor   
TORCH_SDPAget_pathr   )r   r   r   r   compute_capabilitycompute_supportedmajorminor
capabilitypackages_infopackages_availableflash_attn_supportedbackend_upperbackendr   r   r   get_diffusion_attn_backend_cls$   s:   






z/CudaOmniPlatform.get_diffusion_attn_backend_clsc                 C   r
   )NTr   r   r   r   r   supports_torch_inductorS   r   z(CudaOmniPlatform.supports_torch_inductor
local_rankc                 C   s   |d u r	t dS t d|S )Ncuda)torchdevice)r   r1   r   r   r   get_torch_deviceW   s   
z!CudaOmniPlatform.get_torch_devicer   	device_idc                 C   s   t j|\}}t||dS )N)r'   r(   )r3   r2   r   r   )r   r6   r'   r(   r   r   r   r   ]   s   z&CudaOmniPlatform.get_device_capabilityc                 C   s
   t j S N)r3   r2   device_countr   r   r   r   get_device_countb   s   
z!CudaOmniPlatform.get_device_countc                 C   s   t jjS r7   )r3   versionr2   r   r   r   r   get_device_versionf   s   z#CudaOmniPlatform.get_device_versionc                 C   s   t j  d S r7   )r3   r2   synchronizer   r   r   r   r<   j   s   zCudaOmniPlatform.synchronizer4   c                 C   s   t j|\}}|S r7   )r3   r2   mem_get_info)r   r4   free_r   r   r   get_free_memoryn   s   z CudaOmniPlatform.get_free_memoryc                 C   s   t j|S r7   )r3   r2   get_device_name)r   r6   r   r   r   rA   s   s   z CudaOmniPlatform.get_device_namer7   )r   )r	   N)__name__
__module____qualname____doc__r   CUDA
_omni_enumclassmethodstrr   r   r   intr/   boolr0   r3   r4   r5   r   r   r9   r;   r<   r@   rA   r   r   r   r   r      sB    .r   )r3   vllm.loggerr   vllm.platforms.cudar   vllm.platforms.interfacer   /vllm_omni.diffusion.attention.backends.registryr   vllm_omni.platforms.interfacer   r   rB   r    r   r   r   r   r   <module>   s   