o
    zi                     @   s   d dl mZ d dlmZmZmZ d dlZd dlmZ d dl	m
Z
 d dlmZ d dlmZ G dd	 d	e
Zddedee fddZdee fddZdefddZdefddZddeej defddZeddejddfddZdddZdS )     )	lru_cache)ListOptionalUnionN)override)Accelerator)_AcceleratorRegistry)rank_zero_infoc                   @   s   e Zd ZdZedejddfddZedddZe	ed	e
eeee f deee  fd
dZe	ed	ee deej fddZe	edefddZe	edefddZeededdfddZdS )CUDAAcceleratorz$Accelerator for NVIDIA CUDA devices.devicereturnNc                 C   s2   |j dkrtd| dt| tj| dS )zm
        Raises:
            ValueError:
                If the selected device is not of type CUDA.
        cudazDevice should be CUDA, got z	 instead.N)type
ValueError_check_cuda_matmul_precisiontorchr   
set_device)selfr    r   V/home/ubuntu/.local/lib/python3.10/site-packages/lightning_fabric/accelerators/cuda.pysetup_device   s   
zCUDAAccelerator.setup_devicec                 C   s
   t   d S N)_clear_cuda_memory)r   r   r   r   teardown(      
zCUDAAccelerator.teardowndevicesc                 C   s   ddl m} || ddS )z!Accelerator device parsing logic.r   )_parse_gpu_idsT)include_cuda)(lightning_fabric.utilities.device_parserr   )r   r   r   r   r   parse_devices,   s   zCUDAAccelerator.parse_devicesc                 C   s   dd | D S )z*Gets parallel devices for the Accelerator.c                 S   s   g | ]}t d |qS )r   )r   r   ).0ir   r   r   
<listcomp>8   s    z8CUDAAccelerator.get_parallel_devices.<locals>.<listcomp>r   )r   r   r   r   get_parallel_devices4   s   z$CUDAAccelerator.get_parallel_devicesc                   C   s   t  S )z!Get the devices when set to auto.num_cuda_devicesr   r   r   r   auto_device_count:   s   z!CUDAAccelerator.auto_device_countc                   C   s
   t  dkS )Nr   r$   r   r   r   r   is_available@      
zCUDAAccelerator.is_availableaccelerator_registryc                 C   s   |j d| | jd d S )Nr   )description)register__name__)clsr)   r   r   r   register_acceleratorsE   s
   
z%CUDAAccelerator.register_acceleratorsr   N)r,   
__module____qualname____doc__r   r   r   r   r   staticmethodr   intstrr   r   r   r#   r&   boolr'   classmethodr   r.   r   r   r   r   r
      s*    , r
   num_devicesr   c              	   C   s   | dkrg S t  }|std|  d| t|kr&td|  dt| dg }g }|D ]+}ztjdtd|d W n tyI   || Y q,w || t|| krW nq,| dkrrt|| krrtd|  d	t| d
| d|S )a  Returns a list of all available and usable CUDA GPU devices.

    A GPU is considered usable if we can successfully move a tensor to the device, and this is what this function
    tests for each GPU on the system until the target number of usable devices is found.

    A subset of GPUs on the system might be used by other processes, and if the GPU is configured to operate in
    'exclusive' mode (configurable by the admin), then only one process is allowed to occupy it.

    Args:
        num_devices: The number of devices you want to request. By default, this function will return as many as there
            are usable CUDA GPU devices available.

    Warning:
        If multiple processes call this function at the same time, there can be race conditions in the case where
        both processes determine that the device is unoccupied, leading into one of them crashing later on.

    r   zYou requested to find z? devices but there are no visible CUDA devices on this machine.z# devices but this machine only has z GPUs.r   r   r8   z devices but only z& are currently available. The devices zA are occupied by other processes and can't be used at the moment.)_get_all_visible_cuda_devicesr   lenr   tensorr   RuntimeErrorappend)r9   visible_devicesavailable_devicesunavailable_devicesgpu_idxr   r   r   find_usable_cuda_devicesO   s>   


rD   c                   C   s   t tt S )a  Returns a list of all visible CUDA GPU devices.

    Devices masked by the environment variabale ``CUDA_VISIBLE_DEVICES`` won't be returned here. For example, assume you
    have 8 physical GPUs. If ``CUDA_VISIBLE_DEVICES="1,3,6"``, then this function will return the list ``[0, 1, 2]``
    because these are the three visible GPUs after applying the mask ``CUDA_VISIBLE_DEVICES``.

    )listranger%   r   r   r   r   r;      s   r;   c                   C   
   t j S )z-Returns the number of available CUDA devices.)r   r   device_countr   r   r   r   r%      r   r%   c                   C   rG   )z9Returns a bool indicating if CUDA is currently available.)r   r   r'   r   r   r   r   is_cuda_available   r(   rI   r   c                 C   s   t j| \}}|dkS )N   )r   r   get_device_capability)r   major_r   r   r   _is_ampere_or_later   s   rN      c                 C   sB   t j r	t| sd S t  dkrtdt j| d d S d S )NhighestzYou are using a CUDA device (a5  ) that has Tensor Cores. To properly utilize them, you should set `torch.set_float32_matmul_precision('medium' | 'high')` which will trade-off precision for performance. For more details, read https://pytorch.org/docs/stable/generated/torch.set_float32_matmul_precision.html#torch.set_float32_matmul_precision)r   r   r'   rN   get_float32_matmul_precisionr	   get_device_namer:   r   r   r   r      s   r   c                   C   s$   t tjdrtj  tj  d S )N_cuda_clearCublasWorkspaces)hasattrr   _CrS   r   empty_cacher   r   r   r   r      s   
r   )r8   r   r/   )	functoolsr   typingr   r   r   r   typing_extensionsr   )lightning_fabric.accelerators.acceleratorr   &lightning_fabric.accelerators.registryr   $lightning_fabric.utilities.rank_zeror	   r
   r4   rD   r;   r%   r6   rI   r   rN   r   r   r   r   r   r   <module>   s    65