o
    3wi-                     @   s   d Z ddlZddlZddlZddlZddlZddlZddlmZ ddl	m
Z
mZmZmZmZmZmZmZmZ ddlmZ ddd	Zd
d ZdedefddZ	ddededefddZdefddZdS )z
A collection of utilities for ensuring that training can always occur. Heavily influenced by the
[toma](https://github.com/BlackHC/toma) library.
    N)version   )	is_cuda_availableis_hpu_availableis_ipex_availableis_mlu_availableis_mps_availableis_musa_availableis_npu_availableis_sdaa_availableis_xpu_available)compare_versionsFc                 C   s   | rt   t rtj  dS t rtj  dS t r$tj	  dS t
 r.tj  dS t r8tj  dS tddrDtj  dS t rNtj  dS t rT	 dS dS )z
    Clears the device cache by calling `torch.{backend}.empty_cache`. Can also run `gc.collect()`, but do note that
    this is a *considerable* slowdown and should be used sparingly.
    z2.0)min_versionN)gccollectr   torchxpuempty_cacher   mlur   sdaar	   musar
   npur   mpsr   cudar   garbage_collection r   T/home/ubuntu/sommelier/.venv/lib/python3.10/site-packages/accelerate/utils/memory.pyclear_device_cache+   s&   
r   c                  G   s:   t | ts	t| } tt| D ]}d| |< qtdd | S )aN  
    Releases memory from `objects` by setting them to `None` and calls `gc.collect()` and `torch.cuda.empty_cache()`.
    Returned objects should be reassigned to the same variables.

    Args:
        objects (`Iterable`):
            An iterable of objects
    Returns:
        A list of `None` objects to replace `objects`

    Example:

        ```python
        >>> import torch
        >>> from accelerate.utils import release_memory

        >>> a = torch.ones(1000, 1000).cuda()
        >>> b = torch.ones(1000, 1000).cuda()
        >>> a, b = release_memory(a, b)
        ```
    NTr   )
isinstancelistrangelenr   )objectsir   r   r   release_memoryF   s   


r%   	exceptionreturnc                    s:   g d}t  trt jdkrt fdd|D S dS )z
    Checks if `exception` relates to CUDA out-of-memory, XPU out-of-memory, CUDNN not supported, or CPU out-of-memory

    Args:
        exception (`Exception`):
            An exception
    )z out of memory.z(cuDNN error: CUDNN_STATUS_NOT_SUPPORTED.z*DefaultCPUAllocator: can't allocate memoryz1FATAL ERROR :: MODULE:PT_DEVMEM Allocation failedr   c                 3   s    | ]
}| j d  v V  qdS )r   N)args).0errr&   r   r   	<genexpr>s   s    z+should_reduce_batch_size.<locals>.<genexpr>F)r   RuntimeErrorr"   r(   any)r&   _statementsr   r+   r   should_reduce_batch_sized   s   r0      functionstarting_batch_sizereduce_batch_size_fnc                    sB   du rt jt|dS | du r fdd fdd}|S )a  
    A basic decorator that will try to execute `function`. If it fails from exceptions related to out-of-memory or
    CUDNN, the batch size is multiplied by 0.9 and passed to `function`

    `function` must take in a `batch_size` parameter as its first argument.

    Args:
        function (`callable`, *optional*):
            A function to wrap
        starting_batch_size (`int`, *optional*):
            The batch size to try and fit into memory

    Example:

    ```python
    >>> from accelerate.utils import find_executable_batch_size


    >>> @find_executable_batch_size(starting_batch_size=128)
    ... def train(batch_size, model, optimizer):
    ...     ...


    >>> train(model, optimizer)
    ```
    N)r3   c                      s   t  d   S )Ng?)intr   )
batch_sizer   r   r4      s   z8find_executable_batch_size.<locals>.reduce_batch_size_fnc               
      s   t dd ttj }t|t| d k r>ddd t|dd  | dd  D }t	dj
 dj
 d	| d
	  dkrGtdz g| R i |W S  tys } zt|rht dd   n W Y d }~nd }~ww q?)NTr   r   z, c                 S   s   g | ]\}}| d | qS )=r   )r)   argvaluer   r   r   
<listcomp>   s    zAfind_executable_batch_size.<locals>.decorator.<locals>.<listcomp>zBatch size was passed into `zS` as the first argument when called.Remove this as the decorator already does so: `(z)`r   z-No executable batch size found, reached zero.)r   r    inspect	signature
parameterskeysr"   joinzip	TypeError__name__r-   	Exceptionr0   )r(   kwargsparamsarg_strer6   r2   r4   r   r   	decorator   s2   
*

z-find_executable_batch_size.<locals>.decorator)	functoolspartialfind_executable_batch_size)r2   r3   r4   rJ   r   rI   r   rM   w   s   rM   device_indexc                 C   s   t tjjt djkr z	tj| d W S  ty   Y nw t r>t t	j
 d}t|ddr>ddlm} || d S td tj| S )Nz2.6r   intel_extension_for_pytorchz>=z2.5)mem_get_infozThe XPU `mem_get_info` API is available in IPEX version >=2.5 or PyTorch >=2.6. The current returned available memory is incorrect. Please consider upgrading your IPEX or PyTorch version.)r   parser   __version__releaser   rP   rD   r   	importlibmetadatar   intel_extension_for_pytorch.xpuwarningswarnmax_memory_allocated)rN   ipex_versionrP   r   r   r   get_xpu_available_memory   s   r[   )F)Nr1   N)__doc__rK   r   rT   r<   rW   r   	packagingr   importsr   r   r   r   r   r	   r
   r   r   versionsr   r   r%   rD   boolr0   callabler5   rM   r[   r   r   r   r   <module>   s,   ,

B