o
    ©Ì³iO  ã                	   @   s  U d dl Z d dlmZmZmZmZmZmZ d dlZd dl	m
Z
 d dlmZ e
ƒ ZejejejejdœZeeejf ed< dded	dfd
d„Zd	efdd„Z	ddee deej d	ejfdd„Ze jdejd	ed fdd„ƒZ	ddeeeejjf  dejdeee  d	dfdd„ZdS )é    N)ÚDictÚ	GeneratorÚIterableÚListÚOptionalÚTuple)Ú
get_logger)Úis_npu_available)Úfp16Úbf16Úfp32Úfp64ÚPRECISION_STR_TO_DTYPEÚhighÚ	precisionÚreturnc                 C   s@   t j ¡ s	ts	dS t  | ¡ | dkrdt jj_dS dt jj_dS )a¼  Sets the precision of float32 matrix multiplications and convolution operations.

    For more information, see the PyTorch docs:
    - https://pytorch.org/docs/stable/generated/torch.set_float32_matmul_precision.html
    - https://pytorch.org/docs/stable/backends.html#torch.backends.cudnn.allow_tf32

    Args:
        precision (str): The setting to determine which datatypes to use for matrix multiplication and convolution operations.
    NÚhighestFT)ÚtorchÚcudaÚis_availabler	   Úset_float32_matmul_precisionÚbackendsÚcudnnÚ
allow_tf32)r   © r   úP/home/ubuntu/.local/lib/python3.10/site-packages/torchtune/training/precision.pyÚ_set_float32_precision   s   
r   c                  C   sx   t j ¡ ot j ¡ ot j ¡ ot jj ¡ dk} t jj	 ¡ o"t jj	 
¡ }to)t j ¡ }t j ¡ o3t j ¡ }| p;|p;|p;|S )aË  
    Check that bf16 is available on this hardware. Requirements:
        - CUDA is available and supports bf16
            - CUDA version >= 11
            - CUDA compute capability >= 8
        - NCCL is available and version >= 2.10
        - MPS is available and torch was built with MPS
        - NPU is available and supports bf16
        - XPU is available and supports bf16

    Returns:
        bool: True if bf16 is available, False otherwise.

    )é   é
   )r   r   r   Úis_bf16_supportedÚdistributedÚis_nccl_availableÚncclÚversionr   ÚmpsÚis_builtr	   ÚnpuÚxpu)Úcuda_supportÚmps_supportÚnpu_supportÚxpu_supportr   r   r   Úverify_bf16_support0   s   
ÿþür,   ÚdtypeÚdevicec              	   C   st   | du rt jS t | | ¡}|t ¡ vr%td|› dd tt ¡ ƒ¡› dƒ‚|t j	kr8|t  
d¡kr8tƒ s8tdƒ‚|S )aÕ  Get the torch.dtype corresponding to the given precision string. If no string is passed,
    we will default to torch.float32.

    Note:
        If bf16 precision is requested with a CUDA device, we verify whether the device indeed supports
        bf16 kernels. If not, a ``RuntimeError`` is raised.

    Args:
        dtype (Optional[str]): The precision dtype. Default: ``None``, in which we default to torch.float32
        device (Optional[torch.device]): Device in use for training. Only CUDA and CPU
            devices are supported. If a CUDA device is passed in, additional checking is done
            to ensure that the device supports the requested precision. Default: ``None``, in which case
            a CUDA device is assumed.
    Raises:
        ValueError: if precision isn't supported by the library
        RuntimeError: if bf16 precision is requested but not available on this hardware.

    Returns:
        torch.dtype: The corresponding torch.dtype.

    NzDtype z must be one of z, z for finetuning.Úcpuzcbf16 precision was requested but not available on this hardware. Please use fp32 precision instead.)r   Úfloat32r   ÚgetÚvaluesÚ
ValueErrorÚjoinÚlistÚkeysÚbfloat16r.   r,   ÚRuntimeError)r-   r.   Útorch_dtyper   r   r   Ú	get_dtypeK   s   ÿ
ÿÿr:   )NNNc              	   c   s8    t  ¡ }t  | ¡ zdV  W t  |¡ dS t  |¡ w )a…  
    Context manager to set torch's default dtype.

    Args:
        dtype (torch.dtype): The desired default dtype inside the context manager.

    Returns:
        ContextManager: context manager for setting default dtype.

    Example:
        >>> with set_default_dtype(torch.bfloat16):
        >>>     x = torch.tensor([1, 2, 3])
        >>>     x.dtype
        torch.bfloat16


    N)r   Úget_default_dtypeÚset_default_dtype)r-   Ú	old_dtyper   r   r   r<   }   s   €
r<   Únamed_paramsÚexclude_param_namesc                    sX   | D ]'\‰ }|durt ‡ fdd„|D ƒƒrq|j|kr)tdˆ › d|j› d|› ƒ‚qdS )a¦  
    Validates that all input parameters have the expected dtype.

    Args:
        named_params (Iterable[Tuple[str, torch.nn.Parameter]]): Iterable of named parameters.
        dtype (torch.dtype): Expected dtype.
        exclude_param_names (Optional[List[str]]): Optional list of parameter names to exclude from dtype checking

    Raises:
        ValueError: If any parameter has a different dtype than `dtype`.
    Nc                 3   s    | ]}|ˆ v V  qd S ©Nr   )Ú.0Ún©Únamer   r   Ú	<genexpr>ª   s   € z0validate_expected_param_dtype.<locals>.<genexpr>z
Parameter z has dtype z, but expected )Úanyr-   r3   )r>   r-   r?   Úparamr   rC   r   Úvalidate_expected_param_dtype˜   s   
ÿÿürH   )r   )NNr@   ) Ú
contextlibÚtypingr   r   r   r   r   r   r   Útorchtune.utilsr   Útorchtune.utils._devicer	   ÚlogÚfloat16r7   r0   Úfloat64r   Ústrr-   Ú__annotations__r   Úboolr,   r.   r:   Úcontextmanagerr<   ÚnnÚ	ParameterrH   r   r   r   r   Ú<module>   sB   
 üÿÿÿ
þ2ýÿþ
ýü