o
    oi                     @   s   d dl mZmZmZmZmZmZ d dlZd dlm	Z	 d dlm
Z
 d dlmZ d dlmZ d dlmZmZ d dlmZ d d	lmZ d d
lmZmZ d dlmZ er\d dlmZ d dlmZ ed Z G dd deZ!dS )    )TYPE_CHECKINGAnyContextManagerDictLiteralOptionalN)apply_to_collection)Tensor)Module)	Optimizer)get_argsoverride)_optimizer_handles_unscaling)	Precision)_convert_fp_tensor_DtypeContextManager)OptimizableMixedPrecisionShardedGradScaler)32-true16-true	bf16-true16-mixed
bf16-mixedc                       s6  e Zd ZdZd'deded ddfddZed(d
dZe	de
fddZe	de
fddZe	de
fddZe	dedefddZe	dedefddZe	dedee dededdf
 fddZe	dededef fddZe	deddfd d!Ze	deeef fd"d#Ze	d$eeef ddfd%d&Z  ZS ))FSDPPrecisiona  Precision plugin for training with Fully Sharded Data Parallel (FSDP).

    .. warning::  This is an :ref:`experimental <versioning:Experimental API>` feature.

    Args:
        precision: Full precision (32-true), half precision (16-true, bf16-true) or
            mixed precision (16-mixed, bf16-mixed).
        scaler: An optional :class:`torch.distributed.fsdp.sharded_grad_scaler.ShardedGradScaler` to use.

    Raises:
        ValueError:
            If unsupported ``precision`` is provided.

    N	precisionscalerr   returnc                 C   s   t t}||vrtd|d| dddlm} |d ur-| jdkr-td|d| d|d u r8|dkr8| nd | _|| _tjtjtj	tj
tjd}|| j | _d S )	Nz`precision=z9)` is not supported in FSDP. `precision` must be one of: .r   r   r   z` does not use a scaler, found )r   r   r   r   r   )r   _PRECISION_INPUT
ValueError*torch.distributed.fsdp.sharded_grad_scalerr   r   r   torchfloat32bfloat16float16_desired_input_dtype)selfr   r   supported_precisionr   precision_to_type r,   [/home/ubuntu/.local/lib/python3.10/site-packages/lightning/fabric/plugins/precision/fsdp.py__init__3   s&   zFSDPPrecision.__init__TorchMixedPrecisionc                 C   s   ddl m} | jdkrtj}tj }}n?| jdkr"tj}tj }}n1| jdkr/tj } }}n$| jdkr<tj } }}n| jdkrJtj}tj }}n	td| jd	||||d
S )Nr   r   r   r   r   r   r   z-Was unable to infer precision type, received r    )param_dtypereduce_dtypebuffer_dtype)2torch.distributed.fsdp.fully_sharded_data_parallelr   r   r$   r%   r'   r&   r"   )r)   r/   r0   r1   r2   r,   r,   r-   mixed_precision_configL   s(   




z$FSDPPrecision.mixed_precision_configc                 C   s
   t | jS N)r   r(   r)   r,   r,   r-   tensor_init_contextf   s   
z!FSDPPrecision.tensor_init_contextc                 C   s   t | jjptjS r5   )r   r4   r0   r$   r%   r6   r,   r,   r-   module_init_contextj      z!FSDPPrecision.module_init_contextc                 C   s6   d| j v rtjd| j dkrtjdS tjdS |  S )Nmixedcudar   )dtype)r   r$   autocastr&   r'   r7   r6   r,   r,   r-   forward_contextn   s   
$zFSDPPrecision.forward_contextdatac                 C   s   t |tt| jdS N)functionr<   dst_type)r   r   r	   r(   r)   r?   r,   r,   r-   convert_inputt   r9   zFSDPPrecision.convert_inputc                 C   s   t |ttt dS r@   )r   r   r	   r$   get_default_dtyperC   r,   r,   r-   convert_outputx   s   zFSDPPrecision.convert_outputtensormodelargskwargsc                    s6   | j d ur| j |}t j||g|R i | d S r5   )r   scalesuperbackward)r)   rG   rH   rI   rJ   	__class__r,   r-   rM   |   s   
 zFSDPPrecision.backward	optimizerc                    s@   | j d u rt j|fi |S | j j|fi |}| j   |S r5   )r   rL   optimizer_stepstepupdate)r)   rP   rJ   step_outputrN   r,   r-   rQ      s
   

zFSDPPrecision.optimizer_stepc                 C   s0   | j }|d urt|rtd|| d S d S )NzKGradient clipping is not implemented for optimizers handling the unscaling.)r   r   NotImplementedErrorunscale_)r)   rP   r   r,   r,   r-   unscale_gradients   s   zFSDPPrecision.unscale_gradientsc                 C   s   | j d ur
| j  S i S r5   )r   
state_dictr6   r,   r,   r-   rX      s   

zFSDPPrecision.state_dictrX   c                 C   s   | j d ur| j | d S d S r5   )r   load_state_dict)r)   rX   r,   r,   r-   rY      s   
zFSDPPrecision.load_state_dictr5   )r   r/   )__name__
__module____qualname____doc__r!   r   r.   propertyr4   r   r   r7   r8   r>   r   rD   rF   r	   r
   rM   r   rQ   r   rW   r   strrX   rY   __classcell__r,   r,   rN   r-   r   #   s>    ($r   )"typingr   r   r   r   r   r   r$   lightning_utilitiesr   r	   torch.nnr
   torch.optimr   typing_extensionsr   r   &lightning.fabric.plugins.precision.ampr   ,lightning.fabric.plugins.precision.precisionr   (lightning.fabric.plugins.precision.utilsr   r    lightning.fabric.utilities.typesr   r3   r   r/   r#   r   r!   r   r,   r,   r,   r-   <module>   s     