o
    oi2                     @   s   d dl mZmZ d dlmZ d dlmZmZmZm	Z	m
Z
mZ d dlZd dlmZ d dlmZ d dlmZ d dlmZ d dlmZ d d	lmZmZ d d
lmZ d dlmZ d dlmZ d dl m!Z! d dl"m#Z# d dl$m%Z% eG dd deZ&G dd dee& Z'ee(ef Z)G dd deZ*dS )    )	dataclassfield)partial)AnyCallableDictMappingOptionalOrderedDictN)Tensor)	Optimizer)override)_Loop)AbstractClosureOutputResult)_OptimizationProgress)_block_parallel_sync_behavior)call)MisconfigurationException)WarningCache)STEP_OUTPUTc                   @   s   e Zd ZU dZee ed< edddZee ed< ee	dZ
eeef ed< dd
dZdddZeddeded	d fddZed	eeef fddZdS )ClosureResultaU  A container to hold the result of a :class:`Closure` call.

    It is created from the output of :meth:`~lightning.pytorch.core.LightningModule.training_step`.

    Attributes:
        closure_loss: The loss with a graph attached.
        loss: A detached copy of the closure loss.
        extra: Any keys other than the loss returned.

    closure_lossFN)initdefaultloss)default_factoryextrareturnc                 C   s   |    d S N)_clone_lossself r#   b/home/ubuntu/.local/lib/python3.10/site-packages/lightning/pytorch/loops/optimization/automatic.py__post_init__3   s   zClosureResult.__post_init__c                 C   s"   | j d ur| j   | _d S d S r   )r   detachcloner   r!   r#   r#   r$   r    6   s   
zClosureResult._clone_loss   training_step_output	normalizec                 C   s~   d i }}t |tr!|d}|d u rtddd | D }nt |tr)|}n|d ur1td|d ur9|| }| ||dS )Nr   zbIn automatic_optimization, when `training_step` returns a dict, the 'loss' key needs to be presentc                 S   s   i | ]\}}|d kr||qS r   r#   ).0kvr#   r#   r$   
<dictcomp>E   s    z;ClosureResult.from_training_step_output.<locals>.<dictcomp>zrIn automatic optimization, `training_step` must return a Tensor, a dict, or None (where the step will be skipped).)r   )
isinstancer   getr   itemsr   )clsr)   r*   r   r   r#   r#   r$   from_training_step_output;   s"   



z'ClosureResult.from_training_step_outputc                 C   s   d| j i| jS )Nr   )r   r   r!   r#   r#   r$   asdictU   s   zClosureResult.asdictr   N)r(   )__name__
__module____qualname____doc__r	   r   __annotations__r   r   dictr   r   strr   r%   r    classmethodr   intr4   r   r5   r#   r#   r#   r$   r   "   s   
 

r   c                
       s   e Zd ZdZe Z		ddeg ef deee	gdf  deeg df  f fddZ
ee ded	ed
efddZeded	ed
ee	 fddZ  ZS )Closurea  An implementation of a :class:`AbstractClosure` for automatic optimization in Lightning that combines three
    elementary closures into one: ``training_step``, ``backward`` and ``zero_grad``.

    The Closure gets created by the training loop(s) and is then passed to the
    :meth:`torch.optim.Optimizer.step` method. An optimizer is responsible for calling the closure and optionally
    do something with the output.

    Args:
        step_fn: This is typically the :meth:`lightning.pytorch.core.module.LightningModule.training_step
            wrapped with processing for its outputs
        backward_fn: A function that takes a loss value as input, performs back-propagation and returns the loss value.
            Can be set to ``None`` to skip the backward operation.
        zero_grad_fn: A function that zeroes the gradients. Can be set to ``None`` to skip zero_grad, for example
            when accumulating gradients.

    Example:

        closure = Closure()
        optimizer = torch.optim.Adam(...)
        optimizer.step(closure)
    Nstep_fnbackward_fnzero_grad_fnc                    s    t    || _|| _|| _d S r   )super__init___step_fn_backward_fn_zero_grad_fn)r"   rA   rB   rC   	__class__r#   r$   rE   s   s   

zClosure.__init__argskwargsr   c                 O   sT   |   }|jd u r| jd | jd ur|   | jd ur(|jd ur(| |j |S )NzO`training_step` returned `None`. If this was on purpose, ignore this warning...)rF   r   warning_cachewarnrH   rG   )r"   rK   rL   step_outputr#   r#   r$   closure~   s   

zClosure.closurec                 O   s   | j |i || _| jjS r   )rP   _resultr   )r"   rK   rL   r#   r#   r$   __call__   s   zClosure.__call__)NN)r7   r8   r9   r:   r   rM   r   r   r	   r   rE   r   torchenable_gradr   rP   rR   __classcell__r#   r#   rI   r$   r@   Z   s"    
$r@   c                	       s  e Zd ZdZeZd fddZded	ed
e	de
fddZd
e	ded	edefddZd
e	deg ef fddZd	ededeeg df  fddZdedeeegdf  fddZd	edeg ee f ddfddZdejjddfddZd	edejjddfddZd
e	defddZ  ZS )_AutomaticOptimizationzNPerforms automatic optimization (forward, zero grad, backward, optimizer step)trainer
pl.Trainerr   Nc                    s   t  | t | _d| _d S )NF)rD   rE   r   optim_progress_skip_backward)r"   rW   rI   r#   r$   rE      s   
z_AutomaticOptimization.__init__	optimizer	batch_idxrL   c                 C   s   |  |||}| jjjs.| jj r.t| jjdd |  W d   n1 s(w   Y  n| || | }|j	du r?i S |
 S )zRuns closure (train step + backward) together with optimization if necessary.

        Args:
            kwargs: the kwargs passed down to the hooks
            batch_idx: the current batch index.
            optimizer: the optimizer

        T)blockN)_make_closurerW   strategyhandles_gradient_accumulationfit_loop_should_accumulater   _optimizer_stepconsume_resultr   r5   )r"   r[   r\   rL   rP   resultr#   r#   r$   run   s   	


z_AutomaticOptimization.runc                 C   s.   |  |}| |}| ||}t|||dS )zBuild a closure object that captures the given arguments and runs the `training_step` function and
        optionally other functions such as `backward` and `zero_grad`.)rA   rB   rC   )_make_step_fn_make_backward_fn_make_zero_grad_fnr@   )r"   rL   r[   r\   rA   rB   rC   r#   r#   r$   r^      s   

z$_AutomaticOptimization._make_closurec                 C   s   t | j|S )zOBuild the step function that runs the `training_step` and processes its output.)r   _training_step)r"   rL   r#   r#   r$   rg      s   z$_AutomaticOptimization._make_step_fnc                    s8   j rdS  jj dk}|sdS d fdd}|S )zBuild a `zero_grad` function that zeroes the gradients before back-propagation.

        Returns ``None`` in the case backward needs to be skipped.

        Nr   r   c                      s       d S r   )_on_before_zero_grad_optimizer_zero_gradr#   r\   r[   r"   r#   r$   rC      s   
z?_AutomaticOptimization._make_zero_grad_fn.<locals>.zero_grad_fnr6   )rZ   rW   accumulate_grad_batches)r"   r\   r[   is_first_batch_to_accumulaterC   r#   rm   r$   ri      s   z)_AutomaticOptimization._make_zero_grad_fnc                    s&   j rdS dtddf fdd}|S )zBuild a `backward` function that handles back-propagation through the output produced by the `training_step`
        function.

        Returns ``None`` in the case backward needs to be skipped.

        Nr   r   c                    s   t jd|   d S )Nbackward)r   _call_strategy_hookrW   r+   r[   r"   r#   r$   rB      s   z=_AutomaticOptimization._make_backward_fn.<locals>.backward_fn)rZ   r   )r"   r[   rB   r#   rr   r$   rh      s   z(_AutomaticOptimization._make_backward_fntrain_step_and_backward_closurec                 C   s^   | j }|jjd }|j }|s| jjj  t	
|d|j||| |s-| jjj  dS dS )a?  Performs the optimizer step and some sanity checking.

        Args:
            batch_idx: the index of the current batch
            train_step_and_backward_closure: the closure function performing the train step and computing the
                gradients. By default, called by the optimizer (if possible)

        r   optimizer_stepN)rW   r_   _lightning_optimizersra   rb   rY   r[   stepincrement_readyr   _call_lightning_module_hookcurrent_epochincrement_completed)r"   r\   rs   rW   r[   should_accumulater#   r#   r$   rc      s    
	z&_AutomaticOptimization._optimizer_stepc                 C   sB   | j }| jjj  t|d| t|d| | jjj  dS )zmCalls the ``on_before_zero_grad`` hook.

        Args:
            optimizer: the current optimizer

        on_before_zero_gradN)	rW   rY   r[   	zero_gradrw   r   _call_callback_hooksrx   increment_started)r"   r[   rW   r#   r#   r$   rk     s
   z+_AutomaticOptimization._on_before_zero_gradc                 C   s,   | j }t|d|j|| | jjj  dS )zZeroes out all gradients of parameters optimized by the current optimizer.

        Args:
            batch_idx: the index of the current batch
            optimizer: the current optimizer

        optimizer_zero_gradN)rW   r   rx   ry   rY   r[   r}   rz   )r"   r\   r[   rW   r#   r#   r$   rl   %  s   z+_AutomaticOptimization._optimizer_zero_gradc                 C   sT   | j }tj|dg| R  }| j j  |du r"|jdkr"td| j	||j
S )zPerforms the actual train step with the tied hooks.

        Args:
            kwargs: the kwargs passed down to the hooks.

        Returns:
            A ``ClosureResult`` containing the training step output.

        training_stepNr(   zSkipping the `training_step` by returning None in distributed training is not supported. It is recommended that you rewrite your training logic to avoid having to skip the step in the first place.)rW   r   rq   valuesr_   post_training_step
world_sizeRuntimeErroroutput_result_clsr4   rn   )r"   rL   rW   r)   r#   r#   r$   rj   1  s   
z%_AutomaticOptimization._training_step)rW   rX   r   N)r7   r8   r9   r:   r   r   rE   r   r?   r
   _OUTPUTS_TYPErf   r@   r^   r   rg   r	   ri   r   rh   rc   rS   optimrk   rl   rj   rU   r#   r#   rI   r$   rV      s&    $" 
%rV   )+dataclassesr   r   	functoolsr   typingr   r   r   r   r	   r
   rS   r   torch.optimr   typing_extensionsr   lightning.pytorchpytorchpllightning.pytorch.loops.loopr   ,lightning.pytorch.loops.optimization.closurer   r    lightning.pytorch.loops.progressr   !lightning.pytorch.loops.utilitiesr   lightning.pytorch.trainerr   &lightning.pytorch.utilities.exceptionsr   %lightning.pytorch.utilities.rank_zeror   !lightning.pytorch.utilities.typesr   r   r@   r=   r   rV   r#   r#   r#   r$   <module>   s*    7: