o
    ziZ                     @   s2  d dl Z d dlmZ d dlmZmZmZmZ d dlm	Z	 d dl
Zd dlmZ d dlmZ d dl
mZ d dlmZmZ d d	lmZmZ d d
lmZ d d
lmZ d dlmZmZ d dlm Z  d dl!m"Z" d dl#m$Z$ d dl%m&Z&m'Z' d dl(m)Z)m*Z* d dl+m,Z,m-Z- d dl.m/Z/ eeeef  Z0G dd dej1Z2dS )    N)OrderedDict)AnyDictOptionalUnion)override)	_Stateful)PossibleUserWarning)loops)_DataFetcher_DataLoaderIterDataFetcher)_AutomaticOptimization_ManualOptimization)_OUTPUTS_TYPE)_BatchProgress_SchedulerProgress)_is_max_limit_reached)call)_ResultCollection)RunningStage	TrainerFn)MisconfigurationExceptionSIGTERMException)WarningCacherank_zero_warn)is_param_in_hook_signaturec                	       s  e Zd ZdZdGdddee deddf fd	d
ZedefddZedefddZ	edefddZ
edefddZedefddZedefddZdeddfddZdHddZdeddfddZdHddZdHd d!Zdeddfd"d#Zdeddfd$d%ZdHd&d'Zedef fd(d)Zed*eddfd+d,Zdefd-d.Zdefd/d0Zdefd1d2Zd3ed4eddfd5d6Z d3ed4eddfd7d8Z!d9edee" fd:d;Z#defd<d=Z$dedefd>d?Z%dHd@dAZ&dBe'dCe"dDede'fdEdFZ(  Z)S )I_TrainingEpochLoopa  Iterates over all batches in the dataloader (one epoch) that the user returns in their
    :meth:`~pytorch_lightning.core.LightningModule.train_dataloader` method.

    Its main responsibilities are calling the ``*_epoch_{start,end}`` hooks, accumulating outputs if the user request
    them in one of these hooks, and running validation at the requested interval.

    The validation is carried out by yet another loop,
    :class:`~pytorch_lightning.loops._EvaluationLoop`.

    In the ``run()`` method, the training epoch loop could in theory simply call the
    ``LightningModule.training_step`` already and perform the optimization.
    However, Lightning has built-in support for automatic optimization with multiple optimizers.
    For this reason there are actually two more loops nested under
    :class:`~pytorch_lightning.loops._TrainingEpochLoop`.

    Args:
        min_steps: The minimum number of steps (batches) to process
        max_steps: The maximum number of steps (batches) to process

    Ntrainerz
pl.Trainer	min_steps	max_stepsreturnc                    s   t  | |dk rtd| d|| _|| _t | _t | _t	|| _
t|| _tj|tjtjddd| _tdd| _t | _d| _d S )	Nr   zQ`max_steps` must be a non-negative integer or -1 (infinite steps). You passed in .F)verboseinference_modeT)trainingr   )super__init__r   r   r    r   batch_progressr   scheduler_progressr   automatic_optimizationr   manual_optimizationr
   _EvaluationLoopr   FITTINGr   
VALIDATINGval_loopr   _resultsr   _warning_cache_batches_that_stepped)selfr   r   r    	__class__ _/home/ubuntu/.local/lib/python3.10/site-packages/pytorch_lightning/loops/training_epoch_loop.pyr'   >   s"   



z_TrainingEpochLoop.__init__c                 C      | j jjd S )z/Returns the current batch index (across epochs)   )r(   totalreadyr3   r6   r6   r7   total_batch_idxU      z"_TrainingEpochLoop.total_batch_idxc                 C   r8   )z3Returns the current batch index (within this epoch)r9   )r(   currentr;   r<   r6   r6   r7   	batch_idx\   r>   z_TrainingEpochLoop.batch_idxc                 C   s,   | j j}|d u s|jr| jjjS | jjjjS N)	r   lightning_moduler*   optim_progressoptimizer_stepsr+   optim_step_progressr:   	completed)r3   rB   r6   r6   r7   global_stepc   s   
z_TrainingEpochLoop.global_stepc                 C   s   t | j| j}|p|  S rA   )r   rG   r    _num_ready_batches_reached)r3   max_steps_reachedr6   r6   r7   _is_training_donej   s   z$_TrainingEpochLoop._is_training_donec                 C   s   | j  p| jjS rA   )
restartingr/   _has_runr<   r6   r6   r7   _is_validation_doneo   s   z&_TrainingEpochLoop._is_validation_donec                 C   sT   | j r| jrdS | jjr(| jjj}| jjj}|s&| jd|d| j	d |S dS )z!Evaluates when to leave the loop.Tz:Trainer was signaled to stop but the required `min_epochs=z` or `min_steps=z-` has not been met. Training will continue...F)
rJ   rM   r   should_stopfit_loop
min_epochs_can_stop_earlyr1   infor   )r3   rP   can_stop_earlyr6   r6   r7   donet   s   

z_TrainingEpochLoop.donedata_fetcherc                 C   sZ   |    | | | js(z| | | | d| _W n	 ty$   Y nw | jrd| _d S )NF)reseton_run_startrT   advanceon_advance_end_restartingStopIterationr3   rU   r6   r6   r7   run   s   




z_TrainingEpochLoop.runc                 C   s   | j rQ| j  | j  | jj  | j}|jtdkrKt	
|j|j }|jj}|dus0J tdd |jD }| j| dkrM|sOtdtd dS dS dS dS | j  | j  | jj  | jjj  dS )z4Resets the internal state of the loop for a new run.infNc                 s   s    | ]}t |tV  qd S rA   )
isinstancer   ).0loaderr6   r6   r7   	<genexpr>   s    z+_TrainingEpochLoop.reset.<locals>.<genexpr>r   a4  You're resuming from a checkpoint that ended before the epoch ended and your dataloader is not resumable. This can cause unreliable results if further training is done. Consider using an end-of-epoch checkpoint or make your dataloader resumable by implementing the `state_dict` / `load_state_dict` interface.category)rK   r(   reset_on_restartr)   r*   rC   r   num_training_batchesfloatmathceilaccumulate_grad_batchesrO   _combined_loaderall	flattenedrG   r   r	   reset_on_runr/   r:   rV   )r3   r   expected_stepsra   is_resumable_loaderr6   r6   r7   rV      s*   



	
z_TrainingEpochLoop.resetc                 C   sB   | j jdkr| jst| | j| jjj7  _| j|_	| j
|_d S )Nr   )r   current_epochrK   iterfetchedr(   r?   r;   _on_before_fetch_start_profiler_on_after_fetch_stop_profilerr\   r6   r6   r7   rW      s
   z_TrainingEpochLoop.on_run_startc                 C      | j jd| jj d d S N[z].train_dataloader_next)r   profilerstartr5   __name__r<   r6   r6   r7   rt         z#_TrainingEpochLoop._on_before_fetchc                 C   rx   ry   )r   r{   stopr5   r}   r<   r6   r6   r7   rv      r~   z"_TrainingEpochLoop._on_after_fetchc                 C   s&  | j r
| |r
dS d| j_ t|t }r t|}|j}|j}nd}t|\}}}| jd }|j	| j
_| j}|sO|j|}|jj|dd}tj|d|dd}| j
  |j| d}	|du ri|si| jd nct|d|| t|d||}
t|d|| |
d	kr| j
  t| j
  |s| t ||nt|d
}|j d |jj!r| j!"|j#d ||}	n| j$"|}	W d   n1 sw   Y  | j
  | j%ddd | & r| j%ddd |r|j}|j}|j	| j
_t|d|	|| t|d|	|| |j'  | j
(  |j)  dS )zRuns a single training batch.

        Raises:
            StopIteration: When the epoch is canceled by the user returning -1

        NFr9   r   )dataloader_idxbatch_to_devicezMtrain_dataloader yielded None. If this was on purpose, ignore this warning...on_train_batch_startr   )anyrun_training_batchstepupdate_plateau_schedulersepochon_train_batch_end)*rK   _should_check_val_fxr/   r_   r   next_batch
_batch_idxr@   rT   r(   is_last_batchr   precision_pluginconvert_inputrB   _on_before_batch_transferr   _call_strategy_hookincrement_ready_logger_connectoron_batch_startr1   warn_call_callback_hooks_call_lightning_module_hookincrement_processedr[   increment_started_build_kwargsr   r{   profiler*   r]   
optimizersr+   update_lr_schedulersrH   on_batch_endincrement_completedupdate_train_step_metrics)r3   rU   using_dataloader_iterdataloader_iterbatchr@   ___r   batch_outputresponsekwargsr6   r6   r7   rX      sf   








z_TrainingEpochLoop.advancec                 C   s   |  |}|r)d| j_| jjj}|  st| jd | j	  d| j_
|| jj_| jddd |  s;|  jd7  _|   | jsH| jjrJtd S d S )NTon_validation_model_zero_gradr   r   r9   )r   r   
validatingr   _first_loop_iter_should_accumulater   r   r/   r]   r%   r   r2    _save_loggers_on_train_batch_endrJ   received_sigtermr   )r3   rU   should_check_valfirst_loop_iterr6   r6   r7   rY     s    



z!_TrainingEpochLoop.on_advance_endc                 C   s   | j   | j  d S rA   )r0   cpur/   teardownr<   r6   r6   r7   r   9  s   
z_TrainingEpochLoop.teardownc                    s   t   }| j|d< |S )Nr2   )r&   on_save_checkpointr2   r3   
state_dictr4   r6   r7   r   =  s   

z%_TrainingEpochLoop.on_save_checkpointr   c                 C   s   | dd| _d S )Nr2   r   )getr2   r   r6   r6   r7   on_load_checkpointC  s   z%_TrainingEpochLoop.on_load_checkpointc                 C   s   | j jj| jj dkS )zKDetermine if accumulation will be finished by the end of the current batch.r   )r(   r?   r;   r   rj   r<   r6   r6   r7   _accumulated_batches_reachedG  s   z/_TrainingEpochLoop._accumulated_batches_reachedc                 C   s   | j jj| jjk}|p| j jS )zJChecks if we are in the last batch or if there are more batches to follow.)r(   r?   r;   r   rf   r   )r3   epoch_finished_on_readyr6   r6   r7   rH   K  s   z-_TrainingEpochLoop._num_ready_batches_reachedc                 C   s*   |   }|  }| jjjp| }| o|S )ziChecks if the optimizer step should be performed or gradients should be accumulated for the current step.)r   rH   r   strategyhandles_gradient_accumulation)r3   accumulation_doneis_final_batch#strategy_accumulates_on_final_batchr6   r6   r7   r   P  s   
z%_TrainingEpochLoop._should_accumulateintervalr   c                 C   s&   |dkr
|   r
dS | j||d dS )z6Updates the lr schedulers based on the given interval.r   N)r   r   )r   _update_learning_rates)r3   r   r   r6   r6   r7   r   Y  s   z'_TrainingEpochLoop.update_lr_schedulersc           	      C   s   | j }|jr
|jjsdS |jD ]j}||jA rq|dkr| jn|j}|d7 }|j|kry||j dkryd}|jrf|j	}|dus?J | 
|}|du rf|jr[t|j}td| d| dtd| dtd	 q| j  t|d
|j| | j  qdS )a  Update learning rates.

        Args:
            interval: either 'epoch' or 'step'.
            update_plateau_schedulers: control whether ``ReduceLROnPlateau`` or non-plateau schedulers get updated.
                This is used so non-plateau schedulers can be updated before running validation. Checkpoints are
                commonly saved during validation, however, on-plateau schedulers might monitor a validation metric
                so they have to be updated separately.

        Nr   r9   r   z(ReduceLROnPlateau conditioned on metric z0 which is not available. Available metrics are: z?. Condition can be set using `monitor` key in lr scheduler dictzT which is not available but strict is set to `False`. Skipping learning rate update.rc   lr_scheduler_step)r   lr_scheduler_configsrB   r*   reduce_on_plateaur@   rq   r   	frequencymonitor_get_monitor_valuestrictlistcallback_metricsr   r   RuntimeWarningr)   r   r   r   	schedulerr   )	r3   r   r   r   configcurrent_idxmonitor_valmonitor_keyavail_metricsr6   r6   r7   r   _  sJ   






z)_TrainingEpochLoop._update_learning_rateskeyc                 C   s   | j j|S rA   )r   r   r   )r3   r   r6   r6   r7   r     s   z%_TrainingEpochLoop._get_monitor_valuec                 C   s,   | j jo| j jd u p| j jd | j j dkS )Nr9   r   )r   enable_validationcheck_val_every_n_epochrq   r<   r6   r6   r7   _should_check_val_epoch  s   z*_TrainingEpochLoop._should_check_val_epochc                 C   s   |   sdS | jjtdk}| jj}|r|st|trdS | jjr(| jj	j
r(dS |}t| jjtr@|r@| jd | jj dk}|S | jjtdkr^| jjdu rQ| jn| j}|d | jj dk}|S )z#Decide if we should run validation.Fr^   Tr9   r   N)r   r   val_check_batchrg   r(   r   r_   r   rN   rO   rQ   limit_train_batchesintr@   r   r=   )r3   rU   is_infinite_datasetr   is_val_check_batchcurrent_iterationr6   r6   r7   r     s    z'_TrainingEpochLoop._should_check_val_fxc                 C   s&   | j jr| j jD ]}|  qdS dS )zFlushes loggers to disk.N)r   rN   loggerssave)r3   loggerr6   r6   r7   r     s
   
z3_TrainingEpochLoop._save_loggers_on_train_batch_endr   r   r@   c                 C   s0   ||d< t | jjd}t|dddr||d< |S )aD  Helper method to build the arguments for the current step.

        Args:
            kwargs: The kwargs passed down to the hooks.
            batch: The current batch to run through the step.
            batch_idx: the index of the current batch.

        Returns:
            The kwargs passed down to the hooks.

        r   training_stepr@      )min_args)getattrr   rB   r   )r3   r   r   r@   training_step_fxr6   r6   r7   r     s
   z _TrainingEpochLoop._build_kwargs)Nr   )r!   N)*r}   
__module____qualname____doc__r   r   r'   propertyr=   r@   rG   boolrJ   rM   rT   r   r]   rV   rW   rt   rv   rX   rY   r   r   r   r   r   r   rH   r   strr   r   r   r   r   r   r   r   r   __classcell__r6   r6   r4   r7   r   (   sJ    $



X
!	9
"r   )3rh   collectionsr   typingr   r   r   r   typing_extensionsr   pytorch_lightningpl lightning_fabric.utilities.typesr   #lightning_fabric.utilities.warningsr	   r
    pytorch_lightning.loops.fetchersr   r   $pytorch_lightning.loops.optimizationr   r   .pytorch_lightning.loops.optimization.automaticr   _OPTIMIZER_LOOP_OUTPUTS_TYPE+pytorch_lightning.loops.optimization.manual_MANUAL_LOOP_OUTPUTS_TYPE pytorch_lightning.loops.progressr   r   !pytorch_lightning.loops.utilitiesr   pytorch_lightning.trainerr   <pytorch_lightning.trainer.connectors.logger_connector.resultr    pytorch_lightning.trainer.statesr   r   &pytorch_lightning.utilities.exceptionsr   r   %pytorch_lightning.utilities.rank_zeror   r   +pytorch_lightning.utilities.signature_utilsr   _BATCH_OUTPUTS_TYPE_Loopr   r6   r6   r6   r7   <module>   s,   