o
    oiJ                     @   sR  d dl Z d dlmZmZmZmZmZ d dlZd dlm	Z	 d dl
mZ d dlmZmZ d dlmZ d dlmZ d dlmZ d dlmZ d d	lmZ d d
lmZmZ d dlmZ d dlm Z m!Z!m"Z"m#Z#m$Z$m%Z% d dl&m'Z' d dl(m)Z)m*Z* d dl+m,Z,m-Z- d dl.m/Z/ d dl0m1Z1m2Z2 d dl3m4Z4 d dl5m6Z6m7Z7m8Z8 e 9e:Z;G dd deZ<dS )    N)AnyDictListOptionalUnion)override)_set_sampler_epoch	sized_len)PossibleUserWarning)_Loop)_DataFetcher)	_Progress)_TrainingEpochLoop)_is_max_limit_reached_select_data_fetcher)call)_check_dataloader_iterable_DataLoaderSource_parse_num_batches_process_dataloader_request_dataloader_resolve_overfit_batches)_ResultCollection)RunningStage	TrainerFn)_SUPPORTED_MODESCombinedLoader)has_len_all_ranks)MisconfigurationExceptionSIGTERMException)is_overridden)rank_zero_debugrank_zero_inforank_zero_warnc                	       s  e Zd ZdZ		dAdddee dee ddf fd	d
ZedefddZedefddZ	edee fddZ
edefddZejjededdfddZedefddZejdeddfddZedefddZedefddZedefddZedefd d!Zedefd"d#ZdBd$d%ZdBd&d'ZdBd(d)ZdBd*d+ZdBd,d-ZdBd.d/ZdBd0d1ZdBd2d3ZdBd4d5Z ede!f fd6d7Z"ed8e!ddf fd9d:Z#defd;d<Z$defd=d>Z%dBd?d@Z&  Z'S )C_FitLoopaN  This loop is the top-level loop where training starts.

    It simply counts the epochs and iterates from one to the next by calling ``TrainingEpochLoop.run()`` in its
    ``advance()`` method.

    Example::

        # FitLoop
        for epoch in range(max_epochs):
            # TrainingEpochLoop
            for batch_idx, batch in enumerate(train_dataloader):
                loss = lightning_module.training_step(batch, batch_idx)
                ...

                # ValidationEpochLoop
                for batch_idx, batch in enumerate(val_dataloader):
                    lightning_module.validation_step(batch, batch_idx)
                    ...
                ...
            ...

    Args:
        min_epochs: The minimum number of epochs
        max_epochs: The maximum number of epochs, can be set -1 to turn this limit off

    r   Ntrainerz
pl.Trainer
min_epochs
max_epochsreturnc                    s   t  | t|tr|dk rtd| d|| _|| _t|| _t	 | _
td| _td d| _d | _g | _d | _td| _d S )NzA`max_epochs` must be a non-negative integer or -1. You passed in .inftrain_dataloaderz-inf)super__init__
isinstanceintr   r'   r&   r   
epoch_loopr   epoch_progressfloatmax_batchesr   _data_source_combined_loader_combined_loader_states_to_load_data_fetcher_last_train_dl_reload_epoch)selfr%   r&   r'   	__class__ T/home/ubuntu/.local/lib/python3.10/site-packages/lightning/pytorch/loops/fit_loop.pyr.   L   s   


z_FitLoop.__init__c                 C      | j jS )z/Returns the current batch index (across epochs))r1   total_batch_idxr:   r=   r=   r>   r@   e      z_FitLoop.total_batch_idxc                 C   r?   )z3Returns the current batch index (within this epoch))r1   	batch_idxrA   r=   r=   r>   rC   j   rB   z_FitLoop.batch_idxc                 C   r?   )z+Returns the minimum number of steps to run.)r1   	min_stepsrA   r=   r=   r>   rD   o   rB   z_FitLoop.min_stepsc                 C   r?   )z+Returns the maximum number of steps to run.)r1   	max_stepsrA   r=   r=   r>   rE   t   rB   z_FitLoop.max_steps
restartingc                    sL    j jj j jjf}t fdd|D }|r|p  }tj | d S )Nc                 3   s    | ]
}| j jjkV  qd S N)r2   current	processed).0vrA   r=   r>   	<genexpr>~   s    z&_FitLoop.restarting.<locals>.<genexpr>)	r2   rH   readystartedany_iteration_based_trainingr   rF   fset)r:   rF   valuesepoch_unfinishedr=   rA   r>   rF   y   s   z_FitLoop.restartingc                 C   s
   | j jjS )MDetermines whether the loop will skip backward during automatic optimization.r1   automatic_optimization_skip_backwardrA   r=   r=   r>   rW      s   
z_FitLoop._skip_backwardvaluec                 C   s   || j j_dS )rT   NrU   )r:   rX   r=   r=   r>   rW      s   c                 C   s*   | j jr| jjS | j jr| jjjS td)NzD`FitLoop._results` property isn't defined. Accessed outside of scope)r%   trainingr1   _results
validatingval_loopRuntimeErrorrA   r=   r=   r>   rZ      s
   
z_FitLoop._resultsc                 C   s:   | j r| jjj| j knd}| jr| jj| jknd}|o|S )NT)r&   r2   rH   rI   rD   r1   global_step)r:   met_min_epochsmet_min_stepsr=   r=   r>   _can_stop_early   s   z_FitLoop._can_stop_earlyc                 C   s   | j j}|o| j j| j |kS )z-Check if train dataloader should be reloaded.)r%   !reload_dataloaders_every_n_epochscurrent_epochr9   )r:   n_epochsr=   r=   r>   _should_reload_train_dl   s   z _FitLoop._should_reload_train_dlc                 C   s   | j dkrtd dS t| jj| j}|r td| jd dS t| jts(J t| j	j
j| j}|rF| j	j
j| j	j
_td| jd dS | jjrS| jrStd dS dS )	z!Evaluates when to leave the loop.r   z+`Trainer.fit` stopped: No training batches.Tz"`Trainer.fit` stopped: `max_steps=z
` reached.z#`Trainer.fit` stopped: `max_epochs=z5`Trainer.fit` stopped: `trainer.should_stop` was set.F)r4   r"   r   r1   r^   rE   r/   r'   r0   r2   rH   rI   	completedr%   should_stopra   r!   )r:   
stop_stepsstop_epochsr=   r=   r>   done   s"   
z_FitLoop.donec                 C   s   | j p| jjdkS )zXWhether we should skip the training and immediately return from the call to :meth:`run`.r   )rj   r%   limit_train_batchesrA   r=   r=   r>   skip   s   z_FitLoop.skipc                 C   sv   |    | jr	d S |   |   | js2z|   |   |   d| _W n	 t	y.   Y nw | jrd| _| 
  d S )NF)
setup_datarl   reseton_run_startrj   on_advance_startadvanceon_advance_end_restartingStopIteration
on_run_endrA   r=   r=   r>   run   s"   
z_FitLoop.runc                 C   s  | j d ur
| js
d S | j}|j}|jdkstd|sd S t| jj	 d | j
}t|}|jd t|ts>t|d}n|}|jdkrLt|tjd tj}tj}g }|jD ]}	t|	|| t||||	}	||	 qW||_|| _ |j}
|jd ur|
|jjO }
g }|jD ]}	t|	|j|
rt|	ntd}t|||j}|| q||_ | !  t"|tj| _#| j#$| t%| j# t&|}|d ur|ntd| _'t||j|
}| j'dkrd S |j(| _)t|j*t+r|j*|_,|j,| j'kr|j-d urt.d|j* d	| j' d
n#|s|j*dkrtd|_,nt/dt+| j'|j* |_,t0d|j,|_,|j1rF| j'|j2k rH|j3sJt4d| j' d|j2 dt5d d S d S d S d S )Nr   training_stepz: resetting train dataloaderztrain_dataloader()max_size_cycle)moder+   z `val_check_interval` (zD) must be less than or equal to the number of the training batches (z). If you want to disable validation set `limit_val_batches` to 0.0 instead. If you want to validate based on the total training batches, set `check_val_every_n_epoch=None`.g      ?zWhen using an IterableDataset for `train_dataloader`, `Trainer(val_check_interval)` must be `1.0` or an int. An int k specifies checking validation every k training batches.   z The number of training batches (zA) is smaller than the logging interval Trainer(log_every_n_steps=zZ). Set a lower value for log_every_n_steps if you want to see logs for the training epoch.)category)6r6   re   r%   lightning_modulerk   r    logdebugr<   __name__r5   r   strategybarrierr/   r   overfit_batchesr   r   TRAININGr   FITTING	flattenedr   r   append2allow_zero_length_dataloader_with_multiple_devices
datamoduler   lenr3   r   limits_load_combined_loader_statesr   r8   setupiterr	   r4   rc   r9   val_check_intervalr0   val_check_batchcheck_val_every_n_epoch
ValueErrorr   maxloggerslog_every_n_stepsfast_dev_runr#   r
   )r:   r%   	pl_modulesourcer,   combined_loader
trainer_fnstagedataloadersdlallow_zero_lengthr   lengthnum_batchesr4   has_len_all_ranks_r=   r=   r>   rm      s   









z_FitLoop.setup_datac                 C   s2   | j jdusJ td | jr| j  dS dS )z'Resets the internal state of this loop.NT)r%   modeltorchset_grad_enabledrF   r2   reset_on_restartrA   r=   r=   r>   rn   1  s
   
z_FitLoop.resetc                 C   sr   |   s| jjj| jj_| j}| j r%|jdu r%d|_	| jj
  d|_t|d t|d t|d dS )z"Calls the ``on_train_start`` hook.NTon_train_start)rP   r2   rH   rI   rf   r%   r1   _should_check_val_epochval_dataloadersr[   r\   rm   rY   r   _call_callback_hooks_call_lightning_module_hook_call_strategy_hookr:   r%   r=   r=   r>   ro   9  s   z_FitLoop.on_run_startc                 C   sr   | j }|   | jdusJ t| jjD ]\}}t|| jjj q| j	  t
|d t
|d | j  dS )zPPrepares the dataloader for training and calls the hook ``on_train_epoch_start``Non_train_epoch_start)r%   rm   r6   	enumerater   r   r2   rH   rI   increment_readyr   r   r   increment_started)r:   r%   ir   r=   r=   r>   rp   K  s   
z_FitLoop.on_advance_startc                 C   s   t t| j d | j}|dusJ |jdkr+tdt| j ddd tD  | jj	
d | jdus:J | j| j W d   dS 1 sLw   Y  dS )	zRuns one whole epoch.z: advancing loopN
sequential`zZ` does not support the `CombinedLoader(mode="sequential")` mode. The available modes are: c                 S   s   g | ]}|d kr|qS )r   r=   )rJ   mr=   r=   r>   
<listcomp>g  s    z$_FitLoop.advance.<locals>.<listcomp>run_training_epoch)r}   r~   typer   r6   _moder   r   r%   profilerprofiler8   r1   rv   )r:   r   r=   r=   r>   rq   ^  s   
"z_FitLoop.advancec                 C   s   | j }|j  | j  tj|ddd t|d tj|ddd |j  | j	
 r7| j	jd| j d | j	 jd8  _|j  | j	 jd7  _| j  |jrVtd S )Non_train_epoch_endF)monitoring_callbacksTepoch)update_plateau_schedulersrz   )r%   _logger_connectorepoch_end_reachedr2   increment_processedr   r   r   on_epoch_endr1   _num_ready_batches_reachedupdate_lr_schedulersrF   _batches_that_steppedupdate_train_epoch_metricsincrement_completedreceived_sigtermr   r   r=   r=   r>   rr   m  s    





z_FitLoop.on_advance_endc                 C   sB   t | jj d | j}t|d t|d t|d dS )z Calls the ``on_train_end`` hook.z: train run endedon_train_endN)	r}   r~   r<   r   r%   r   r   r   r   r   r=   r=   r>   ru     s
   z_FitLoop.on_run_endc                 C   s(   | j d ur| j   d | _ | j  d S rG   )r8   teardownr1   rA   r=   r=   r>   r     s   

z_FitLoop.teardownc                    s.   t   }| jd ur| j  }r||d< |S Nr   )r-   on_save_checkpointr6   _state_dicts)r:   
state_dictloader_statesr;   r=   r>   r     s   
z_FitLoop.on_save_checkpointr   c                    s   | dg | _t | d S r   )getr7   r-   on_load_checkpoint)r:   r   r;   r=   r>   r     s   z_FitLoop.on_load_checkpointc                 C   s
   | j  S )z,Whether the gradients should be accumulated.)r1   _should_accumulaterA   r=   r=   r>   r     s   
z_FitLoop._should_accumulatec                 C   s   | j jdkS )Nr)   )r%   rE   rA   r=   r=   r>   rP     s   z"_FitLoop._iteration_based_trainingc                 C   s2   | j r| jr| jd u rd S | j| j g | _d S rG   )rF   r7   r6   _load_state_dictsrA   r=   r=   r>   r     s   
z%_FitLoop._load_combined_loader_states)r   N)r(   N)(r   
__module____qualname____doc__r   r0   r.   propertyr@   rC   rD   rE   r   rF   setterr   boolrW   r   rZ   ra   re   rj   rl   rv   rm   rn   ro   rp   rq   rr   ru   r   r   r   r   r   rP   r   __classcell__r=   r=   r;   r>   r$   0   sl    


\




#
	r$   )=loggingtypingr   r   r   r   r   r   typing_extensionsr   lightning.pytorchpytorchpllightning.fabric.utilities.datar   r	   #lightning.fabric.utilities.warningsr
   lightning.pytorch.loopsr    lightning.pytorch.loops.fetchersr    lightning.pytorch.loops.progressr   +lightning.pytorch.loops.training_epoch_loopr   !lightning.pytorch.loops.utilitiesr   r   lightning.pytorch.trainerr   3lightning.pytorch.trainer.connectors.data_connectorr   r   r   r   r   r   <lightning.pytorch.trainer.connectors.logger_connector.resultr    lightning.pytorch.trainer.statesr   r   +lightning.pytorch.utilities.combined_loaderr   r    lightning.pytorch.utilities.datar   &lightning.pytorch.utilities.exceptionsr   r   )lightning.pytorch.utilities.model_helpersr    %lightning.pytorch.utilities.rank_zeror!   r"   r#   	getLoggerr   r}   r$   r=   r=   r=   r>   <module>   s.    
