o
    8wiY                     @   sd  d dl Z d dlmZ d dlmZmZmZ d dlZd dlm	Z	 d dl
Zd dlmZmZ d dlmZ d dlmZ d dlmZ d d	lmZ d d
lmZ d dlmZmZ d dlmZ d dlmZm Z m!Z!m"Z"m#Z#m$Z$ d dl%m&Z& d dl'm(Z(m)Z) d dl*m+Z+m,Z, d dl-m.Z. d dl/m0Z0m1Z1 d dl2m3Z3 d dl4m5Z5m6Z6m7Z7 e 8e9Z:eG dd dZ;G dd deZ<dS )    N)	dataclass)AnyOptionalUnion)override)_set_sampler_epoch	sized_len)PossibleUserWarning)_Loop)_DataFetcher)	_Progress)_TrainingEpochLoop)_is_max_limit_reached_select_data_fetcher)call)_check_dataloader_iterable_DataLoaderSource_parse_num_batches_process_dataloader_request_dataloader_resolve_overfit_batches)_ResultCollection)RunningStage	TrainerFn)_SUPPORTED_MODESCombinedLoader)has_len_all_ranks)MisconfigurationExceptionSIGTERMException)is_overridden)rank_zero_debugrank_zero_inforank_zero_warnc                   @   s    e Zd ZdZdZdZdZdZdS )RestartStagenonerestarted_on_epoch_startrestarted_mid_epochrestarted_on_epoch_endresumed_on_epoch_endN)__name__
__module____qualname__NONERESTARTED_ON_EPOCH_STARTRESTARTED_MID_EPOCHRESTARTED_ON_EPOCH_ENDRESUMED_ON_EPOCH_END r1   r1   ]/home/ubuntu/sommelier/.venv/lib/python3.10/site-packages/pytorch_lightning/loops/fit_loop.pyr#   1   s    r#   c                	       s:  e Zd ZdZ		dMdddee dee ddf fd	d
ZedefddZedefddZ	edee fddZ
edefddZejjededdfddZedefddZejdeddfddZedefddZedefddZedefddZedefd d!Zedefd"d#ZdNd$d%ZdNd&d'Zedefd(d)Zedefd*d+Zedefd,d-Zedefd.d/ZdNd0d1ZdNd2d3ZdNd4d5Z dNd6d7Z!dNd8d9Z"dNd:d;Z#dNd<d=Z$dNd>d?Z%dNd@dAZ&ede'f fdBdCZ(edDe'ddf fdEdFZ)defdGdHZ*defdIdJZ+dNdKdLZ,  Z-S )O_FitLoopaN  This loop is the top-level loop where training starts.

    It simply counts the epochs and iterates from one to the next by calling ``TrainingEpochLoop.run()`` in its
    ``advance()`` method.

    Example::

        # FitLoop
        for epoch in range(max_epochs):
            # TrainingEpochLoop
            for batch_idx, batch in enumerate(train_dataloader):
                loss = lightning_module.training_step(batch, batch_idx)
                ...

                # ValidationEpochLoop
                for batch_idx, batch in enumerate(val_dataloader):
                    lightning_module.validation_step(batch, batch_idx)
                    ...
                ...
            ...

    Args:
        min_epochs: The minimum number of epochs
        max_epochs: The maximum number of epochs, can be set -1 to turn this limit off

    r   Ntrainerz
pl.Trainer
min_epochs
max_epochsreturnc                    s   t  | t|tr|dk rtd| d|| _|| _t|| _t	 | _
td| _td d| _d | _g | _d | _td| _tj| _d S )NzA`max_epochs` must be a non-negative integer or -1. You passed in .inftrain_dataloaderz-inf)super__init__
isinstanceintr   r6   r5   r   
epoch_loopr   epoch_progressfloatmax_batchesr   _data_source_combined_loader_combined_loader_states_to_load_data_fetcher_last_train_dl_reload_epochr#   r,   _restart_stage)selfr4   r5   r6   	__class__r1   r2   r=   V   s    



z_FitLoop.__init__c                 C      | j jS )z/Returns the current batch index (across epochs))r@   total_batch_idxrJ   r1   r1   r2   rN   p      z_FitLoop.total_batch_idxc                 C   rM   )z3Returns the current batch index (within this epoch))r@   	batch_idxrO   r1   r1   r2   rQ   u   rP   z_FitLoop.batch_idxc                 C   rM   )z+Returns the minimum number of steps to run.)r@   	min_stepsrO   r1   r1   r2   rR   z   rP   z_FitLoop.min_stepsc                 C   rM   )z+Returns the maximum number of steps to run.)r@   	max_stepsrO   r1   r1   r2   rS      rP   z_FitLoop.max_steps
restartingc                    sL    j jj j jjf}t fdd|D }|r|p  }tj | d S )Nc                 3   s    | ]
}| j jjkV  qd S N)rA   current	processed).0vrO   r1   r2   	<genexpr>   s    z&_FitLoop.restarting.<locals>.<genexpr>)	rA   rV   readystartedany_iteration_based_trainingr
   rT   fset)rJ   rT   valuesepoch_unfinishedr1   rO   r2   rT      s   z_FitLoop.restartingc                 C   s
   | j jjS )MDetermines whether the loop will skip backward during automatic optimization.r@   automatic_optimization_skip_backwardrO   r1   r1   r2   re      s   
z_FitLoop._skip_backwardvaluec                 C   s   || j j_dS )rb   Nrc   )rJ   rf   r1   r1   r2   re      s   c                 C   s*   | j jr| jjS | j jr| jjjS td)NzD`FitLoop._results` property isn't defined. Accessed outside of scope)r4   trainingr@   _results
validatingval_loopRuntimeErrorrO   r1   r1   r2   rh      s
   
z_FitLoop._resultsc                 C   s:   | j r| jjj| j knd}| jr| jj| jknd}|o|S )NT)r5   rA   rV   rW   rR   r@   global_step)rJ   met_min_epochsmet_min_stepsr1   r1   r2   _can_stop_early   s   z_FitLoop._can_stop_earlyc                 C   s   | j j}|o| j j| j |kS )z-Check if train dataloader should be reloaded.)r4   !reload_dataloaders_every_n_epochscurrent_epochrH   )rJ   n_epochsr1   r1   r2   _should_reload_train_dl   s   z _FitLoop._should_reload_train_dlc                 C   s   | j dkrtd dS t| jj| j}|r td| jd dS t| jts(J t| j	j
j| j}|rF| j	j
j| j	j
_td| jd dS | jjrS| jrStd dS dS )	z!Evaluates when to leave the loop.r   z+`Trainer.fit` stopped: No training batches.Tz"`Trainer.fit` stopped: `max_steps=z
` reached.z#`Trainer.fit` stopped: `max_epochs=z5`Trainer.fit` stopped: `trainer.should_stop` was set.F)rC   r!   r   r@   rl   rS   r>   r6   r?   rA   rV   rW   	completedr4   should_stopro   r    )rJ   
stop_stepsstop_epochsr1   r1   r2   done   s"   
z_FitLoop.donec                 C   s   | j p| jjdkS )zXWhether we should skip the training and immediately return from the call to :meth:`run`.r   )rx   r4   limit_train_batchesrO   r1   r1   r2   skip   s   z_FitLoop.skipc                 C   s   |    | jr	d S |   |   | js@z#z|   |   |   W n ty1   Y W | 	  nw W | 	  n| 	  w | jrd| _
|   d S )NF)
setup_datarz   reseton_run_startrx   on_advance_startadvanceon_advance_endStopIterationon_iteration_done_restarting
on_run_endrO   r1   r1   r2   run   s&   
	z_FitLoop.runc                 C   s  | j d ur
| js
d S | j}|j}|jdkstd|sd S t| jj	 d | j
}t|}|jd t|ts>t|d}n|}|jdkrLt|tjd tj}tj}g }|jD ]}	t|	|| t||||	}	||	 qW||_|| _ |j}
|jd ur|
|jjO }
g }|jD ]}	t|	|j|
rt|	ntd}t|||j}|| q||_ | !  t"|tj| _#| j#$| t%| j# t&|}|d ur|ntd| _'t||j|
}| j'dkrd S |j(| _)t|j*t+r|j*|_,|j,| j'kr|j-d urt.d|j* d	| j' d
n#|s|j*dkrtd|_,nt/dt+| j'|j* |_,t0d|j,|_,|j1rF| j'|j2k rH|j3sJt4d| j' d|j2 dt5d d S d S d S d S )Nr   training_stepz: resetting train dataloaderztrain_dataloader()max_size_cycle)moder:   z `val_check_interval` (zD) must be less than or equal to the number of the training batches (z). If you want to disable validation set `limit_val_batches` to 0.0 instead. If you want to validate based on the total training batches, set `check_val_every_n_epoch=None`.g      ?zWhen using an IterableDataset for `train_dataloader`, `Trainer(val_check_interval)` must be `1.0` or an int. An int k specifies checking validation every k training batches.   z The number of training batches (zA) is smaller than the logging interval Trainer(log_every_n_steps=zZ). Set a lower value for log_every_n_steps if you want to see logs for the training epoch.)category)6rE   rs   r4   lightning_modulery   r   logdebugrL   r)   rD   r   strategybarrierr>   r   overfit_batchesr   r   TRAININGr   FITTING	flattenedr   r   append2allow_zero_length_dataloader_with_multiple_devices
datamoduler   lenrB   r   limits_load_combined_loader_statesr   rG   setupiterr   rC   rq   rH   val_check_intervalr?   val_check_batchcheck_val_every_n_epoch
ValueErrorr   maxloggerslog_every_n_stepsfast_dev_runr"   r	   )rJ   r4   	pl_modulesourcer;   combined_loader
trainer_fnstagedataloadersdlallow_zero_lengthr   lengthnum_batchesrC   has_len_all_ranks_r1   r1   r2   r{      s   









z_FitLoop.setup_datac                 C      | j tjkS rU   )rI   r#   r-   rO   r1   r1   r2   r%   =     z!_FitLoop.restarted_on_epoch_startc                 C   r   rU   )rI   r#   r.   rO   r1   r1   r2   r&   A  r   z_FitLoop.restarted_mid_epochc                 C   r   rU   )rI   r#   r/   rO   r1   r1   r2   r'   E  r   z_FitLoop.restarted_on_epoch_endc                 C   r   rU   )rI   r#   r0   rO   r1   r1   r2   r(   I  s   z_FitLoop.resumed_on_epoch_endc                 C   sV  | j r(| jjj| jjjd kr(| jjj| jjjkr(| jjj| jjjkr(tj| _	n|| j rP| jjj| jjjkrP| jjj| jjjd krP| jjj| jjjkrPtj
| _	nT| j rx| jjj| jjjkrx| jjj| jjjkrx| jjj| jjjd krxtj| _	n,| jr| jjj| jjjkr| jjj| jjjkr| jjj| jjjd krtj| _	ntj| _	| j  d S )Nr   )rT   rA   totalr\   r[   rW   rt   r#   r-   rI   r.   r/   _loaded_from_state_dictr0   r,   r@   update_restart_stagerO   r1   r1   r2   r   O  s4   



z_FitLoop.update_restart_stagec                 C   s   t j| _d S rU   )r#   r,   rI   rO   r1   r1   r2   reset_restart_stageq     z_FitLoop.reset_restart_stagec                 C   s   | j jdusJ td |   | jr| j  | jr!| j	  | j
jr7| jr7| j
jjr7| j  | j	  | j
jrP| j
jjrR| jsT| j
jjjsV| j	  dS dS dS dS dS )z'Resets the internal state of this loop.NT)r4   modeltorchset_grad_enabledr   r%   rA   reset_on_restartr(   increment_completedr@   restarted_on_train_batch_endr&   batch_progressis_last_batchincrement_processedrj   rO   r1   r1   r2   r|   t  s2   





z_FitLoop.resetc                 C   sr   |   s| jjj| jj_| j}| j r%|jdu r%d|_	| jj
  d|_t|d t|d t|d dS )z"Calls the ``on_train_start`` hook.NTon_train_start)r^   rA   rV   rW   rt   r4   r@   _should_check_val_epochval_dataloadersri   rj   r{   rg   r   _call_callback_hooks_call_lightning_module_hook_call_strategy_hookrJ   r4   r1   r1   r2   r}     s   z_FitLoop.on_run_startc                 C   s   | j }|   | jdusJ t| jjD ]\}}t|| jjj q| j	sB| j
sD| js/| j  t|d t|d | j  dS dS dS )zPPrepares the dataloader for training and calls the hook ``on_train_epoch_start``Non_train_epoch_start)r4   r{   rE   	enumerater   r   rA   rV   rW   r&   r'   r%   increment_readyr   r   r   increment_started)rJ   r4   ir   r1   r1   r2   r~     s   
z_FitLoop.on_advance_startc                 C   s   t t| j d | j}|dusJ |jdkr+tdt| j ddd tD  | jj	
d | jdus:J | j| j W d   dS 1 sLw   Y  dS )	zRuns one whole epoch.z: advancing loopN
sequential`zZ` does not support the `CombinedLoader(mode="sequential")` mode. The available modes are: c                 S   s   g | ]}|d kr|qS )r   r1   )rX   mr1   r1   r2   
<listcomp>  s    z$_FitLoop.advance.<locals>.<listcomp>run_training_epoch)r   r   typer)   rE   _moder   r   r4   profilerprofilerG   r@   r   )rJ   r   r1   r1   r2   r     s   
"z_FitLoop.advancec                 C   s   | j }|j  | j  tj|ddd t|d tj|ddd |j  | j	s:| j
 r:| j
jd| j	 d | j
 jd8  _|j  | j
 jd7  _| j  |jrYtd S )Non_train_epoch_endF)monitoring_callbacksTepoch)update_plateau_schedulersr   )r4   _logger_connectorepoch_end_reachedrA   r   r   r   r   on_epoch_endrT   r@   _num_ready_batches_reachedupdate_lr_schedulers_batches_that_steppedupdate_train_epoch_metricsr   received_sigtermr   r   r1   r1   r2   r     s    




z_FitLoop.on_advance_endc                 C   sB   t | jj d | j}t|d t|d t|d dS )z Calls the ``on_train_end`` hook.z: train run endedon_train_endN)	r   r   rL   r)   r4   r   r   r   r   r   r1   r1   r2   r     s
   z_FitLoop.on_run_endc                 C   s(   | j d ur| j   d | _ | j  d S rU   )rG   teardownr@   rO   r1   r1   r2   r     s   

z_FitLoop.teardownc                    s.   t   }| jd ur| j  }r||d< |S Nr   )r<   on_save_checkpointrE   _state_dicts)rJ   
state_dictloader_statesrK   r1   r2   r     s   
z_FitLoop.on_save_checkpointr   c                    s   | dg | _t | d S r   )getrF   r<   on_load_checkpoint)rJ   r   rK   r1   r2   r     s   z_FitLoop.on_load_checkpointc                 C   s
   | j  S )z,Whether the gradients should be accumulated.)r@   _should_accumulaterO   r1   r1   r2   r     s   
z_FitLoop._should_accumulatec                 C   s   | j jdkS )Nr8   )r4   rS   rO   r1   r1   r2   r^   
  r   z"_FitLoop._iteration_based_trainingc                 C   s2   | j r| jr| jd u rd S | j| j g | _d S rU   )rT   rF   rE   _load_state_dictsrO   r1   r1   r2   r     s   
z%_FitLoop._load_combined_loader_states)r   N)r7   N).r)   r*   r+   __doc__r   r?   r=   propertyrN   rQ   rR   rS   r
   rT   setterr   boolre   r   rh   ro   rs   rx   rz   r   r{   r%   r&   r'   r(   r   r   r|   r}   r~   r   r   r   r   dictr   r   r   r^   r   __classcell__r1   r1   rK   r2   r3   :   s    

\

"





"
	r3   )=loggingdataclassesr   typingr   r   r   r   typing_extensionsr   pytorch_lightningpllightning_fabric.utilities.datar   r   #lightning_fabric.utilities.warningsr	   pytorch_lightning.loopsr
    pytorch_lightning.loops.fetchersr    pytorch_lightning.loops.progressr   +pytorch_lightning.loops.training_epoch_loopr   !pytorch_lightning.loops.utilitiesr   r   pytorch_lightning.trainerr   3pytorch_lightning.trainer.connectors.data_connectorr   r   r   r   r   r   <pytorch_lightning.trainer.connectors.logger_connector.resultr    pytorch_lightning.trainer.statesr   r   +pytorch_lightning.utilities.combined_loaderr   r    pytorch_lightning.utilities.datar   &pytorch_lightning.utilities.exceptionsr   r   )pytorch_lightning.utilities.model_helpersr   %pytorch_lightning.utilities.rank_zeror    r!   r"   	getLoggerr)   r   r#   r3   r1   r1   r1   r2   <module>   s4    
