o
    }oi                     @   sB   d dl Z d dlmZ d dlmZ d dlmZ G dd dejZ	dS )    N)Any)loggingc                       sz   e Zd ZdZd fddZdejdejdejj	j
d	ed
eddfddZdejdejdejj	j
d	ed
eddfddZ  ZS )GarbageCollectionCallbacka  Callback for synchronized manual Garbage Collection. This is required for distributed training
    as all processes on different rank need to synchronize to garbage collect at the same time, without which
    one process might hog or straggle all the rest of the processes.

    Migration from NeMo 1.0:
        When mitrating from NeMo1,
            - gc_interval = 0 implied no GC, simply do not add this callback to the trainer
            - gc_interval > 0, this config is maps => gc_interval_train

            - env-var:NEMO_MANUAL_GC_IN_VALIDATION=0 or doesn't exist => Set gc_interval_val to a very high value that it does not practically run.
            - env-var:NEMO_MANUAL_GC_IN_VALIDATION=1 => Set gc_interval_val to the same value as gc_interval

        Moving from boolean flag (NEMO_MANUAL_GC_IN_VALIDATION) to integer is to allow user to set a specific value based on the size of the
        validation datasets.

    Note: This callback does not run gc at the start or the end of training or validation.
    returnNc                    sH   |dksJ d|dksJ dt    || _|| _t  d| _dS )a  _summary_

        Args:
            gc_interval (int, mandatory): Number of global train steps at which garbage collection is done.
            gc_interval_val (int, mandatory): Number of global validation steps at which garbage collection is done.
        r   z;gc_interval_train should be an integer value larger than 0.z9gc_interval_val should be an integer value larger than 0.N)super__init__gc_interval_traingc_interval_valgcdisablevalidation_global_step)selfr   r	   	__class__ g/home/ubuntu/.local/lib/python3.10/site-packages/nemo/lightning/pytorch/callbacks/garbage_collection.pyr   )   s   

z"GarbageCollectionCallback.__init__trainer	pl_moduleoutputsbatch	batch_idxc                 C   s2   |j | j dkrtd|j   t  d S d S )Nr   z1Running garbage collection at train global_step: )global_stepr   r   infor
   collectr   r   r   r   r   r   r   r   r   on_train_batch_end;   s   z,GarbageCollectionCallback.on_train_batch_endc                 C   s@   |  j d7  _ | j | j dkrtd| j   t  d S d S )N   r   z/Running garbage collection at validation step: )r   r	   r   r   r
   r   r   r   r   r   on_validation_batch_endG   s
   z1GarbageCollectionCallback.on_validation_batch_end)r   N)__name__
__module____qualname____doc__r   plTrainerLightningModule	utilitiestypesSTEP_OUTPUTr   intr   r   __classcell__r   r   r   r   r      s:    
r   )
r
   typingr   lightning.pytorchpytorchr"   
nemo.utilsr   Callbackr   r   r   r   r   <module>   s
   