o
    ziJ                     @   s   d Z ddlZddlmZmZmZmZmZmZm	Z	m
Z
 ddlZddlmZmZ ddlmZ ddlmZ ddlmZ ddlZddlmZ dd	lmZ dd
lmZ eeZde de!fddZ"G dd deZ#G dd de#Z$dS )z_
Finetuning Callback
^^^^^^^^^^^^^^^^^^^^

Freeze and unfreeze models for finetuning purposes.
    N)AnyCallableDict	GeneratorIterableListOptionalUnion)Module
ModuleDict)
_BatchNorm)	Optimizer)override)Callback)MisconfigurationException)rank_zero_warnepochreturnc                 C   s   dS )Ng       @ )r   r   r   Z/home/ubuntu/.local/lib/python3.10/site-packages/pytorch_lightning/callbacks/finetuning.pymultiplicative&   s   r   c                   @   s@  e Zd ZdZd<ddZedeeef fddZ	edeeef ddfd	d
Z
ed=ddZedeeeeeef  f dee fddZe	d>deeeeeef  f dededefddZedeeeeeef  f ddfddZededdfddZed?deeeeeef  f deddfddZed ed!edefd"d#Ze		$	d@deeeeeef  f d ed%ee d&ededdfd'd(Zeddddd)eddfd*d+Zed,eeeef  d-edeeeef  fd.d/Zddd0e d1e d2eeeef  ddf
d3d4Z!ed=d5d6Z"ddd7e d eddfd8d9Z#dAd:d;Z$dS )BBaseFinetuninga  This class implements the base logic for writing your own Finetuning Callback.

    .. warning::  This is an :ref:`experimental <versioning:Experimental API>` feature.

    Override ``freeze_before_training`` and ``finetune_function`` methods with your own logic.

    ``freeze_before_training``: This method is called before ``configure_optimizers``
        and should be used to freeze any modules parameters.

    ``finetune_function``: This method is called on every train epoch start and should be used to
        ``unfreeze`` any parameters. Those parameters need to be added in a new ``param_group``
        within the optimizer.

    .. note:: Make sure to filter the parameters based on ``requires_grad``.

    Example::

        >>> from torch.optim import Adam
        >>> class MyModel(pl.LightningModule):
        ...     def configure_optimizer(self):
        ...         # Make sure to filter the parameters based on `requires_grad`
        ...         return Adam(filter(lambda p: p.requires_grad, self.parameters()))
        ...
        >>> class FeatureExtractorFreezeUnfreeze(BaseFinetuning):
        ...     def __init__(self, unfreeze_at_epoch=10):
        ...         super().__init__()
        ...         self._unfreeze_at_epoch = unfreeze_at_epoch
        ...
        ...     def freeze_before_training(self, pl_module):
        ...         # freeze any module you want
        ...         # Here, we are freezing `feature_extractor`
        ...         self.freeze(pl_module.feature_extractor)
        ...
        ...     def finetune_function(self, pl_module, current_epoch, optimizer):
        ...         # When `current_epoch` is 10, feature_extractor will start training.
        ...         if current_epoch == self._unfreeze_at_epoch:
        ...             self.unfreeze_and_add_param_group(
        ...                 modules=pl_module.feature_extractor,
        ...                 optimizer=optimizer,
        ...                 train_bn=True,
        ...             )

    r   Nc                 C   s   i | _ d| _d S NF)_internal_optimizer_metadata_restartingselfr   r   r   __init__W   s   
zBaseFinetuning.__init__c                 C   s
   d| j iS )Ninternal_optimizer_metadata)r   r   r   r   r   
state_dict[   s   zBaseFinetuning.state_dictr   c                 C   s&   d| _ d|v r|d | _d S || _d S )NTr   )r   r   r   r   r   r   r   load_state_dicta   s   
zBaseFinetuning.load_state_dicttrainer
pl.Trainer	pl_modulepl.LightningModulec                 C   sL   | j r$t| }t|jD ]\}}| | j| |}||_qd| _ d S d S r   )r   dictnamed_parameters	enumerate
optimizers_apply_mapping_to_param_groupsr   param_groups)r   r"   r$   r'   opt_idx	optimizerr+   r   r   r   on_fit_startj   s   

zBaseFinetuning.on_fit_startmodulesc                 C   sZ   t | tr	|  } t | tr"g }| D ]
}|t| qt|}n|  }dd |D S )aH  This function is used to flatten a module or an iterable of modules into a list of its leaf modules (modules
        with no children) and parent modules that have parameters directly themselves.

        Args:
            modules: A given module or an iterable of modules

        Returns:
            List of modules

        c                 S   s"   g | ]}t | r|jr|qS r   )listchildren_parameters).0mr   r   r   
<listcomp>   s   " z2BaseFinetuning.flatten_modules.<locals>.<listcomp>)	
isinstancer   valuesr   extendr   flatten_modulesiterr/   )r/   _flatten_modulesr4   _modulesr   r   r   r9   v   s   


zBaseFinetuning.flatten_modulesTtrain_bnrequires_gradc                 c   sL    t | } | D ]}t|tr|sq|jddD ]
}|j|kr"|V  qqdS )am  Yields the `requires_grad` parameters of a given module or list of modules.

        Args:
            modules: A given module or an iterable of modules
            train_bn: Whether not to train the BatchNorm module
            requires_grad: Whether to create a generator for trainable or non-trainable parameters.
        Returns:
            Generator

        FrecurseN)r   r9   r6   r   
parametersr>   )r/   r=   r>   modparamr   r   r   filter_params   s   

zBaseFinetuning.filter_paramsc                 C   s@   t | } | D ]}t|trd|_|jddD ]}d|_qqdS )zUnfreezes the parameters of the provided modules.

        Args:
            modules: A given module or an iterable of modules

        TFr?   N)r   r9   r6   r   track_running_statsrA   r>   )r/   modulerC   r   r   r   make_trainable   s   

zBaseFinetuning.make_trainablerF   c                 C   s,   t | trd| _| jddD ]}d|_qdS )zjFreezes the parameters of the provided module.

        Args:
            module: A given module

        Fr?   N)r6   r   rE   rA   r>   )rF   rC   r   r   r   freeze_module   s
   
zBaseFinetuning.freeze_modulec                 C   s<   t | } | D ]}t|tr|rt | qt | qdS )zFreezes the parameters of the provided modules.

        Args:
            modules: A given module or an iterable of modules
            train_bn: If True, leave the BatchNorm layers in training mode

        Returns:
            None

        N)r   r9   r6   r   rG   rH   )r/   r=   rB   r   r   r   freeze   s   
zBaseFinetuning.freezer-   paramsc                    s\   g }g }|D ] t  fdd| jD s|  q|  q|r,tdt|  d |S )ac  This function is used to exclude any parameter which already exists in this optimizer.

        Args:
            optimizer: Optimizer used for parameter exclusion
            params: Iterable of parameters used to check against the provided optimizer

        Returns:
            List of parameters not contained in this optimizer param groups

        c                 3   s*    | ]}|d  D ]	}t | V  qqdS )rJ   N)torchequal)r3   groupprC   r   r   	<genexpr>   s   ( z5BaseFinetuning.filter_on_optimizer.<locals>.<genexpr>zThe provided params to be frozen already exist within another group of this optimizer. Those parameters will be skipped.
HINT: Did you init your optimizer in `configure_optimizer` as such:
 z<(filter(lambda p: p.requires_grad, self.parameters()), ...) )anyr+   appendr   type)r-   rJ   
out_paramsremoved_paramsr   rO   r   filter_on_optimizer   s   z"BaseFinetuning.filter_on_optimizer      $@lrinitial_denom_lrc                 C   st   t |  |du r|jd d nt|}|du r|nd}t j| |dd}t ||}|r8|||| d dS dS )a  Unfreezes a module and adds its parameters to an optimizer.

        Args:
            modules: A module or iterable of modules to unfreeze.
                Their parameters will be added to an optimizer as a new param group.
            optimizer: The provided optimizer will receive new parameters and will add them to
                `add_param_group`
            lr: Learning rate for the new param group.
            initial_denom_lr: If no lr is provided, the learning from the first param group will be used
                and divided by `initial_denom_lr`.
            train_bn: Whether to train the BatchNormalization layers.

        Nr   rX   g      ?T)r=   r>   )rJ   rX   )r   rG   r+   floatrD   rV   add_param_group)r/   r-   rX   rY   r=   	params_lrdenom_lrrJ   r   r   r   unfreeze_and_add_param_group   s   
z+BaseFinetuning.unfreeze_and_add_param_groupstagec                 C   s.   |  | ddlm} t|j|rtdd S )Nr   )DeepSpeedStrategyzThe Finetuning callback does not support running with the DeepSpeed strategy. Choose a different strategy or disable the callback.)freeze_before_trainingpytorch_lightning.strategiesr`   r6   strategyNotImplementedError)r   r"   r$   r_   r`   r   r   r   setup  s   
zBaseFinetuning.setupr+   mappingc                    sH   g }| D ]}dd |  D } fdd|d D |d< || q|S )Nc                 S   s   i | ]\}}|d kr||qS )rJ   r   )r3   kvr   r   r   
<dictcomp>$  s    zABaseFinetuning._apply_mapping_to_param_groups.<locals>.<dictcomp>c                    s   g | ]} | qS r   r   )r3   rN   rf   r   r   r5   %  s    zABaseFinetuning._apply_mapping_to_param_groups.<locals>.<listcomp>rJ   )itemsrR   )r+   rf   outputggroup_stater   rj   r   r*     s   z-BaseFinetuning._apply_mapping_to_param_groupsr,   num_param_groupscurrent_param_groupsc                 C   sf   dd |  D }|| jvr| ||| j|< d S |t|kr1| j| | ||d  | d S d S )Nc                 S   s   i | ]\}}||qS r   r   )r3   nrN   r   r   r   ri   0  s    z)BaseFinetuning._store.<locals>.<dictcomp>)r'   r   r*   lenr8   )r   r$   r,   ro   rp   rf   r   r   r   _store)  s   

zBaseFinetuning._storec                 C   sH   t |jD ]\}}t|j}| ||j| |j}| |||| qdS )Called when the epoch begins.N)r(   r)   rr   r+   finetune_functioncurrent_epochrs   )r   r"   r$   r,   r-   ro   rp   r   r   r   on_train_epoch_start;  s   
z#BaseFinetuning.on_train_epoch_startr   c                 C      t )z$Override to add your unfreeze logic.rd   )r   r$   r   r-   r   r   r   ru   D     z BaseFinetuning.finetune_functionc                 C   rx   )z"Override to add your freeze logic.ry   r   r$   r   r   r   ra   H  rz   z%BaseFinetuning.freeze_before_training)r   Nr"   r#   r$   r%   r   N)TT)T)NrW   Tr$   r%   r   N)%__name__
__module____qualname____doc__r   r   r   strr   r   r!   r.   staticmethodr	   r
   r   r   r9   boolr   rD   rG   rH   rI   r   rV   r   rZ   r^   re   r&   r*   intrs   rw   ru   ra   r   r   r   r   r   *   s    
,,(.0	
r   c                       s   e Zd ZdZdedddddddf	d	ed
ededee de	dede	de	deddf fddZ
edeeef fddZedeeef ddf fddZed& fddZed'd d!Zeddd"ed#eddfd$d%Z  ZS )(BackboneFinetuninga  Finetune a backbone model based on a learning rate user-defined scheduling.

    When the backbone learning rate reaches the current model learning rate
    and ``should_align`` is set to True, it will align with it for the rest of the training.

    Args:
        unfreeze_backbone_at_epoch: Epoch at which the backbone will be unfreezed.
        lambda_func: Scheduling function for increasing backbone learning rate.
        backbone_initial_ratio_lr:
            Used to scale down the backbone learning rate compared to rest of model
        backbone_initial_lr: Optional, Initial learning rate for the backbone.
            By default, we will use ``current_learning /  backbone_initial_ratio_lr``
        should_align: Whether to align with current learning rate when backbone learning
            reaches it.
        initial_denom_lr: When unfreezing the backbone, the initial learning rate will
            ``current_learning_rate /  initial_denom_lr``.
        train_bn: Whether to make Batch Normalization trainable.
        verbose: Display current learning rate for model and backbone
        rounding: Precision for displaying learning rate

    Example::

        >>> from pytorch_lightning import Trainer
        >>> from pytorch_lightning.callbacks import BackboneFinetuning
        >>> multiplicative = lambda epoch: 1.5
        >>> backbone_finetuning = BackboneFinetuning(200, multiplicative)
        >>> trainer = Trainer(callbacks=[backbone_finetuning])

    
   g?NTrW   F   unfreeze_backbone_at_epochlambda_funcbackbone_initial_ratio_lrbackbone_initial_lrshould_alignrY   r=   verboseroundingr   c
           
         sJ   t    || _|| _|| _|| _|| _|| _|| _|| _	|	| _
d | _d S N)superr   r   r   r   r   r   rY   r=   r   r   previous_backbone_lr)
r   r   r   r   r   r   rY   r=   r   r   	__class__r   r   r   l  s   

zBackboneFinetuning.__init__c                 C   s   | j | jdS )N)r   r   )r   r   r   r   r   r   r     s   zBackboneFinetuning.state_dictr   c                    s   |d | _ t | d S )Nr   )r   r   r!   r    r   r   r   r!     s   
z"BackboneFinetuning.load_state_dictr"   r#   r$   r%   c                    s,   t |drt|jtrt ||S td)z
        Raises:
            MisconfigurationException:
                If LightningModule has no nn.Module `backbone` attribute.
        backbonez@The LightningModule should have a nn.Module `backbone` attribute)hasattrr6   r   r
   r   r.   r   )r   r"   r$   r   r   r   r.     s   zBackboneFinetuning.on_fit_startc                 C   s   |  |j d S r   )rI   r   r{   r   r   r   ra     s   z)BackboneFinetuning.freeze_before_trainingr   r-   c                 C   s  || j krC|jd d }| jdur| jn|| j }|| _| j|j||| j| jd | j	rAt
dt|| j dt|| j  dS dS || j kr|jd d }| |d | j }| jrb||krb|n|}||jd d< || _| j	rt
dt|| j dt|| j  dS dS dS )	rt   r   rX   N)r=   rY   zCurrent lr: z, Backbone lr:    )r   r+   r   r   r   r^   r   r=   rY   r   loginforoundr   r   r   )r   r$   r   r-   
current_lrinitial_backbone_lrnext_current_backbone_lrr   r   r   ru     sP   





z$BackboneFinetuning.finetune_functionr|   r}   )r~   r   r   r   r   r   r   rZ   r   r   r   r   r   r   r   r   r!   r.   ra   r   ru   __classcell__r   r   r   r   r   M  sT     	
 
$r   )%r   loggingtypingr   r   r   r   r   r   r   r	   rK   torch.nnr
   r   torch.nn.modules.batchnormr   torch.optim.optimizerr   typing_extensionsr   pytorch_lightningpl$pytorch_lightning.callbacks.callbackr   &pytorch_lightning.utilities.exceptionsr   %pytorch_lightning.utilities.rank_zeror   	getLoggerr~   r   r   rZ   r   r   r   r   r   r   r   <module>   s$   (
  %