o
    8wiJ                     @   s   d Z ddlZddlmZmZ ddlmZmZmZm	Z	 ddl
Z
ddlmZmZ ddlmZ ddlmZ ddlmZ ddlZdd	lmZ dd
lmZ ddlmZ eeZdede fddZ!G dd deZ"G dd de"Z#dS )z_
Finetuning Callback
^^^^^^^^^^^^^^^^^^^^

Freeze and unfreeze models for finetuning purposes.
    N)	GeneratorIterable)AnyCallableOptionalUnion)Module
ModuleDict)
_BatchNorm)	Optimizer)override)Callback)MisconfigurationException)rank_zero_warnepochreturnc                 C   s   dS )Ng       @ )r   r   r   c/home/ubuntu/sommelier/.venv/lib/python3.10/site-packages/pytorch_lightning/callbacks/finetuning.pymultiplicative'   s   r   c                   @   s@  e Zd ZdZd<ddZedeeef fddZ	edeeef ddfd	d
Z
ed=ddZedeeeeeef  f dee fddZe	d>deeeeeef  f dededefddZedeeeeeef  f ddfddZededdfddZed?deeeeeef  f deddfddZed ed!edefd"d#Ze		$	d@deeeeeef  f d ed%ee d&ededdfd'd(Zeddddd)eddfd*d+Zed,eeeef  d-edeeeef  fd.d/Zddd0ed1ed2eeeef  ddf
d3d4Z ed=d5d6Z!ddd7ed eddfd8d9Z"dAd:d;Z#dS )BBaseFinetuninga  This class implements the base logic for writing your own Finetuning Callback.

    .. warning::  This is an :ref:`experimental <versioning:Experimental API>` feature.

    Override ``freeze_before_training`` and ``finetune_function`` methods with your own logic.

    ``freeze_before_training``: This method is called before ``configure_optimizers``
        and should be used to freeze any modules parameters.

    ``finetune_function``: This method is called on every train epoch start and should be used to
        ``unfreeze`` any parameters. Those parameters need to be added in a new ``param_group``
        within the optimizer.

    .. note:: Make sure to filter the parameters based on ``requires_grad``.

    Example::

        >>> from torch.optim import Adam
        >>> class MyModel(pl.LightningModule):
        ...     def configure_optimizer(self):
        ...         # Make sure to filter the parameters based on `requires_grad`
        ...         return Adam(filter(lambda p: p.requires_grad, self.parameters()))
        ...
        >>> class FeatureExtractorFreezeUnfreeze(BaseFinetuning):
        ...     def __init__(self, unfreeze_at_epoch=10):
        ...         super().__init__()
        ...         self._unfreeze_at_epoch = unfreeze_at_epoch
        ...
        ...     def freeze_before_training(self, pl_module):
        ...         # freeze any module you want
        ...         # Here, we are freezing `feature_extractor`
        ...         self.freeze(pl_module.feature_extractor)
        ...
        ...     def finetune_function(self, pl_module, current_epoch, optimizer):
        ...         # When `current_epoch` is 10, feature_extractor will start training.
        ...         if current_epoch == self._unfreeze_at_epoch:
        ...             self.unfreeze_and_add_param_group(
        ...                 modules=pl_module.feature_extractor,
        ...                 optimizer=optimizer,
        ...                 train_bn=True,
        ...             )

    r   Nc                 C   s   i | _ d| _d S NF)_internal_optimizer_metadata_restartingselfr   r   r   __init__X   s   
zBaseFinetuning.__init__c                 C   s
   d| j iS )Ninternal_optimizer_metadata)r   r   r   r   r   
state_dict\   s   zBaseFinetuning.state_dictr   c                 C   s&   d| _ d|v r|d | _d S || _d S )NTr   )r   r   r   r   r   r   r   load_state_dictb   s   
zBaseFinetuning.load_state_dicttrainer
pl.Trainer	pl_modulepl.LightningModulec                 C   sL   | j r$t| }t|jD ]\}}| | j| |}||_qd| _ d S d S r   )r   dictnamed_parameters	enumerate
optimizers_apply_mapping_to_param_groupsr   param_groups)r   r    r"   r%   opt_idx	optimizerr)   r   r   r   on_fit_startk   s   

zBaseFinetuning.on_fit_startmodulesc                 C   sZ   t | tr	|  } t | tr"g }| D ]
}|t| qt|}n|  }dd |D S )aH  This function is used to flatten a module or an iterable of modules into a list of its leaf modules (modules
        with no children) and parent modules that have parameters directly themselves.

        Args:
            modules: A given module or an iterable of modules

        Returns:
            List of modules

        c                 S   s"   g | ]}t | r|jr|qS r   )listchildren_parameters).0mr   r   r   
<listcomp>   s   " z2BaseFinetuning.flatten_modules.<locals>.<listcomp>)	
isinstancer	   valuesr   extendr   flatten_modulesiterr-   )r-   _flatten_modulesr2   _modulesr   r   r   r7   w   s   


zBaseFinetuning.flatten_modulesTtrain_bnrequires_gradc                 c   sL    t | } | D ]}t|tr|sq|jddD ]
}|j|kr"|V  qqdS )am  Yields the `requires_grad` parameters of a given module or list of modules.

        Args:
            modules: A given module or an iterable of modules
            train_bn: Whether not to train the BatchNorm module
            requires_grad: Whether to create a generator for trainable or non-trainable parameters.
        Returns:
            Generator

        FrecurseN)r   r7   r4   r
   
parametersr<   )r-   r;   r<   modparamr   r   r   filter_params   s   

zBaseFinetuning.filter_paramsc                 C   s@   t | } | D ]}t|trd|_|jddD ]}d|_qqdS )zUnfreezes the parameters of the provided modules.

        Args:
            modules: A given module or an iterable of modules

        TFr=   N)r   r7   r4   r
   track_running_statsr?   r<   )r-   modulerA   r   r   r   make_trainable   s   

zBaseFinetuning.make_trainablerD   c                 C   s,   t | trd| _| jddD ]}d|_qdS )zjFreezes the parameters of the provided module.

        Args:
            module: A given module

        Fr=   N)r4   r
   rC   r?   r<   )rD   rA   r   r   r   freeze_module   s
   
zBaseFinetuning.freeze_modulec                 C   s<   t | } | D ]}t|tr|rt | qt | qdS )zFreezes the parameters of the provided modules.

        Args:
            modules: A given module or an iterable of modules
            train_bn: If True, leave the BatchNorm layers in training mode

        Returns:
            None

        N)r   r7   r4   r
   rE   rF   )r-   r;   r@   r   r   r   freeze   s   
zBaseFinetuning.freezer+   paramsc                    s\   g }g }|D ] t  fdd| jD s|  q|  q|r,tdt|  d |S )ac  This function is used to exclude any parameter which already exists in this optimizer.

        Args:
            optimizer: Optimizer used for parameter exclusion
            params: Iterable of parameters used to check against the provided optimizer

        Returns:
            List of parameters not contained in this optimizer param groups

        c                 3   s*    | ]}|d  D ]	}t | V  qqdS )rH   N)torchequal)r1   groupprA   r   r   	<genexpr>   s   ( z5BaseFinetuning.filter_on_optimizer.<locals>.<genexpr>zThe provided params to be frozen already exist within another group of this optimizer. Those parameters will be skipped.
HINT: Did you init your optimizer in `configure_optimizer` as such:
 z<(filter(lambda p: p.requires_grad, self.parameters()), ...) )anyr)   appendr   type)r+   rH   
out_paramsremoved_paramsr   rM   r   filter_on_optimizer   s   z"BaseFinetuning.filter_on_optimizer      $@lrinitial_denom_lrc                 C   st   t |  |du r|jd d nt|}|du r|nd}t j| |dd}t ||}|r8|||| d dS dS )a  Unfreezes a module and adds its parameters to an optimizer.

        Args:
            modules: A module or iterable of modules to unfreeze.
                Their parameters will be added to an optimizer as a new param group.
            optimizer: The provided optimizer will receive new parameters and will add them to
                `add_param_group`
            lr: Learning rate for the new param group.
            initial_denom_lr: If no lr is provided, the learning from the first param group will be used
                and divided by `initial_denom_lr`.
            train_bn: Whether to train the BatchNormalization layers.

        Nr   rV   g      ?T)r;   r<   )rH   rV   )r   rE   r)   floatrB   rT   add_param_group)r-   r+   rV   rW   r;   	params_lrdenom_lrrH   r   r   r   unfreeze_and_add_param_group   s   
z+BaseFinetuning.unfreeze_and_add_param_groupstagec                 C   s.   |  | ddlm} t|j|rtdd S )Nr   )DeepSpeedStrategyzThe Finetuning callback does not support running with the DeepSpeed strategy. Choose a different strategy or disable the callback.)freeze_before_trainingpytorch_lightning.strategiesr^   r4   strategyNotImplementedError)r   r    r"   r]   r^   r   r   r   setup  s   
zBaseFinetuning.setupr)   mappingc                    sH   g }| D ]}dd |  D } fdd|d D |d< || q|S )Nc                 S   s   i | ]\}}|d kr||qS )rH   r   )r1   kvr   r   r   
<dictcomp>%  s    zABaseFinetuning._apply_mapping_to_param_groups.<locals>.<dictcomp>c                    s   g | ]} | qS r   r   )r1   rL   rd   r   r   r3   &  s    zABaseFinetuning._apply_mapping_to_param_groups.<locals>.<listcomp>rH   )itemsrP   )r)   rd   outputggroup_stater   rh   r   r(      s   z-BaseFinetuning._apply_mapping_to_param_groupsr*   num_param_groupscurrent_param_groupsc                 C   sf   dd |  D }|| jvr| ||| j|< d S |t|kr1| j| | ||d  | d S d S )Nc                 S   s   i | ]\}}||qS r   r   )r1   nrL   r   r   r   rg   1  s    z)BaseFinetuning._store.<locals>.<dictcomp>)r%   r   r(   lenr6   )r   r"   r*   rm   rn   rd   r   r   r   _store*  s   

zBaseFinetuning._storec                 C   sH   t |jD ]\}}t|j}| ||j| |j}| |||| qdS )Called when the epoch begins.N)r&   r'   rp   r)   finetune_functioncurrent_epochrq   )r   r    r"   r*   r+   rm   rn   r   r   r   on_train_epoch_start<  s   
z#BaseFinetuning.on_train_epoch_startr   c                 C      t )z$Override to add your unfreeze logic.rb   )r   r"   r   r+   r   r   r   rs   E     z BaseFinetuning.finetune_functionc                 C   rv   )z"Override to add your freeze logic.rw   r   r"   r   r   r   r_   I  rx   z%BaseFinetuning.freeze_before_training)r   Nr    r!   r"   r#   r   N)TT)T)NrU   Tr"   r#   r   N)$__name__
__module____qualname____doc__r   r   r$   strr   r   r   r,   staticmethodr   r   r   r.   r7   boolr   rB   rE   rF   rG   r   rT   r   rX   r\   rc   r(   intrq   ru   rs   r_   r   r   r   r   r   +   s    
,,(.0	
r   c                       s   e Zd ZdZdedddddddf	d	ed
ededee de	dede	de	deddf fddZ
edeeef fddZedeeef ddf fddZed& fddZed'd d!Zeddd"ed#eddfd$d%Z  ZS )(BackboneFinetuninga  Finetune a backbone model based on a learning rate user-defined scheduling.

    When the backbone learning rate reaches the current model learning rate
    and ``should_align`` is set to True, it will align with it for the rest of the training.

    Args:
        unfreeze_backbone_at_epoch: Epoch at which the backbone will be unfreezed.
        lambda_func: Scheduling function for increasing backbone learning rate.
        backbone_initial_ratio_lr:
            Used to scale down the backbone learning rate compared to rest of model
        backbone_initial_lr: Optional, Initial learning rate for the backbone.
            By default, we will use ``current_learning /  backbone_initial_ratio_lr``
        should_align: Whether to align with current learning rate when backbone learning
            reaches it.
        initial_denom_lr: When unfreezing the backbone, the initial learning rate will
            ``current_learning_rate /  initial_denom_lr``.
        train_bn: Whether to make Batch Normalization trainable.
        verbose: Display current learning rate for model and backbone
        rounding: Precision for displaying learning rate

    Example::

        >>> from pytorch_lightning import Trainer
        >>> from pytorch_lightning.callbacks import BackboneFinetuning
        >>> multiplicative = lambda epoch: 1.5
        >>> backbone_finetuning = BackboneFinetuning(200, multiplicative)
        >>> trainer = Trainer(callbacks=[backbone_finetuning])

    
   g?NTrU   F   unfreeze_backbone_at_epochlambda_funcbackbone_initial_ratio_lrbackbone_initial_lrshould_alignrW   r;   verboseroundingr   c
           
         sJ   t    || _|| _|| _|| _|| _|| _|| _|| _	|	| _
d | _d S N)superr   r   r   r   r   r   rW   r;   r   r   previous_backbone_lr)
r   r   r   r   r   r   rW   r;   r   r   	__class__r   r   r   m  s   

zBackboneFinetuning.__init__c                 C   s   | j | jdS )N)r   r   )r   r   r   r   r   r   r     s   zBackboneFinetuning.state_dictr   c                    s   |d | _ t | d S )Nr   )r   r   r   r   r   r   r   r     s   
z"BackboneFinetuning.load_state_dictr    r!   r"   r#   c                    s,   t |drt|jtrt ||S td)z
        Raises:
            MisconfigurationException:
                If LightningModule has no nn.Module `backbone` attribute.
        backbonez@The LightningModule should have a nn.Module `backbone` attribute)hasattrr4   r   r   r   r,   r   )r   r    r"   r   r   r   r,     s   zBackboneFinetuning.on_fit_startc                 C   s   |  |j d S r   )rG   r   ry   r   r   r   r_     s   z)BackboneFinetuning.freeze_before_trainingr   r+   c                 C   s  || j krC|jd d }| jdur| jn|| j }|| _| j|j||| j| jd | j	rAt
dt|| j dt|| j  dS dS || j kr|jd d }| |d | j }| jrb||krb|n|}||jd d< || _| j	rt
dt|| j dt|| j  dS dS dS )	rr   r   rV   N)r;   rW   zCurrent lr: z, Backbone lr:    )r   r)   r   r   r   r\   r   r;   rW   r   loginforoundr   r   r   )r   r"   r   r+   
current_lrinitial_backbone_lrnext_current_backbone_lrr   r   r   rs     sP   





z$BackboneFinetuning.finetune_functionrz   r{   )r|   r}   r~   r   r   r   r   rX   r   r   r   r   r$   r   r   r   r   r,   r_   r   rs   __classcell__r   r   r   r   r   N  sT     	
 
$r   )$r   loggingcollections.abcr   r   typingr   r   r   r   rI   torch.nnr   r	   torch.nn.modules.batchnormr
   torch.optim.optimizerr   typing_extensionsr   pytorch_lightningpl$pytorch_lightning.callbacks.callbackr   &pytorch_lightning.utilities.exceptionsr   %pytorch_lightning.utilities.rank_zeror   	getLoggerr|   r   r   rX   r   r   r   r   r   r   r   <module>   s&   
  %