o
    wi                     @   s   d dl mZmZmZ d dlmZmZ d dlmZ d dl	m
Z
 d dlmZ d dlmZ d dlmZmZ G dd	 d	ejZG d
d dejeZeG dd dZdS )    )	dataclassfieldis_dataclass)AnyOptional)instantiate)	OmegaConf)nn)activation_registry)access_mixinsadapter_mixin_strategiesc                   @   s6   e Zd ZdZdeej fddZdddZd	d
 Z	dS )AdapterModuleUtilz_
    Base class of Adapter Modules, providing common functionality to all Adapter Modules.
    adapter_strategyc                 C   st   |du r|   }t|rt|}t|d t|ts!t|r(t|| _	dS t|t
jr3|| _	dS td| )aY  
        Setup adapter strategy of this class, enabling dynamic change in the way the adapter output is
        merged with the input.

        When called successfully, will assign the variable `adapter_strategy` to the module.

        Args:
            adapter_strategy: Can be a None or an implementation of AbstractAdapterStrategy.
        NFz)`adapter_strategy` provided is invalid : )get_default_strategy_configr   r   
structured
set_struct
isinstancedict	is_configr   r   r   AbstractAdapterStrategyAttributeError)selfr    r   j/home/ubuntu/sommelier/.venv/lib/python3.10/site-packages/nemo/collections/common/parts/adapter_modules.pysetup_adapter_strategy   s   

z(AdapterModuleUtil.setup_adapter_strategyreturnr   c                 C      t  S )z<
        Returns a default adapter module strategy.
        r    ResidualAddAdapterStrategyConfigr   r   r   r   r   :   s   z-AdapterModuleUtil.get_default_strategy_configc                 C   s   |   D ]}|d qdS )z
        Sets the requires grad for all parameters in the adapter to True.
        This method should be overridden for any custom unfreeze behavior that is required.
        For example, if not all params of the adapter should be unfrozen.
        TN)
parametersrequires_grad_)r   paramr   r   r   adapter_unfreeze@   s   z"AdapterModuleUtil.adapter_unfreezeN)r   r   )
__name__
__module____qualname____doc__r   r   r   r   r   r#   r   r   r   r   r      s
    
r   c                       sV   e Zd ZdZ				ddededed	ed
edejf fddZ	dd Z
dd Z  ZS )LinearAdaptera  
    Simple Linear Feedforward Adapter module with LayerNorm and singe hidden layer with activation function.
    Note: The adapter explicitly initializes its final layer with all zeros in order to avoid affecting the
    original model when all adapters are disabled.

    Args:
        in_features: Input dimension of the module. Note that for adapters, input_dim == output_dim.
        dim: Hidden dimension of the feed forward network.
        activation: Str name for an activation function.
        norm_position: Str, can be `pre` or `post`. Defaults to `pre`. Determines whether the normalization
            will occur in the first layer or the last layer. Certain architectures may prefer one over the other.
        dropout: float value, whether to perform dropout on the output of the last layer of the adapter.
        adapter_strategy: By default, ResidualAddAdapterStrategyConfig. An adapter composition function object.
    swishpre        Nin_featuresdim
activationnorm_positiondropoutr   c              
      s   t    t|  }t|drd|_|dv sJ || _|dkr8tt|tj	||dd|tj	||dd| _
n|dkrTttj	||dd|tj	||ddt|| _
|dkr_t|| _nd | _| | |   d S )	NinplaceT)r*   postr*   F)biasr2   r+   )super__init__r
   hasattrr1   r/   r	   
Sequential	LayerNormLinearmoduleDropoutr0   r   reset_parameters)r   r,   r-   r.   r/   r0   r   	__class__r   r   r5   [   s2   
	


zLinearAdapter.__init__c                 C   sb   | j dkr| jd j jd9  _d S | j dkr/| jd j jd9  _| jd j jd9  _d S d S )Nr*   r   r2   )r/   r:   weightdatar3   r   r   r   r   r<      s   

zLinearAdapter.reset_parametersc                 C   s"   |  |}| jd ur| |}|S N)r:   r0   )r   xr   r   r   forward   s   


zLinearAdapter.forward)r)   r*   r+   N)r$   r%   r&   r'   intstrfloatr   r   r5   r<   rD   __classcell__r   r   r=   r   r(   J   s*    .	r(   c                   @   st   e Zd ZU eed< eed< dZeed< dZeed< dZe	ed< e
d	d
 dZee ed< dejej Zeed< dS )LinearAdapterConfigr,   r-   r)   r.   r*   r/   r+   r0   c                   C   r   rB   r   r   r   r   r   <lambda>   s    zLinearAdapterConfig.<lambda>)default_factoryr   z{0}.{1}_target_N)r$   r%   r&   rE   __annotations__r.   rF   r/   r0   rG   r   r   r   r   formatr(   rL   r   r   r   r   rI      s   
 rI   N)dataclassesr   r   r   typingr   r   hydra.utilsr   	omegaconfr   torchr	   #nemo.collections.common.parts.utilsr
   nemo.core.classes.mixinsr   r   AccessMixinr   Moduler(   rI   r   r   r   r   <module>   s   0R