o
    NÆÏi¸(  ã                   @  s„   d dl mZ d dlZd dlZd dlmZmZ d dlZd dlm	Z	 d dl
mZ d dlmZmZ G dd„ deƒZG dd	„ d	e	jeƒZdS )
é    )ÚannotationsN)ÚAnyÚOptional)Ú
BufferDict)ÚBaseTunerLayerÚcheck_adapters_to_mergec                   @  sV   e Zd ZdZdZd&dd„Zed'dd„ƒZd(dd„Z		d)d*d d!„Z			"d+d,d#d$„Z
d%S )-ÚDeloraLayer)Údelora_AÚdelora_BÚdelora_lambda)ÚrÚdelora_dropoutÚdelora_w_normÚ
base_layerú	nn.ModuleÚreturnÚNonec                 K  s˜   || _ i | _t i ¡| _t i ¡| _t i ¡| _t i ¡| _t	i dd| _
d| _g | _|| _|  ¡ }t|tjƒrC|j|j| _| _d S tdt|ƒ› ƒ‚)NT)Ú
persistentFzUnsupported layer type )r   r   ÚnnÚ
ModuleDictr   ÚParameterDictr	   r
   r   r   r   Ú_disable_adaptersÚmerged_adaptersÚkwargsÚget_base_layerÚ
isinstanceÚLinearÚin_featuresÚout_featuresÚ
ValueErrorÚtype)Úselfr   r   Úbase_layer_mod© r#   úL/home/ubuntu/.local/lib/python3.10/site-packages/peft/tuners/delora/layer.pyÚ__init__)   s   zDeloraLayer.__init__ÚAútorch.TensorÚBr   r   ÚintÚw_normc           	      C  s`   t j| jdddd}t j|jdddd}t  || ||  ¡}|| |  }|| d¡ }|S )zpCompute delta = B @ diag(delora_lambda/r / (||A_i||*||B^j||)) @ A, scaled by provided w_norm (per-input channel)é   ©Údimç-Cëâ6?©Úminr   )ÚtorchÚclampÚnormÚ
diag_embedÚ	unsqueeze)	r&   r(   r   r   r*   ÚAnÚBnÚdiagÚdeltar#   r#   r$   Ú_compute_delta=   s   zDeloraLayer._compute_deltaÚadapterÚstrc                 C  sX   || j vs
|| jvrtd|› dƒ‚|  | j | | j| | j| | j| | j| ¡}|S )NzAdapter z not found.)r	   r
   r   r:   r   r   r   )r!   r;   r9   r#   r#   r$   Úget_delta_weightI   s   ûzDeloraLayer.get_delta_weightTFÚadapter_nameÚfloatÚmodule_dropoutÚinit_weightsÚboolÚinference_moder   r   c           	      K  sÊ   |dkrt d|› ƒ‚|| j|< t t || j¡¡| j|< t t | j|¡¡| j	|< t t d¡¡| j
|< |dkr@tj|d}nt ¡ }| j t ||i¡¡ |  |||¡ |  |¡ | j| j|d dS )a¡  Internal function to create delora adapter

        Args:
            adapter_name (`str`): Name for the adapter to add.
            r (`int`): Rank for the added adapter.
            delora_lambda (`float`): Boundary for the adapter's norm.
            module_dropout (`float`): The dropout probability for disabling adapter during training.
            init_weights (`bool`): Whether to initialize weights.
        r   z?`r` should be a positive integer value but the value passed is r+   g        )Úp)rC   N)r   r   r   Ú	Parameterr1   Úemptyr   r	   r   r
   r   ÚDropoutÚIdentityr   Úupdater   Úreset_delora_parametersÚ%_move_adapter_to_device_of_base_layerÚset_adapterÚactive_adapters)	r!   r>   r   r   r@   rA   rC   r   Úmodule_dropout_layerr#   r#   r$   Úupdate_layerV   s   

zDeloraLayer.update_layerç      .@c                 C  s  || j  ¡ vr	d S |du r%tjj| j | t d¡d tj | j| ¡ ntjj| j | t d¡d tjj| j| t d¡d | j	| j
 t|ƒ¡ t ¡ / |  ¡ j}|jjdkrgtj|j
dd ¡ }ntj|jd |jd}|| j|< W d   ƒ d S 1 s‚w   Y  d S )	NTé   )ÚaÚmetar   r,   r+   )Údevice)r	   Úkeysr   ÚinitÚkaiming_uniform_ÚmathÚsqrtÚzeros_r
   r   ÚdataÚfill_r?   r1   Úno_gradr   ÚweightrT   r    r3   ÚdetachÚonesÚshaper   )r!   r>   rA   r   Úwr*   r#   r#   r$   rJ   }   s   

"ùz#DeloraLayer.reset_delora_parametersN)r   r   r   r   )r&   r'   r(   r'   r   r'   r   r)   r*   r'   r   r'   )r;   r<   r   r'   )TF)r>   r<   r   r)   r   r?   r@   r?   rA   rB   rC   rB   r   r   r   r   )TrP   )r>   r<   rA   rB   r   r?   r   r   )Ú__name__Ú
__module__Ú__qualname__Úadapter_layer_namesÚother_param_namesr%   Ústaticmethodr:   r=   rO   rJ   r#   r#   r#   r$   r      s    

ù*ür   c                      sP   e Zd Z	d!d"‡ fdd„Zd#d$dd„Zd%dd„Zd&dd„Zd'‡ fdd „Z‡  ZS )(ÚDeloraLinearTr>   r<   r   r)   r   r?   r@   rA   rB   r   r   c                   s:   t ƒ  ¡  tj| |fi |¤Ž || _|  |||||¡ d S )N)Úsuperr%   r   Ú_active_adapterrO   )r!   r   r>   r   r   r@   rA   r   ©Ú	__class__r#   r$   r%   œ   s   

zDeloraLinear.__init__FNÚ
safe_mergeÚadapter_namesúOptional[list[str]]c              	   C  sØ   t | |ƒ}|s	dS |D ]^}|| j ¡ v ri|  ¡ }|  |¡ ¡ j|jj|jj	d}t
 ¡ / |rM|jj ¡ }|| }t
 |¡ ¡ sHtd|› dƒ‚||j_n|jj |¡ W d  ƒ n1 s^w   Y  | j |¡ qdS )a^  
        Merge the active adapter weights into the base weights

        Args:
            safe_merge (`bool`, *optional*):
                If True, the merge operation will be performed in a copy of the original weights and check for NaNs
                before merging the weights. This is useful if you want to check if the merge operation will produce
                NaNs. Defaults to `False`.
            adapter_names (`list[str]`, *optional*):
                The list of adapter names that should be merged. If None, all active adapters will be merged. Defaults
                to `None`.
        N)ÚdtyperT   z1NaNs detected in the merged weights. The adapter z seems to be broken)r   r	   rU   r   r=   r_   Útor^   rq   rT   r1   r]   r[   ÚcloneÚisfiniteÚallr   Úadd_r   Úappend)r!   rn   ro   Úactive_adapterr   Údelta_weightÚorig_weightsr#   r#   r$   Úmerge«   s4   
þÿ

ÿ
€ô€êzDeloraLinear.mergec                 C  sj   | j s
t d¡ dS t| jƒdkr3| j ¡ }|| j ¡ v r*|  ¡ j	 j
|  |¡8  _
t| jƒdksdS dS )zJ
        Unmerge all merged adapter layers from the base weights.
        z Already unmerged. Nothing to do.Nr   )ÚmergedÚwarningsÚwarnÚlenr   Úpopr	   rU   r   r^   r[   r=   )r!   rx   r#   r#   r$   ÚunmergeÔ   s   

ýzDeloraLinear.unmergeÚxr'   Úargsr   r   c                 O  st  |j }| jr| jr|  ¡  | j|g|¢R i |¤Ž}n™| jr*| j|g|¢R i |¤Ž}n‰| js<| j|g|¢R i |¤Ž |¡S | j|g|¢R i |¤Ž}t |¡}| jD ]Z}|| j	vrXqP| j
| |ƒ}	tj |	| j|  | j	| ¡}
tj| j	| jdddd}tj| j| jdddd}| j| | j|  ||  }|
| }
tj |
| j| ¡}
||
7 }qP|| |j ¡ }| |¡}|S )Nr+   r,   r.   r/   r   )rq   Údisable_adaptersr|   r   r   rM   rr   r1   Ú
zeros_liker	   r   r   Ú
functionalÚlinearr   r2   r3   r
   r   r   )r!   r‚   rƒ   r   Úprevious_dtypeÚresultÚbase_outÚadd_outr;   Úx_dÚhr6   r7   Úscalingr#   r#   r$   Úforwardà   s2   




zDeloraLinear.forwardc                   s   t ƒ  ¡ }d| S )Nzdelora.)rj   Ú__repr__)r!   Úreprl   r#   r$   r     s   
zDeloraLinear.__repr__)T)r>   r<   r   r)   r   r?   r@   r?   rA   rB   r   r   )FN)rn   rB   ro   rp   r   r   )r   r   )r‚   r'   rƒ   r   r   r   r   r'   )r   r<   )	rc   rd   re   r%   r{   r   r   r   Ú__classcell__r#   r#   rl   r$   ri   š   s    	ù
)
+ri   )Ú
__future__r   rX   r}   Útypingr   r   r1   Útorch.nnr   Úpeft.tuners._buffer_dictr   Úpeft.tuners.tuners_utilsr   r   r   ÚModuleri   r#   r#   r#   r$   Ú<module>   s   