o
    8wih/                     @   s   d dl Z d dlmZ d dlZd dlmZ d dlm  mZ d dl	m
Z
 d dlmZmZ d dlmZ ddlmZ G dd	 d	eZG d
d dejeZdS )    N)Optional)Conv1D)BaseTunerLayercheck_adapters_to_merge)	transpose   )
BufferDictc                   @   s`   e Zd ZdZdZdejfddZede	fddZ
		dd
ededefddZddefddZdS )	VeraLayer)vera_lambda_bvera_lambda_d)vera_Avera_B
base_layerc                 K   s   || _ i | _ti | _ti | _ti | _d | _d | _	d| _
g | _|  }t|tjr6|j|j}}nt|trKt|jdrE|jjn|jj\}}|| _|| _|| _d S )NFds_shape)r   rnn
ModuleDictvera_dropoutParameterDictr
   r   r   r   _disable_adaptersmerged_adaptersget_base_layer
isinstanceLinearin_featuresout_featuresr   hasattrweightr   shapekwargs)selfr   r   r   r    r!   S/home/ubuntu/sommelier/.venv/lib/python3.10/site-packages/peft/tuners/vera/layer.py__init__"   s$   

zVeraLayer.__init__returnc                 C   s
   t | jS N)boolr   )r    r!   r!   r"   merged@   s   
zVeraLayer.merged皙?r   r   	d_initialc                 C   s  |dkrt d| || j|< |dkrtj|d}nt }| jt||i tjt	
| jdd| j|< tjt	|dd| j|< || _|| _||vrt| jdk rZt dt| j d }	t| j d }
d	}|	jd | jk rt |d
|	jd | j|
jd | jk rt |d|
jd | jd}|	jd | j| k rt |d
|	jd | j| |
jd | j| k rt |d|
jd | j| |	| j|< |
| j|< |r| j||d | | | | j d S )Nr   z?`r` should be a positive integer value but the value passed is         )pT)requires_grad   z^The `vera_A` and `vera_B` buffers are empty. This should not happen. Please report this issue.z{} has a size of {} but {} or greater is required; this probably happened because an additional VeRA adapter was added after the first one with incompatible shapes.r   r   z{} has a size of {} but {} or greater is required; this probably happened because an additional VeRA adapter with a lower rank was added after the first one; loading the adapters in reverse order may solve this.r)   )
ValueErrorr   r   DropoutIdentityr   updater   	Parametertorchonesr   r
   randnr   r   r   lenlistvaluesr   r   formatreset_vera_parameters%_move_adapter_to_device_of_base_layerset_adapteractive_adapters)r    adapter_namer   r   r   r   init_weightsr)   vera_dropout_layervera_A_paramvera_B_param
error_tmplr!   r!   r"   update_layerD   sH   




zVeraLayer.update_layerc                 C   sj   || j  v r3t  tj| j | | tj| j|  W d    d S 1 s,w   Y  d S d S r%   )	r   keysr4   no_gradr   initzeros_fill_r
   )r    r?   r)   r!   r!   r"   r;      s   
"zVeraLayer.reset_vera_parametersN)r(   )__name__
__module____qualname__adapter_layer_namesother_param_namesr   Moduler#   propertyr&   r'   r   floatrE   r;   r!   r!   r!   r"   r	      s    
Br	   c                       s   e Zd Z						d dededed	ed
edededededdf fddZd!dede	e
e  ddfddZd"ddZdejfddZdejdejfddZdef fddZ  ZS )#r   r   r*   FTr(   r   r   r?   r   r   fan_in_fan_outis_target_conv_1d_layerr@   r)   r$   Nc              	      sR   t tj|   tj| |fi | || _|| _| j||||||	|
d || _d S )Nr.   )	superr   r   r#   r	   rS   _active_adapterrE   rT   )r    r   r   r   r?   r   r   rS   rT   r@   r)   r   	__class__r!   r"   r#      s   
zLinear.__init__
safe_mergeadapter_namesc                 C   s   t | |}|s	dS |D ]A}|| j v rL|  }|r;|jj }|| |7 }t	|
 s6td| d||j_n|j j| |7  _| j| qdS )a^  
        Merge the active adapter weights into the base weights

        Args:
            safe_merge (`bool`, *optional*):
                If True, the merge operation will be performed in a copy of the original weights and check for NaNs
                before merging the weights. This is useful if you want to check if the merge operation will produce
                NaNs. Defaults to `False`.
            adapter_names (`List[str]`, *optional*):
                The list of adapter names that should be merged. If None, all active adapters will be merged. Defaults
                to `None`.
        Nz1NaNs detected in the merged weights. The adapter z seems to be broken)r   r   rF   r   r   datacloneget_delta_weightr4   isfiniteallr/   r   append)r    rY   rZ   active_adapterr   orig_weightsr!   r!   r"   merge   s$   


zLinear.mergec                 C   sj   | j s
td d S t| jdkr3| j }|| j v r*|  j	 j
| |8  _
t| jdksd S d S )Nz Already unmerged. Nothing to do.r   )r'   warningswarnr7   r   popr   rF   r   r   r[   r]   )r    ra   r!   r!   r"   unmerge   s   

zLinear.unmergec                 C   s   | j | }| j| }|j}|j}|jdko|tjkp|tjk}| j| }| j	| }|r;|
 }|
 }|
 }|
 }|ddd| jf |j}	|d| jddf |j}
|d}|d}t||
 ||	  | j}|rw|j|d}|S )z
        Compute the delta weight for the given adapter.

        Args:
            adapter (str):
                The name of the adapter for which the delta weight should be computed.
        cpuN)dtype)r   r   devicerj   typer4   float16bfloat16r   r
   rR   r   tor   	unsqueezer   rS   )r    adapterr   r   rk   rj   cast_to_fp32lambda_dlambda_bsliced_Asliced_Boutput_tensorr!   r!   r"   r]      s(   





zLinear.get_delta_weightxc              
   O   s4  |j }| jr| jr|   | j|g|R i |}ny| jr*| j|g|R i |}ni| j|g|R i |}| jD ]Y}|| j vrCq9| j| }| j| }| j	| }	| j
| }
|	d d d | jf |j}|
d | jd d f |j}| j| }||j }||t|t||| |  }q9||}|S r%   )rj   disable_adaptersr'   rg   r   r>   r   rF   r
   r   r   r   ro   rk   r   r   Flinear)r    rx   argsr   previous_dtyperesultra   rs   rt   r   r   ru   rv   dropoutr!   r!   r"   forward   s,   





&
zLinear.forwardc                    s   t   }d| S )Nzvera.)rU   __repr__)r    reprW   r!   r"   r     s   
zLinear.__repr__)r   r*   FFTr(   )FN)r$   N)rK   rL   rM   r   strintrR   r&   r#   r   r8   rc   rg   r4   Tensorr]   r   r   __classcell__r!   r!   rW   r"   r      sB    	
 
&
'"r   )rd   typingr   r4   torch.nnr   torch.nn.functional
functionalrz   transformers.pytorch_utilsr   peft.tuners.tuners_utilsr   r   peft.utils.otherr   _buffer_dictr   r	   r   r!   r!   r!   r"   <module>   s   p