o
    8wi*                     @   s   d dl Z d dlmZ d dlZd dlmZ d dlm  mZ d dl	m
Z
 d dlmZmZ d dlmZ G dd deZG dd	 d	ejeZdS )
    N)Optional)Conv1D)BaseTunerLayercheck_adapters_to_merge)	transposec                   @   sf   e Zd ZdZdejfddZedefddZ				dd
e
dedededededefddZdd ZdS )VBLoRALayer)vblora_logits_Avblora_logits_Bvblora_vector_bank
base_layerc                 K   s   || _ i | _i | _ti | _ti | _ti | _d| _	g | _
|  }t|tjr3|j|j}}nt|trHt|jdrB|jjn|jj\}}|| _|| _|| _d S )NFds_shape)r   rtopknn
ModuleDictvblora_dropoutParameterDictr   r	   _disable_adaptersmerged_adaptersget_base_layer
isinstanceLinearin_featuresout_featuresr   hasattrweightr   shapekwargs)selfr   r   r   r    r   U/home/ubuntu/sommelier/.venv/lib/python3.10/site-packages/peft/tuners/vblora/layer.py__init__   s"   

zVBLoRALayer.__init__returnc                 C   s
   t | jS N)boolr   )r   r   r   r    merged9   s   
zVBLoRALayer.merged        {Gz?adapter_namer   r   num_vectorsvector_lengthr   init_logits_stdc	           
      C   s4  |dkrt d| d|dkrt d| d| j| dkr*t d| j d| | j| dkr<t d| j d| || j|< || j|< |dkrQtj|d	}	nt }	| j	t
||	i tjt|| j| |d
d| j|< tjt| j| ||d
d| j|< || _| || | | | | j d S )Nr   z`r` z# should be a positive integer valuez`topk` z`in_features` z& must be divisible by `vector_length` z`out_features` r&   )pT)requires_grad)
ValueErrorr   r   r   r   r   DropoutIdentityr   updater   	Parametertorchzerosr   r	   r
   reset_vblora_logits%_move_adapter_to_device_of_base_layerset_adapteractive_adapters)
r   r(   r
   r   r   r)   r*   r   r+   vblora_dropout_layerr   r   r    update_layer=   s4   


zVBLoRALayer.update_layerc                 C   sl   || j  v r4t  tj| j | d| tj| j| d| W d    d S 1 s-w   Y  d S d S )Nr   )r   keysr3   no_gradr   initnormal_r	   )r   r(   r+   r   r   r    r5   f   s   
"zVBLoRALayer.reset_vblora_logitsN)r&   r'   )__name__
__module____qualname__adapter_layer_namesr   Moduler!   propertyr$   r%   strintfloatr:   r5   r   r   r   r    r      s.    	
)r   c                       s   e Zd Z					d"dedededed	ed
ededededdf fddZd#dedee	e  ddfddZ
d$ddZdejdejfddZd%deejejf fddZdejfddZdejdejfd d!Z  ZS )&r      r&   r'   Fr(   r   r)   r*   r   r   r+   fan_in_fan_outis_target_conv_1d_layerr"   Nc              
      sR   t tj|   tj| |fi | |
| _|| _| ||||||||	 || _d S r#   )	superr   r   r!   r   rI   _active_adapterr:   rJ   )r   r   r
   r(   r   r)   r*   r   r   r+   rI   rJ   r   	__class__r   r    r!   o   s   
zLinear.__init__
safe_mergeadapter_namesc                 C   s   t | |}|s	dS |D ]A}|| j v rL|  }|r;|jj }|| |7 }t	|
 s6td| d||j_n|j j| |7  _| j| qdS )a^  
        Merge the active adapter weights into the base weights

        Args:
            safe_merge (`bool`, *optional*):
                If True, the merge operation will be performed in a copy of the original weights and check for NaNs
                before merging the weights. This is useful if you want to check if the merge operation will produce
                NaNs. Defaults to `False`.
            adapter_names (`List[str]`, *optional*):
                The list of adapter names that should be merged. If None, all active adapters will be merged. Defaults
                to `None`.
        Nz1NaNs detected in the merged weights. The adapter z seems to be broken)r   r   r;   r   r   datacloneget_delta_weightr3   isfiniteallr.   r   append)r   rO   rP   active_adapterr   orig_weightsr   r   r    merge   s$   


zLinear.mergec                 C   sj   | j s
td d S t| jdkr3| j }|| j v r*|  j	 j
| |8  _
t| jdksd S d S )Nz Already unmerged. Nothing to do.r   )r%   warningswarnlenr   popr   r;   r   r   rQ   rS   )r   rW   r   r   r    unmerge   s   

zLinear.unmergelogitsc                 C   s8   |j |dd\}}tj|dd}|d||  dS )N)dim)r   Fsoftmax	unsqueezesum)r   r_   r
   r   top_k_logitsindicestopk_weightsr   r   r    _get_low_rank_matrix   s   zLinear._get_low_rank_matrixc           	      C   s   | j | }| j| }| jr|d   rtd| j| |j}| j	| }|r5|
 }|
 }|
 }| ||||jd d}| |||ddd|jd }||fS )N)r   r   zoFound infinity values in VB-LoRA logits. Ensure training was not resumed from a `save_only_topk_weights` model.r   r`      rH   )r   r	   trainingisinfanyRuntimeErrorr
   todevicer   rG   rj   reshaper   r   )	r   adaptercast_to_fp32r   r	   r
   r   ABr   r   r    _get_lora_matrices   s$   


zLinear._get_lora_matricesc                 C   sP   | j | j}| j | j}|jdko|tjk}| ||\}}t|| | j}|S )z
        Compute the delta weight for the given adapter.

        Args:
            adapter (str):
                The name of the adapter for which the delta weight should be computed.
        cpu)	r   rq   dtypetyper3   float16rw   r   rI   )r   rs   rq   ry   rt   ru   rv   output_tensorr   r   r    rS      s   zLinear.get_delta_weightxc           
      O   s   |j }| jr| jr|   | j|g|R i |}nM| jr*| j|g|R i |}n=| j|g|R i |}| jD ]-}|| j vrCq9| |\}}|	| j
| j }| j| }	|tt|	||| }q9|	|}|S r#   )ry   disable_adaptersr%   r^   r   r8   r   r;   rw   rp   r
   r   rc   linear)
r   r}   argsr   previous_dtyperesultrW   ru   rv   dropoutr   r   r    forward   s"   


zLinear.forward)rH   r&   r'   FF)FN)r"   N)F)r?   r@   rA   rE   rF   rG   r$   r!   r   listrY   r^   r3   tensorTensorrj   tuplerw   rS   r   __classcell__r   r   rM   r    r   m   sB    
	
 
#
r   )rZ   typingr   r3   torch.nnr   torch.nn.functional
functionalrc   transformers.pytorch_utilsr   peft.tuners.tuners_utilsr   r   peft.utils.otherr   r   r   r   r   r   r    <module>   s   R