o
    Ni)<                     @   s   d dl Z d dlmZ d dlZd dlmZ d dlm  mZ d dl	m
Z
 d dlmZmZ d dlmZ ddlmZ G dd	 d	ejjZG d
d deZG dd dejeZdS )    N)Optional)Conv1D)BaseTunerLayercheck_adapters_to_merge)	transpose   )
BufferDictc                   @   s$   e Zd Zedd Zedd ZdS )UniqueBaseGradc                 C   s4   |d d d d d f | |d  }|  ||| |S )NN)save_for_backward)ctx
randlora_Arandlora_lambdarandlora_gammaout r   N/home/ubuntu/.local/lib/python3.10/site-packages/peft/tuners/randlora/layer.pyforward   s   "zUniqueBaseGrad.forwardc                 C   s^   | j \}}}||j||j||j}}}td|||}td|||}d ||fS )Nzkbj,kvj,bj->kbzkbj,kvj,kb->bj)saved_tensorstodtypetorcheinsum)r   grad_outputr   r   r   grad_randlora_lambdagrad_randlora_gammar   r   r   backward%   s   




zUniqueBaseGrad.backwardN)__name__
__module____qualname__staticmethodr   r   r   r   r   r   r	      s
    
r	   c                   @   sX   e Zd ZdZdZdejfddZede	fddZ
		dd
edede	fddZdd ZdS )RandLoraLayer)r   r   )r   
randlora_B
base_layerc                 K   s   || _ i | _i | _ti | _ti | _ti | _d | _	d | _
d| _g | _d| _|  }t|tjr<|j|j}}nt|trQt|jdrK|jjn|jj\}}|| _|| _|| _d S )NFTds_shape)r#   rscalingnn
ModuleDictrandlora_dropoutParameterDictr   r   r   r"   _disable_adaptersmerged_adapterscast_input_dtype_enabledget_base_layer
isinstanceLinearin_featuresout_featuresr   hasattrweightr$   shapekwargs)selfr#   r6   r1   r2   r   r   r   __init__7   s(   

zRandLoraLayer.__init__returnc                 C   s
   t | jS r
   )boolr,   )r7   r   r   r   mergedY   s   
zRandLoraLayer.mergedFr   r"   inference_modec	                 K   sP  |dkrt d| || j|< |dkrtj|d}
nt }
| jt||
i t| j	| j
| }| r;t|nt|d | _tjt|| jdd| j|< tjt| jt| j
| j	t| j
| j	 dd| j|< || | j|< || _|| _||vrt| jdk rt dt| j d }t| j d }d	}t| j	| j
t| j	| j
}}|jd |k rt |d
|jd ||jd |k rt |d|jd |d}|jd | j| k rt |d|jd | j| |jd | j| k rt |d
|jd | j| || j|< || j|< |r| | | | | j| j |d d S )Nr   z?`r` should be a positive integer value but the value passed is         )p   T)requires_gradzfThe `randlora_A` and `randlora_B` buffers are empty. This should not happen. Please report this issue.z{} has a size of {} but {} or greater is required; this probably happened because an additional RandLora adapter was added after the first one with incompatible shapes.r"   r   z{} has a size of {} but {} or greater is required; this probably happened because an additional RandLora adapter with a lower rank was added after the first one; loading the adapters in reverse order may solve this.)r<   )!
ValueErrorr%   r'   DropoutIdentityr)   updater(   minr1   r2   
is_integerint	num_bases	Parameterr   randnr   onesmaxr   r&   r   r"   lenlistvaluesr5   formatreset_randlora_parameters%_move_adapter_to_device_of_base_layerset_adapteractive_adapters)r7   adapter_namer   r"   r%   randlora_alphar)   init_weightsr<   r6   randlora_dropout_layerrI   randlora_A_paramrandlora_B_param
error_tmplmax_dimmin_dimr   r   r   update_layer]   sZ   





zRandLoraLayer.update_layerc                 C   sv   || j  v r9t $ tj| j |  tj| j| dt	| j| j
  W d    d S 1 s2w   Y  d S d S )Nr?   )r   keysr   no_gradr'   initzeros_	constant_r   rM   r5   )r7   rV   r   r   r   rR      s   
&"z'RandLoraLayer.reset_randlora_parametersN)F)r   r   r   adapter_layer_namesother_param_namesr'   Moduler8   propertyr:   r;   r   r_   rR   r   r   r   r   r!   2   s    "	
Pr!   c                       s   e Zd Z						d!dedededed	ed
ededededdf fddZd"dede	e
e  ddfddZd#ddZd$deejejf fddZdejfddZdejdejfddZdef fdd Z  ZS )%r0   r   r=   FTr   r"   rV   r%   rW   r)   fan_in_fan_outis_target_conv_1d_layerrX   r9   Nc              	      sP   t tj|   tj| |fi | || _|| _| |||||||
 |	| _d S r
   )	superr'   r0   r8   r!   ri   _active_adapterr_   rj   )r7   r#   r   r"   rV   r%   rW   r)   ri   rj   rX   r6   	__class__r   r   r8      s   
zLinear.__init__
safe_mergeadapter_namesc                 C   s   t | |}|s	dS |D ]M}|| j v rX|  }|jj}|rB|jj }|| |7 }t	
| s:td| d|||j_n| |}|j j||7  _| j| qdS )a^  
        Merge the active adapter weights into the base weights

        Args:
            safe_merge (`bool`, *optional*):
                If True, the merge operation will be performed in a copy of the original weights and check for NaNs
                before merging the weights. This is useful if you want to check if the merge operation will produce
                NaNs. Defaults to `False`.
            adapter_names (`list[str]`, *optional*):
                The list of adapter names that should be merged. If None, all active adapters will be merged. Defaults
                to `None`.
        Nz1NaNs detected in the merged weights. The adapter z seems to be broken)r   r   r`   r.   r4   r   datacloneget_delta_weightr   isfiniteallrB   r   r,   append)r7   ro   rp   active_adapterr#   
orig_dtypeorig_weightsdelta_weightr   r   r   merge   s(   


zLinear.mergec                 C   s   | j s
td dS t| jdkr>|  }|jj}| j }|| j	
 v r5| |}|j j||8  _t| jdksdS dS )zW
        This method unmerges all merged adapter layers from the base weights.
        z Already unmerged. Nothing to do.Nr   )r;   warningswarnrN   r,   r.   r4   r   popr   r`   rs   rq   r   )r7   r#   rx   rw   rz   r   r   r   unmerge   s   


zLinear.unmergec                 C   s,  | j | }| j| }|du r|j}|j}|jdko"|tjkp"|tjk}| j| 	|}| j
| 	|}|rE| }| }| }| }t| j| jt| j| j}	}
|ddd| jd|	f 	|}|d|
d| jddf 	|}|jdd}t|||jdd}|	| jkr||fS |j|jfS )a4  
        Performs scaling on the smallest random base (randlora_A) and returns randlora_A and randlora_B in the correct
        order to fit the target layers' dimensions

        Args:
            adapter (str):
                The name of the adapter for which the delta weight should be computed.
        Ncpur?   )	start_dim)end_dim)r   r"   devicer   typer   float16bfloat16r   r   r   floatrF   r2   r1   rM   rI   flattenr	   applyT)r7   adapterr   r   r"   r   cast_to_fp32r   r   r^   r]   sliced_Asliced_Bupdate_Bupdate_Ar   r   r   get_scaled_bases  s*   


""
zLinear.get_scaled_basesc                 C   s:   |  |\}}|j|j j}t|| j}| j| }|| S )z
        Compute the delta weight for the given adapter.

        Args:
            adapter (str):
                The name of the adapter for which the delta weight should be computed.
        )r   r   r   ri   r&   )r7   r   r   r   rE   output_tensorr&   r   r   r   rs   6  s
   	
zLinear.get_delta_weightxc                 O   s   |j }| jr| jr|   | j|g|R i |}nT| jr*| j|g|R i |}nD| j|g|R i |}| jD ]4}|| j vrCq9| j| }| j	||j
d\}}	||	j }| j| }
|tt||||	|
  }q9||}|S )N)r   )r   disable_adaptersr;   r   r#   rU   r   r`   r)   r   r   r   r&   Flinear)r7   r   argsr6   previous_dtyperesultrw   dropoutr   r   r&   r   r   r   r   G  s$   


"
zLinear.forwardc                    s   t   }d| S )Nz	randlora.)rk   __repr__)r7   reprm   r   r   r   \  s   
zLinear.__repr__)r   r   r=   FFT)FN)r9   Nr
   )r   r   r   r   strrH   r   r:   r8   r   rO   r{   r   tupler   Tensorr   rs   r   r   __classcell__r   r   rm   r   r0      sD    	
 
*0r0   )r|   typingr   r   torch.nnr'   torch.nn.functional
functionalr   transformers.pytorch_utilsr   peft.tuners.tuners_utilsr   r   peft.utils.otherr   _buffer_dictr   autogradFunctionr	   r!   r0   r   r   r   r   <module>   s    