o
    8wi;                     @   s   d dl Z d dlmZ d dlZd dlmZ d dlm  mZ d dl	m
Z
 d dlmZmZ d dlmZ ddlmZ G dd	 d	ejjZG d
d deZG dd dejeZdS )    N)Optional)Conv1D)BaseTunerLayercheck_adapters_to_merge)	transpose   )
BufferDictc                   @   s$   e Zd Zedd Zedd ZdS )UniqueBaseGradc                 C   s4   |d d d d d f | |d  }|  ||| |S )NN)save_for_backward)ctx
randlora_Arandlora_lambdarandlora_gammaout r   W/home/ubuntu/sommelier/.venv/lib/python3.10/site-packages/peft/tuners/randlora/layer.pyforward   s   "zUniqueBaseGrad.forwardc                 C   s^   | j \}}}||j||j||j}}}td|||}td|||}d ||fS )Nzkbj,kvj,bj->kbzkbj,kvj,kb->bj)saved_tensorstodtypetorcheinsum)r   grad_outputr   r   r   grad_randlora_lambdagrad_randlora_gammar   r   r   backward%   s   




zUniqueBaseGrad.backwardN)__name__
__module____qualname__staticmethodr   r   r   r   r   r   r	      s
    
r	   c                   @   sP   e Zd ZdZdZdejfddZede	fddZ
d	ed
efddZdd ZdS )RandLoraLayer)r   r   )r   
randlora_B
base_layerc                 K   s   || _ i | _i | _ti | _ti | _ti | _d | _	d | _
d| _g | _d| _|  }t|tjr<|j|j}}nt|trQt|jdrK|jjn|jj\}}|| _|| _|| _d S )NFTds_shape)r#   rscalingnn
ModuleDictrandlora_dropoutParameterDictr   r   r   r"   _disable_adaptersmerged_adapterscast_input_dtype_enabledget_base_layer
isinstanceLinearin_featuresout_featuresr   hasattrweightr$   shapekwargs)selfr#   r6   r1   r2   r   r   r   __init__7   s(   

zRandLoraLayer.__init__returnc                 C   s
   t | jS r
   )boolr,   )r7   r   r   r   mergedY   s   
zRandLoraLayer.mergedr   r"   c                 C   sL  |dkrt d| || j|< |dkrtj|d}nt }| jt||i t| j	| j
| }	|	 r;t|	nt|	d | _tjt|| jdd| j|< tjt| jt| j
| j	t| j
| j	 dd| j|< || | j|< || _|| _||vrt| jdk rt dt| j d }
t| j d }d	}t| j	| j
t| j	| j
}}|jd |k rt |d
|jd ||
jd |k rt |d|
jd |d}|
jd | j| k rt |d|
jd | j| |jd | j| k rt |d
|jd | j| |
| j|< || j|< |r| | | | | | j  d S )Nr   z?`r` should be a positive integer value but the value passed is         )p   T)requires_gradzfThe `randlora_A` and `randlora_B` buffers are empty. This should not happen. Please report this issue.z{} has a size of {} but {} or greater is required; this probably happened because an additional RandLora adapter was added after the first one with incompatible shapes.r"   r   z{} has a size of {} but {} or greater is required; this probably happened because an additional RandLora adapter with a lower rank was added after the first one; loading the adapters in reverse order may solve this.)!
ValueErrorr%   r'   DropoutIdentityr)   updater(   minr1   r2   
is_integerint	num_bases	Parameterr   randnr   onesmaxr   r&   r   r"   lenlistvaluesr5   formatreset_randlora_parameters%_move_adapter_to_device_of_base_layerset_adapteractive_adapters)r7   adapter_namer   r"   r%   randlora_alphar)   init_weightsrandlora_dropout_layerrH   randlora_A_paramrandlora_B_param
error_tmplmax_dimmin_dimr   r   r   update_layer]   sZ   






zRandLoraLayer.update_layerc                 C   sv   || j  v r9t $ tj| j |  tj| j| dt	| j| j
  W d    d S 1 s2w   Y  d S d S )Nr>   )r   keysr   no_gradr'   initzeros_	constant_r   rL   r5   )r7   rU   r   r   r   rQ      s   
&"z'RandLoraLayer.reset_randlora_parametersN)r   r   r   adapter_layer_namesother_param_namesr'   Moduler8   propertyr:   r;   r   r^   rQ   r   r   r   r   r!   2   s    "
Nr!   c                       s   e Zd Z						d!dedededed	ed
ededededdf fddZd"dede	e
e  ddfddZd#ddZd$deejejf fddZdejfddZdejdejfddZdef fdd Z  ZS )%r0   r   r<   FTr   r"   rU   r%   rV   r)   fan_in_fan_outis_target_conv_1d_layerrW   r9   Nc              	      sP   t tj|   tj| |fi | || _|| _| |||||||
 |	| _d S r
   )	superr'   r0   r8   r!   rh   _active_adapterr^   ri   )r7   r#   r   r"   rU   r%   rV   r)   rh   ri   rW   r6   	__class__r   r   r8      s   
zLinear.__init__
safe_mergeadapter_namesc                 C   s   t | |}|s	dS |D ]M}|| j v rX|  }|jj}|rB|jj }|| |7 }t	
| s:td| d|||j_n| |}|j j||7  _| j| qdS )a^  
        Merge the active adapter weights into the base weights

        Args:
            safe_merge (`bool`, *optional*):
                If True, the merge operation will be performed in a copy of the original weights and check for NaNs
                before merging the weights. This is useful if you want to check if the merge operation will produce
                NaNs. Defaults to `False`.
            adapter_names (`list[str]`, *optional*):
                The list of adapter names that should be merged. If None, all active adapters will be merged. Defaults
                to `None`.
        Nz1NaNs detected in the merged weights. The adapter z seems to be broken)r   r   r_   r.   r4   r   datacloneget_delta_weightr   isfiniteallrA   r   r,   append)r7   rn   ro   active_adapterr#   
orig_dtypeorig_weightsdelta_weightr   r   r   merge   s(   


zLinear.mergec                 C   s   | j s
td dS t| jdkr>|  }|jj}| j }|| j	
 v r5| |}|j j||8  _t| jdksdS dS )zW
        This method unmerges all merged adapter layers from the base weights.
        z Already unmerged. Nothing to do.Nr   )r;   warningswarnrM   r,   r.   r4   r   popr   r_   rr   rp   r   )r7   r#   rw   rv   ry   r   r   r   unmerge   s   


zLinear.unmergec                 C   s,  | j | }| j| }|du r|j}|j}|jdko"|tjkp"|tjk}| j| 	|}| j
| 	|}|rE| }| }| }| }t| j| jt| j| j}	}
|ddd| jd|	f 	|}|d|
d| jddf 	|}|jdd}t|||jdd}|	| jkr||fS |j|jfS )a4  
        Performs scaling on the smallest random base (randlora_A) and returns randlora_A and randlora_B in the correct
        order to fit the target layers' dimensions

        Args:
            adapter (str):
                The name of the adapter for which the delta weight should be computed.
        Ncpur>   )	start_dim)end_dim)r   r"   devicer   typer   float16bfloat16r   r   r   floatrE   r2   r1   rL   rH   flattenr	   applyT)r7   adapterr   r   r"   r   cast_to_fp32r   r   r]   r\   sliced_Asliced_Bupdate_Bupdate_Ar   r   r   get_scaled_bases  s*   


""
zLinear.get_scaled_basesc                 C   s:   |  |\}}|j|j j}t|| j}| j| }|| S )z
        Compute the delta weight for the given adapter.

        Args:
            adapter (str):
                The name of the adapter for which the delta weight should be computed.
        )r   r   r   rh   r&   )r7   r   r   r   rD   output_tensorr&   r   r   r   rr   4  s
   	
zLinear.get_delta_weightxc                 O   s   |j }| jr| jr|   | j|g|R i |}nT| jr*| j|g|R i |}nD| j|g|R i |}| jD ]4}|| j vrCq9| j| }| j	||j
d\}}	||	j }| j| }
|tt||||	|
  }q9||}|S )N)r   )r   disable_adaptersr;   r~   r#   rT   r   r_   r)   r   r   r   r&   Flinear)r7   r   argsr6   previous_dtyperesultrv   dropoutr   r   r&   r   r   r   r   E  s$   


"
zLinear.forwardc                    s   t   }d| S )Nz	randlora.)rj   __repr__)r7   reprl   r   r   r   Z  s   
zLinear.__repr__)r   r   r<   FFT)FN)r9   Nr
   )r   r   r   r   strrG   r   r:   r8   r   rN   rz   r~   tupler   Tensorr   rr   r   r   __classcell__r   r   rl   r   r0      sD    	
 
*0r0   )r{   typingr   r   torch.nnr'   torch.nn.functional
functionalr   transformers.pytorch_utilsr   peft.tuners.tuners_utilsr   r   peft.utils.otherr   _buffer_dictr   autogradFunctionr	   r!   r0   r   r   r   r   <module>   s    