o
    Ni@:                     @   s   d dl Z d dlmZmZ d dlZd dlZd dlZd dlmZ d dlm	Z	 d dl
mZ d dlmZ ejejejdkrDd dlmZ nd dlmZ G d	d
 d
e	ZG dd dejeZG dd dZdS )    N)AnyOptional)nn)	LoraLayer)check_adapters_to_merge)	transposez4.33.0)deepspeed_configc                       sJ   e Zd ZdZdZdejddf fddZ	dd	efd
dZ	dd Z
  ZS )AdaLoraLayer)lora_Alora_Blora_Elora_embedding_Alora_embedding_B)r
lora_alphascalinglora_dropoutranknum
base_layerreturnNc                    s@   t  | ti | _ti | _ti | _ti | _d S N)super__init__r   ParameterDictr   r
   r   r   )selfr   	__class__ M/home/ubuntu/.local/lib/python3.10/site-packages/peft/tuners/adalora/layer.pyr   )   s
   zAdaLoraLayer.__init__Finference_modec           	      K   s   |dk rt d| || j|< || j|< |dkr tj|d}nt }|| j|< tt	|| j
| j|< tt	|d| j|< tt	| j|| j|< tjt	ddd| j|< | j| jt| d| j| _|dkrs|nt|| j|< |r| | | | | j| j|d d S )	Nr   z?`r` should be a positive integer or 0, but the value passed is         )p   F)requires_grad)r   )
ValueErrorr   r   r   DropoutIdentityr   	Parametertorchrandnin_featuresr
   r   out_featuresr   r   datafill_floatr#   r   reset_lora_parameters%_move_adapter_to_device_of_base_layerset_adapteractive_adapters)	r   adapter_namer   r   r   init_lora_weightsr   kwargslora_dropout_layerr   r   r   update_layer0   s&   




zAdaLoraLayer.update_layerc                 C   sX   || j  v r*tj| j|  tjj| j | ddd tjj| j| ddd d S d S )Nr    g{Gz?)meanstd)r
   keysr   initzeros_r   normal_r   )r   r3   r   r   r   r/   Q   s
   z"AdaLoraLayer.reset_lora_parametersF)__name__
__module____qualname__adapter_layer_namesother_param_namesr   Moduler   boolr7   r/   __classcell__r   r   r   r   r	   "   s    
!r	   c                       s   e Zd Z					d dejdeded	ed
edededdf fddZ	d!dede
ee  ddfddZd"ddZdejfddZdejdededejfddZdef fddZ  ZS )#	SVDLinearr   r"   r    FTr   r3   r   r   r   fan_in_fan_outr4   r   Nc           	         sD   t    t| | d|  j_|| _|| _| ||||| d S )NF)	r   r   r	   get_base_layerweightr#   rH   _active_adapterr7   )	r   r   r3   r   r   r   rH   r4   r5   r   r   r   r   Z   s   
zSVDLinear.__init__
safe_mergeadapter_namesc                 C   s   t | |}|s	dS |D ]A}|  }|| j v rL|r;|jj }|| |7 }t	|
 s6td| d||j_n|j j| |7  _| j| qdS )a^  
        Merge the active adapter weights into the base weights

        Args:
            safe_merge (`bool`, *optional*):
                If True, the merge operation will be performed in a copy of the original weights and check for NaNs
                before merging the weights. This is useful if you want to check if the merge operation will produce
                NaNs. Defaults to `False`.
            adapter_names (`List[str]`, *optional*):
                The list of adapter names that should be merged. If None, all active adapters will be merged. Defaults
                to `None`.
        Nz1NaNs detected in the merged weights. The adapter z seems to be broken)r   rI   r
   r:   rJ   r,   cloneget_delta_weightr(   isfiniteallr$   merged_adaptersappend)r   rL   rM   active_adapterr   orig_weightsr   r   r   mergen   s$   


zSVDLinear.mergec                 C   sj   | j s
td dS t| jdkr3| j }|| j v r*|  j	 j
| |8  _
t| jdksdS dS )zW
        This method unmerges all merged adapter layers from the base weights.
        z Already unmerged. Nothing to do.Nr   )mergedwarningswarnlenrR   popr
   r:   rI   rJ   r,   rO   )r   rT   r   r   r   unmerge   s   

zSVDLinear.unmergec                 C   s>   t | j| | j| | j|   | j| j|  | j| d  S Ngh㈵>)r   r   r
   r   rH   r   r   )r   adapterr   r   r   rO      s   $zSVDLinear.get_delta_weightxargsr5   c                 O   s   | j r| jr
|   | j|g|R i |}|S | jr)| j|g|R i |}|S | j|g|R i |}| jD ]C}|| j vrBq8| j| }| j| }| j| }| j	| }	| j
| }
| j| d }| ||j}||	||| j |j |
 | 7 }q8|S r]   )disable_adaptersrW   r\   r   r2   r
   r:   r   r   r   r   r   _cast_input_dtypedtypeT)r   r_   r`   r5   resultrT   r
   r   r   dropoutr   r   r   r   r   forward   s*   





&zSVDLinear.forwardc                    s   t   }d| S )Nzadalora.)r   __repr__)r   repr   r   r   rh      s   
zSVDLinear.__repr__)r   r"   r    FT)FN)r   N)r?   r@   rA   r   rD   strintr.   rE   r   r   listrV   r\   r(   TensorrO   r   rg   rh   rF   r   r   r   r   rG   X   s8    
 
%rG   c                   @   sp   e Zd ZdZdd Zdd Zdd Zdd	 Zd
efddZ	dd Z
dd Zdd Zdd ZdddZdd ZdS )RankAllocatorz
    The RankAllocator for AdaLoraModel. Paper: https://openreview.net/pdf?id=lq62uWRJjiY

    Args:
        config ([`AdaLoraConfig`]): The configuration of the AdaLora model.
        model: the model that we apply AdaLoRA to.

    c                 C   sb   || _ || _|j| _|j| _| jdkr| jdk sJ | jdkr$| jdk s&J |   | | d S )Nr   r"   )peft_configr3   beta1beta2	reset_ipt_set_budget_scheduler)r   modelro   r3   r   r   r   r      s   zRankAllocator.__init__c                 C   s   || j _d S r   )ro   
total_step)r   ru   r   r   r   set_total_step   s   zRankAllocator.set_total_stepc                 C   s   i | _ i | _i | _d S r   )iptexp_avg_iptexp_avg_unc)r   r   r   r   rr      s   
zRankAllocator.reset_iptc                 C   s|   d| _ t | _| D ] \}}d| j |v r+|  j |d7  _ | j|dd qt| j| _| j	j
t| j | _d S )Nr   lora_A.r
   %s)init_bgtsetname_setnamed_parametersr3   sizeaddreplacesortedro   target_rrZ   
target_bgt)r   rt   nr!   r   r   r   rs      s   z#RankAllocator._set_budget_schedulerstepc                 C   s   | j j}| j j}| j j}||kr| j}d}||fS ||| kr(| j}d}||fS d|| || |   }t| j| j |d  | j }|| j j dkrMdnd}||fS )NFTr"      r   )ro   tinittfinalru   r|   r   rk   deltaT)r   r   r   r   ru   budgetmask_ind	mul_coeffr   r   r   budget_schedule   s   
zRankAllocator.budget_schedulec              	   C   s:  |  D ]\}}d|v r| j|v r|| jvr.t|| j|< t|| j|< t|| j|< t ` t d urNdd l	}|j
|}||   | j|< n||j   | j|< | j| j|  d| j | j|   | j|< | j| j|  d| j | j| | j|     | j|< W d    n1 sw   Y  qd S )Nlora_r   r"   )r   r3   rw   r(   
zeros_likerx   ry   no_gradr   	deepspeedutilssafe_get_full_gradabsdetachgradrp   rq   )r   rt   r   r!   r   r   r   r   r   
update_ipt   s$   


*0
zRankAllocator.update_iptc                 C   s   | j | | j|  S r   )rx   ry   )r   r   r   r   r   _element_score  s   zRankAllocator._element_scorec                 C   s&   |j ddd}|d|d }|S )Nr"   Fdimkeepdim)sumview)r   ipt_Eipt_ABsum_iptr   r   r   _combine_ipt  s   zRankAllocator._combine_iptc                 C   s  i }i }i }|  D ]w\}}d| j |v r:| |}tj|ddd}	|dd}
|
|vr3|	g||
< n||
 |	 d| j |v rj| |}tj|dd	dd
d}	|dd}
|
|vrc|	g||
< n||
 |	 d| j |v r| |}|dd}
|||
< q
g }|D ])}
||
 }tj||
 dd}| 	||}|
d }|d
d||< ||d
 qtj
t|| j| dd  }i }t 2 |  D ]$\}}d| j |v r||| |kd || |k d
 ||< qW d    |S 1 sw   Y  |S )Nrz   r"   Tr   r
   r{   zlora_B.r   Fr   r   lora_E.r   )r   )kr    )r   r3   r   r(   r8   r   rS   r   catr   kthvaluer|   itemr   masked_fill_tolist)r   rt   r   	value_ipt
vector_ipttriplet_iptr   r!   	entry_iptcomb_iptname_m	all_scorer   r   r   name_Emask_thresholdrank_patternr   r   r   mask_to_budget  sf   




zRankAllocator.mask_to_budgetFc                 C   sT   || j j| j j k r| | | |\}}|s|r$| ||}||fS d }||fS r   )ro   ru   r   r   r   r   )r   rt   global_step
force_maskr   r   r   r   r   r   update_and_allocateQ  s   
z!RankAllocator.update_and_allocatec                 C   s   d}| j tt| vrd}t ? | D ]1\}}d| j  |v rI|s(|n	|d| j  d}t|| 	d
|j}||  d qW d    d S 1 sUw   Y  d S )NFTr   . r   r    )r3   nextiterr:   r(   r   r   r   rm   	unsqueezetodevicer   rE   )r   rt   r   is_adapter_name_truncatedr   r!   keymaskr   r   r   mask_using_rank_pattern]  s   
"z%RankAllocator.mask_using_rank_patternNr>   )r?   r@   rA   __doc__r   rv   rr   rs   rk   r   r   r   r   r   r   r   r   r   r   r   rn      s    	
4rn   )rX   typingr   r   	packagingr(   transformersr   peft.tuners.lorar   peft.tuners.tuners_utilsr   
peft.utilsr   versionparse__version__transformers.integrationsr   transformers.deepspeedr	   rD   rG   rn   r   r   r   r   <module>   s   6k