o
    8wÖië9  ã                   @   sÊ   d dl Z d dlmZmZ d dlZd dlZd dlZd dlmZ d dlm	Z	 d dl
mZ d dlmZ ej ej¡ej d¡krDd dlmZ nd dlmZ G d	d
„ d
e	ƒZG dd„ dejeƒZG dd„ dƒZdS )é    N)ÚAnyÚOptional)Únn)Ú	LoraLayer)Úcheck_adapters_to_merge)Ú	transposez4.33.0)Údeepspeed_configc                       s@   e Zd ZdZdZdejddf‡ fdd„Zdd	„ Zd
d„ Z	‡  Z
S )ÚAdaLoraLayer)Úlora_AÚlora_BÚlora_EÚlora_embedding_AÚlora_embedding_B)ÚrÚ
lora_alphaÚscalingÚlora_dropoutÚranknumÚ
base_layerÚreturnNc                    s@   t ƒ  |¡ t i ¡| _t i ¡| _t i ¡| _t i ¡| _d S ©N)ÚsuperÚ__init__r   ÚParameterDictr   r
   r   r   )Úselfr   ©Ú	__class__© úV/home/ubuntu/sommelier/.venv/lib/python3.10/site-packages/peft/tuners/adalora/layer.pyr   )   s
   zAdaLoraLayer.__init__c                 C   s  |dk rt d|› ƒ‚|| j|< || j|< |dkr tj|d}nt ¡ }|| j|< t t 	|| j
¡¡| j|< t t 	|d¡¡| j|< t t 	| j|¡¡| j|< tjt 	d¡dd| j|< | j| j t|ƒ¡ d| j| _|dkrs|nt|ƒ| j|< |r|  |¡ |  |¡ |  | j¡ d S )Nr   z?`r` should be a positive integer or 0, but the value passed is ç        )Úpé   F)Úrequires_grad)Ú
ValueErrorr   r   r   ÚDropoutÚIdentityr   Ú	ParameterÚtorchÚrandnÚin_featuresr
   r   Úout_featuresr   r   ÚdataÚfill_Úfloatr"   r   Úreset_lora_parametersÚ%_move_adapter_to_device_of_base_layerÚset_adapterÚactive_adapters)r   Úadapter_namer   r   r   Úinit_lora_weightsÚlora_dropout_layerr   r   r   Úupdate_layer0   s&   




zAdaLoraLayer.update_layerc                 C   sX   || j  ¡ v r*tj | j| ¡ tjj| j | ddd tjj| j| ddd d S d S )Nr   g{®Gáz”?)ÚmeanÚstd)r
   Úkeysr   ÚinitÚzeros_r   Únormal_r   )r   r2   r   r   r   r.   O   s
   ýz"AdaLoraLayer.reset_lora_parameters)Ú__name__Ú
__module__Ú__qualname__Úadapter_layer_namesÚother_param_namesr   ÚModuler   r5   r.   Ú__classcell__r   r   r   r   r	   "   s    r	   c                       s¶   e Zd Z					d dejdeded	ed
edededdf‡ fdd„Z	d!dede
ee  ddfdd„Zd"dd„Zdejfdd„Zdejdededejfdd„Zdef‡ fdd„Z‡  ZS )#Ú	SVDLinearr   r!   r   FTr   r2   r   r   r   Úfan_in_fan_outr3   r   Nc           	         sD   t ƒ  ¡  t | |¡ d|  ¡ j_|| _|| _|  |||||¡ d S )NF)	r   r   r	   Úget_base_layerÚweightr"   rD   Ú_active_adapterr5   )	r   r   r2   r   r   r   rD   r3   Úkwargsr   r   r   r   X   s   
zSVDLinear.__init__Ú
safe_mergeÚadapter_namesc                 C   sž   t | |ƒ}|s	dS |D ]A}|  ¡ }|| j ¡ v rL|r;|jj ¡ }||  |¡7 }t 	|¡ 
¡ s6td|› dƒ‚||j_n|j j|  |¡7  _| j |¡ qdS )a^  
        Merge the active adapter weights into the base weights

        Args:
            safe_merge (`bool`, *optional*):
                If True, the merge operation will be performed in a copy of the original weights and check for NaNs
                before merging the weights. This is useful if you want to check if the merge operation will produce
                NaNs. Defaults to `False`.
            adapter_names (`List[str]`, *optional*):
                The list of adapter names that should be merged. If None, all active adapters will be merged. Defaults
                to `None`.
        Nz1NaNs detected in the merged weights. The adapter z seems to be broken)r   rE   r
   r8   rF   r+   ÚcloneÚget_delta_weightr'   ÚisfiniteÚallr#   Úmerged_adaptersÚappend)r   rI   rJ   Úactive_adapterr   Úorig_weightsr   r   r   Úmergel   s$   

ÿ
€ïzSVDLinear.mergec                 C   sj   | j s
t d¡ dS t| jƒdkr3| j ¡ }|| j ¡ v r*|  ¡ j	 j
|  |¡8  _
t| jƒdksdS dS )zW
        This method unmerges all merged adapter layers from the base weights.
        z Already unmerged. Nothing to do.Nr   )ÚmergedÚwarningsÚwarnÚlenrO   Úpopr
   r8   rE   rF   r+   rL   )r   rQ   r   r   r   Úunmerge‘   s   

ýzSVDLinear.unmergec                 C   s>   t | j| | j| | j|   | jƒ| j|  | j| d  S ©Ngñhãˆµøä>)r   r   r
   r   rD   r   r   )r   Úadapterr   r   r   rL      s   $ÿþÿzSVDLinear.get_delta_weightÚxÚargsrH   c                 O   sü   | j r| jr
|  ¡  | j|g|¢R i |¤Ž}|S | jr)| j|g|¢R i |¤Ž}|S | j|g|¢R i |¤Ž}| jD ]C}|| j ¡ vrBq8| j| }| j| }| j| }| j	| }	| j
| }
| j| d }|  ||j¡}||	|ƒ|| j |j |
 | 7 }q8|S rZ   )Údisable_adaptersrT   rY   r   r1   r
   r8   r   r   r   r   r   Ú_cast_input_dtypeÚdtypeÚT)r   r\   r]   rH   ÚresultrQ   r
   r   r   Údropoutr   r   r   r   r   Úforward¤   s*   ïò





&zSVDLinear.forwardc                    s   t ƒ  ¡ }d| S )Nzadalora.)r   Ú__repr__)r   Úrepr   r   r   re   ¼   s   
zSVDLinear.__repr__)r   r!   r   FT)FN)r   N)r<   r=   r>   r   rA   ÚstrÚintr-   Úboolr   r   ÚlistrS   rY   r'   ÚTensorrL   r   rd   re   rB   r   r   r   r   rC   V   s8    øþýüûúùø
ö 
%rC   c                   @   sp   e Zd ZdZdd„ Zdd„ Zdd„ Zdd	„ Zd
efdd„Z	dd„ Z
dd„ Zdd„ Zdd„ Zddd„Zdd„ ZdS )ÚRankAllocatorzé
    The RankAllocator for AdaLoraModel. Paper: https://openreview.net/pdf?id=lq62uWRJjiY

    Args:
        config ([`AdaLoraConfig`]): The configuration of the AdaLora model.
        model: the model that we apply AdaLoRA to.

    c                 C   sb   || _ || _|j| _|j| _| jdkr| jdk sJ ‚| jdkr$| jdk s&J ‚|  ¡  |  |¡ d S )Nr   r!   )Úpeft_configr2   Úbeta1Úbeta2Ú	reset_iptÚ_set_budget_scheduler)r   Úmodelrm   r2   r   r   r   r   Ë   s   zRankAllocator.__init__c                 C   s   || j _d S r   )rm   Ú
total_step)r   rs   r   r   r   Úset_total_stepÖ   s   zRankAllocator.set_total_stepc                 C   s   i | _ i | _i | _d S r   )ÚiptÚexp_avg_iptÚexp_avg_unc)r   r   r   r   rp   Ù   s   
zRankAllocator.reset_iptc                 C   s|   d| _ tƒ | _| ¡ D ] \}}d| j› |v r+|  j | d¡7  _ | j | dd¡¡ qt| jƒ| _| j	j
t| jƒ | _d S )Nr   úlora_A.r
   ú%s)Úinit_bgtÚsetÚname_setÚnamed_parametersr2   ÚsizeÚaddÚreplaceÚsortedrm   Útarget_rrW   Ú
target_bgt)r   rr   Únr    r   r   r   rq   Þ   s   €z#RankAllocator._set_budget_schedulerÚstepc                 C   s¦   | j j}| j j}| j j}||kr| j}d}||fS ||| kr(| j}d}||fS d|| || |   }t| j| j |d  | j ƒ}|| j j dkrMdnd}||fS )NFTr!   é   r   )rm   ÚtinitÚtfinalrs   rz   rƒ   rh   ÚdeltaT)r   r…   r‡   rˆ   rs   ÚbudgetÚmask_indÚ	mul_coeffr   r   r   Úbudget_scheduleé   s   
øýzRankAllocator.budget_schedulec              	   C   s:  |  ¡ D ]–\}}d|v rš| j|v rš|| jvr.t |¡| j|< t |¡| j|< t |¡| j|< t ¡ ` tƒ d urNdd l	}|j
 |¡}||  ¡  ¡ | j|< n||j  ¡  ¡ | j|< | j| j|  d| j | j|   | j|< | j| j|  d| j | j| | j|   ¡   | j|< W d   ƒ n1 s•w   Y  qd S )NÚlora_r   r!   )r}   r2   ru   r'   Ú
zeros_likerv   rw   Úno_gradr   Ú	deepspeedÚutilsÚsafe_get_full_gradÚabsÚdetachÚgradrn   ro   )r   rr   r„   r    r‘   r–   r   r   r   Ú
update_iptü   s$   


*0
ÿõ€úzRankAllocator.update_iptc                 C   s   | j | | j|  S r   )rv   rw   )r   r„   r   r   r   Ú_element_score  s   zRankAllocator._element_scorec                 C   s&   |j ddd}| d¡| d¡ }|S )Nr!   F©ÚdimÚkeepdiméÿÿÿÿ)ÚsumÚview)r   Úipt_EÚipt_ABÚsum_iptr   r   r   Ú_combine_ipt  s   zRankAllocator._combine_iptc                 C   s  i }i }i }|  ¡ D ]w\}}d| j› |v r:|  |¡}tj|ddd}	| dd¡}
|
|vr3|	g||
< n||
  |	¡ d| j› |v rj|  |¡}tj|dd	d d
d¡}	| dd¡}
|
|vrc|	g||
< n||
  |	¡ d| j› |v r|  |¡}| dd¡}
|||
< q
g }|D ])}
||
 }tj||
 dd}|  	||¡}|
d }| d
d¡||< | | d
¡¡ q†tj
t |¡| j| dd  ¡ }i }t ¡ 2 |  ¡ D ]$\}}d| j› |v rð| || |kd¡ || |k  d
¡ ¡ ||< qÌW d   ƒ |S 1 süw   Y  |S )Nrx   r!   Tr™   r
   ry   zlora_B.r   Frœ   r   úlora_E.r   )rš   )Úkr   )r}   r2   r˜   r'   r6   r€   rP   rž   Úcatr¢   Úkthvaluerz   Úitemr   Úmasked_fill_Útolist)r   rr   rŠ   Ú	value_iptÚ
vector_iptÚtriplet_iptr„   r    Ú	entry_iptÚcomb_iptÚname_mÚ	all_scorerŸ   r    r¡   Úname_EÚmask_thresholdÚrank_patternr   r   r   Úmask_to_budget  sf   


€þýý
€ý
ÿûzRankAllocator.mask_to_budgetFc                 C   sT   || j j| j j k r|  |¡ |  |¡\}}|s|r$|  ||¡}||fS d }||fS r   )rm   rs   rˆ   r—   r   r´   )r   rr   Úglobal_stepÚ
force_maskrŠ   r‹   r³   r   r   r   Úupdate_and_allocateO  s   
ÿz!RankAllocator.update_and_allocatec                 C   s¸   d}| j tt| ¡ ƒƒvrd}t ¡ ? | ¡ D ]1\}}d| j › |v rI|s(|n	| d| j › d¡}t || ¡ 	d¡ 
|j¡}| | ¡  d¡ qW d   ƒ d S 1 sUw   Y  d S )NFTr£   Ú.Ú rœ   r   )r2   ÚnextÚiterr8   r'   r   r}   r€   rk   Ú	unsqueezeÚtoÚdevicer¨   ri   )r   rr   r³   Úis_adapter_name_truncatedr„   r    ÚkeyÚmaskr   r   r   Úmask_using_rank_pattern[  s   
€ü"ÿz%RankAllocator.mask_using_rank_patternN)F)r<   r=   r>   Ú__doc__r   rt   rp   rq   rh   r   r—   r˜   r¢   r´   r·   rÂ   r   r   r   r   rl   Á   s    	
4rl   )rU   Útypingr   r   Ú	packagingr'   Útransformersr   Úpeft.tuners.lorar   Úpeft.tuners.tuners_utilsr   Ú
peft.utilsr   ÚversionÚparseÚ__version__Útransformers.integrationsr   Útransformers.deepspeedr	   rA   rC   rl   r   r   r   r   Ú<module>   s   4k