o
    Ni<                     @   s   d dl Z d dlZd dlmZ d dlmZmZmZ d dlm	Z	m
Z
 d dlmZ d dlmZmZmZmZmZmZ d dlmZ dd	lmZ dd
lmZmZmZ G dd de
ZdS )    N)Conv1D)is_bnb_4bit_availableis_bnb_availableis_gptqmodel_available)
LoraConfig	LoraModel)BaseTunerLayer)5TRANSFORMERS_MODELS_TO_ADALORA_TARGET_MODULES_MAPPING_freeze_adapter_get_submodulesget_auto_gptq_quant_linearget_gptqmodel_quant_linearget_quantization_config)gather_params_ctx   )SVDQuantLinear)AdaLoraLayerRankAllocator	SVDLinearc                       sx   e Zd ZdZeZ fddZdeddf fddZd	d
 Z	e
dddZdd Zdd Zdd Zdd Zdd Z  ZS )AdaLoraModela  
    Creates AdaLoRA (Adaptive LoRA) model from a pretrained transformers model. Paper:
    https://openreview.net/forum?id=lq62uWRJjiY

    Args:
        model ([`transformers.PreTrainedModel`]): The model to be adapted.
        config ([`AdaLoraConfig`]): The configuration of the AdaLora model.
        adapter_name (`str`): The name of the adapter, defaults to `"default"`.
        low_cpu_mem_usage (`bool`, `optional`, defaults to `False`):
            Create empty adapter weights on meta device. Useful to speed up the loading process.

    Returns:
        `torch.nn.Module`: The AdaLora model.

    Example::

        >>> from transformers import AutoModelForSeq2SeqLM >>> from peft import LoraConfig, AdaLoraModel, AdaLoraConfig
        >>> config = AdaLoraConfig(
                peft_type="ADALORA", task_type="SEQ_2_SEQ_LM", init_r=12, lora_alpha=32, target_modules=["q", "v"],
                lora_dropout=0.01,
            )
        >>> model = AutoModelForSeq2SeqLM.from_pretrained("t5-base") >>> model = AdaLoraModel(model, config, "default")

    **Attributes**:
        - **model** ([`transformers.PreTrainedModel`]) -- The model to be adapted.
        - **peft_config** ([`AdaLoraConfig`]): The configuration of the AdaLora model.
    c                    s   t  j|||fi | d}| j D ]	}|js|d7 }q|dkr%td| j| jr3t| j| d S || _t	| j| j| | j| _
d S )Nr   r   zAdaLoraModel supports only 1 trainable adapter. When using multiple adapters, set inference_mode to True for all adapters except the one you want to train.)super__init__peft_configvaluesinference_mode
ValueErrorr
   modeltrainable_adapter_namer   rankallocator)selfr   configadapter_namekwargstraininable_mode_counter	__class__ M/home/ubuntu/.local/lib/python3.10/site-packages/peft/tuners/adalora/model.pyr   E   s   zAdaLoraModel.__init__r    returnNc                    sL   t  | d}| j D ]	}|js|d7 }q|dkr$t| jj ddS )z
        A helper method to check the config when a new adapter is being added.

        Raise a ValueError if there is something wrong with the config or if it conflicts with existing adapters.

        r   r   z supports only 1 trainable adapter. When using multiple adapters, set inference_mode to True for all adapters except the one you want to train.N)r   _check_new_adapter_configr   r   r   r   r%   __name__)r   r    r#   config_r$   r&   r'   r)   Y   s   z&AdaLoraModel._check_new_adapter_configc              
   C   s   |j |j|j|j|jt| jddt| jddd}|d s!|d r(t s(tdt	| jdd	}|d ur7||d
< t
|tsit| jdrF| jjnd }	| j|||fd|	i|}
|| jvr_|
d | |||
| d S |||j |j|j|j d S )Nis_loaded_in_8bitFis_loaded_in_4bit)r
lora_alphalora_dropoutfan_in_fan_outinit_lora_weightsloaded_in_8bitloaded_in_4bitr3   r4   zTo use AdaLora with 8-bit quantization, please install the `bitsandbytes` package. You can install it with `pip install bitsandbytes`.gptq)methodgptq_quantization_confighf_device_map
device_map)init_rr/   r0   r1   r2   getattrr   r   ImportErrorr   
isinstancer   hasattrr8   _create_new_moduleactive_adaptersrequires_grad__replace_moduleupdate_layer)r   lora_configr!   targettarget_nameparentcurrent_keyr"   quantization_configr9   
new_moduler&   r&   r'   _create_and_replacen   s8   
	


z AdaLoraModel._create_and_replacec                 K   s  t  rdd l}ddlm} t rddlm} |dd }t r&t||d}	nt	|}	|
dd}
|
d	d}t|tr@| }n|}|
rdt||jjrd||jj|jj|jd
 |||fi |}|S |rt rt||jjr| }||j|jj|jjd |||fi |}|S |	d urt||	rt||fi |}|S t|tjjr|d rtd d |d< | _ nt|t!r|d std d |d< | _ nt"d| dt#||fi |}|S )Nr   r   )SVDLinear8bitLt)SVDLinear4bitr7   )r9   r3   Fr4   )has_fp16_weights	thresholdindex)compute_dtypecompress_statistics
quant_typer1   zjfan_in_fan_out is set to True but the target module is `torch.nn.Linear`. Setting fan_in_fan_out to False.zafan_in_fan_out is set to False but the target module is `Conv1D`. Setting fan_in_fan_out to True.TzTarget module zP is not supported. Currently, only `torch.nn.Linear` and `Conv1D` are supported.)$r   bitsandbytesbnbrL   r   rM   getr   r   r   popr=   r   get_base_layernnLinear8bitLtupdatestaterN   rO   rP   
Linear4bitcopyrQ   weightrR   rS   r   torchLinearwarningswarnr1   r   r   r   )rD   r!   rE   r9   r"   rU   rL   rM   r7   QuantLinearr3   r4   target_base_layerrJ   fourbit_kwargsr&   r&   r'   r?      sn   

#

zAdaLoraModel._create_new_modulec              	   O   sf  | j j|i |}t|dd d urt|jtjr| j| j j	}|dkr't
dd}d}| j  D ]l\}}d|v s<d|v r| j|v r|jtdgkrot|| d d|v rZ||j n|j| }	W d    n1 siw   Y  nd|v rx||j n|j| }	tj|	 dt|	i}
d|
_|d	7 }|tj|	|
 d
d7 }q0|dkr|| }nd}| j|| 7  _|S )Nlossr   z*orth_reg_weight should be greater than 0. lora_Alora_B)
fwd_moduleoutFr   fro)p)r   forwardr;   r=   rg   r`   Tensorr   r   orth_reg_weightr   named_parametersshapeSizer   Teyesize
empty_likerequires_gradnorm)r   argsr"   outputsrp   	regu_loss	num_paramnrm   para_covIr&   r&   r'   rn      s2   
zAdaLoraModel.forwardc              	   C   sn  | j | }| D ]\}}t|trt|}nt|tjr)|d}|  }nt	d||v r=d
|ddd nd
|ddd }t| j|\}}	}|	j| | }
|	j| | }|	j| d d |f }|	j| }|	|||j|j|j t , |dkr|	j| |
 |	j| | |	j| | |	j| | W d    n1 sw   Y  q	d S )NzUnexpected type of rank_idx.r   )r   itemsr=   listsumr`   ro   viewitemr   joinsplitr   r   lora_Erh   ri   ranknumrC   r/   r0   r2   no_gradcopy_)r   rank_patternr!   rD   namerank_idxrankkey_rE   lora_E_weightslora_A_weightslora_B_weightsr   r&   r&   r'   resize_modules_by_rank_pattern   s>   



8

z+AdaLoraModel.resize_modules_by_rank_patternc           
      C   s   |  D ]g\}}t|}||v rd|ddd nd|ddd }dD ]@}d| d| d| }	|dkrQ|||	 jd krJ||	 | n||	 ||	< q*|||	 jd krd||	 d d |f n||	 ||	< q*q|S )	Nr   r   r   r   )r   rh   ri   zbase_model.model.ri   r   )r   r   r   r   rr   )
r   r   
state_dictr!   r   r   r   prefixlayerr   r&   r&   r'   !resize_state_dict_by_rank_pattern  s   8$,
z.AdaLoraModel.resize_state_dict_by_rank_patternc                 C   s   | j | j }||j|j k r!| j| j|\}}|r||_dS dS ||j|j kr?| jj| j|dd\}}||_| j  dS ||j|j krR| j	| j|j dS dS )aM  
        This method updates Adalora budget and mask.

        This should be called in every training step after `loss.backward()` and before `zero_grad()`.

        `tinit`, `tfinal` and `deltaT` are handled with in the method.

        Args:
            global_step (`int`): The current training step, it is used to calculate adalora budget.

        Example:

        ```python
        >>> loss = model(**input).loss
        >>> loss.backward()
        >>> optimizer.step()
        >>> model.base_model.update_and_allocate(i_step)
        >>> optimizer.zero_grad()
        ```
        T)
force_maskN)
r   r   
total_steptfinalr   update_and_allocater   r   	reset_iptmask_using_rank_pattern)r   global_steprD   r   r   r&   r&   r'   r   .  s   
z AdaLoraModel.update_and_allocatec                 O   s   t | jj d)z;This method is not supported for AdaLoRA, use LoRA instead.z. does not support add_weighted_adapter method.)	TypeErrorr%   r*   )r   rz   r"   r&   r&   r'   add_weighted_adapterX  s   z!AdaLoraModel.add_weighted_adapter)N)r*   
__module____qualname____doc__r	   target_module_mappingr   r   r)   rK   staticmethodr?   rn   r   r   r   r   __classcell__r&   r&   r$   r'   r   %   s    -E*r   )rb   r`   transformers.pytorch_utilsr   peft.import_utilsr   r   r   peft.tuners.lorar   r   peft.tuners.tuners_utilsr   
peft.utilsr	   r
   r   r   r   r   peft.utils.integrationsr   r5   r   r   r   r   r   r   r&   r&   r&   r'   <module>   s    