o
    8wi?                     @   s   d dl Z d dlZd dlmZ d dlmZmZmZ d dlm	Z	m
Z
 d dlmZ d dlmZmZmZmZmZmZ d dlmZ dd	lmZ dd
lmZmZmZ G dd de
ZdS )    N)Conv1D)is_bnb_4bit_availableis_bnb_availableis_gptqmodel_available)
LoraConfig	LoraModel)BaseTunerLayer)5TRANSFORMERS_MODELS_TO_ADALORA_TARGET_MODULES_MAPPING_freeze_adapter_get_submodulesget_auto_gptq_quant_linearget_gptqmodel_quant_linearget_quantization_config)gather_params_ctx   )SVDQuantLinear)AdaLoraLayerRankAllocator	SVDLinearc                       s   e Zd ZdZ fddZdeddf fddZd	d
 ZedddZ	edd Z
def fddZdd Zdd Zdd Zdd Zdd Z  ZS )AdaLoraModela  
    Creates AdaLoRA (Adaptive LoRA) model from a pretrained transformers model. Paper:
    https://openreview.net/forum?id=lq62uWRJjiY

    Args:
        model ([`transformers.PreTrainedModel`]): The model to be adapted.
        config ([`AdaLoraConfig`]): The configuration of the AdaLora model.
        adapter_name (`str`): The name of the adapter, defaults to `"default"`.
        low_cpu_mem_usage (`bool`, `optional`, defaults to `False`):
            Create empty adapter weights on meta device. Useful to speed up the loading process.

    Returns:
        `torch.nn.Module`: The AdaLora model.

    Example::

        >>> from transformers import AutoModelForSeq2SeqLM >>> from peft import LoraConfig, AdaLoraModel, AdaLoraConfig
        >>> config = AdaLoraConfig(
                peft_type="ADALORA", task_type="SEQ_2_SEQ_LM", init_r=12, lora_alpha=32, target_modules=["q", "v"],
                lora_dropout=0.01,
            )
        >>> model = AutoModelForSeq2SeqLM.from_pretrained("t5-base") >>> model = AdaLoraModel(model, config, "default")

    **Attributes**:
        - **model** ([`transformers.PreTrainedModel`]) -- The model to be adapted.
        - **peft_config** ([`AdaLoraConfig`]): The configuration of the AdaLora model.
    c                    s   t  ||| d}| j D ]	}|js|d7 }q|dkr!td| j| jr/t| j| d S || _t	| j| j| | j| _
d S )Nr   r   zAdaLoraModel supports only 1 trainable adapter. When using multiple adapters, set inference_mode to True for all adapters except the one you want to train.)super__init__peft_configvaluesinference_mode
ValueErrorr
   modeltrainable_adapter_namer   rankallocator)selfr   configadapter_nametraininable_mode_counter	__class__ V/home/ubuntu/sommelier/.venv/lib/python3.10/site-packages/peft/tuners/adalora/model.pyr   D   s   zAdaLoraModel.__init__r    returnNc                    sL   t  | d}| j D ]	}|js|d7 }q|dkr$t| jj ddS )z
        A helper method to check the config when a new adapter is being added.

        Raise a ValueError if there is something wrong with the config or if it conflicts with existing adapters.

        r   r   z supports only 1 trainable adapter. When using multiple adapters, set inference_mode to True for all adapters except the one you want to train.N)r   _check_new_adapter_configr   r   r   r   r$   __name__)r   r    r"   config_r#   r%   r&   r(   X   s   z&AdaLoraModel._check_new_adapter_configc              
   C   s   |j |j|j|j|jt| jddt| jddd}|d s!|d r(t s(tdt	| jdd	}|d ur7||d
< t
|tsit| jdrF| jjnd }	| j|||fd|	i|}
|| jvr_|
d | |||
| d S |||j |j|j|j d S )Nis_loaded_in_8bitFis_loaded_in_4bit)r
lora_alphalora_dropoutfan_in_fan_outinit_lora_weightsloaded_in_8bitloaded_in_4bitr2   r3   zTo use AdaLora with 8-bit quantization, please install the `bitsandbytes` package. You can install it with `pip install bitsandbytes`.gptq)methodgptq_quantization_confighf_device_map
device_map)init_rr.   r/   r0   r1   getattrr   r   ImportErrorr   
isinstancer   hasattrr7   _create_new_moduleactive_adaptersrequires_grad__replace_moduleupdate_layer)r   lora_configr!   targettarget_nameparentcurrent_keykwargsquantization_configr8   
new_moduler%   r%   r&   _create_and_replacem   s8   
	


z AdaLoraModel._create_and_replacec                 K   s  t  rdd l}ddlm} t rddlm} |dd }t r&t||d}	nt	|}	|
dd}
|
d	d}t|tr@| }n|}|
rdt||jjrd||jj|jj|jd
 |||fi |}|S |rt rt||jjr| }||j|jj|jjd |||fi |}|S |	d urt||	rt||fi |}|S t|tjjr|d rtd d |d< | _ nt|t!r|d std d |d< | _ nt"d| dt#||fi |}|S )Nr   r   )SVDLinear8bitLt)SVDLinear4bitr6   )r8   r2   Fr3   )has_fp16_weights	thresholdindex)compute_dtypecompress_statistics
quant_typer0   zjfan_in_fan_out is set to True but the target module is `torch.nn.Linear`. Setting fan_in_fan_out to False.zafan_in_fan_out is set to False but the target module is `Conv1D`. Setting fan_in_fan_out to True.TzTarget module zP is not supported. Currently, only `torch.nn.Linear` and `Conv1D` are supported.)$r   bitsandbytesbnbrL   r   rM   getr   r   r   popr<   r   get_base_layernnLinear8bitLtupdatestaterN   rO   rP   
Linear4bitcopyrQ   weightrR   rS   r   torchLinearwarningswarnr0   r   r   r   )rC   r!   rD   r8   rH   rU   rL   rM   r6   QuantLinearr2   r3   target_base_layerrJ   fourbit_kwargsr%   r%   r&   r>      sn   

#

zAdaLoraModel._create_new_modulec                 C   s0   | j d u r|d tvrtdt|d  | _ | S )N
model_typez0Please specify `target_modules` in `peft_config`)target_modulesr	   r   )r   model_configr%   r%   r&   _prepare_adapter_config   s   
z$AdaLoraModel._prepare_adapter_confignamec                    s8   zt  |W S  ty   |dkr t| j| Y S w )z1Forward missing attributes to the wrapped module.r   )r   __getattr__AttributeErrorr:   r   )r   rk   r#   r%   r&   rl      s   zAdaLoraModel.__getattr__c              	   O   sf  | j j|i |}t|dd d urt|jtjr| j| j j	}|dkr't
dd}d}| j  D ]l\}}d|v s<d|v r| j|v r|jtdgkrot|| d d|v rZ||j n|j| }	W d    n1 siw   Y  nd|v rx||j n|j| }	tj|	 dt|	i}
d|
_|d	7 }|tj|	|
 d
d7 }q0|dkr|| }nd}| j|| 7  _|S )Nlossr   z*orth_reg_weight should be greater than 0. lora_Alora_B)
fwd_moduleoutFr   fro)p)r   forwardr:   r<   rn   r`   Tensorr   r   orth_reg_weightr   named_parametersshapeSizer   Teyesize
empty_likerequires_gradnorm)r   argsrH   outputsrw   	regu_loss	num_paramnrt   para_covIr%   r%   r&   ru      s2   
zAdaLoraModel.forwardc              	   C   sn  | j | }| D ]\}}t|trt|}nt|tjr)|d}|  }nt	d||v r=d
|ddd nd
|ddd }t| j|\}}	}|	j| | }
|	j| | }|	j| d d |f }|	j| }|	|||j|j|j t , |dkr|	j| |
 |	j| | |	j| | |	j| | W d    n1 sw   Y  q	d S )NzUnexpected type of rank_idx.r   )r   itemsr<   listsumr`   rv   viewitemr   joinsplitr   r   lora_Ero   rp   ranknumrB   r.   r/   r1   no_gradcopy_)r   rank_patternr!   rC   rk   rank_idxrankkey_rD   lora_E_weightslora_A_weightslora_B_weightsr   r%   r%   r&   resize_modules_by_rank_pattern  s>   



8

z+AdaLoraModel.resize_modules_by_rank_patternc           
      C   s   |  D ]g\}}t|}||v rd|ddd nd|ddd }dD ]@}d| d| d| }	|dkrQ|||	 jd krJ||	 | n||	 ||	< q*|||	 jd krd||	 d d |f n||	 ||	< q*q|S )	Nr   r   r   r   )r   ro   rp   zbase_model.model.rp   r   )r   r   r   r   ry   )
r   r   
state_dictr!   rk   r   r   prefixlayerr   r%   r%   r&   !resize_state_dict_by_rank_pattern0  s   8$,
z.AdaLoraModel.resize_state_dict_by_rank_patternc                 C   s   | j | j }||j|j k r!| j| j|\}}|r||_dS dS ||j|j kr?| jj| j|dd\}}||_| j  dS ||j|j krR| j	| j|j dS dS )aM  
        This method updates Adalora budget and mask.

        This should be called in every training step after `loss.backward()` and before `zero_grad()`.

        `tinit`, `tfinal` and `deltaT` are handled with in the method.

        Args:
            global_step (`int`): The current training step, it is used to calculate adalora budget.

        Example:

        ```python
        >>> loss = model(**input).loss
        >>> loss.backward()
        >>> optimizer.step()
        >>> model.base_model.update_and_allocate(i_step)
        >>> optimizer.zero_grad()
        ```
        T)
force_maskN)
r   r   
total_steptfinalr   update_and_allocater   r   	reset_iptmask_using_rank_pattern)r   global_steprC   r   r   r%   r%   r&   r   @  s   
z AdaLoraModel.update_and_allocatec                 O   s   t | jj d)z;This method is not supported for AdaLoRA, use LoRA instead.z. does not support add_weighted_adapter method.)	TypeErrorr$   r)   )r   r   rH   r%   r%   r&   add_weighted_adapterj  s   z!AdaLoraModel.add_weighted_adapter)N)r)   
__module____qualname____doc__r   r   r(   rK   staticmethodr>   rj   strrl   ru   r   r   r   r   __classcell__r%   r%   r#   r&   r   %   s    -E
		*r   )rb   r`   transformers.pytorch_utilsr   peft.import_utilsr   r   r   peft.tuners.lorar   r   peft.tuners.tuners_utilsr   
peft.utilsr	   r
   r   r   r   r   peft.utils.integrationsr   r4   r   r   r   r   r   r   r%   r%   r%   r&   <module>   s    