o
    8wiP                     @  s  d dl mZ d dlZd dlZd dlmZ d dlmZ d dlm	Z	m
Z
 d dlZd dlmZ d dlmZ d dlmZ d dlmZ d d	lmZmZ d d
lmZmZmZ d dlmZmZmZ ddlmZ ddl m!Z! ddl"m#Z# ddl$m%Z%m&Z& dddZ'G dd deZ(dS )    )annotationsN)asdict)Enum)OptionalUnion)_calculate_correct_fan)tqdm)Conv1D)is_bnb_4bit_availableis_bnb_available)	BaseTunerBaseTunerLayercheck_target_module_exists)2TRANSFORMERS_MODELS_TO_VERA_TARGET_MODULES_MAPPINGModulesToSaveWrapper_get_submodules   )
BufferDict) _maybe_include_all_linear_layers   )
VeraConfig)Linear	VeraLayertensor_or_shape$Union[torch.Tensor, tuple[int, ...]]	generatortorch.Generatorreturntorch.Tensorc                 C  s   t | trt| }n| }t|d}td}|t| }td| }t  |j| ||dW  d   S 1 s=w   Y  dS )a  
    Kaiming Uniform Initialisation adapted to accept a `torch.Generator` object for PRNG.

    Args:
        tensor_or_shape (`Union[torch.Tensor, tuple[int, ...]]`):
            Tensor to initialise, or shape of new tensor to create and then initialise.
        generator: (`torch.Generator`):
            Generator object that manages the state of the PRNG algorithm in use.

    Returns:
        `torch.Tensor`: The initialised tensor.
    fan_inr   g      @r   N)	
isinstancetupletorchemptyr   mathsqrtno_graduniform_)r   r   tensorfangainstdbound r.   S/home/ubuntu/sommelier/.venv/lib/python3.10/site-packages/peft/tuners/vera/model.py_kaiming_init+   s   



$r0   c                      s  e Zd ZU dZdZded< dBdC fd
dZdDddZdEddZdFddZ	dGddZ
edd Zdd Zedd ZdHd d!Zed"d# ZdI fd%d&ZdBdJd(d)ZdKd+d,Zd-d. Zd/d0 Zd1d2 Zed3d4 Z	*			5dLdMd:d;ZdNd<d=Z	5dOdMd>d?Zd@dA Z  ZS )P	VeraModela=  
    Creates Vector-based Random Matrix Adaptation (Vera) model from a pretrained transformers model.

    Args:
        model ([`~transformers.PreTrainedModel`]): The model to be adapted.
        config ([`VeraConfig`]): The configuration of the Vera model.
        adapter_name (`str`): The name of the adapter, defaults to `"default"`.
        low_cpu_mem_usage (`bool`, `optional`, defaults to `False`):
            Create empty adapter weights on meta device. Useful to speed up the loading process.

    Returns:
        `torch.nn.Module`: The Vera model.

    Example:

        ```py
        >>> from transformers import AutoModelForCausalLM
        >>> from peft import VeraConfig, get_peft_model

        >>> base_model = AutoModelForCausalLM.from_pretrained("facebook/opt-125m")
        >>> config = VeraConfig(r=128)
        >>> model = get_peft_model(base_model, config)
        ```

    **Attributes**:
        - **model** ([`~transformers.PreTrainedModel`]) -- The model to be adapted.
        - **peft_config** ([`VeraConfig`]): The configuration of the Vera model.
    vera_lambda_strprefixFlow_cpu_mem_usageboolr   Nonec                   s   t  j||||d d S )N)r5   )super__init__)selfmodelconfigadapter_namer5   	__class__r.   r/   r9   h   s   zVeraModel.__init__tuple[int, int]c           	      C  s   |  | j}| ||}t|| j}d}| j D ]K\}}| ||s$qt|tjr1|j	|j
f}nt|trLt|jdr@|jjn|jj}|ddd }nq|du rT|}q||krdtdd t||D }q|du rod}t||S )z
        Finds the largest input and output dimensions across linear layers that have been wrapped with VeRA.

        This will be used for determining the size of the shared vera_A and vera_B matrices.
        Nds_shapec                 s  s    | ]
\}}t ||V  qd S N)max).0abr.   r.   r/   	<genexpr>   s    z&VeraModel._find_dim.<locals>.<genexpr>z[No layers types compatible with VeRA were found. Please check `peft_config.target_modules`.)get_model_configr;   _prepare_adapter_configr   named_modules_check_target_module_existsr!   nnr   out_featuresin_featuresr	   hasattrweightrA   shaper"   zip
ValueError)	r:   r<   model_configpeft_configlargest_shapekeymodulemodule_shapemsgr.   r.   r/   	_find_dimk   s.   
zVeraModel._find_dimr<   r   r=   c                 C  s~   |  |\}}ti |jd| _ti |jd| _tjdd|j}t	|j
|f|d}t	||j
f|d}|| j|< || j|< d S )N)
persistentcpudevicer    )r\   r   save_projectionvera_Avera_Br#   	Generatormanual_seedprojection_prng_keyr0   r)r:   r<   r=   linear_out_dimlinear_in_dimr   rb   rc   r.   r.   r/   _init_vera_A_vera_B   s   
zVeraModel._init_vera_A_vera_Br;   	nn.Modulec                 C  s   |  || d S rC   )rj   )r:   r;   r<   r=   r.   r.   r/   _pre_injection_hook      zVeraModel._pre_injection_hookc                 C  s   t | jdkr|jdkrt| jj d| j D ]}||u r!q|j|jkr4td|jd|j dqtdd | j D }t |dkrNtd	| d
S )z
        A helper method to check the config when a new adapter is being added.

        Raise a ValueError if there is something wrong with the config or if it conflicts with existing adapters.

        r   nonezf supports only 1 adapter with bias. When using multiple adapters, set bias to 'none' for all adapters.z_Vera PRNG initialisation key must be the same for all adapters. Got config.projection_prng_key=z but previous config had .c                 S  s   h | ]}|j qS r.   )ra   )rE   r<   r.   r.   r/   	<setcomp>   s    z6VeraModel._check_new_adapter_config.<locals>.<setcomp>zcVeRA projection weights must be saved for all adapters or none, but got multiple different values: N)	lenrV   biasrT   r?   __name__valuesrf   sorted)r:   r<   existing_configsave_project_unique_valuesr.   r.   r/   _check_new_adapter_config   s,   

z#VeraModel._check_new_adapter_configc                 C  s
   t | |S rC   )r   )vera_configrX   r.   r.   r/   rL      s   
z%VeraModel._check_target_module_existsc              	   K  s   |d u rt d|j}t|do|jd u}	||j|j|jt| jddt| jddd}
|	|
d< t	|t
rG|j|| j| j||j|j|jd d S | j|| j| j||fi |
}|| jvr`|d | |||| d S )NzCurrent Key shouldn't be `None`rr   is_loaded_in_8bitFis_loaded_in_4bit)rg   vera_dropoutfan_in_fan_outinit_weightsloaded_in_8bitloaded_in_4bit)	d_initial)rT   rg   rP   rr   r|   r}   r~   getattrr;   r!   r   update_layerrb   rc   r   _create_new_moduleactive_adapterrequires_grad__replace_module)r:   ry   r=   targettarget_nameparentcurrent_keyoptional_kwargsrg   rr   kwargs
new_moduler.   r.   r/   _create_and_replace   s4   





zVeraModel._create_and_replacec                   s   t | || t|dr|j}t|ds |j|_t|dr |j|_t|dd d ur>t|dr3|j|j_n|j|_||jj t	d |
 D ]\}}d|v rct fdd| D sc||jj qGd S )N
base_layerrr   statemetavera_c                 3  s    | ]}|j  kV  qd S rC   r_   )rE   pr   r.   r/   rH     s    z,VeraModel._replace_module.<locals>.<genexpr>)setattrrP   r   rQ   rr   r   r   tor`   r#   rK   any
parameters)r   
child_namer   childnamerY   r.   r   r/   r      s&   




zVeraModel._replace_modulec                 C  s   |  D ]\}}| j|vrd|_q| jD ]H}| j| j}|dkr!q|dkr6|  D ]\}}d|v r4d|_q)q|dkrU| D ]}t|trSt	|drS|jd urSd|j_q>qt
d| dd S )	NFrn   allrr   T	vera_onlyzRequested bias: z, is not implemented.)named_parametersr4   requires_gradactive_adaptersrV   rr   modulesr!   r   rP   NotImplementedError)r:   r;   nr   r   rr   mr.   r.   r/    _mark_only_adapters_as_trainable  s,   

z*VeraModel._mark_only_adapters_as_trainablec                 K  s  t  rdd l}ddlm} t rddlm} |dd}	|dd}
|dd}t|t	r2|
 }n|}|
rZt||jjrZ| }||jj|jj|jd	 |||||fi |S |rt||jjr| }||j|jj|jjd
 |||||fi |S t|tjjr|d rtd d |d< | _n"t|trd|d< |d std d |d< | _ntd| dt||||f|	| jd|}|S )Nr   r   )Linear8bitLt)
Linear4bitrr   Fr   r   )has_fp16_weights	thresholdindex)compute_dtypecompress_statistics
quant_typer}   zjfan_in_fan_out is set to True but the target module is `torch.nn.Linear`. Setting fan_in_fan_out to False.Tis_target_conv_1d_layerzafan_in_fan_out is set to False but the target module is `Conv1D`. Setting fan_in_fan_out to True.zTarget module z is not supported. Currently, only the following modules are supported: `torch.nn.Linear`, `transformers.pytorch_utils.Conv1D`.)rr   r   )r   bitsandbytesbnbr   r
   r   popgetr!   r   get_base_layerrM   copyupdater   r   r   r   r   rQ   r   r   r#   r   warningswarnr}   r	   rT   r   )ry   rb   rc   r=   r   r   r   r   r   rr   r   r   target_base_layereightbit_kwargsfourbit_kwargsr   r.   r.   r/   r   $  sv   




zVeraModel._create_new_moduler   c                   s8   zt  |W S  ty   |dkr t| j| Y S w )z1Forward missing attributes to the wrapped module.r;   )r8   __getattr__AttributeErrorr   r;   )r:   r   r>   r.   r/   r   k  s   zVeraModel.__getattr__	inferencec                 C  sF   i }| j  D ]\}}dd t| D }|rd|d< q|||< |S )Nc                 S  s&   i | ]\}}|t |tr|jn|qS r.   )r!   r   value)rE   kvr.   r.   r/   
<dictcomp>w  s   & z5VeraModel.get_peft_config_as_dict.<locals>.<dictcomp>Tinference_mode)rV   itemsr   )r:   r   config_dictrX   r   r<   r.   r.   r/   get_peft_config_as_dictt  s   z!VeraModel.get_peft_config_as_dictTc                 C  s,   | j  D ]}t|ttfr|| qd S rC   )r;   r   r!   r   r   enable_adapters)r:   enabledrY   r.   r.   r/   _set_adapter_layers}  s
   
zVeraModel._set_adapter_layersc                 C  s   | j dd d S )NTr   )r   r:   r.   r.   r/   enable_adapter_layers  rm   zVeraModel.enable_adapter_layersc                 C  sF   | j D ]}| j| j}|dkrd| d}t| q| jdd d S )Nrn   z>Careful, disabling adapter layers with bias configured to be 'zL' does not produce the same output as the base model would without adaption.Fr   )r   rV   rr   r   r   r   )r:   r   valr[   r.   r.   r/   disable_adapter_layers  s   


z VeraModel.disable_adapter_layersc                 C  sF   | j  D ]}t|tr|jrtd |  || q|| _	d S )NzJAdapter cannot be set when the model is merged. Unmerging the model first.)
r;   r   r!   r   mergedr   r   unmergeset_adapterr   )r:   r=   rY   r.   r.   r/   r     s   



zVeraModel.set_adapterc                 C  s4   | j d u r|d tvrtdtt|d  | _ | S )N
model_typez0Please specify `target_modules` in `peft_config`)target_modulesr   rT   set)rV   rU   r.   r.   r/   rJ     s   

z!VeraModel._prepare_adapter_configNprogressbar
safe_mergeadapter_namesOptional[list[str]]c              	   C  s   dd | j  D }d|rdnd d }t|| |dD ]?}zt| j |\}}	}
W n	 ty2   Y qw t|	drL|rA|	j||d	 | ||
|	 |	 qt	|	t
r[t||
|	j|	j  q| j S )
Nc                 S     g | ]
\}}d |vr|qS verar.   rE   rX   _r.   r.   r/   
<listcomp>      z:VeraModel._unload_and_optionally_merge.<locals>.<listcomp>z
Unloading zand merging  r;   )disabledescr   )r   r   )r;   rK   r   r   r   rP   merger   r   r!   r   r   modules_to_saver   )r:   r   r   r   r   key_listr   rX   r   r   r   r.   r.   r/   _unload_and_optionally_merge  s    

z&VeraModel._unload_and_optionally_mergec                 C  s   |t | j vrtd| d| j|= dd | j D }d}|D ] }t| j|\}}}t|trC|	| |du rC|j
dd }q#|pGg | _
| j||d dS )z
        Deletes an existing adapter.

        Args:
            adapter_name (str): Name of the adapter to be deleted.
        zAdapter z does not existc                 S  r   r   r.   r   r.   r.   r/   r     r   z,VeraModel.delete_adapter.<locals>.<listcomp>N)new_active_adapters)listrV   keysrT   r;   rK   r   r!   r   delete_adapterr   _delete_auxiliary_adapter)r:   r=   r   new_adapterrX   r   r   r.   r.   r/   r     s   


zVeraModel.delete_adapterc                 C  s   | j |||dS )aH  
        This method merges the Vera layers into the base model. This is needed if someone wants to use the base model
        as a standalone model.

        Args:
            progressbar (`bool`):
                whether to show a progressbar indicating the unload and merge process
            safe_merge (`bool`):
                whether to activate the safe merging check to check if there is any potential Nan in the adapter
                weights
            adapter_names (`list[str]`, *optional*):
                The list of adapter names that should be merged. If None, all active adapters will be merged. Defaults
                to `None`.

        Example:

        ```py
        >>> from transformers import AutoModelForCausalLM
        >>> from peft import PeftModel

        >>> base_model = AutoModelForCausalLM.from_pretrained("tiiuae/falcon-40b")
        >>> peft_model_id = "smangrul/falcon-40B-int4-peft-lora-sfttrainer-sample"
        >>> model = PeftModel.from_pretrained(base_model, peft_model_id)
        >>> merged_model = model.merge_and_unload()
        ```
        )r   r   r   r   )r:   r   r   r   r.   r.   r/   merge_and_unload  s   zVeraModel.merge_and_unloadc                 C  s   | j ddS )z
        Gets back the base model by removing all the Vera modules without merging. This gives back the original base
        model.
        F)r   r   r   r.   r.   r/   unload  s   zVeraModel.unload)F)r5   r6   r   r7   )r   r@   )r<   r   r=   r3   r   r7   )r;   rk   r<   r   r=   r3   r   r7   )r<   r   r   r7   )r;   rk   r   r7   )r   r3   )r   r6   )T)TFFN)r   r6   r   r6   r   r   )r=   r3   )FFN)rs   
__module____qualname____doc__r4   __annotations__r9   r\   rj   rl   rx   staticmethodrL   r   r   r   r   r   r   r   r   r   r   rJ   r   r   r   r   __classcell__r.   r.   r>   r/   r1   H   sB   
 

%

"
*


F	
		

!r1   )r   r   r   r   r   r   ))
__future__r   r%   r   dataclassesr   enumr   typingr   r   r#   torch.nnrM   torch.nn.initr   r   transformers.pytorch_utilsr	   peft.import_utilsr
   r   peft.tuners.tuners_utilsr   r   r   
peft.utilsr   r   r   _buffer_dictr   tuners_utilsr   r<   r   layerr   r   r0   r1   r.   r.   r.   r/   <module>   s(   
