o
    Ni                    @  s  d dl mZ d dlZd dlZd dlmZ d dlmZmZm	Z	 d dl
Z
d dlmZ d dlm  mZ d dl
mZ d dlmZ d dlmZ d dlmZmZmZ d d	lmZmZmZmZ d d
lmZ d dl m!Z! ddl"m#Z#m$Z$ dgZ%G dd dZ&G dd deZ'G dd dej(e'Z)G dd dej(e'Z*G dd dej(e'Z+G dd de+Z,G dd de+Z-G dd de+Z.G dd  d ej(e'Z/G d!d" d"ej(Z0d#d$ Z1G d%d& d&ej(e'Z2	d3d4d1d2Z3dS )5    )annotationsN)contextmanager)AnyOptionalUnion)svd_lowrank)Conv1D)
BufferDict)BaseTunerLayer_get_in_out_featurescheck_adapters_to_merge)dequantize_module_weightgather_params_ctxget_bnb_param_typeskip_init_on_device)	transpose)PeftWarning   )ArrowConfig
LoraConfigalora_offsetsc                   @  sV   e Zd ZdZeddd	ZedddZedddZedddZedddZ	dS )LoraVarianta{  
    Base class for LoRA variants, e.g. DoRA.

    This class should be subclassed and the methods below should be implemented accordingly. The methods should be
    implemented as static methods, this makes it easier to combine variants.

    Note for developers: These methods are prone to change and should thus considered to be "private". Use at your own
    discretion.
    module	LoraLayeradapter_namestrreturnNonec                 C     t )zKInitialization code for the LoRA variant, it's called within `update_layer`NotImplementedError)r   r    r!   J/home/ubuntu/.local/lib/python3.10/site-packages/peft/tuners/lora/layer.pyinit7      zLoraVariant.initactive_adapterorig_weighttorch.Tensorc                 C  r   )zZSafe merging of the weights from `merge(..., safe_merge=True)`, should return a new tensorr   r   r%   r&   r!   r!   r"   
merge_safe<   r$   zLoraVariant.merge_safec                 C     dS )zdUnsafe merging of the weights from `merge(..., safe_merge=False)`, should modify the weight in-placeNr!   r(   r!   r!   r"   merge_unsafeA       zLoraVariant.merge_unsafec                 C  r*   )zFRemove the adapter weights from the original weights, then return themNr!   r(   r!   r!   r"   unmergeE   r,   zLoraVariant.unmergexresultc                 K  r   )a  
        The forward pass of the LoRA variant, should return the overall result (not just the diff)

        Args:
            module (LoraLayer): The module on which the forward pass is called
            active_adapter (str): The name of the active adapter
            x (torch.Tensor): The input to the forward call
            result (torch.Tensor): The result from the base model
            **kwargs: Additional arguments passed from [`LoraLayer.forward`].
        r   )r   r%   r.   r/   kwargsr!   r!   r"   forwardI   s   zLoraVariant.forwardN)r   r   r   r   r   r   )r   r   r%   r   r&   r'   r   r'   )r   r   r%   r   r&   r'   r   r   )
r   r   r%   r   r.   r'   r/   r'   r   r'   )
__name__
__module____qualname____doc__staticmethodr#   r)   r+   r-   r1   r!   r!   r!   r"   r   ,   s    
r   c                   @  s   e Zd ZU dZded< dZded< dGdHddZdIddZ							dJdKddZdd  Z	d!d" Z
d#d$ Zd%d& Zd'd( Ze d)d* ZdLd/d0ZdMd1d2ZdNd6d7ZdOd8d9ZdPdQd;d<Zd=d> ZdRdEdFZdS )Sr   )lora_Alora_Blora_embedding_Alora_embedding_Bztuple[str, ...]adapter_layer_names)r
lora_alphascalinglora_dropoutother_param_namesF
base_layer	nn.Moduleephemeral_gpu_offloadboolr   r   c                 K  s   || _ i | _i | _i | _ti | _ti | _ti | _t	i | _
t	i | _d| _g | _i | _i | _i | _tj | _i | _|| _d| _i | _|| _|  }t|\}}|| _|| _d S )NFT)rA   r<   r=   r>   nn
ModuleDictr?   r7   r8   ParameterDictr9   r:   _disable_adaptersmerged_adaptersuse_dora
use_rslora	lora_biastorchlora_magnitude_vector_cachesrC   cast_input_dtype_enabledlora_variantr0   get_base_layerr   in_featuresout_features)selfrA   rC   r0   rS   rT   r!   r!   r"   __init__d   s0   
zLoraLayer.__init__rJ   Optional[LoraVariant]c                K  r*   )a  Return a matching LoRA variant for this layer type.

        Given the init arguments of this layer, return the correct LoRA variant, if any. E.g., if `use_dora=True`, this
        method should return the DoRA variant for the given layer. If `use_alora=True`, same for aLoRA.

        If there is no fitting variant, return None.

        Note: If this layer type does not support the LoRA variant at all, please raise an error during __init__ as is
        convention, and not here.

        Nr!   )rU   rJ   r0   r!   r!   r"   resolve_lora_variant   s   zLoraLayer.resolve_lora_variantN    	use_alora
use_qalorarL   arrow_configr   qalora_group_sizeintinference_modec                 K  sv  t   }|d= |dkrtd| |
r.t|  dd d u r.tdt|  j dt	 | j
|||	||d}|d urA|| j|< || j|< || j|< |dkrVtj|d	}nt }| jt||i tj| j|d
d| j|< tj|| j|
d| j|< |
| j|< |r|t| | j|< n|| | j|< || j|< || j|< t|t r|!drt"|  j# | $|| W d    n1 sw   Y  nt|t r|!drt"|  j# | %|| W d    n1 sw   Y  nt|t r|& dkrt"|  j# | '| W d    n	1 sw   Y  n_|dkr>t"|  j# | (| W d    n	1 s8w   Y  n<|dkrNtj)*| j| j# n,|dkrqt"|  j# | +| W d    n	1 skw   Y  n	|rz| ,|| | -| || jv r| j| j)| fi | | j.| j/|d t0| dr| jD ]}|| j1v r| j1| 2| j| j qd S d S )NrU   r   ?`r` should be a positive integer value but the value passed is bias;`lora_bias=True` was passed but the targeted layer of type E has no bias. This means that merging LoRA weights won't be possible.)rJ   rZ   r[   r]   r\           pFra   pissacordaoloraloftqeva
orthogonalr_   
lora_arrow)3localscopy
ValueErrorgetattrrR   warningswarntyper2   r   rX   rQ   r<   r=   rE   DropoutIdentityr?   updaterF   LinearrS   r7   rT   r8   rL   mathsqrtr>   rK   rJ   
isinstancer   
startswithr   weight
pissa_init
corda_initlower
olora_init
loftq_initr#   zeros_orthogonal_initreset_lora_parameters%_move_adapter_to_device_of_base_layerset_adapteractive_adaptershasattrro   on_adapter_change)rU   r   r<   r=   r?   init_lora_weightsrK   rJ   rZ   r[   rL   r\   r]   r_   r0   rQ   lora_dropout_layeradapterr!   r!   r"   update_layer   s   











zLoraLayer.update_layerc                 C  s  |du rd S || j  v rY|du r!tjj| j | jtdd n| dkr9tjj	| j | jd| j
|  d ntd|tj| j| j | j| rYtj| j| j || j v rtj| j|  tj	| j|  | j| rtj| j| j d S d S d S )	NFT   )agaussianr   )stdz)Unknown initialization init_lora_weights=)r7   keysrE   r#   kaiming_uniform_r   r{   r|   r   normal_r<   rr   r   r8   rL   ra   r9   r:   )rU   r   r   r!   r!   r"   r      s$    $

zLoraLayer.reset_lora_parametersc                 C  sl  |   }|j}t|}|j}|rt|}n|tjtjtjfv r"|}nt	d| d| j
| }| j| }|tj}tj|j\}	}
|	d d d |f |
d | }}| | j| j_| | j| j_| j|| j| j | j| j 8  _|dkr|j||j|j|j|jd|j}||_d S |dkr|j||j|jd|j}||_d S ||}||j_d S )Nz.Unsupported data type for the base layer. Got .4bit)
quant_typequant_storagecompress_statisticsr   8bit)requires_gradhas_fp16_weights)rR   r   r   dtyper   rM   float32float16bfloat16	TypeErrorr>   r<   tolinalgqrdata
contiguousr7   r8   	__class__r   r   r   r   devicer   r   )rU   r   rA   r&   bnb_param_typer   weight_tensorscale_factorr<   QRQrRrr!   r!   r"   r     sN   


"&


zLoraLayer.olora_initc                 C  s  |   j}|j}|tjtjtjfvrtdt|	tj| j
}|dkrWtjj|jdd\}}}|d d d | j| f }|d | j|  }	|	| j|  }	|d | j|  }
n2t|ddkrt|j| j| t|dd d\}}	}|	| j|  }	| }
ntd	| d
tt|	|
 }|tt|	 }|| j| j_|| j| j_|j| j| | |  }t|	|| j
}||   j_d S )NzPlease initialize PiSSA under float32, float16, or bfloat16. Subsequently, re-quantize the residual model to help minimize quantization errors.rh   F)full_matrices_niter_   )niterzLinit_lora_weights should be 'pissa' or 'pissa_niter_[number of iters]', got 	 instead.)rR   r   r   rM   r   r   r   r   r   r   fan_in_fan_outr   svdr   r<   r>   lensplitr   r^   trr   diagr|   r7   r8   )rU   r   r   r   r   VSUhVrSrUhrUrr7   r8   r!   r!   r"   r   >  s:   


zLoraLayer.pissa_initc                 C  s$  |   }|j}|j}|tjtjtjfvrtd|tj}|j	
d}|j	
d}t|ds3td|j}|j}	|j}
|j}| j| }t|
 sRt|
 rVtdt|	 sdt|	 rhtdt| svt| rztd|	
d|ks|	
d|krtd	|	
  d
| d| d|

d|krtd|

  d
| d|
d|ks|
d|krtd|
  d
| d| d|
| j|  }
| |
 dd }|	|
  }|| j| j_	|| j| j_	|j	| j| | |  }||}||   j_	|`d S )NzPlease initialize CorDA under float32, float16, or bfloat16. Subsequently, re-quantize the residual model to help minimize quantization errors.r   r   eigensz`eigens` attribute not found for layer, please run `preprocess_corda` first. More information can be found at examples/corda_finetuning/README.md.zdInvalid value found in matrix S. Please file an issue at https://github.com/huggingface/peft/issues.zdInvalid value found in matrix U. Please file an issue at https://github.com/huggingface/peft/issues.zdInvalid value found in matrix V. Please file an issue at https://github.com/huggingface/peft/issues.zMatrix U size mismatch: z vs. (z, z). Please make sure the `lora_config` and `model` argument of `preprocess_corda` is consistent with `get_peft_model`. If you're using cache in `preprocess_corda`, please make sure the cache is built with the same model and LoRA rank.zMatrix S size mismatch: z,). Please make sure the `lora_config` and `model` argument of `preprocess_corda` is consistent with `get_peft_model`. If you're using cache in `preprocess_corda`, please make sure the cache is built with the same model and LoRA rank.zMatrix V size mismatch: r   )rR   r   r   rM   r   r   r   r   r   r   sizer   rr   r   U_WCS_WCV_WCr<   isnananyisinfr>   r   mulr|   viewr   r7   r8   )rU   r   r   linearr   r   out_dimin_dimr   Ur   r   r<   r7   r8   r!   r!   r"   r   a  sh   


zLoraLayer.corda_initc                 C  s   ddl m} |  j}| jdd| j| | jddd}||fi |\}}}|| j v r>|| j| j_	|| j
| j_	|| j v rS|| j| j_	|| j| j_	||  j_	d S )Nr   )r   
loftq_bits   
loftq_iterr   )num_bitsreduced_ranknum_iter)peft.utils.loftq_utilsr   rR   r   r0   getr<   r7   r   r   r8   r9   r:   )rU   r   r   r   r0   qweightr7   r8   r!   r!   r"   r     s   
zLoraLayer.loftq_initc                 C  s   | j | }|d dkrtd| dt||}tj|\}}|dd dd d f }|dd dd d f }|  jj}t| j	|d 
|jd }	t|d | jj
|d }
t|	 || j| _t|
 || j| _d S )Nr   r   zAOrthogonal initialization requires the LoRA rank to be even, got r   r   g      $@)r<   rr   rM   randnr   r   rR   r   r   rS   mmTrT   rE   	Parameterr   r   r7   r8   )rU   r   rankXr   _q_oddq_evenr   r7   r8   r!   r!   r"   r     s   
 zLoraLayer.orthogonal_initkeyr   valuer   c                 C  s   || j |< d S N)rO   rU   r   r   r!   r!   r"   _cache_store  s   zLoraLayer._cache_storec                 C  s   | j |}|S r   )rO   popr   r!   r!   r"   
_cache_pop  s   zLoraLayer._cache_popr   scalefloat | intc                 C  sf   || j vrdS | j|dr"|| j|  t| j|  | j |< dS || j|  | j|  | j |< dS )zSet the scale of the given adapter to the initial scale multiplied by the provided factor

        The initial scale is determined by the configured `r` (rank) and `lora_alpha`.
        NF)r>   rK   r   r=   r{   r|   r<   )rU   r   r   r!   r!   r"   	set_scale  s
   
("zLoraLayer.set_scalec                 C  s>   |dkrdS | j D ]}|| j vrq	| j|  |9  < q	dS )zHMultiply the current scale of all active adapters by the provided factorr   N)r   r7   r   r>   rU   r   r%   r!   r!   r"   scale_layer  s   
zLoraLayer.scale_layerOptional[float | int]c                 C  s   | j D ]>}|| j vrq|du r7| j|dr)| j| t| j|  | j	|< q| j| | j|  | j	|< q| j	| | | j	|< qdS )zDivide the current scale of all active adapters by the provided factor. If `scale=None` is passed, reset to
        initial scale

        The initial scale is determined by the configured `r` (rank) and `lora_alpha`.

        NF)
r   r7   r   rK   r   r=   r{   r|   r<   r>   r   r!   r!   r"   unscale_layer  s   
"zLoraLayer.unscale_layerc                 O  s   | dd}|du rdS t|t|kr%dt| dt| d}t|| jr.d}t|dd |D }|D ]}| j |d	rFd
}t|q7dS )MCheck if the arguments are compatible with the configs and state of the modeladapter_namesNzNLength of `adapter_names` should be the same as the number of inputs, but got z and z respectively.z`Cannot pass `adapter_names` when there are merged adapters, please call `unmerge_adapter` first.c                 S  s   h | ]}|d kr|qS )__base__r!   ).0namer!   r!   r"   	<setcomp>  s    z0LoraLayer._check_forward_args.<locals>.<setcomp>Fz1Cannot pass `adapter_names` when DoRA is enabled.)r   r   rr   mergedrJ   )rU   r.   argsr0   r   msgunique_adaptersr   r!   r!   r"   _check_forward_args  s*   zLoraLayer._check_forward_argsr.   r'   r   r   	list[str]r0   c                  sv  fddt D }| j|g|R i }|j}t|}g }	|D ] |	 fddt|D  q |dd t|D ]}\}
}|dkrDq;|| j vrLq;| j| }| j	| }| j
| }| j| }||	|
  |jj}|| jvr||||| }||	|
   ||7  < q;d urfdd|	|
 D |d< | j| j| f||||	|
  d|}||||	|
 < q;|S )	Nc                      i | ]	}|  |d qS r   r   r   kr0   r!   r"   
<dictcomp>#      z2LoraLayer._mixed_batch_forward.<locals>.<dictcomp>c                      g | ]
\}}| kr|qS r!   r!   r   indexitemr   r!   r"   
<listcomp>*      z2LoraLayer._mixed_batch_forward.<locals>.<listcomp>r   r   c                   s   g | ]} | qS r!   r!   )r   j)r   r!   r"   r  ?  s    r%   r.   r/   )VARIANT_KWARG_KEYSrA   r   setappend	enumerater   r7   r   r8   r?   r>   r   r   rQ   r1   )rU   r.   r   r   r0   variant_kwargsr/   torch_result_dtyper   sub_batch_indices_listir%   r7   r8   dropoutr>   	sub_batchlora_outputr!   )r   r   r0   r"   _mixed_batch_forward  sH   






zLoraLayer._mixed_batch_forward)F)rA   rB   rC   rD   r   r   rJ   rD   r   rW   )FFFFNrY   F)rJ   rD   rZ   rD   r[   rD   rL   rD   r\   r   r]   r^   r_   rD   )r   r   r   r   r   r   )r   r   r   r   )r   r   r   r   r   r   )r   r   r   r   r   )r   r   r   r   
r.   r'   r   r   r   r   r0   r   r   r'   )r2   r3   r4   r;   __annotations__r@   rV   rX   r   r   r   r   r   r   rM   no_gradr   r   r   r   r   r   r   r  r!   r!   r!   r"   r   ^   s6   
 
h-#J




r   c                      sx   e Zd Z											d2d3 fddZd4ddZd5d6d#d$Zd7d%d&Zd8d(d)Zd9d.d/Zd: fd0d1Z	  Z
S );rz   r   r   rd   FTNr   r   r<   r^   r=   r?   floatr   rD   is_target_conv_1d_layerr   Union[bool, str]rK   rJ   rZ   r\   r   rL   r   r   c                   sR   t    tj| |fi | || _|| _| j||||||	|
|||d
 || _d S )N)r=   r?   r   rK   rJ   rZ   rL   r\   )superrV   r   r   _active_adapterr   r!  )rU   rA   r   r<   r=   r?   r   r!  r   rK   rJ   rZ   r\   rL   r0   r   r!   r"   rV   Y  s"   

zLinear.__init__rW   c                K  sF   |d urddl m} | S |s|sd S ddl m}m} |r | S | S )Nr   )ArrowLinearVariant)ALoraLinearVariantDoraLinearVariant)variantsr&  r'  r(  )rU   r\   rJ   rZ   r0   r&  r'  r(  r!   r!   r"   rX   }  s   zLinear.resolve_lora_variant
safe_merger   Optional[list[str]]c           	      C  s  t | |}|s	dS |D ]}|| j v r|  }|r|jj }|j}|| jvr5| 	|}||
|7 }n
| j| | ||}t| sNtd| d||j_| j| rt|dddu rctd|j| j| j| j|   }t| std| d|
||j_n@|| jvr| 	|}|j j|7  _n| j| | ||j | j| rt|dddu rtd|j j| j| j| j|  7  _| j| qdS )^  
        Merge the active adapter weights into the base weights

        Args:
            safe_merge (`bool`, *optional*):
                If True, the merge operation will be performed in a copy of the original weights and check for NaNs
                before merging the weights. This is useful if you want to check if the merge operation will produce
                NaNs. Defaults to `False`.
            adapter_names (`list[str]`, *optional*):
                The list of adapter names that should be merged. If None, all active adapters will be merged. Defaults
                to `None`.
        N1NaNs detected in the merged weights. The adapter  seems to be brokenra   RImpossible to merge LoRA with `lora_bias=True` because the base layer has no bias.)r   r7   r   rR   r   r   cloner   rQ   get_delta_weightr   r)   rM   isfiniteallrr   rL   rs   RuntimeErrorra   r8   r>   r+   rI   r  )	rU   r*  r   r%   rA   r&   
orig_dtypedelta_weightnew_biasr!   r!   r"   merge  sV   








"zLinear.mergec                 C     | j s
td dS t| jdkrh| j }|| j v r_|  j	}|| j
vr:|j}| |}| j||8  _n| j
| | ||}||_| j| r_|  j j| j| j| j|  8  _t| jdksdS dS W
        This method unmerges all merged adapter layers from the base weights.
         Already unmerged. Nothing to do.Nr   r   rt   ru   r   rI   r   r7   r   rR   r   rQ   r   r1  r   r   r-   rL   ra   r8   r>   rU   r%   r   r5  r6  unmergedr!   r!   r"   r-     s    





&zLinear.unmerger'   c                 C  s   | j | jj}| j | jj}|jdko|tjkp|tjk}| j| j}| j | j}|r3|	 }|	 }t
|| | j| j|  }|r\|j|d}||| j| j_||| j | j_|S 
        Compute the delta weight for the given adapter.

        Args:
            adapter (str):
                The name of the adapter for which the delta weight should be computed.
        cpur   )r8   r   r   r   rv   rM   r   r   r7   r   r   r   r>   r   r   rU   r   r   r   cast_to_fp32weight_Aweight_Boutput_tensorr!   r!   r"   r1    s   zLinear.get_delta_weightr.   r   r   r0   c                   s  | j |g|R i    dd } fddtD }| jr3| jr%|   | j|g|R i  }|S |d urI| j|g|R d|i| }|S | jrZ| j|g|R i  }|S | j|g|R i  }|j}| j	
 }| jD ]G}	|	|vrxqq| j	|	 }
| j|	 }| j|	 }| j|	 }| ||
jj}|	| jvr|||
|||  }qq| j|	 j| f|	||d| }qq||}|S )Nr   c                   r   r   r  r  r  r!   r"   r    r  z"Linear.forward.<locals>.<dictcomp>r  )r   r   r  disable_adaptersr   r-   rA   r  r   r7   r   r   r8   r?   r>   _cast_input_dtyper   rQ   r1   r   )rU   r.   r   r0   r   r  r/   r  lora_A_keysr%   r7   r8   r  r>   r!   r  r"   r1     sP   ! 








	zLinear.forwardc                      t   }d| S Nlora.r#  __repr__rU   repr%  r!   r"   rP  6     
zLinear.__repr__)r   r   rd   FFTFFFNF)r   r   r<   r^   r=   r^   r?   r   r   rD   r!  rD   r   r"  rK   rD   rJ   rD   rZ   rD   r\   r   rL   rD   r   r   )r\   r   rJ   rD   rZ   rD   r   rW   FNr*  rD   r   r+  r   r   r   r   r   r'   r.   r'   r   r   r0   r   r   r'   r   r   )r2   r3   r4   rV   rX   r8  r-   r1  r1   rP  __classcell__r!   r!   r%  r"   rz   W  s&    
$
C

"+rz   c                      s   e Zd Z									d<d= fddZd>ddZ		d?d@d!d"ZdAdBd&d'ZdCd(d)ZdDd+d,ZdEd2d3Z	dFd6d7Z
dGd8d9ZdH fd:d;Z  ZS )I	Embeddingr   r   rd   FTNrA   rB   r   r   r<   r^   r=   r?   r   r   rD   r   r"  rK   rJ   r\   r   rL   r   r   c                   s`   |rt d| d| jj dt   t| | || _|| _| j|||||||	||
d	 d S )Nz
lora_bias=z is not supported for r   r=   r?   r   rK   rJ   rL   r\   )	rr   r   r2   r#  rV   r   r   r$  r   )rU   rA   r   r<   r=   r?   r   r   rK   rJ   r\   rL   r0   r%  r!   r"   rV   =  s"   

zEmbedding.__init__rW   c                K     |sd S ddl m} | S )Nr   )DoraEmbeddingVariant)r)  r^  )rU   rJ   r0   r^  r!   r!   r"   rX   a     zEmbedding.resolve_lora_variantr_   c                 K  sl  t   }|d= |dkrtd| | j||	d}|d ur#|| j|< || j|< || j|< |dkr8tj|d}nt	 }|| j
|< t|| jf}t| j|f}t|| j|< t|| j|< || j|< |rs|t| | j|< n|| | j|< || j|< || j|< |dkr| | n|r| || | | || jv r| j| j| fi | | j| j|
d d S )	NrU   r   r`   rJ   r\   rd   re   rk   rn   )rp   rq   rr   rX   rQ   r<   r=   rE   rw   rx   r?   rM   r   rS   rT   r   r9   r:   rL   r{   r|   r>   rK   rJ   r   r   r   r#   r   r   )rU   r   r<   r=   r?   r   rK   rJ   rL   r\   r_   r0   rQ   r   rF  rG  r!   r!   r"   r   i  s>   









zEmbedding.update_layerr*  r   r+  c                 C  s   t | |}|s	dS |D ]l}|| j v rw|  }|jj}|rR|jj }|| jvr4|| 	|
|7 }n
| j| | ||}t| sMtd| d||j_n|| jvrf|j j| 	|
|7  _n| j| | ||j | j| qdS )r,  Nr-  r.  )r   r9   r   rR   r   r   r   r0  rQ   r1  r   r)   rM   r2  r3  rr   r+   rI   r  )rU   r*  r   r%   rA   r5  r&   r!   r!   r"   r8    s.   




zEmbedding.mergec                 C  s   | j s
td dS t| jdkrQ| j }|  jj}|| j	
 v rH|  j}|| jvr;| j| ||8  _n| j| | ||}||_t| jdksdS dS r:  )r   rt   ru   r   rI   r   rR   r   r   r9   r   rQ   r   r1  r   r-   )rU   r%   r5  r   r?  r!   r!   r"   r-     s   



zEmbedding.unmerger'   c                 C  s   | j | j}| j| j}|jdko|tjkp|tjk}| j| }| j | }|r/| }| }t	|| d| j
|  }|rS|j|d}||| j|< ||| j |< |S )rA  rB  TrC  )r:   r   r9   r   rv   rM   r   r   r   r   r>   r   rD  r!   r!   r"   r1    s   

zEmbedding.get_delta_weightr.   r   r   r   r0   c                  s   | j |g|R i |}|  }t|}g }|D ] | fddt|D  qt|D ]J\}	}
|
dkr6q-|
| j vr>q-| j|
 j}| j|
 j}| j	|
 }|||	  }| 
||}|| | }|d urm|||j }|||	   |7  < q-|S )Nc                   r  r!   r!   r  r  r!   r"   r    r  z2Embedding._mixed_batch_forward.<locals>.<listcomp>r   )rA   _get_embed_scaler  r  r  r9   r   r   r:   r>   _embedr   r   )rU   r.   r   r   r0   r/   embed_scaler   r  r  r%   embedding_Aembedding_Br>   r  after_Aadapter_outputr!   r  r"   r    s*   
zEmbedding._mixed_batch_forwardinputr   c              	   C  s*   |   }tj|||j|j|j|j|jdS )N)padding_idxmax_norm	norm_typescale_grad_by_freqsparse)rR   F	embeddingri  rj  rk  rl  rm  )rU   rh  r   rA   r!   r!   r"   rb  /  s   zEmbedding._embedc                   s  | j |g|R i    dd } fddtD }| jr3| jr%|   | j|g|R i  }|S |d urG| j|g|R d|i }|S | jrX| j|g|R i  }|S | j|g|R i  }|j}| 	 }| j
D ]M}	|	| jvrvqn|	| jvr| j|	 j}
| j|	 j}| j|	 }| ||
}|| | }|d ur|||j }|| }qn| j|	 j| f|	||d| }qn||}|S )Nr   c                   r   r   r  r  r  r!   r"   r  ?  r  z%Embedding.forward.<locals>.<dictcomp>r  )r   r   r  rI  r   r-   rA   r  r   ra  r   r9   rQ   r   r:   r>   rb  r   r1   )rU   r.   r   r0   r   r  r/   r  rc  r%   rd  re  r>   rf  rg  r!   r  r"   r1   ;  sT   (&$






zEmbedding.forwardc                   rL  rM  rO  rQ  r%  r!   r"   rP  m  rS  zEmbedding.__repr__)	r   r   rd   FTFFNF)rA   rB   r   r   r<   r^   r=   r^   r?   r   r   rD   r   r"  rK   rD   rJ   rD   r\   r   rL   rD   r   r   r  NFr\   r   r_   rD   rT  rU  rV  rW  r  )rh  r'   r   r'   r   r'   rX  rY  )r2   r3   r4   rV   rX   r   r8  r-   r1  r  rb  r1   rP  rZ  r!   r!   r%  r"   r[  ;  s,    
$>
,

"
(
2r[  c                      s   e Zd Z								d0d1 fddZ		d2d3ddZdd  Zd4d5d$d%Zd6d&d'Zd7d)d*Zd8d,d-Z	d9 fd.d/Z
  ZS ):_ConvNdr   r   rd   TFNrA   rB   r   r   r<   r^   r=   r?   r   r   r"  rK   rD   rJ   r\   r   rL   r   r   c                   s   t    t| | |ddrtd|jdkrtd ||j dkr7td|jj	 d|j d	| d
|| _
|j | _| j||||||||
|	d	 d S )NrZ   Fz,aLoRA does not support adapting conv layers.r   zMLoRA adapter added to ConvNd layer with groups > 1. Merging is not supported.r   zTargeting a z with groups=z
 and rank z. Currently, support is limited to conv layers where the rank is divisible by groups. Either choose a different rank or do not target this specific layer.r\  )r#  rV   r   r   rr   groupsrt   ru   r   r2   r$  r   dim_kernel_dimr   )rU   rA   r   r<   r=   r?   r   rK   rJ   r\   rL   r0   r%  r!   r"   rV   t  s.   



z_ConvNd.__init__r_   c                 K  s  t   }|d= |dkrtd| |r.t|  dd d u r.tdt|  j dt	 | j
||	d}|d ur>|| j|< || j|< || j|< |dkrStj|d	}nt }|| j|< |  }|j}|j}|j}t|}d
| jd   }}|| j||||dd| j|< ||| j|||j|d| j|< || j|< |r|t| | j|< n|| | j|< || j|< || j |< |dkr| !| n|r| "|| | #| || jv r| j| j$| fi | | j%| j&|
d d S )NrU   r   r`   ra   rb   rc   r`  rd   re   r   r   Frg   )rs  ra   rk   rn   )'rp   rq   rr   rs   rR   rt   ru   rv   r2   r   rX   rQ   r<   r=   rE   rw   rx   r?   kernel_sizestridepaddingru  rS   r7   rT   rs  r8   rL   r{   r|   r>   rK   rJ   r   r   r   r#   r   r   )rU   r   r<   r=   r?   r   rK   rJ   rL   r\   r_   r0   rQ   r   rA   rw  rx  ry  
conv_layer
out_kernel
out_strider!   r!   r"   r     sT   









z_ConvNd.update_layerc                 C  s   dd| j d   S )N)r   rv  r   )ru  rU   r!   r!   r"   _get_dora_factor_view  s   z_ConvNd._get_dora_factor_viewr*  r   r+  c           	      C  s  t | |}|s	dS |D ]}|| j v r|  }|jj}|jdkr%td|r|jj	 }|| j
vr?| |}|||7 }n
| j
| | ||}t| sXtd| d||j_| j| rt|dddu rmtd|j| j| j| j|   }t| std| d|||j_nC|| j
vr| |}|j j||7  _n| j
| | ||j | j| rt|dddu rtd|j j| j| j| j|  7  _| j| qdS )a`  
        Merge the active adapter weights inside the base weights

        Args:
            safe_merge (`bool`, *optional*):
                If True, the merge operation will be performed in a copy of the original weights and check for NaNs
                before merging the weights. This is useful if you want to check if the merge operation will produce
                NaNs. Defaults to `False`.
            adapter_names (`list[str]`, *optional*):
                The list of adapter names that should be merged. If None, all active adapters will be merged. Defaults
                to `None`.
        Nr   z<Merging is not supported for _ConvNd layers with groups > 1!r-  r.  ra   r/  )r   r7   r   rR   r   r   rs  r    r   r0  rQ   r1  r   r)   rM   r2  r3  rr   rL   rs   r4  ra   r8   r>   r+   rI   r  )	rU   r*  r   r%   rA   r5  r&   r6  r7  r!   r!   r"   r8    sZ   









"z_ConvNd.mergec                 C  r9  r:  r=  r>  r!   r!   r"   r-   5  s    





&z_ConvNd.unmerger'   c                 C  s<  | j | jj}| j| jj}|jdko|tjkp|tjk}| j| j}| j | j}|r3|	 }|	 }| 
 j dd dkr\|dd|dd dd| j|  }n$| |dd|}| 
 jdkru|| j|  }n|dd| j|  }|r|j|d}||| j| j_||| j | j_|S )	rA  rB  r   r   )r   r      r   r   rC  )r8   r   r   r7   r   rv   rM   r   r   r   rR   r   squeeze	unsqueezer>   conv_fnr   rs  r   r   rD  r!   r!   r"   r1  K  s.   &z_ConvNd.get_delta_weightr.   c                   sx  | j |g|R i    dd } fddtD }| jr3| jr%|   | j|g|R i  }|S |d urG| j|g|R d|i }|S | jrX| j|g|R i  }|S | j|g|R i  }|j}| j	D ]J}|| j
 vrtqj| j
| }	| j| }
| j| }| j| }| ||	jj}|| jvr||
|	|||  }qj| j| j| f|||d| }qj||}|S )Nr   c                   r   r   r  r  r  r!   r"   r  |  r  z#_ConvNd.forward.<locals>.<dictcomp>r  )r   r   r  rI  r   r-   rA   r  r   r   r7   r   r8   r?   r>   rJ  r   rQ   r1   r   )rU   r.   r   r0   r   r  r/   r  r%   r7   r8   r  r>   r!   r  r"   r1   y  sN    







	z_ConvNd.forwardc                   rL  rM  rO  rQ  r%  r!   r"   rP    rS  z_ConvNd.__repr__)r   r   rd   TFFNF)rA   rB   r   r   r<   r^   r=   r^   r?   r   r   r"  rK   rD   rJ   rD   r\   r   rL   rD   r   r   rp  rq  rT  rU  rV  rW  )r.   r'   r   r'   rY  )r2   r3   r4   rV   r   r~  r8  r-   r1  r1   rP  rZ  r!   r!   r%  r"   rr  r  s&    5K
H

.)rr  c                      &   e Zd Z fddZd	ddZ  ZS )
Conv2dc                   8   t  j|i | | jdkstd| j tj| _d S )Nr   z0Conv2d layer kernel must have 4 dimensions, not )r#  rV   ru  rr   rn  conv2dr  rU   r   r0   r%  r!   r"   rV        
zConv2d.__init__rJ   rD   r   rW   c                K  r]  )Nr   )DoraConv2dVariant)r)  r  )rU   rJ   r0   r  r!   r!   r"   rX     r_  zConv2d.resolve_lora_variantr  r2   r3   r4   rV   rX   rZ  r!   r!   r%  r"   r        r  c                      r  )
Conv1dc                   r  )Nr  z0Conv1d layer kernel must have 3 dimensions, not )r#  rV   ru  rr   rn  conv1dr  r  r%  r!   r"   rV     r  zConv1d.__init__rJ   rD   r   rW   c                K  r]  )Nr   )DoraConv1dVariant)r)  r  )rU   rJ   r0   r  r!   r!   r"   rX     r_  zConv1d.resolve_lora_variantr  r  r!   r!   r%  r"   r    r  r  c                      r  )
Conv3dc                   r  )Nr   z0Conv3d layer kernel must have 5 dimensions, not )r#  rV   ru  rr   rn  conv3dr  r  r%  r!   r"   rV     r  zConv3d.__init__rJ   rD   r   rW   c                K  r]  )Nr   )DoraConv3dVariant)r)  r  )rU   rJ   r0   r  r!   r!   r"   rX     r_  zConv3d.resolve_lora_variantr  r  r!   r!   r%  r"   r    r  r  c                      s~  e Zd ZdZ						d[d\ fddZed]ddZed^ddZed^ddZed_ddZ	ed]d d!Z
ed`d"d#Zed_d$d%Zed]d&d'Zedad)d*Zedad+d,Zedbd.d/Zedcd1d2Zedcd3d4Zddd6d7Zed_d8d9Zde fd:d;Zdfdgd@dAZdedBdCZdhdFdGZdidIdJZ fdKdLZdjdQdRZedSdT Z fdUdVZ fdWdXZdk fdYdZZ   Z!S )lMultiheadAttentiona  LoRA implemented in a multihead attention layer

    This is currently only implemented for the case of `_qkv_same_embed_dim = True`, i.e. query, key, and value having
    the same dimension.

    Note: LoRA is applied to both the in_proj (query/key/value) and out_proj. There is currently no way to specify only
    one of them. Don't try to apply LoRA to the out_proj of MultiheadAttention by targeting that layer specifically,
    since the forward method of that layer is not being used, hence the LoRA adapter would be ignored.

    This is a little bit hacky because of the way that MultiheadAttention is implemented in PyTorch: There are no
    `nn.Linear` layers which we can hook onto or, in case of output projection, `.forward` is not used. This
    implementation works around these problems by merging the weights before the forward call and unmerging them after
    the forward call.
    r   r   rd   TFr   r   r<   r^   r=   r?   r   r   r"  rK   rD   rJ   r   r   c	           
   	     s   t |ddstd| jj d|rt| jj d|	ddr*t| jj dt   tj| |fi |	 t|j	t
jrTt|j	|f||||||d	|	| j_	n
td
| jj d|| _| |||||| d S )N_qkv_same_embed_dimTz?Only same embed for query/key/value is supported as of now for r   z: does not support DoRA (yet), please set use_dora to FalserZ   Fz< does not support aLoRA (yet), please set use_alora to False)r<   r=   r?   r   rK   rJ   z.out_proj must be an instance of nn.Linear for )rs   rr   r   r2   r   r#  rV   r   r}   out_projrE   rz   rA   r$  r   )
rU   rA   r   r<   r=   r?   r   rK   rJ   r0   r%  r!   r"   rV     s6   
	zMultiheadAttention.__init__c                 C  
   |   jS r   )rR   	embed_dimr}  r!   r!   r"   r       
zMultiheadAttention.embed_dimOptional[int]c                 C  r  r   )rR   kdimr}  r!   r!   r"   r    r  zMultiheadAttention.kdimc                 C  r  r   )rR   vdimr}  r!   r!   r"   r     r  zMultiheadAttention.vdimc                 C  r  r   )rR   r  r}  r!   r!   r"   r  $  r  z&MultiheadAttention._qkv_same_embed_dimc                 C  r  r   )rR   	num_headsr}  r!   r!   r"   r  (  r  zMultiheadAttention.num_headsc                 C  r  r   )rR   r  r}  r!   r!   r"   r  ,  r  zMultiheadAttention.dropoutc                 C  r  r   )rR   batch_firstr}  r!   r!   r"   r  0  r  zMultiheadAttention.batch_firstc                 C  r  r   )rR   head_dimr}  r!   r!   r"   r  4  r  zMultiheadAttention.head_dimnn.Parameterc                 C  r  r   )rR   in_proj_weightr}  r!   r!   r"   r  8  r  z!MultiheadAttention.in_proj_weightc                 C  r  r   )rR   in_proj_biasr}  r!   r!   r"   r  <  r  zMultiheadAttention.in_proj_biasrB   c                 C  s   |   j  S r   )rR   r  r}  r!   r!   r"   r  @  s   zMultiheadAttention.out_projOptional[nn.Parameter]c                 C  r  r   )rR   bias_kr}  r!   r!   r"   r  D  r  zMultiheadAttention.bias_kc                 C  r  r   )rR   bias_vr}  r!   r!   r"   r  H  r  zMultiheadAttention.bias_v,tuple[Optional[torch.Tensor], Optional[int]]c                 O  s   |   j|i |S r   )rR   merge_masksr  r!   r!   r"   r  L  s   zMultiheadAttention.merge_masksc                 C  r  r   )rR   add_zero_attnr}  r!   r!   r"   r  O  r  z MultiheadAttention.add_zero_attnc                   s*   t  j|i | | jjj|i | d S r   )r#  r   rA   r  r  r%  r!   r"   r   S  s   zMultiheadAttention.update_layerNr*  r   r+  c           
      C  s~  t | |}|s	dS |D ]}|| j v r|  }|jjj}|r||jj	 
 }|| ||7 }t| s@td| d|jjj	 
 }||j||7 }t| sctd| d|`||_|j `||j _|jj|gd n:| ||}|jj	 | }	|`|	|_|j||}|jjj	 | }	|j `|	|j _|jj|gd | j| qdS )a^  
        Merge the active adapter weights into the base weights

        Args:
            safe_merge (`bool`, *optional*):
                If True, the merge operation will be performed in a copy of the original weights and check for NaNs
                before merging the weights. This is useful if you want to check if the merge operation will produce
                NaNs. Defaults to `False`.
            adapter_names (`List[str]`, *optional*):
                The list of adapter names that should be merged. If None, all active adapters will be merged. Defaults
                to `None`.
        Nr-  r.  r   )r   r7   r   rR   r  r   r   r  r   detachr0  r1  r   rM   r2  r3  rr   r8  rI   r  )
rU   r*  r   r%   rA   r5  orig_weight_inorig_weight_outr6  weight_mergedr!   r!   r"   r8  X  sJ   





zMultiheadAttention.mergec                 C  s   | j s
td dS |  }|jjjj}t| j	dkrk| j	
 }|| j v rd| ||}|jj| }|`|dtj|dd |j||}|jjjj| }|jj`|jjdtj|dd t| j	dks|  j  dS )r;  r<  Nr   r  Fr   r   )r   rt   ru   rR   r  rA   r   r   r   rI   r   r7   r   r1  r   r  r   register_parameterrE   r   r-   )rU   rA   r5  r%   r6  
old_weightr!   r!   r"   r-     s(   

zMultiheadAttention.unmerger8  nn.MultiheadAttentionc                 C  sv   |r	| j ||d |  }|j}|`|dtj|j|jd |j }|j	}|`	|dtj|j|jd ||_|S )z
        Merging and unloading of the MultiheadAttention module

        This requires an extra step for MultiheadAttention, which is why there is this special method instead of
        relying on the normal merge_and_unload code path.
        r*  r   r  r  r   )
r8  rR   r  r  rE   r   r   r   r  r   )rU   r8  r*  r   rA   r   out_proj_layerr!   r!   r"   "unload_and_optionally_merge_module  s   	
z5MultiheadAttention.unload_and_optionally_merge_moduler'   c                 C  s   | j | jj}| j | jj}|jdko|tjk}| j| j}| j | j}|r.| }| }|| | j	|  }|rS|j
|d}|
|| j| j_|
|| j | j_|S r@  )r8   r   r   r   rv   rM   r   r7   r   r>   r   r   rD  r!   r!   r"   r1    s   z#MultiheadAttention.get_delta_weightc                   s:   d|v rt d| jj dt j|g|R i | d S )Nr   rN  z( does not support mixed adapter batches.)r   r   r2   r#  r   rU   r.   r   r0   r%  r!   r"   r     s   z&MultiheadAttention._check_forward_argsqueryr   r   r0   c           	        s.  |j } j|g|R i |  jr& jr    j|g|R i |}nT jr6 j|g|R i |}nD  j}|j jkrR  j	j
}td| d| d fdd jD }z j|d  j|g|R i |}W    n   w |d ||d d ur|d |f}|S |d f}|S )	NzThe out_proj layer of z has merged layers but zJ itself doesn't; please ensure that either both or none have merged layersc                   s   g | ]	}| j v r|qS r!   r7   )r   r   r}  r!   r"   r     r  z.MultiheadAttention.forward.<locals>.<listcomp>r  r   r   )r   r   rI  r   r-   rA   rR   r  r   r   r2   rr   r8  r   )	rU   r  r   r0   previous_dtyper/   r  cls_namer   r!   r}  r"   r1     s.   
(
zMultiheadAttention.forwardc                 C  s^   |   }|j}|`|dtj|j|jd |j  }|j}|`|dtj|j|jd d S )Nr  r  r   )	rR   r  r  rE   r   r   r   r  r   )rU   rA   r   r!   r!   r"   _restore_weights/  s   

z#MultiheadAttention._restore_weightsc                      |    t j|i |S r   )r  r#  
state_dictr  r%  r!   r"   r  D  s   zMultiheadAttention.state_dictc                   r  r   )r  r#  named_modulesr  r%  r!   r"   r  H  s   z MultiheadAttention.named_modulesc                   rL  rM  rO  rQ  r%  r!   r"   rP  M  rS  zMultiheadAttention.__repr__)r   r   rd   TFF)r   r   r<   r^   r=   r^   r?   r   r   r"  rK   rD   rJ   rD   r   r   )r   r^   )r   r  )r   rD   )r   r   )r   r  )r   rB   )r   r  )r   r  rV  rT  rU  )r8  rD   r*  rD   r   r+  r   r  rW  )r  r'   r   r   r0   r   r   r'   rY  )"r2   r3   r4   r5   rV   propertyr  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r   r8  r-   r  r1  r   r1   r   r  r  r  rP  rZ  r!   r!   r%  r"   r    sd    .

G
!
"
,
r  c                      s(   e Zd ZdZ fddZdd Z  ZS )_LoraParameterProxyzThis proxies an `nn.Parameter` that is targeted with LoRA.
    Intended to be used in conjunction with `nn.utils.parametrize`, see `ParamWrapper`.
    c                   s   t    || _d S r   )r#  rV   r6  )rU   r6  r%  r!   r"   rV   W  s   

z_LoraParameterProxy.__init__c                 C  s:   t jj  || j W  d    S 1 sw   Y  d S r   )rE   utilsparametrizecachedr6  )rU   Wr!   r!   r"   r1   [  s   $z_LoraParameterProxy.forward)r2   r3   r4   r5   rV   r1   rZ  r!   r!   r%  r"   r  R  s    r  c                 C  s,   t |tjr| || d S | || d S r   )r}   rE   r   r  register_buffer)r   r   r   r!   r!   r"   _register_parameter_or_bufferb  s   r  c                      s   e Zd ZdZ									dEdF fddZ					dGdHdd ZdIdJd$d%Zd&d' Zd(d) Ze	dKd,d-Z
d.d/ ZdLdMd3d4ZdNd5d6Z fd7d8ZdOd:d;ZdPdAdBZdQ fdCdDZ  ZS )RParamWrappera  A LoRA wrapper for `nn.Parameter`. This layer is dispatched if users target a parameter directly with
    `lora_config.target_parameters`
        Note:
        - When accessing the wrapped nn.Parameter directly, e.g. via `module.weight`, the LoRA weights are *not*
          applied.
        - It is currently not implemented to target multiple parameters on the same module. To achieve this, it is
          currently required to create a separate LoRA adapter (with another adapter name) and activate both at the
          same time.
    r   r   rd   FTr   r   parameter_namer<   r^   r=   r?   r   r   rD   r!  r   r"  rK   rJ   rL   r   r   c              
     s4  t    tj| |fi | || _|  }|jdkr%|j\| _| _| _	nd|jd |jd | _| _| _	|jdvrIt
d| jj d|j d|rUt
d| jj d|rat
d| jj d	|rmt
d| jj d
|ryt
d| jj d|rt
d| jj d|| _|| _| j|||||	|
||d d S )Nr  r   r   )r   r  rN  z was initialized with z9 dimensional Parameter, but only 2d and 3d are supported.& does not work with lora_dropout != 0.z# does not work with fan_in_fan_out.z# does not work with lora_bias=True.z" does not work with use_dora=True.z1 does not work with is_target_conv_1d_layer=True.)r=   r?   r   rK   rJ   rL   )r#  rV   r   r  	get_paramndimshapenum_expertsrS   rT   rr   r   r2   r   r$  r   )rU   rA   r   r  r<   r=   r?   r   r!  r   rK   rJ   rL   r0   paramr%  r!   r"   rV   t  sB   

"

zParamWrapper.__init__rY   r[   r]   r_   c                 K  s  t   }|d= |dkrtd| | j|||
d}|d ur)td| jj d|| j|< || j|< |dkrAtd| jj dt	 }| j
t||i tj| j|| j d	d
| j|< tj|| j | j|	d
| j|< |	| j|< |r|t| | j|< n|| | j|< || j|< || j|< t|tr|drt|  j | || W d    n1 sw   Y  nt|tr|drt|  j |  || W d    n1 sw   Y  nt|tr|! dkrt|  j | "| W d    n	1 sw   Y  n_|dkr/t|  j | #| W d    n	1 s)w   Y  n<|dkr?tj$%| j| j n,|dkrbt|  j | &| W d    n	1 s\w   Y  n	|rk| '|| | (| || j)v r| j)| j$| fi | | j*| j+|d d S )NrU   r   r`   )rJ   r[   r]   rN  z, does not work with LoRA variants like DoRA.rd   r  Frg   rh   ri   rj   rk   rl   rm   rn   ),rp   rq   rr   rX   r   r2   r<   r=   rE   rx   r?   ry   rF   rz   rS   r  r7   rT   r8   rL   r{   r|   r>   rK   rJ   r}   r   r~   r   rR   r   r   r   r   r   r   r#   r   r   r   r   rQ   r   r   )rU   r   r<   r=   r?   r   rK   rJ   r[   rL   r]   r_   r0   rQ   r   r!   r!   r"   r     sp   









zParamWrapper.update_layerNr   Optional[torch.device]c                   s   |   j}td |   }| j| j D ]D}t| |d}t|tjtj	t
fs'q||vr,qt fdd| D r:q|jjsB|jjrO|| j||jd||< q|| |||< qdS )z}
        Move the adapter of the given name to the device of the base layer. Needs special handling for nn.Parameter
        metaNc                 3  s    | ]}|j  kV  qd S r   )r   )r   rf   r  r!   r"   	<genexpr>      zEParamWrapper._move_adapter_to_device_of_base_layer.<locals>.<genexpr>rC  )r  r   rM   r;   r@   rs   r}   rE   rF   rG   r	   r   
parametersr   is_floating_point
is_complexr   )rU   r   r   r  adapter_layer_nameadapter_layerr!   r  r"   r      s   

z2ParamWrapper._move_adapter_to_device_of_base_layerc                 C  s   t |  | j}|S r   )rs   rR   r  )rU   r  r!   r!   r"   r    s   zParamWrapper.get_paramc           	      O  s   | j dkrtj| |g|R i |}n.| j| j}| j| j}|| j d|jd }||jd d| j }t	d||| j
|  }|  }|  }||j|j}|S )Nr   r   r   zo r e, e r i -> e i o)r  rz   r1  r7   r   r8   reshaper  rM   einsumr>   rR   r  r   r   r   )	rU   r   r   r0   r6  rF  rG  rA   r  r!   r!   r"   r1    s   
zParamWrapper.get_delta_weightr   r   c                 #  s    |rt  fdd|D sd V  d S d }|D ]}| jvrq|d u r) |}q| | }q  }  j}tjj	| j
t| |j j
 j| z
d V  W    d S    w )Nc                 3  s    | ]}| j v V  qd S r   r  )r   r   r}  r!   r"   r  .  r  z.ParamWrapper._activate_lora.<locals>.<genexpr>)r   r7   r1  rR   r  r   rE   r  r  register_parametrizationr  r  parametrizationsoriginalrequires_grad__remove_parametrizations)rU   r   r6  r%   rA   requires_grad_beforer!   r}  r"   _activate_lora,  s(   

zParamWrapper._activate_lorac                 C  s   |   }| j}||jvrtd|j| }t|dkr'tjjj||dd d S t	t
t|}|D ]}|| }t|trB||=  d S q1td|  d d S )NzbSomething went wrong, please report this issue on PEFT: https://github.com/huggingface/peft/issuesr   F)leave_parametrizedz+Could not find any LoRA parametrization on z], please open an issue on https://github.com/huggingface/peft/issues and report this warning.)rR   r  r  rr   r   rE   r  r  remove_parametrizationsreversedranger}   r  rt   ru   )rU   rA   r  
param_listreversed_indicesr  r   r!   r!   r"   r  H  s(   



z%ParamWrapper._remove_parametrizationsr*  r   r+  c           	      C  s   t | |}|s	d S |D ]N}|| j v rY|  }t|| j}|rG|j }|j}| 	|}||
|7 }t| sCtd| d||_n| 	|}| j|7  _| j| qd S )Nr-  r.  )r   r7   r   rR   rs   r  r   r0  r   r1  r   rM   r2  r3  rr   rI   r  )	rU   r*  r   r%   rA   r  r&   r5  r6  r!   r!   r"   r8  h  s,   




zParamWrapper.mergec                 C  s   | j s
td d S t| jdkr@| j }|| j v r7t| 	 | j
}|j}| |}| j||8  _t| jdksd S d S )Nr<  r   )r   rt   ru   r   rI   r   r7   r   rs   rR   r  r   r1  r   r   )rU   r%   r  r5  r6  r!   r!   r"   r-     s   


zParamWrapper.unmergec                   s>   | ddrtd| jj dt j|g|R i | dS )r   r   NrN  z, does not support mixed adapter batches yet.)r   rr   r   r2   r#  r   r  r%  r!   r"   r     s   z ParamWrapper._check_forward_argsr8  c                 C  sP   | j }|r"| j||d t|tr |j||d |j }t|ts|S |  }|S )Nr  )rA   r8  r}   r  rR   )rU   r8  r*  r   rA   r!   r!   r"   r    s   

z/ParamWrapper.unload_and_optionally_merge_moduler.   r'   r   r   r0   c                 O  s   | j |g|R i | |dd }| jr*| jr|   | j|g|R i |}|S |d ur8td| jj d| jrI| j|g|R i |}|S | 	| j
 | j|g|R i |}W d    |S 1 sgw   Y  |S )Nr   rN  z' does not support mixed batch inference)r   r   rI  r   r-   rA   rr   r   r2   r  r   )rU   r.   r   r0   r   r/   r!   r!   r"   r1     s$   
zParamWrapper.forwardc                   sF   t   }|dd }|d |  d| j d||d   }d| S )N(r   z
  parameter_name='z',rN  )r#  rP  findr  )rU   rR  idxr%  r!   r"   rP    s   
&zParamWrapper.__repr__)	r   r   rd   FFTFFF)r   r   r  r   r<   r^   r=   r^   r?   r   r   rD   r!  rD   r   r"  rK   rD   rJ   rD   rL   rD   r   r   )FFFrY   F)
rJ   rD   r[   rD   rL   rD   r]   r^   r_   rD   r   )r   r   r   r  r   r   )r   r   rT  rU  rV  )r8  rD   r*  rD   r   r+  rX  rY  )r2   r3   r4   r5   rV   r   r   r  r1  r   r  r  r8  r-   r   r  r1   rP  rZ  r!   r!   r%  r"   r  i  s<    @T 
 

r  targettorch.nn.Moduler   r   lora_configr   r  Optional[str]r   Optional[torch.nn.Module]c                 K  s  d }t | tr|  }n| }|d urt| |fd|i|}|S t |tjjrA| }|dd  |	|j
 t| |fi |}|S t |tjjrY|	|j
 t| |fi |}|S t |tjjrq|	|j
 t| |fi |}|S t |tjr|	|j
 t| |fi |}|S t |tjjr|	|j
 t| |fi |}|S t |tjjr|d rtd d |d< |_|	|j
 t| |fi |}|S t |tr|d std d |d< |_|	|j
 t| |fddi|}|S )Nr  r   zjfan_in_fan_out is set to True but the target module is `torch.nn.Linear`. Setting fan_in_fan_out to False.Fzafan_in_fan_out is set to False but the target module is `Conv1D`. Setting fan_in_fan_out to True.Tr!  )r}   r
   rR   r  rM   rE   r[  rq   r   ry   loftq_configr  r  r  r  rz   rt   ru   r   r   )r  r   r  r  r0   
new_moduletarget_base_layerembedding_kwargsr!   r!   r"   dispatch_default  s^   

$

r  r   )
r  r  r   r   r  r   r  r  r   r  )4
__future__r   r{   rt   
contextlibr   typingr   r   r   rM   torch.nnrE   torch.nn.functional
functionalrn  r   transformers.pytorch_utilsr   peft.tuners._buffer_dictr	   peft.tuners.tuners_utilsr
   r   r   peft.utils.integrationsr   r   r   r   peft.utils.otherr   peft.utils.warningr   configr   r   r  r   r   Modulerz   r[  rr  r  r  r  r  r  r  r  r  r!   r!   r!   r"   <module>   sV   2   | e  9  7  z  ^