o
    Ni7                     @  s   d dl mZ d dlZd dlZd dlZd dlmZmZmZ d dl	Z	d dl
mZ d dlm  mZ d dlmZmZ d dlmZ d dlmZ ddlmZ G d	d
 d
ejeZdddZdS )    )annotationsN)AnyOptionalUnion)BaseTunerLayercheck_adapters_to_merge)	transpose)gather_params_ctx   )	LoraLayerc                      s   e Zd ZdZ									d0d1 fddZdejdddfd2ddZd3d d!Zd4d5d(d)Z	d6d*d+Z
d7d,d-Zd8 fd.d/Z  ZS )9LoraParallelLineara  
    When the target layer parallel_linear is RowParallelLinear, in order to keep the input and output shapes
    consistent, we need to split the lora matrix A into rows, and the lora_B at this time should be a complete linear
    layer; In the same way, when the target layer is ColumnParallelLinear, we perform column segmentation on lora_B,
    while lora_A is still a complete linear layer.
    r   r
           FTadapter_namestrrint
lora_alphalora_dropoutfloatfan_in_fan_outboolis_target_conv_1d_layerinit_lora_weightsUnion[bool, str]
use_rslorause_dora	lora_biasc                   s   |rt | jj dt   tj| fd|i| |r&t | jj d|| _t||j| _	|| _
|| _|d }d|i}tj}t|drI|j}d}d}| j	rT|j}n|j}| j||f|||	|
||||d| |rut | jj d	d| _d S )
Nz0 does not support lora_bias yet, set it to False
base_layerz2 does not support DoRA yet, please set it to Falsemegatron_configinit_methodTF)r   r   r   r   r   r   input_is_parallelgather_outputzB does not support target_conv_1d_layer yet, please set it to False)
ValueError	__class____name__super__init__r   backend
isinstanceRowParallelLinearis_parallel_ar   _active_adapterinitxavier_normal_hasattrr   r    r!   update_layerr   )selfr   r   r'   r   r   r   r   r   r   r   r   r   kwargsr   parallel_linear_kwargsr   r    r!   r#    M/home/ubuntu/.local/lib/python3.10/site-packages/peft/tuners/lora/tp_layer.pyr&   (   sP   


zLoraParallelLinear.__init__inference_modec              	   K  s  t   }|d= |dkrtd| || j|< || j|< |dkr(tj|d}nt }|| j|< |d }t	j
|_| jrV| jj| j|d|	d||d	}tj|| jdt	j
d
}ntj| j|dt	j
d
}| jj|| jd|
||d}|| j|< || j|< |r|t| | j|< n|| | j|< || j|< t|tr|drt|  j | || W d    n1 sw   Y  nt|tr|drt|  j |  || W d    n1 sw   Y  nWt|tr|! dkrt|  j | "| W d    n	1 sw   Y  n,|dkr/t|  j | #| W d    n	1 s)w   Y  n	|r8| $|| | %| || j&v rO| j&| j'| fi | | j(| j)|d d S )Nr0   r   z?`r` should be a positive integer value but the value passed is r   )pr   FT)
input_sizeoutput_sizebiasr    skip_bias_addr   config)in_featuresout_featuresr:   dtype)r8   r9   r:   r!   r   r<   pissacordaoloraloftq)r6   )*localscopyr"   r   r   nnDropoutIdentityr   torchfloat32params_dtyper*   r'   r)   r=   Linearr>   ColumnParallelLinearlora_Alora_Bmathsqrtscalingr   r(   r   
startswithr	   get_base_layerweight
pissa_init
corda_initlower
olora_init
loftq_initreset_lora_parameters%_move_adapter_to_device_of_base_layerlora_variantr,   set_adapteractive_adapters)r0   r   r   r   r   r   r   r   r   r    r!   r6   r2   r1   lora_dropout_layerr   lora_alora_br4   r4   r5   r/   e   s   



	




zLoraParallelLinear.update_layerxtorch.Tensorargsr   r1   c                 O  s@  | j |g|R i | |dd }| jr.| jr|   | j|g|R i |\}}||fS |d ur;t| jj d| jrP| j|g|R i |\}}||fS | j|g|R i |\}}|j	}| j
D ]2}|| j vrnqd| j| }	| j| }
| j| }| j| }| ||	jj	}||
|	|||  }qd||}||fS )Nadapter_namesz* does not support mixed_batch_forward yet.)_check_forward_argspopdisable_adaptersmergedunmerger   r"   r#   r$   r?   r_   rN   keysrO   r   rR   _cast_input_dtyperU   to)r0   rc   re   r1   rf   resultr:   torch_result_dtypeactive_adapterrN   rO   dropoutrR   r4   r4   r5   forward   s2   





zLoraParallelLinear.forwardN
safe_mergerf   Optional[list[str]]returnNonec                 C  s   t | |}|s	dS |D ]E}|| j v rP|  }|r=|jj }| |}|| }t	|
 s8td| d||j_n| |}|jj| |j_| j| qdS )a^  
        Merge the active adapter weights into the base weights

        Args:
            safe_merge (`bool`, *optional*):
                If True, the merge operation will be performed in a copy of the original weights and check for NaNs
                before merging the weights. This is useful if you want to check if the merge operation will produce
                NaNs. Defaults to `False`.
            adapter_names (`list[str]`, *optional*):
                The list of adapter names that should be merged. If None, all active adapters will be merged. Defaults
                to `None`.
        Nz1NaNs detected in the merged weights. The adapter z seems to be broken)r   rN   rl   rT   rU   datacloneget_delta_weightrI   isfiniteallr"   merged_adaptersappend)r0   rt   rf   rq   r   orig_weightsdelta_weightr4   r4   r5   merge   s(   




zLoraParallelLinear.mergec                 C  sr   | j s
td dS t| jdkr7| j }|| j v r.|  j	}| 
|}| j|8  _t| jdksdS dS )zW
        This method unmerges all merged adapter layers from the base weights.
        z Already unmerged. Nothing to do.Nr   )rj   warningswarnlenr}   rh   rN   rl   rT   rU   rz   rx   )r0   rq   rU   r   r4   r4   r5   rk     s   



zLoraParallelLinear.unmergec                 C  s   | j | jj}| j | jj}|jdko|tjkp|tjk}| j| j}| j | j}|r3|	 }|	 }t
|| | j| j|  }|r\|j|d}||| j| j_||| j | j_|S )z
        Compute the delta weight for the given adapter.

        Args:
            adapter (str):
                The name of the adapter for which the delta weight should be computed.
        cpu)r?   )rO   rU   devicer?   typerI   float16bfloat16rN   r   r   r   rR   rn   rx   )r0   adapterr   r?   cast_to_fp32weight_Aweight_Boutput_tensorr4   r4   r5   rz     s   z#LoraParallelLinear.get_delta_weightc                   s   t   }d| S )Nzlora.)r%   __repr__)r0   repr3   r4   r5   r   2  s   
zLoraParallelLinear.__repr__)	r   r
   r   FFTFFF)r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   )r6   r   )rc   rd   re   r   r1   r   )FN)rt   r   rf   ru   rv   rw   )rv   rw   )rv   rd   )rv   r   )r$   
__module____qualname____doc__r&   r,   r-   r/   rs   r   rk   rz   r   __classcell__r4   r4   r3   r5   r       s.    E
W
(
"r   targettorch.nn.Moduler   r   r1   r   rv   Optional[torch.nn.Module]c           
      K  s   d }t | tr|  }n| }|jrt|j}nd }|r`t ||jj|jj	fr`|
 }|j}t |tr@|jjj}	|	di |j}||d< |d rTtd d |d< |_td| ||jd|}|S )Nr   r   zfan_in_fan_out is set to True but the target module is `ColumnParallelLinear` or `RowParallelLinear`. Setting fan_in_fan_out to False.F)r   r   r'   r4   )r(   r   rT   r   	importlibimport_modulemegatron_coretensor_parallelrM   r)   rE   dicttransformertransformer_configTransformerConfigr   r   r   r   )
r   r   lora_configr1   
new_moduletarget_base_layerr   megatron_kwargsr   transformer_config_classr4   r4   r5   dispatch_megatron7  s8   



r   )r   r   r   r   r1   r   rv   r   )
__future__r   r   rP   r   typingr   r   r   rI   torch.nnrF   torch.nn.initr,   peft.tuners.tuners_utilsr   r   
peft.utilsr   peft.utils.integrationsr	   layerr   Moduler   r   r4   r4   r4   r5   <module>   s      