o
    ̳i                     @   s   d dl Z d dlmZmZmZ d dlZd dlm  mZ	 d dlmZ d dl
mZmZ d dlmZ d dlmZ G dd dejeZd	ejd
dfddZd	ejd
dfddZdS )    N)ListOptionalUnion)nn)
linear_nf4to_nf4)_register_nf4_dispatch_ops)AdapterModulec                       s   e Zd ZdZ			ddededededed	ed
ef fddZdddee	e
ejef  defddZdd Ze dd Zdd Zdee
 fddZdejdejfddZ  ZS ) 
DoRALinearaQ  DoRA linear layer as introduced in
    `DoRA: Weight-Decomposed Low-Rank Adaptation of Large Language Models <https://arxiv.org/abs/2402.09353>`_.

    DoRA (Weight-Decomposed Low-Rank Adaptation) fine-tunes a layer by decomposing the pre-trained weights
    into two components: magnitude and direction. The magnitude component is a learnable scalar vector
    that scales each output channel, while the direction component, modified via LoRA, adjusts the orientation
    of weights. By scaling the LoRA update component :math:`BAx` with the `magnitude` vector, DoRA allows the model
    to apply distinct scaling adjustments across different output dimensions.

    Args:
        in_dim (int): input dimension
        out_dim (int): output dimension
        rank (int): rank of the low-rank approximation
        alpha (float): scaling factor for the low-rank approximation
        dropout (float): dropout probability. Default: 0.0
        use_bias (bool): whether to include bias in the original linear layer.
            Default: False
        quantize_base (bool): Whether to quantize base linear weight or not.
            Default: False
        **quantization_kwargs: Keyword arguments to pass to `to_nf4` when quantizing the base linear weight.
            Examples of valid arguments are `block_size` and `scaler_block_size`, which control the granularity of
            weight quantization and scaler quantization respectively. This is only used if `quantize_base` is True.
            Default None

    Raises:
        ValueError: If ``quantize_base`` is False, but quantization kwargs are provided.

            Fin_dimout_dimrankalphadropoutuse_biasquantize_basec                    s,  t    || _|| _|| | _|| _|| _| js+tdd | D r+t	d| t
j||| jd}	| js:|	jnt|	jfi |}
| jrI|	jnd }d| _| dt
|
 | d|d urct
|nd  |dkrpt
j|d	nt
 | _t
j||dd| _t
j||dd| _t
t|| _|   d S )
Nc                 S   s   g | ]}|qS  r   ).0vr   r   O/home/ubuntu/.local/lib/python3.10/site-packages/torchtune/modules/peft/dora.py
<listcomp>D   s    z'DoRALinear.__init__.<locals>.<listcomp>zO``quantize_base`` is False, but received the following quantization arguments: )in_featuresout_featuresbiasFweightr   r   )p)super__init__r   r   scalingr   _quantize_baseanyvalues
ValueErrorr   Linearr   r   r   disabledregister_parameter	ParameterDropoutIdentityr   lora_alora_btorchempty	magnitudeinitialize_parameters)selfr   r   r   r   r   r   r   quantization_kwargslinearr   r   	__class__r   r   r   2   s4   

zDoRALinear.__init__T)recursedevicer5   c                C   sR   | j j||d | jj||d tjtj| j|d| jjd}tj	
| j| | S )N)r6   r5   )r6   )requires_grad)r*   to_emptyr+   r   r'   r,   
empty_liker.   r7   utilsswap_tensors)r0   r6   r5   r.   r   r   r   r8   `   s   zDoRALinear.to_emptyc                 C   s   t | j t| j d S )N)_lora_a_init_paramsr*   _lora_b_init_paramsr+   )r0   r   r   r   r/   m   s   
z DoRALinear.initialize_parametersc                 C   s`   t | jj| jjj| jjjgrtd| j| jjj}| jj| jj }| j	| 
|| dS )a=  
        DoRA initializes the magnitude vector such that its outputs are initially
        identical to standard LoRA's outputs.

        This must be called after loading/initializing base model and LoRA params.

        Raises:
            RuntimeError: If base or LoRA parameters are still on meta device.
        zUCannot initialize DoRA magnitude if base or LoRA parameters are still on meta device.N)r!   r   is_metar*   r+   RuntimeErrortodtyper.   copy__get_weight_norm)r0   base_weightlora_weightr   r   r   initialize_dora_magnitudes   s   z$DoRALinear.initialize_dora_magnitudec                 C   s*   || j |  }tjj|dd|j}|S )N   )dim)r   r,   linalgnormr@   rA   )r0   r   rE   weight_normr   r   r   rC      s   zDoRALinear._get_weight_normreturnc                 C   s   g d}|S )z
        Return a list of strings corresponding to the names of the ``nn.Parameter`` s in
        the model coming from the adapter.

        For DoRA this means lora_a.weight, lora_b.weight, and magnitude.
        )zlora_a.weightzlora_b.weightr.   r   )r0   adapter_paramsr   r   r   rM      s   zDoRALinear.adapter_paramsxc                 C   s   | j rt|| jd}| jr|| j }n	t|| j| j}| jr!|S | |}| 	| 
|}tj| j
jjd | j
jj|jd}| 	| 
|j}| j}| j|j}| || }| }|| dd}	|	d | |	| | j  }
|
| S )z
        Args:
            x (torch.Tensor): input tensor with shape ``(..., in_dim)``

        Returns:
            Tensor: output tensor with shape ``(..., out_dim)``
        )inputr   rG   )r6   rA   )r    r   r   r   r   Fr2   r%   r   r+   r*   r,   eyeshaper6   rA   Tr.   r@   rC   detachviewr   )r0   rN   base_outlora_outx_eyerE   r.   r   rK   mag_norm_scaledora_outr   r   r   forward   s2   

zDoRALinear.forward)r   FF)__name__
__module____qualname____doc__intfloatboolr   r   r   strr,   r6   r8   r/   no_gradrF   rC   r   rM   Tensorr\   __classcell__r   r   r3   r   r
      s@    #/


r
   rN   rL   c                 C   s   t jj| jtdd dS )z6
    Initialize LoRA A weight to Kaiming uniform.
       )aN)r   initkaiming_uniform_r   mathsqrtrN   r   r   r   r<      s   r<   c                 C   s   t j| j dS )z,
    Initialize LoRA B weight to zeros.
    N)r   rj   zeros_r   rn   r   r   r   r=      s   r=   )rl   typingr   r   r   r,   torch.nn.functionalr   
functionalrQ   torchao.dtypes.nf4tensorr   r   torchtune.modules.low_precisionr   torchtune.modules.peftr	   Moduler
   r$   r<   r=   r   r   r   r   <module>   s    /