o
    ̳i                     @   sF   d dl mZ d dlZd dlmZ d dlmZmZ G dd dejZ	dS )    )OptionalN)
linear_nf4to_nf4c                       s`   e Zd ZdZ			ddedededeej deej	 f
 fd	d
Z
dejdejfddZ  ZS )FrozenNF4LinearaN  
    A linear layer similar to ``torch.nn.Linear`` but uses a quantized
    NF4Tensor as its weight. This class also freezes its ``weight`` parameter
    and is meant to be used as the base Linear layer for modeling
    use cases such as QLoRA where base model parameters are frozen.

    Args:
        in_dim (int): input dimension
        out_dim (int): output dimension
        bias (bool): whether to include bias in the linear layer. Default: False
        device (Optional[torch.device]): device to use for the underlying weight. If ``None``, uses the default
            device given by `torch.get_default_device()`.
        dtype (Optional[torch.dtype]): dtype to use for the underlying weight. If ``None``, uses the default
        **quantization_kwargs: Keyword arguments to pass to `to_nf4` when quantizing the base linear weight.
            Examples of valid arguments are `block_size` and `scaler_block_size`, which control the granularity of
            weight quantization and scaler quantization respectively. This is only used if `quantize_base` is True.
            Default None
    FNin_dimout_dimbiasdevicedtypec                    sj   t  j|||||d | jd | jd ur| jd t| jfi |}tj| jtj	j
|dd d S )N)r   r	   r
   F)requires_grad)super__init__weightrequires_grad_r   r   torchutilsswap_tensorsnn	Parameter)selfr   r   r   r	   r
   quantization_kwargs
nf4_weight	__class__ ^/home/ubuntu/.local/lib/python3.10/site-packages/torchtune/modules/low_precision/nf4_linear.pyr   #   s   	
zFrozenNF4Linear.__init__inputreturnc                 C   s&   t || jd}| jdur|| j }|S )ak  
        Runs linear operation with input tensor as given by `input`. Computation happens in higher
        precision, though only the nf4 weight is saved for backward for gradient computation to ensure
        additional memory is not used.
        Args:
            input (torch.Tensor): input tensor

        Returns:
            Tensor: output tensor
        )r   r   N)r   r   r   )r   r   outr   r   r   forward7   s   

zFrozenNF4Linear.forward)FNN)__name__
__module____qualname____doc__intboolr   r   r	   r
   r   Tensorr   __classcell__r   r   r   r   r      s"    r   )
typingr   r   torch.nnr   torchao.dtypes.nf4tensorr   r   Linearr   r   r   r   r   <module>   s
   