o
    Ti                     @   sL   d dl mZmZ d dlmZ d dlZeG dd dZeG dd dZdS )    )	dataclassfield)ListNc                   @   st   e Zd ZU dZdZeed< dZeed< dZ	eed< dZ
eed	< d
Zeed< dZeed< edd dZee ed< dS )
LoRAConfigaN  
    Configuration settings for LoRAOptimizedLinear.

    Attributes:
        lora_r (int): LoRA attention dimension, also known as the rank. Defaults is 64.
        lora_alpha (float): LoRA scaling factor, default is 16.
        base_weight_sharding (int): The degree to which the base weights are sharded,
            should typically be set to the data-parallel world size to maximize the memory
            reduction benefits. Defaults to 1, which means this feature is disabled.
        offload (bool): offload frozen parameters to cpu when not in use
        offload_ratio (float): ratio of parameters to offload to cpu when not in use
        delay_lora_init (bool): initialize lora parameters at time of model init or allow manual init later
        target_mods (str): target module names to apply LoRA to, defaults to llama-3.1 arch
    @   lora_rg      0@
lora_alpha   base_weight_shardingFoffloadg        offload_ratiodelay_lora_initc                   C   s   g dS )N)q_projk_projv_projo_proj	gate_projup_proj	down_proj r   r   r   K/home/ubuntu/.local/lib/python3.10/site-packages/deepspeed/linear/config.py<lambda>#   s    zLoRAConfig.<lambda>)default_factorytarget_modsN)__name__
__module____qualname____doc__r   int__annotations__r   floatr
   r   boolr   r   r   r   r   strr   r   r   r   r      s   
 r   c                   @   sF   e Zd ZU dZdZeed< dZeed< dZeed< e	j
Ze	jed< d	S )
QuantizationConfigax  
    Configuration settings for quantization for LoRAOptimizedLinear, QuantizedLinear,
    and QuantizedParameter

    Attributes:
        q_bits (int): The number of bits used for quantization. Default is 8.
        mantissa_bits (int): The number of bits reserved for the mantissa in fixed-point quantization. Default is 3.
        group_size (int): The number of elements used for quantization. Default is 512.
        q_dtype (torch.dtype): The data type to quantize to. Default is uint8. (in CUDA, buffers are allocated as
                                     uint8, but inside the kernels the quantization is done to fp8)
       q_bits   mantissa_bitsi   
group_sizeq_dtypeN)r   r   r   r   r%   r   r   r'   r(   torchuint8r)   dtyper   r   r   r   r#   &   s   
 r#   )dataclassesr   r   typingr   r*   r   r#   r   r   r   r   <module>   s   