o
    ̳i'                     @   sZ  d dl mZ d dlmZ d dlmZmZ d dlmZ d dl	m
Z
 	 defddZ			
						d$dee
 dededededededefddZeeddZde_defddZ			
						d$dee
 dededededededefddZeeddZde_defddZ			
						d$dee
 dededededededefd d!ZeeddZd"e_d#S )%    )partial)List)gemma2lora_gemma2)TransformerDecoder)LORA_ATTN_MODULESreturnc                   C   "   t ddddddddd	d
ddddS )z
    Builder for creating a Gemma2 2B model initialized w/ the default 2b parameter values
    from: https://github.com/google/gemma_pytorch/blob/main/gemma/config.py

    Returns:
        TransformerDecoder: Instantiation of Gemma2 2B model
                   	   $              ư>      >@      I@   
vocab_size
num_layers	num_headshead_dimnum_kv_heads	embed_dimintermediate_dimmax_seq_lenattn_dropoutnorm_epshidden_capping_valuefinal_capping_valuesliding_window_sizer    r&   r&   [/home/ubuntu/.local/lib/python3.10/site-packages/torchtune/models/gemma2/_model_builders.py	gemma2_2b      r(   Fr      r   lora_attn_modulesapply_lora_to_mlp	lora_rank
lora_alphalora_dropoutuse_doraquantize_basec                 C      t d#i d| d|ddddddd	d
ddddddddddddddddddd|d|d|d |d!|S )$a  
    Builder for creating a Gemma2 2B model with LoRA enabled.

    The Gemma defaults are the same as in :func:`~torchtune.models.gemma.gemma_2b`,
    while LoRA default params are based on
    https://github.com/tloen/alpaca-lora/blob/8bb8579e403dc78e37fe81ffbb253c413007323f/finetune.py#L41-L43.

    Args:
        lora_attn_modules (List[LORA_ATTN_MODULES]): list of which linear layers
            LoRA should be applied to in each self-attention block. Options are
            ``{"q_proj", "k_proj", "v_proj", "output_proj"}``.
        apply_lora_to_mlp (bool): whether to apply LoRA to the MLP in each transformer layer.
            Default: False
        lora_rank (int): rank of each low-rank approximation
        lora_alpha (float): scaling factor for the low-rank approximation
        lora_dropout (float): dropout probability for the low-rank approximation. Default: 0.0
        use_dora (bool): Decompose the LoRA weight into magnitude and direction, as
            introduced in "DoRA: Weight-Decomposed Low-Rank Adaptation" (https://arxiv.org/abs/2402.09353).
        quantize_base (bool): Whether to quantize base model weights

    Returns:
        TransformerDecoder: Instantiation of Gemma2 2B model with LoRA applied
    r+   r,   r   r
   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r    r   r!   r   r"   r   r#   r   r$   r   r-   r.   r/   r0   r1   Nr&   r   r+   r,   r-   r.   r/   r0   r1   r&   r&   r'   lora_gemma2_2b-   R    	
r5   T)r1   z
Builder for creating a Gemma2 model with QLoRA enabled. Base model weights in linear layers
that LoRA is applied to are quantized per the QLoRA paper: https://arxiv.org/abs/2305.14314.
Please see `lora_gemma2_2b` for full API arguments.
c                   C   r	   )z
    Builder for creating a Gemma2 9B model initialized w/ the default 9b parameter values
    from: https://github.com/google/gemma_pytorch/blob/main/gemma/config.py

    Returns:
        TransformerDecoder: Instantiation of Gemma 9B model
    r
   *   r*   r   r       8  r   r   r   r   r   r   r   r%   r&   r&   r&   r'   	gemma2_9bn   r)   r:   c                 C   r2   )$a  
    Builder for creating a Gemma 9B model with LoRA enabled.

    The Gemma defaults are the same as in :func:`~torchtune.models.gemma.gemma_7b`,
    while LoRA default params are based on
    https://github.com/tloen/alpaca-lora/blob/8bb8579e403dc78e37fe81ffbb253c413007323f/finetune.py#L41-L43.

    Args:
        lora_attn_modules (List[LORA_ATTN_MODULES]): list of which linear layers
            LoRA should be applied to in each self-attention block. Options are
            ``{"q_proj", "k_proj", "v_proj", "output_proj"}``.
        apply_lora_to_mlp (bool): whether to apply LoRA to the MLP in each transformer layer.
            Default: False
        lora_rank (int): rank of each low-rank approximation
        lora_alpha (float): scaling factor for the low-rank approximation
        lora_dropout (float): dropout probability for the low-rank approximation. Default: 0.0
        use_dora (bool): Decompose the LoRA weight into magnitude and direction, as
            introduced in "DoRA: Weight-Decomposed Low-Rank Adaptation" (https://arxiv.org/abs/2402.09353).
        quantize_base (bool): Whether to quantize base model weights

    Returns:
        TransformerDecoder: Instantiation of Gemma2 9B model with LoRA applied
    r+   r,   r   r
   r   r7   r   r*   r   r   r   r   r   r8   r   r9   r   r   r    r   r!   r   r"   r   r#   r   r$   r   r-   r.   r/   r0   r1   Nr&   r3   r4   r&   r&   r'   lora_gemma2_9b   r6   r;   z
Builder for creating a Gemma model with QLoRA enabled. Base model weights in linear layers
that LoRA is applied to are quantized per the QLoRA paper: https://arxiv.org/abs/2305.14314.
Please see `lora_gemma2_9b` for full API arguments.
c                   C   s$   t ddddddddd	d
dddddS )z
    Builder for creating a Gemma2 27B model initialized w/ the default 27b parameter values
    from: https://github.com/google/gemma_pytorch/blob/main/gemma/config.py

    Returns:
        TransformerDecoder: Instantiation of Gemma2 27B model
    r
   .          r*         r   r   r   r   r   r      )r   r   r   r   r   r   r   r   r    r!   r"   r#   r$   query_pre_attn_scalarr%   r&   r&   r&   r'   
gemma2_27b   s    rC   c                 C   s   t d%i d| d|ddddddd	d
ddddddddddddddddddddd|d |d!|d"|d#|S )&a  
    Builder for creating a Gemma2 27B model with LoRA enabled.

    The Gemma defaults are the same as in :func:`~torchtune.models.gemma.gemma_7b`,
    while LoRA default params are based on
    https://github.com/tloen/alpaca-lora/blob/8bb8579e403dc78e37fe81ffbb253c413007323f/finetune.py#L41-L43.

    Args:
        lora_attn_modules (List[LORA_ATTN_MODULES]): list of which linear layers
            LoRA should be applied to in each self-attention block. Options are
            ``{"q_proj", "k_proj", "v_proj", "output_proj"}``.
        apply_lora_to_mlp (bool): whether to apply LoRA to the MLP in each transformer layer.
            Default: False
        lora_rank (int): rank of each low-rank approximation
        lora_alpha (float): scaling factor for the low-rank approximation
        lora_dropout (float): dropout probability for the low-rank approximation. Default: 0.0
        use_dora (bool): Decompose the LoRA weight into magnitude and direction, as
            introduced in "DoRA: Weight-Decomposed Low-Rank Adaptation" (https://arxiv.org/abs/2402.09353).
        quantize_base (bool): Whether to quantize base model weights

    Returns:
        TransformerDecoder: Instantiation of Gemma2 27B model with LoRA applied
    r+   r,   r   r
   r   r<   r   r=   r   r>   r   r*   r   r?   r   r@   r   r   r    r   r!   r   r"   r   r#   r   r$   r   rB   rA   r-   r.   r/   r0   r1   Nr&   r3   r4   r&   r&   r'   lora_gemma2_27b   sV    	
rD   z
Builder for creating a Gemma model with QLoRA enabled. Base model weights in linear layers
that LoRA is applied to are quantized per the QLoRA paper: https://arxiv.org/abs/2305.14314.
Please see `lora_gemma2_27b` for full API arguments.
N)Fr   r*   r   FF)	functoolsr   typingr   +torchtune.models.gemma2._component_buildersr   r   torchtune.modulesr   torchtune.modules.peftr   r(   boolintfloatr5   qlora_gemma2_2b__doc__r:   r;   qlora_gemma2_9brC   rD   qlora_gemma2_27br&   r&   r&   r'   <module>   s   
8
8
9
