o
    ̳i;                     @   s  d dl mZ d dlmZmZ d dlmZmZ d dlm	Z	m
Z
 d dlmZ d dlmZ d dlmZ d dlmZ 	 d	efd
dZ					d%dededee dee dee d	efddZ							d&dee dededededededed	efd d!Zeed"d#Zd$e_dS )'    )partial)ListOptional)_get_prompt_template_TemplateType)	lora_phi3phi3)Phi4Tokenizer)TransformerDecoder)LORA_ATTN_MODULES)parse_hf_tokenizer_jsonreturnc                   C   s   t dddddddddd		S )
z
    Builder for creating the Phi4 (14B) Instruct Model.

    Returns:
        TransformerDecoder: Instantiation of Phi4 (14B) Instruct Model
      (   
       F   @          h㈵>)	
vocab_size
num_layers	num_headsnum_kv_heads	embed_dimintermediate_dimmax_seq_lenattn_dropoutnorm_eps)r    r   r   Y/home/ubuntu/.local/lib/python3.10/site-packages/torchtune/models/phi4/_model_builders.pyphi4_14b   s   r!   N
vocab_pathmerges_pathspecial_tokens_pathr   prompt_templatec                 C   s:   |durt |nd}|durt|nd}t| ||||dS )a  
    Phi4 tokenizer.

    Args:
        vocab_path (str): Path to vocab.json.
        merges_path (str): Path to merges.txt.
        special_tokens_path (Optional[str]): Path to ``tokenizer.json`` from Hugging Face
            model files that contains all registered special tokens, or a local json file
            structured similarly. Default is None to use the canonical Phi4 special tokens.
        max_seq_len (Optional[int]): maximum sequence length for tokenizing a single list of messages,
            after which the input will be truncated. Default is None.
        prompt_template (Optional[_TemplateType]): optional specified prompt template.
            If a string, it is assumed to be the dotpath of a :class:`~torchtune.data.PromptTemplateInterface`
            class. If a dictionary, it is assumed to be a custom prompt template mapping role to the
            prepend/append tags.

    Returns:
        Phi4Tokenizer: Instantiation of the Phi-4 (14B) tokenizer.
    N)r"   r#   special_tokensr   r%   )r   r   r	   )r"   r#   r$   r   r%   r&   templater   r   r    phi4_tokenizer(   s   r(   F      r   lora_attn_modulesapply_lora_to_mlpapply_lora_to_output	lora_rank
lora_alphalora_dropoutuse_doraquantize_basec                 C   sp   t di d| d|d|ddddddd	d
ddddddddddd|d|d|d|d|S )a  
    Builder for creating a Phi4 (14b) model with LoRA enabled.

    The Phi4 defaults are the same as in :func:`~torchtune.models.phi4.phi4`.

    Args:
        lora_attn_modules (List[LORA_ATTN_MODULES]): list of which linear layers
            LoRA should be applied to in each self-attention block. Options are
            ``{"q_proj", "k_proj", "v_proj", "output_proj"}``.
        apply_lora_to_mlp (bool): whether to apply LoRA to the MLP in each transformer layer.
            Default: False
        apply_lora_to_output (bool): whether to apply LoRA to the model's final output projection.
            Default: False
        lora_rank (int): rank of each low-rank approximation
        lora_alpha (float): scaling factor for the low-rank approximation
        lora_dropout (float): dropout probability for the low-rank approximation. Default: 0.0
        use_dora (bool): Decompose the LoRA weight into magnitude and direction, as
            introduced in "DoRA: Weight-Decomposed Low-Rank Adaptation" (https://arxiv.org/abs/2402.09353).
        quantize_base (bool): Whether to quantize base model weights

    Returns:
        TransformerDecoder: Instantiation of Phi4 (14B) model with LoRA applied
    r+   r,   r-   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r.   r/   r0   r1   r2   Nr   )r   )r+   r,   r-   r.   r/   r0   r1   r2   r   r   r    lora_phi4_14bS   sF   !	
r3   T)r2   z
Builder for creating a Phi4 (14B) model with QLoRA enabled. Base model weights in linear layers
that LoRA is applied to are quantized per the QLoRA paper: https://arxiv.org/abs/2305.14314.
Please see `lora_phi4_14b` for full API arguments.
)NNNNN)FFr)   r*   r   FF)	functoolsr   typingr   r    torchtune.data._prompt_templatesr   r   )torchtune.models.phi3._component_buildersr   r    torchtune.models.phi4._tokenizerr	   torchtune.modulesr
   torchtune.modules.peftr   torchtune.modules.tokenizersr   r!   strintr(   boolfloatr3   qlora_phi4_14b__doc__r   r   r   r    <module>   sp    
-	
6
