o
    ©Ì³i’  ã                   @   s¼   d dl mZ d dlZd dlmZ 	 ddddddd	d
ddddddddœZ				ddeeejf dededededeeejf fdd„Z					ddeeejf dedededef
dd„Z
dS ) é    )ÚDictN)Úget_mapped_keyztok_embeddings.weightzlayers.{}.attn.q_proj.weightzlayers.{}.attn.k_proj.weightzlayers.{}.attn.v_proj.weightz!layers.{}.attn.output_proj.weightzlayers.{}.mlp.w1.weightzlayers.{}.mlp.w3.weightzlayers.{}.mlp.w2.weightzlayers.{}.sa_norm.scalezlayers.{}.sa_scale.scalezlayers.{}.mlp_norm.scalezlayers.{}.mlp_scale.scaleznorm.rms_norm.scalezoutput.weight)zmodel.embed_tokens.weightz'model.layers.{}.self_attn.q_proj.weightz'model.layers.{}.self_attn.k_proj.weightz'model.layers.{}.self_attn.v_proj.weightz'model.layers.{}.self_attn.o_proj.weightz-model.layers.{}.self_attn.rotary_emb.inv_freqz$model.layers.{}.mlp.gate_proj.weightz"model.layers.{}.mlp.up_proj.weightz$model.layers.{}.mlp.down_proj.weightz&model.layers.{}.input_layernorm.weightz/model.layers.{}.post_attention_layernorm.weightz1model.layers.{}.post_feedforward_layernorm.weightz0model.layers.{}.pre_feedforward_layernorm.weightzmodel.norm.weightzlm_head.weighté    é   Ú
state_dictÚ	num_headsÚnum_kv_headsÚdimÚhead_dimÚreturnc           
         sx   i }ˆdu r
ˆ | ‰‡ ‡fdd„}|   ¡ D ]$\}}d|vr9t|tƒ}	d|v r,|||ƒ}n	d|v r5|||ƒ}|||	< q|S )a(  
    Convert a state dict from HF's format to torchtune's format. State dicts
    from multiple checkpoint files should be consolidated into a single state dict
    before calling this function.

    Eg of HF-format state dict can be found in the ``meta-llama/Llama-2-7b-hf``
    repo in HF (https://huggingface.co/meta-llama/Llama-2-7b-hf).

    Args:
        state_dict (Dict[str, torch.Tensor]): State dict in HF's format.
        num_heads (int): Number of heads in the model.
        num_kv_heads (int): Number of heads in the key/value projection layers.
        dim (int): Dimension of the model.
        head_dim (int): Dimension of the head. If not provided, it will be calculated
            as dim // num_heads.

    Returns:
        Dict[str, torch.Tensor]: State dict in torchtune's format.
    Nc                    s(   |   |dˆd ˆ ¡ dd¡ ˆ| ˆ ¡S ©Né   é   ©ÚviewÚ	transposeÚreshape©ÚtÚn_heads©r	   r
   © ú\/home/ubuntu/.local/lib/python3.10/site-packages/torchtune/models/gemma2/_convert_weights.pyÚ_permuteF   ó   ýz#gemma2_hf_to_tune.<locals>._permutezrotary_emb.inv_freqÚq_projÚk_proj)Úitemsr   Ú_GEMMA2_FROM_HF)
r   r   r   r	   r
   Úconverted_state_dictr   ÚkeyÚvalueÚnew_keyr   r   r   Úgemma2_hf_to_tune(   s   

€r#   c                    s‚   i }dd„ t  ¡ D ƒ}ˆdu rˆ | ‰‡ ‡fdd„}|  ¡ D ] \}}	t||ƒ}
d|v r1||	|ƒ}	n	d|v r:||	|ƒ}	|	||
< q|S )an  
    Convert a state dict from torchtune's format to HF's format. This function
    doesn't handle any sharding or splitting of state dicts. It follows the
    state_dict IN -> state_dict OUT pattern.

    Args:
        state_dict (Dict[str, torch.Tensor]): State dict in torchtune's format.
        num_heads (int): Number of heads in the model.
        num_kv_heads (int): Number of heads in the key/value projection layers.
        dim (int): Dimension of the model.
        head_dim (int): Dimension of model attention heads. Default None.

    Returns:
        Dict[str, torch.Tensor]: State dict in HF's format.
    c                 S   s   i | ]\}}||“qS r   r   )Ú.0ÚkÚvr   r   r   Ú
<dictcomp>p   s    z%gemma2_tune_to_hf.<locals>.<dictcomp>Nc                    s(   |   |ˆd dˆ ¡ dd¡ ˆ| ˆ ¡S r   r   r   r   r   r   r   u   r   z#gemma2_tune_to_hf.<locals>._permuter   r   )r   r   r   )r   r   r   r	   r
   r   Úinverted_mapping_dictr   r    r!   r"   r   r   r   Úgemma2_tune_to_hfY   s   


r)   )r   r   r   N)Útypingr   ÚtorchÚ torchtune.models.convert_weightsr   r   ÚstrÚTensorÚintr#   r)   r   r   r   r   Ú<module>   sh   	ñûÿþýüû
ú3ûÿþýüû