o
    ©Ì³iÖ  ã                   @   s
  d dl mZ d dlZd dlmZ i dd“dd“dd	“d
d“dd“dd“dd“dd“dd“dd“dd“dd“dd“dd“dd “d!d"“Zd!Z	#	#	$		%d1d&eeejf d'e	d(e	d)e	d*e	d+e
d,eeejf fd-d.„Z	#	#	$		%d1d&eeejf d'e	d(e	d)e	d*e	d+e
fd/d0„ZdS )2é    )ÚDictN)Úget_mapped_keyzmodel.embed_tokens.weightztok_embeddings.weightz'model.layers.{}.self_attn.q_proj.weightzlayers.{}.attn.q_proj.weightz%model.layers.{}.self_attn.q_proj.biaszlayers.{}.attn.q_proj.biasz'model.layers.{}.self_attn.k_proj.weightzlayers.{}.attn.k_proj.weightz%model.layers.{}.self_attn.k_proj.biaszlayers.{}.attn.k_proj.biasz'model.layers.{}.self_attn.v_proj.weightzlayers.{}.attn.v_proj.weightz%model.layers.{}.self_attn.v_proj.biaszlayers.{}.attn.v_proj.biasz'model.layers.{}.self_attn.o_proj.weightz!layers.{}.attn.output_proj.weightz-model.layers.{}.self_attn.rotary_emb.inv_freqz$model.layers.{}.mlp.gate_proj.weightzlayers.{}.mlp.w1.weightz"model.layers.{}.mlp.up_proj.weightzlayers.{}.mlp.w3.weightz$model.layers.{}.mlp.down_proj.weightzlayers.{}.mlp.w2.weightz&model.layers.{}.input_layernorm.weightzlayers.{}.sa_norm.scalez/model.layers.{}.post_attention_layernorm.weightzlayers.{}.mlp_norm.scalezmodel.norm.weightz
norm.scalezlm_head.weightzoutput.weighté    é   FÚ
state_dictÚ	num_headsÚnum_kv_headsÚdimÚhead_dimÚtie_word_embeddingsÚreturnc           
      C   sT   i }|du r
|| }|   ¡ D ]\}}|rt|v rqd|v rqt|tƒ}	|||	< q|S )a³  
    Convert a state dict from HF's format to TorchTune's format, which contains the weights
    of a Qwen2 model.
    State dicts from multiple checkpoint files should be consolidated into a single state dict
    before calling this function.
    The logic is identical to :func:`~torchtune.models.convert_weights.hf_to_tune`, but may not load
    output projection weights.

    Args:
        state_dict (Dict[str, torch.Tensor]): State dict in HF's format.
        num_heads (int): Number of heads in the model.
        num_kv_heads (int): Number of heads in the key/value projection layers.
        dim (int): Dimension of the model.
        head_dim (int): Dimension of the head. If not provided, it will be calculated
            as dim // num_heads.
        tie_word_embeddings (bool): Whether the model's input and output word embeddings should be tied.

    Returns:
        Dict[str, torch.Tensor]: State dict in torchtune's format.
    Nzrotary_emb.inv_freq)ÚitemsÚQWEN2_TIED_KEYr   Ú_FROM_HF)
r   r   r   r	   r
   r   Úconverted_state_dictÚkeyÚvalueÚnew_key© r   ú[/home/ubuntu/.local/lib/python3.10/site-packages/torchtune/models/qwen2/_convert_weights.pyÚqwen2_hf_to_tune%   s   ÿ

r   c                 C   sN   i }dd„ t  ¡ D ƒ}|du r|| }|  ¡ D ]\}}	t||ƒ}
|	||
< q|S )a  
    Convert a state dict from torchtune's format to HF's format. This function
    doesn't handle any sharding or splitting of state dicts. It follows the
    state_dict IN -> state_dict OUT pattern.

    Args:
        state_dict (Dict[str, torch.Tensor]): State dict in torchtune's format.
        num_heads (int): Number of heads in the model.
        num_kv_heads (int): Number of heads in the key/value projection layers.
        dim (int): Dimension of the model.
        head_dim (int): Dimension of the head. If not provided, it will be calculated
            as dim // num_heads.
        tie_word_embeddings (bool): Whether the model's input and output word embeddings should be tied.

    Returns:
        Dict[str, torch.Tensor]: State dict in HF's format.
    c                 S   s   i | ]\}}||“qS r   r   )Ú.0ÚkÚvr   r   r   Ú
<dictcomp>l   s    z$qwen2_tune_to_hf.<locals>.<dictcomp>N)r   r   r   )r   r   r   r	   r
   r   r   Úinverted_mapping_dictr   r   r   r   r   r   Úqwen2_tune_to_hfR   s   

r   )r   r   r   NF)Útypingr   ÚtorchÚ torchtune.models.convert_weightsr   r   r   ÚstrÚTensorÚintÚboolr   r   r   r   r   r   Ú<module>   s–   ÿþýüûúùø	÷
öõôóòñðúÿþýüûú
ù/úÿþýüûú