o
    ©Ì³iK  ã                   @   s¼   d dl mZ d dlZd dlmZ dddddd	d
ddddddœZ				ddeeejf dededededeeejf fdd„Z				ddeeejf dedededeeejf f
dd„Z
dS )é    )ÚDictN)Úget_mapped_keyztok_embeddings.weightzlayers.{}.attn.q_proj.weightzlayers.{}.attn.k_proj.weightzlayers.{}.attn.v_proj.weightz!layers.{}.attn.output_proj.weightzlayers.{}.mlp.w1.weightzlayers.{}.mlp.w3.weightzlayers.{}.mlp.w2.weightzlayers.{}.sa_norm.scalezlayers.{}.mlp_norm.scalez
norm.scalezoutput.weight)zmodel.embed_tokens.weightz'model.layers.{}.self_attn.q_proj.weightz'model.layers.{}.self_attn.k_proj.weightz'model.layers.{}.self_attn.v_proj.weightz'model.layers.{}.self_attn.o_proj.weightz$model.layers.{}.mlp.gate_proj.weightz"model.layers.{}.mlp.up_proj.weightz$model.layers.{}.mlp.down_proj.weightz&model.layers.{}.input_layernorm.weightz/model.layers.{}.post_attention_layernorm.weightzmodel.norm.weightzscore.weighté    é   Ú
state_dictÚ	num_headsÚnum_kv_headsÚdimÚhead_dimÚreturnc           
         s‚   i }ˆdu r
ˆ | ‰‡ ‡fdd„}|   ¡ D ])\}}|dkrqd|vr't|tƒ}	d|v r1|||ƒ}n	d|v r:|||ƒ}|||	< q|S )aã  
    Convert a state dict from HF's format to torchtune's format, which contains the weights
    of a reward model (i.e. a classifier with a single class).
    State dicts from multiple checkpoint files should be consolidated into a single state dict
    before calling this function.
    The logic is identical to :func:`~torchtune.models.convert_weights.hf_to_tune`, but with a different mapping.

    Eg of HF-format state dict can be found in the ``Ray2333/reward-model-Mistral-7B-instruct-Unified-Feedback``
    repo in HF.

    Args:
        state_dict (Dict[str, torch.Tensor]): State dict in HF's format.
        num_heads (int): Number of heads in the model.
        num_kv_heads (int): Number of heads in the key/value projection layers.
        dim (int): Dimension of the model.
        head_dim (int): Dimension of the head. If not provided, it will be calculated
            as dim // num_heads.

    Returns:
        Dict[str, torch.Tensor]: State dict in torchtune's format.
    Nc                    s(   |   |dˆd ˆ ¡ dd¡ ˆ| ˆ ¡S ©Né   é   ©ÚviewÚ	transposeÚreshape©ÚtÚn_heads©r	   r
   © úY/home/ubuntu/.local/lib/python3.10/site-packages/torchtune/rlhf/utils/_convert_weights.pyÚ_permute=   ó   ýz#reward_hf_to_tune.<locals>._permutez
score.biaszrotary_emb.inv_freqÚq_projÚk_proj)Úitemsr   Ú_REWARD)
r   r   r   r	   r
   Úconverted_state_dictr   ÚkeyÚvalueÚnew_keyr   r   r   Úreward_hf_to_tune   s   


r#   c           
         sz   i }dd„ t  ¡ D ƒ}ˆ | ‰‡ ‡fdd„}|  ¡ D ] \}}t||ƒ}	d|v r-|||ƒ}n	d|v r6|||ƒ}|||	< q|S )ap  
    Convert a state dict from torchtune's format to Hugging Face's format for a reward model.

    This function takes a state dictionary in torchtune's format, which contains the weights of a reward model
    (i.e. a classifier with a single class), and converts it into a format that can be loaded into a Hugging Face model.
    The logic is identical to :func:`~torchtune.models.convert_weights.tune_to_hf`, but with a different mapping.

    Args:
        state_dict (Dict[str, torch.Tensor]): State dict in torchtune's format.
        num_heads (int, optional): Number of heads in the model. Defaults to 32.
        num_kv_heads (int, optional): Number of heads in the key/value projection layers. Defaults to 32.
        dim (int, optional): Dimension of the model. Defaults to 4096.

    Returns:
        Dict[str, torch.Tensor]: State dict in Hugging Face's format.

    c                 S   s   i | ]\}}||“qS r   r   )Ú.0ÚkÚvr   r   r   Ú
<dictcomp>l   s    z%reward_tune_to_hf.<locals>.<dictcomp>c                    s(   |   |ˆd dˆ ¡ dd¡ ˆ| ˆ ¡S r   r   r   r   r   r   r   o   r   z#reward_tune_to_hf.<locals>._permuter   r   )r   r   r   )
r   r   r   r	   r   Úinverted_mapping_dictr   r    r!   r"   r   r   r   Úreward_tune_to_hfT   s   


r)   )r   r   r   N)r   r   r   )Útypingr   ÚtorchÚ torchtune.models.convert_weightsr   r   ÚstrÚTensorÚintr#   r)   r   r   r   r   Ú<module>   s^   ôûÿþýüû
ú9üÿþýüû