o
    ̳i+                     @   s  d dl Z d dlmZmZmZ d dlZddddddd	d
dddddZddddd	ddddd
ddddZdedeeef defddZ	deeej
f deeej
f fddZdeeej
f deeej
f fddZ				d9deeej
f dededed edeeej
f fd!d"Z				d9deeej
f dededed ef
d#d$Zd%d&d'd(Zd)d*d+d,d-d.d/d0d1Zg d2Z	d:d3eeef d4ee fd5d6Z				d9deeej
f dededed ee f
d7d8ZdS );    N)AnyDictOptionaltok_embeddings.weightz
norm.scaleoutput.weightzlayers.{}.attn.k_proj.weightzlayers.{}.attn.q_proj.weightzlayers.{}.attn.v_proj.weightz!layers.{}.attn.output_proj.weightzlayers.{}.sa_norm.scalezlayers.{}.mlp_norm.scalezlayers.{}.mlp.w1.weightzlayers.{}.mlp.w2.weightzlayers.{}.mlp.w3.weight)r   znorm.weightr   zlayers.{}.attention.wk.weightzlayers.{}.attention.wq.weightzlayers.{}.attention.wv.weightzlayers.{}.attention.wo.weightzlayers.{}.attention_norm.weightzlayers.{}.ffn_norm.weightz layers.{}.feed_forward.w1.weightz layers.{}.feed_forward.w2.weightz layers.{}.feed_forward.w3.weight)zmodel.embed_tokens.weightz'model.layers.{}.self_attn.q_proj.weightz'model.layers.{}.self_attn.k_proj.weightz'model.layers.{}.self_attn.v_proj.weightz'model.layers.{}.self_attn.o_proj.weightz-model.layers.{}.self_attn.rotary_emb.inv_freqz$model.layers.{}.mlp.gate_proj.weightz"model.layers.{}.mlp.up_proj.weightz$model.layers.{}.mlp.down_proj.weightz&model.layers.{}.input_layernorm.weightz/model.layers.{}.post_attention_layernorm.weightzmodel.norm.weightzlm_head.weightkeymapping_dictreturnc              
   C   s   z/t dd | dD r)tdd| }td| d}|| }||}W |S ||  }W |S  tyD } z	td|  d	|d }~ww )
Nc                 s   s    | ]}|  V  qd S N)isdigit).0k r   T/home/ubuntu/.local/lib/python3.10/site-packages/torchtune/models/convert_weights.py	<genexpr>2   s    z!get_mapped_key.<locals>.<genexpr>.z(\.\d+)z.{}z\d+r   z8Error converting the state dict. Found unexpected key: "zG". Please make sure you're loading a checkpoint with the right format. )	anysplitresubsearchgroupformatKeyError	Exception)r   r   abstract_key	layer_numnew_keyer   r   r   get_mapped_key/   s"   	

r   
state_dictc                 C   s4   i }|   D ]\}}|dvrt|t}|||< q|S )a  
    Convert a state dict from Meta's format to torchtune's format. State dicts
    from multiple checkpoint files should be consolidated into a single state dict
    before calling this function.

    Eg of Meta-format state dict can be found in the ``meta-llama/Llama-2-7b``
    repo in HF (https://huggingface.co/meta-llama/Llama-2-7b).

    Args:
        state_dict (Dict[str, torch.Tensor]): State dict in Meta's format.

    Returns:
        Dict[str, torch.Tensor]: State dict in torchtune's format.
    )z
rope.freqs)itemsr   
_FROM_META)r    converted_state_dictr   valuer   r   r   r   meta_to_tuneC   s   
r%   c                 C   s>   i }dd t  D }|  D ]\}}t||}|||< q|S )av  
    Convert a state dict from torchtune's format to Meta's format. This function
    doesn't handle any sharding or splitting of state dicts. It follows the
    state_dict IN -> state_dict OUT pattern.

    Args:
        state_dict (Dict[str, torch.Tensor]): State dict in torchtune's format.

    Returns:
        Dict[str, torch.Tensor]: State dict in Meta's format.
    c                 S      i | ]\}}||qS r   r   r   r   vr   r   r   
<dictcomp>h       z tune_to_meta.<locals>.<dictcomp>)r"   r!   r   )r    r#   inverted_mapping_dictr   r$   r   r   r   r   tune_to_meta[   s   

r,          	num_headsnum_kv_headsdimhead_dimc           
         sx   i }du r
 |  fdd}|   D ]$\}}d|vr9t|t}	d|v r,|||}n	d|v r5|||}|||	< q|S )a(  
    Convert a state dict from HF's format to torchtune's format. State dicts
    from multiple checkpoint files should be consolidated into a single state dict
    before calling this function.

    Eg of HF-format state dict can be found in the ``meta-llama/Llama-2-7b-hf``
    repo in HF (https://huggingface.co/meta-llama/Llama-2-7b-hf).

    Args:
        state_dict (Dict[str, torch.Tensor]): State dict in HF's format.
        num_heads (int): Number of heads in the model.
        num_kv_heads (int): Number of heads in the key/value projection layers.
        dim (int): Dimension of the model.
        head_dim (int): Dimension of the head. If not provided, it will be calculated
            as dim // num_heads.

    Returns:
        Dict[str, torch.Tensor]: State dict in torchtune's format.
    Nc                    s(   |  |dd  dd|  S N      view	transposereshapetn_headsr1   r2   r   r   _permute      zhf_to_tune.<locals>._permutezrotary_emb.inv_freqq_projk_proj)r!   r   _FROM_HF)
r    r/   r0   r1   r2   r#   r>   r   r$   r   r   r=   r   
hf_to_tuneq   s   

rC   c                    s   i }dd t  D }du r |  fdd}|  D ] \}}	t||}
d|v r1||	|}	n	d|v r:||	|}	|	||
< q|S )an  
    Convert a state dict from torchtune's format to HF's format. This function
    doesn't handle any sharding or splitting of state dicts. It follows the
    state_dict IN -> state_dict OUT pattern.

    Args:
        state_dict (Dict[str, torch.Tensor]): State dict in torchtune's format.
        num_heads (int): Number of heads in the model.
        num_kv_heads (int): Number of heads in the key/value projection layers.
        dim (int): Dimension of the model.
        head_dim (int): Dimension of model attention heads. Default None.

    Returns:
        Dict[str, torch.Tensor]: State dict in HF's format.
    c                 S   r&   r   r   r'   r   r   r   r)      r*   ztune_to_hf.<locals>.<dictcomp>Nc                    s(   |  |d d dd|  S r3   r6   r:   r=   r   r   r>      r?   ztune_to_hf.<locals>._permuter@   rA   )rB   r!   r   )r    r/   r0   r1   r2   r#   r+   r>   r   r$   r   r   r=   r   
tune_to_hf   s   


rD   lora_Alora_Blora_magnitude_vector)lora_alora_b	magnituder@   rA   v_projo_proj	gate_proj	down_projup_projlm_head)r@   rA   rK   output_projw1w2w3output)target_modulesr
lora_alphaadapter_configbase_model_name_or_pathc                    sz   t  fddtD stdt d    d D ]}|tvr(td| qtttj d  d< |r;| d<  S )Nc                    s   g | ]}|   v qS r   )keys)r   xrY   r   r   
<listcomp>   s    z/tune_to_peft_adapter_config.<locals>.<listcomp>zPEFT adapter config requires z, found rV   zUnknown target module rZ   )all_PEFT_CONFIG_EXPECTED_KEYS
ValueErrorr[   _TO_PEFT_TARGET_MODULESlistmapget)rY   rZ   r   r   r]   r   tune_to_peft_adapter_config   s   rf   c                    s  i }i }t  D ]D\}}t D ];\}	}
|
d u rq|dkr0|
dd| }|	dd| }n|
dd| d}|	dd| d}|||i qq d u rU||   fdd}|  D ]*\}}t||}d|v rvd|v rv|||}nd|v rd|v r|||}||d	| < q_|S )
NrJ   z.weightr   c                    s2   | j d }| | d d|dd | |S )Nr4   r5   )shaper7   r8   r9   )r;   r<   rankr2   r   r   _permute_lora_matrix  s
   
z:tune_to_peft_adapter_weights.<locals>._permute_lora_matrixr@   rF   rA   zbase_model.model.)_TO_PEFT_KEYSr!   rB   replaceupdater   )r    r/   r0   r1   r2   r#   full_mappingpeft_keypeft_valhf_keyhf_valadapter_keyadapter_valrk   r   r$   r   r   rj   r   tune_to_peft_adapter_weights   s0   

rv   )r-   r-   r.   Nr
   )r   typingr   r   r   torchr"   rB   strr   Tensorr%   r,   intrC   rD   rl   rb   r`   rf   rv   r   r   r   r   <module>   s   &&
3
0

