o
    i@                     @   s  d dl Z d dlZd dlZd dlZd dlmZ d dlmZ d dlm	Z	m
Z
mZ d dlZd dlm  mZ d dlmZ d dlmZmZ d dlmZ de	eejf d	ed
e	eejf fddZde	eejf d	ed
e	eejf fddZde	eejf d	ed
e	eejf fddZdeeejf ded
efddZdeeejf ded
efddZ	ddeeejf ded
efddZ deeejf ded
e
e! fddZ"ded
efddZ#dS )     N)OrderedDict)Path)DictListUnion)SentencePieceProcessor)
GPT2ConfigLlamaConfig)	rearrange
state_dictconfigreturnc              	      s2  dd t fdd|  D } dd t fdd|  D } | d}t|d	d
}t|jd | | }t|ddd||jd  f| d< t|drV| d | d< n"| d}t|jd | | }t|ddd||jd  f| d< dd t fdd|  D } t	|j
D ]#}| d| d}| d| d}tj||gdd| d| d< qdd t fdd|  D } t	|j
D ]7}| d| d}	| d| d}
| d| d}tj|	|
|gdd| d| d< | d| dd qd d!  t  fd"d|  D } | d#d | S )$zsConvert the state_dict in Meta format to standard GPT format.

    This function modifies state_dict in place.
    c                 S   s   |  ds
d|  S | S )Nzoutput.ztransformer.)
startswithkey r   M/home/ubuntu/vllm_env/lib/python3.10/site-packages/flash_attn/models/llama.pykey_mapping_layers   s   z7remap_state_dict_meta_llama.<locals>.key_mapping_layersc                 3        | ]\}} ||fV  qd S Nr   .0kv)r   r   r   	<genexpr>       z.remap_state_dict_meta_llama.<locals>.<genexpr>c                 S      t dd| S )Nz^transformer.tok_embeddings.'transformer.embeddings.word_embeddings.resubr   r   r   r   key_mapping_emb!   s   z4remap_state_dict_meta_llama.<locals>.key_mapping_embc                 3   r   r   r   r   r!   r   r   r   &   r   -transformer.embeddings.word_embeddings.weightpad_vocab_size_multiple   r   tie_word_embeddingslm_head.weightzoutput.weightc                 S   .   t dd| } t dd| } t dd| } | S )Nz^transformer.norm.transformer.ln_f.z)^transformer.layers.(\d+).attention_norm.transformer.layers.\1.norm1.z#^transformer.layers.(\d+).ffn_norm.transformer.layers.\1.norm2.r   r   r   r   r   key_mapping_ln@   s   z3remap_state_dict_meta_llama.<locals>.key_mapping_lnc                 3   r   r   r   r   r,   r   r   r   J   r   transformer.layers.z.feed_forward.w1.weightz.feed_forward.w3.weightdim.mlp.fc1.weightc                 S   r   )Nz*^transformer.layers.(\d+).feed_forward.w2.transformer.layers.\1.mlp.fc2.r   r   r   r   r   key_mapping_mlpS   
   z4remap_state_dict_meta_llama.<locals>.key_mapping_mlpc                 3   r   r   r   r   r3   r   r   r   Z   r   z.attention.wq.weightz.attention.wk.weightz.attention.wv.weight.mixer.Wqkv.weight%.attention.inner_attention.rope.freqsNc                 S   r   )Nz'^transformer.layers.(\d+).attention.wo.%transformer.layers.\1.mixer.out_proj.r   r   r   r   r   key_mapping_attne   r4   z5remap_state_dict_meta_llama.<locals>.key_mapping_attnc                 3   r   r   r   r   r9   r   r   r   l   r   ztransformer.rope.freqsr   itemspopgetattrmathceilshapeFpadrangen_layertorchcat)r   r   word_embeddingsr$   
vocab_sizeoutput_embeddingslw1w3WqWkWvr   )r9   r!   r   r,   r3   r   remap_state_dict_meta_llama   sN   



  rQ   c              	      s  dd t fdd|  D } | d}t dd}t|jd | | }t|ddd||jd  f| d< t d	rE| d | d
< n"| d
}t|jd | | }t|ddd||jd  f| d
< t	 j
D ]#}| d| d}| d| d}tj||gdd| d| d< qldd t fdd|  D } dd t fdd|  D }  fdd}	t	 j
D ];}| d| d}
| d| d}| d| d}tj|	|
|	||gdd| d| d< | d| dd qdd  t fd!d|  D } | S )"z{Convert the state_dict in Hugging Face format to standard GPT format.

    This function modifies state_dict in place.
    c                 S   r   )Nz^model.embed_tokens.r   r   r   r   r   r   r!   |      z2remap_state_dict_hf_llama.<locals>.key_mapping_embc                 3   r   r   r   r   r"   r   r   r      r   z,remap_state_dict_hf_llama.<locals>.<genexpr>r#   r$   r%   r   r&   r'   model.layers..mlp.gate_proj.weight.mlp.up_proj.weightr/   r.   r1   c                 S   r   )Nz"^model.layers.(\d+).mlp.down_proj.r2   r   r   r   r   r   r3      r4   z2remap_state_dict_hf_llama.<locals>.key_mapping_mlpc                 3   r   r   r   r   r5   r   r   r      r   c                 S   r(   )Nz^model.norm.r)   z$^model.layers.(\d+).input_layernorm.r*   z-^model.layers.(\d+).post_attention_layernorm.r+   r   r   r   r   r   r,         z1remap_state_dict_hf_llama.<locals>.key_mapping_lnc                 3   r   r   r   r   r-   r   r   r      r   c                       t | d j j d ddS )Nz(h two d) n -> (h d two) n   dtwor
   n_embdn_headwr   r   r   inv_permute   s   z.remap_state_dict_hf_llama.<locals>.inv_permute.self_attn.q_proj.weight.self_attn.k_proj.weight.self_attn.v_proj.weightr6   z.self_attn.rotary_emb.inv_freqNc                 S   r   )Nz%^model.layers.(\d+).self_attn.o_proj.r8   r   r   r   r   r   r9      r4   z3remap_state_dict_hf_llama.<locals>.key_mapping_attnc                 3   r   r   r   r   r:   r   r   r      r   r;   )r   r   rH   r$   rI   rJ   rK   rL   rM   rb   rN   rO   rP   r   r   r9   r!   r,   r3   r   remap_state_dict_hf_llamas   sN   	


 rg   c              	      sl  dd t fdd|  D } | d}t dd}t|jd | | }t|ddd||jd  f| d< t d	rE| d | d
< n"| d
}t|jd | | }t|ddd||jd  f| d
< t	 j
D ]#}tj| d| dddd\}}|| d| d< || d| d< qldd t fdd|  D } dd t fdd|  D }  fdd}	 j}
t d|
} j}||
 }|
| }||  }}t	 j
D ]K}| d| d}|d| }||||  }||| || |  }|	|| d| d< |	|| d| d< || d| d< | d| d d qd!d" t fd#d|  D } | S )$a  Convert the state_dict in standard GPT format to Hugging Face format.

    This function is meant to be the inverse of remap_state_dict_hf_llama, up to a
    multiplier pad in the embedding and lm_head. That is if the original embedding
    isn't a multiple of pad_vocab_size_multiple, then
    inv_remap_state_dict_hf_llama(remap_state_dict_hf_llama(state_dict)) != state_dict.

    This function modifies state_dict in place.
    c                 S   r   )Nz(^transformer.embeddings.word_embeddings.zmodel.embed_tokens.r   r   r   r   r   r!      rR   z6inv_remap_state_dict_hf_llama.<locals>.key_mapping_embc                 3   r   r   r   r   r"   r   r   r      r   z0inv_remap_state_dict_hf_llama.<locals>.<genexpr>zmodel.embed_tokens.weightr$   r%   r   r&   r'   r.   r1   rX   )chunksr0   rS   rT   rU   c                 S   r   )Nz"^transformer.layers.(\d+).mlp.fc2.zmodel.layers.\1.mlp.down_proj.r   r   r   r   r   r3     r4   z6inv_remap_state_dict_hf_llama.<locals>.key_mapping_mlpc                 3   r   r   r   r   r5   r   r   r     r   c                 S   r(   )Nz^transformer.ln_f.zmodel.norm.z ^transformer.layers.(\d+).norm1.z model.layers.\1.input_layernorm.z ^transformer.layers.(\d+).norm2.z)model.layers.\1.post_attention_layernorm.r   r   r   r   r   r,     rV   z5inv_remap_state_dict_hf_llama.<locals>.key_mapping_lnc                 3   r   r   r   r   r-   r   r   r   #  r   c                    rW   )Nz(h d two) n -> (h two d) nrX   rY   r\   r_   ra   r   r   permute%  s   z.inv_remap_state_dict_hf_llama.<locals>.permute	n_head_kvr6   Nrc   rd   re   r7   c                 S   r   )Nz)^transformer.layers.(\d+).mixer.out_proj.z!model.layers.\1.self_attn.o_proj.r   r   r   r   r   r9   >  r4   z7inv_remap_state_dict_hf_llama.<locals>.key_mapping_attnc                 3   r   r   r   r   r:   r   r   r   E  r   )r   r<   r=   r>   r?   r@   rA   rB   rC   rD   rE   rF   chunkr^   hidden_size)r   r   rH   r$   rI   rJ   rK   rM   rL   ri   r^   rj   	embed_dimhead_dimq_dimk_dimv_dimWqkvrN   rO   rP   r   rf   r   inv_remap_state_dict_hf_llama   s`   



rs   checkpoint_path
model_namec           	   
   C   s  t t| | d }t|}W d   n1 sw   Y  t|d d|d |d |d |ddd}|d	d
}|dd}d|j }td| d }|durXt|| }||| d
 |  }||_d|v rn|d |_	d|_
t| | d }| rtt|
 |_
|S )z*Load a LlamaConfig from a checkpoint path.zparams.jsonNr0   n_headsn_layersnorm_eps
n_kv_heads)rl   intermediate_sizenum_attention_headsnum_hidden_layersrms_norm_epsnum_key_value_headsmultiple_ofr%   ffn_dim_multiplier   rX      
rope_thetai }  ztokenizer.model)openr   jsonloadr	   getrl   intrz   rotary_emb_baserI   is_filer   str)	rt   ru   fparamsr   r   r   rz   	tokenizerr   r   r   config_from_meta_checkpointI  s4   


r   c                 C   s   t t| | d d S )Nz-hfzconfig.json)r	   from_pretrainedr   rt   ru   r   r   r   config_from_hf_checkpointp  s   r   metac                 C   s   |dkr	t | |S t| |S )Nr   )r   r   )rt   ru   checkpoint_formatr   r   r   config_from_checkpointv  s   

r   c                 C   s    dd t t| | dD S )Nc                 S   s   g | ]	}t j|d dqS )cpu)map_location)rF   r   )r   pathr   r   r   
<listcomp>  s    z/state_dicts_from_checkpoint.<locals>.<listcomp>zconsolidated.*.pth)sortedr   globr   r   r   r   state_dicts_from_checkpoint  s   r   llama_configc                 C   s   t d!i d| jddd| jd| jd| jd| jdd	d
dddddd| jd| jd| jd| j	d| j
dddddddddddddddddt| ddd | jS )"NrI   n_positionsr   r]   rE   r^   n_inneractivation_functionswigluresid_pdropg        
embd_pdrop
attn_pdroplayer_norm_epsiloninitializer_rangebos_token_ideos_token_idpad_token_idrms_normTrotary_emb_fractiong      ?rotary_emb_interleavedr&   Fqkv_proj_biasout_proj_biasmlp_fc1_biasmlp_fc2_biasr   g     @rj   r   )r   rI   rl   r|   r{   rz   r}   r   r   r   r   r>   r~   )r   r   r   r   llama_config_to_gpt2_config  sf   	
r   )r   )$r   r?   osr   collectionsr   pathlibr   typingr   r   r   rF   torch.nn.functionalnn
functionalrB   sentencepiecer   transformersr   r	   einopsr
   r   TensorrQ   rg   rs   PathLiker   r   r   dictr   r   r   r   r   r   <module>   s~   
`
h
n
'

	

