o
    i                     @   s   d dl Z d dlZd dlZd dlmZ d dlmZ d dlZd dlm	  m
Z d dlmZ d dlmZmZmZ dd Zded	efd
dZdS )    N)Path)OrderedDict)	rearrange)
GPT2Config
AutoConfigPretrainedConfigc              	      s  dd d| v rt fdd|  D } | d}t|dd}t|j| | }t|d	d	d	||j	d	  f| d
< | d
 | d< dd t fdd|  D } t
|jD ][}| d| d}| d| d}tj| | gd	d| d| d< | d| d}| d| d}	tj||	gd	d| d| d< | d| d}
|
 | d| d< qWdd t fdd|  D } t
|jD ](}| d| d}| | d| d< | d| d}| | d| d < q| d! d"d#  t  fd$d|  D } | S )%Nc                 S   s   t dd| S )Nz^transformer.wpe.z+transformer.embeddings.position_embeddings.resubkey r   L/home/ubuntu/vllm_env/lib/python3.10/site-packages/flash_attn/models/btlm.pykey_mapping_pos_emb   s   z5remap_state_dict_hf_btlm.<locals>.key_mapping_pos_embztransformer.wpe.weightc                 3        | ]\}} ||fV  qd S Nr   .0kv)r   r   r   	<genexpr>       z+remap_state_dict_hf_btlm.<locals>.<genexpr>ztransformer.wte.weightpad_vocab_size_multiple   r   z-transformer.embeddings.word_embeddings.weightzlm_head.weightc                 S       t dd| } t dd| } | S )Nz^transformer.ln_f.(weight|bias)ztransformer.ln_f.\1z+^transformer.h.(\d+).ln_(1|2).(weight|bias)ztransformer.layers.\1.norm\2.\3r   r   r   r   r   key_mapping_ln"   s   z0remap_state_dict_hf_btlm.<locals>.key_mapping_lnc                 3   r   r   r   r   )r   r   r   r   '   r   ztransformer.h.z.mlp.c_fc.weightz.mlp.c_fc2.weight)dimztransformer.layers.z.mlp.fc1.weightz.mlp.c_fc.biasz.mlp.c_fc2.biasz.mlp.fc1.biasz.mlp.c_proj.weightz.mlp.fc2.weightc                 S   s   t dd| } | S )Nz$^transformer.h.(\d+).mlp.c_proj.biasz"transformer.layers.\1.mlp.fc2.biasr   r   r   r   r   key_mapping_mlp4   s   z1remap_state_dict_hf_btlm.<locals>.key_mapping_mlpc                 3   r   r   r   r   )r   r   r   r   8   r   z.attn.c_attn.weightz.mixer.Wqkv.weightz.attn.c_proj.weightz.mixer.out_proj.weightztransformer.relative_pe.slopesc                 S   r   )Nz%^transformer.h.(\d+).attn.c_attn.biasz%transformer.layers.\1.mixer.Wqkv.biasz%^transformer.h.(\d+).attn.c_proj.biasz)transformer.layers.\1.mixer.out_proj.biasr   r   r   r   r   key_mapping_attnB   s
   z2remap_state_dict_hf_btlm.<locals>.key_mapping_attnc                 3   r   r   r   r   )r   r   r   r   I   r   )r   itemspopgetattrmathceil
vocab_sizeFpadshaperangenum_hidden_layerstorchcatt)
state_dictconfigword_embeddingsr   r$   dW1W3b1b3W2WqkvWoutr   )r   r   r   r   r   remap_state_dict_hf_btlm   s@   
&
r8   btlm_configreturnc                 C   sj  t di d| jd| jdkrdnT| jd| jd| jd| jd| jd	| jd
| j	d| j
d| jd| jd| jd| jd| jd| jdkd| jdkd| jd| jd| jd| jddS d| jd| jd| jd| jd	| jd
| j	d| j
d| jd| jd| jd| jd| jd| jdkd| jdkd| jd| jd| jd| jddS )Nr$   n_positionsalibir   n_embdn_layern_headn_inneractivation_functionresid_pdrop
embd_pdrop
attn_pdroplayer_norm_epsiloninitializer_rangebos_token_ideos_token_id	use_alibiuse_flash_attnmup_width_scalemup_embeddings_multipliermup_output_multipliermup_scale_qk_dot_by_dmlp_multiple_ofr   r   )r   r$   position_embedding_typer;   hidden_sizer)   num_attention_headsr@   rA   rB   rC   rD   rE   rF   rG   rH   rK   mup_embeddings_scalemup_output_alpharN   )r9   r   r   r   btlm_config_to_gpt2_configN   s   	


	


rU   )r"   jsonr	   pathlibr   collectionsr   r*   torch.nn.functionalnn
functionalr%   einopsr   transformersr   r   r   r8   rU   r   r   r   r   <module>   s   =