o
    i                     @   sp   d dl Z d dlZd dlmZ d dlZd dlm  mZ d dl	m
Z
 d dlmZmZ dd Zdedefd	d
ZdS )    N)OrderedDict)	rearrange)FalconConfig
GPT2Configc              	      s"  dd t fdd|  D } dd t fdd|  D } | d}t|d	d
}t|j| | }t|ddd||j	d  f| d< t|drT| d | d< n*| d}t|ddd||j	d  f| d< | d}t|d||j	d  f| d< dd t fdd|  D } dd t fdd|  D } dd  t  fdd|  D } |j
}t|dd
}|j| }	t|jD ]J}
t| d|
 dd|| d |	d}t|d d d df d}t|d d dgf d}t|d d d gf d}tj|||gdd!| d|
 d< q| S )"Nc                 S      t dd| S )Nz^transformer.h.transformer.layers.resubkey r   N/home/ubuntu/vllm_env/lib/python3.10/site-packages/flash_attn/models/falcon.pykey_mapping_layers   s   z6remap_state_dict_hf_falcon.<locals>.key_mapping_layersc                 3        | ]\}} ||fV  qd S Nr   .0kv)r   r   r   	<genexpr>       z-remap_state_dict_hf_falcon.<locals>.<genexpr>c                 S   r   )Nz^transformer.word_embeddings.z'transformer.embeddings.word_embeddings.r   r   r   r   r   key_mapping_emb   s   z3remap_state_dict_hf_falcon.<locals>.key_mapping_embc                 3   r   r   r   r   )r   r   r   r      r   z-transformer.embeddings.word_embeddings.weightpad_vocab_size_multiple   r   tie_word_embeddingszlm_head.weightzlm_head.biasc                 S   s<   t dd| } t dd| } t dd| } t dd| } | S )Nz*^transformer.layers.(\d+).input_layernorm.ztransformer.layers.\1.norm1.z3^transformer.layers.(\d+).post_attention_layernorm.ztransformer.layers.\1.norm2.z"^transformer.layers.(\d+).ln_attn.z!^transformer.layers.(\d+).ln_mlp.r   r   r   r   r   key_mapping_ln.   s   z2remap_state_dict_hf_falcon.<locals>.key_mapping_lnc                 3   r   r   r   r   )r   r   r   r   ;   r   c                 S       t dd| } t dd| } | S )Nz,^transformer.layers.(\d+).mlp.dense_h_to_4h.ztransformer.layers.\1.mlp.fc1.z,^transformer.layers.(\d+).mlp.dense_4h_to_h.ztransformer.layers.\1.mlp.fc2.r   r   r   r   r   key_mapping_mlp>   s   z3remap_state_dict_hf_falcon.<locals>.key_mapping_mlpc                 3   r   r   r   r   )r   r   r   r   G   r   c                 S   r   )Nz9^transformer.layers.(\d+).self_attention.query_key_value.z!transformer.layers.\1.mixer.Wqkv.z/^transformer.layers.(\d+).self_attention.dense.z%transformer.layers.\1.mixer.out_proj.r   r   r   r   r   key_mapping_attnI   s   z4remap_state_dict_hf_falcon.<locals>.key_mapping_attnc                 3   r   r   r   r   )r   r   r   r   V   r   	n_head_kvr   z.mixer.Wqkv.weightz4(group ratio headdim) ... -> group ratio headdim ...   )ratioheaddimz4group ratio headdim ... -> (group ratio headdim) ...)dim)r   itemspopgetattrmathceil
vocab_sizeFpadshapen_headhidden_sizerangen_layerr   torchcat)
state_dictconfigword_embeddingsr   r,   output_embeddingsoutput_embeddings_biasr0   r    r#   lWqkvWqWkWvr   )r   r   r   r   r   r   remap_state_dict_hf_falcon   sR   



	

"r@   falcon_configreturnc                 C   s   t | dt | ddrdn| j}|dk}td#i d| jddd| jd	| jd
| jd| jd ddd| jddd| jd| jd| j	d| j
d| jd| jd|d|ddddddd| jd| jd | jd!| jd"dS )$Nr    multi_queryFr   r,   n_positionsr   n_embdr3   r0   n_inner   activation_functiongeluresid_pdrop
embd_pdropg        
attn_pdroplayer_norm_epsiloninitializer_rangebos_token_ideos_token_idparallel_blockparallel_block_tied_normrotary_emb_fractiong      ?rotary_emb_interleavedr   Tqkv_proj_biasout_proj_biasmlp_fc1_biasmlp_fc2_biaslm_head_biasr   )r)   r0   r   r,   r1   r3   hidden_dropoutattention_dropoutrM   rN   rO   rP   parallel_attnbias)rA   r    rR   r   r   r   falcon_config_to_gpt2_configj   sr   
	
r^   )r*   r	   collectionsr   r4   torch.nn.functionalnn
functionalr-   einopsr   transformersr   r   r@   r^   r   r   r   r   <module>   s   ]