o
    
۾i                     @   s|   d Z ddlmZ ddlZddlmZ ddlmZ ddlm	Z	m
Z
 ddlmZ ddlmZ d	d
lmZmZ G dd deZdS )z!Llama model for fairseq2 weights.    )IterableN)	Parameter)
VllmConfig)get_tensor_model_parallel_rank$get_tensor_model_parallel_world_size)set_weight_attrs)LlamaForCausalLM   )AutoWeightsLoaderWeightsMapperc                
       s   e Zd Zdddedef fddZdeeeej	f  de
e fd	d
Zdeeef fddZdedej	deeef deeej	f fddZ  ZS )Fairseq2LlamaForCausalLM )prefixvllm_configr   c                   s8   t  j||d t | _t | _dd| j dg| _d S )N)r   r   zmodel.ptmodel.z.pt)super__init__r   tp_rankr   tp_sizeallow_patterns_overrides)selfr   r   	__class__ ]/home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/models/fairseq2_llama.pyr   %   s   
z!Fairseq2LlamaForCausalLM.__init__weightsreturnc              
      s   t |}||d   }tdddddddd	d
dddd}||}t   tjjr2dgnd d}| fdd|D S )N	model_keyzmodel.embed_tokens.r   zlm_head.)zdecoder_frontend.embed.zdecoder.zfinal_proj.z.input_layernorm.z.post_attention_layernorm.z.self_attn.o_proj.z.mlp.gate_proj.z.mlp.up_proj.z.mlp.down_proj.z.norm.)z.self_attn_layer_norm.z.ffn_layer_norm.z.self_attn.output_proj.z.ffn.gate_proj.z.ffn.inner_proj.z.ffn.output_proj.z.layer_norm.)orig_to_new_prefixorig_to_new_substr)skip_prefixesc                 3   s"    | ]\}} || V  qd S N)reshape_fairseq2_weights).0nameloaded_weightparamsr   r   r   	<genexpr>R   s
    
z8Fairseq2LlamaForCausalLM.load_weights.<locals>.<genexpr>)	dictitemsr   applynamed_parametersr
   configtie_word_embeddingsload_weights)r   r   weights_wrappedfs2_to_vllm_mapperloaderr   r&   r   r/   1   s6   
z%Fairseq2LlamaForCausalLM.load_weightsr'   c                    s`   |  D ])\}}|d d|v rt| dk rqt fdddD r&qt|ddi qd	S )
zASets the `is_sharded_weight` flag to True for all sharded weights.norm   c                 3       | ]}| v V  qd S r!   r   r#   embmodulesr   r   r(   _       z@Fairseq2LlamaForCausalLM.flag_sharded_weights.<locals>.<genexpr>embed_tokenslm_headis_sharded_weightTN)r*   splitlensizeanyr   )r   r'   r$   paramr   r9   r   flag_sharded_weightsX   s   
z-Fairseq2LlamaForCausalLM.flag_sharded_weightsr$   r%   c                    s   dt jdtdt jffdd}|d d v r ||jj}nd v r+||jj}t fd	d
dD r{d}jdkr{|j	| jj
k r{|j	| j jj
ksVJ ddgt|  }j||< ||}t|| ddi d v r{| ||fS )zReshape fairseq2's weights.wn_headsr   c                    sf    j j| }| j |  d kr| j }| j } j j}| ||| d d|dd||S )Nr   r5   r	   )r-   head_dimr   rB   hidden_sizeview	transposereshape)rF   rG   attn_inattn_out)r   r   r   permuten   s   

zBFairseq2LlamaForCausalLM.reshape_fairseq2_weights.<locals>.permuter3   k_projq_projc                 3   r6   r!   r   r7   r9   r   r   r(      r;   zDFairseq2LlamaForCausalLM.reshape_fairseq2_weights.<locals>.<genexpr>r<   r   r	   z*vocab_size should be divisible by tp_size.r?   Fr=   )torchTensorintr@   r-   num_key_value_headsnum_attention_headsrC   r   shape
vocab_sizerA   rB   repeatr   rE   )r   r$   r%   r'   rO   dimrepeatsr   )r:   r   r   r"   f   s&   



z1Fairseq2LlamaForCausalLM.reshape_fairseq2_weights)__name__
__module____qualname__r   strr   r   tuplerR   rS   setr/   r)   r   rE   r"   __classcell__r   r   r   r   r   $   s    $'
r   )__doc__collections.abcr   rR   torch.nnr   vllm.configr   vllm.distributedr   r   !vllm.model_executor.layers.linearr    vllm.model_executor.models.llamar   utilsr
   r   r   r   r   r   r   <module>   s   