o
    }oiJ                     @   s   d dl mZmZ d dlmZmZmZ d dlZG dd deZG dd deZ	G dd	 d	eZ
G d
d deZG dd deZe	e	e
eedZdd Zdee fddZdS )    )ABCabstractmethod)	GeneratorOptionalTupleNc                
   @   s   e Zd ZdZdefddZedee fddZde	d	e	dd
fddZ
ede	de	deeeejf d
d
f fddZdefddZd
S )ModelConverterz
    Abstract class that defines the interface for a converter that implements model-specific conversion functions
    for deploying NeMo checkpoints on vLLM.
    
model_typec                 C   s
   || _ d S )Nr   )selfr    r   U/home/ubuntu/.local/lib/python3.10/site-packages/nemo/export/vllm/model_converters.py__init__   s   
zModelConverter.__init__returnc                 C      dS )ze
        Returns the HF architecture name for the current model, such as 'LlamaForCausalLM'.
        Nr   r
   r   r   r   get_architecture      zModelConverter.get_architecturenemo_model_config	hf_configNc                 C   r   )z
        Implements any custom HF configuration adjustments in the 'hf_config' dict that are necessary
        for this model after the common translation takes place in NemoModelConfig's constructor.
        Nr   )r
   r   r   r   r   r   convert_config%   r   zModelConverter.convert_config
state_dictc                 C   r   )zt
        Returns or yields a sequence of (name, tensor) tuples that contain model weights in the HF format.
        Nr   )r
   r   r   r   r   r   convert_weights,   s   zModelConverter.convert_weightsc                 C   r   )z
        Returns True if the model requires a 'bos' token to be used at the beginning of the input sequence.
        NeMo checkpoints do not store this information.
        Fr   r   r   r   r   requires_bos_token5   r   z!ModelConverter.requires_bos_token)__name__
__module____qualname____doc__strr   r   r   r   dictr   r   r   torchtensorr   boolr   r   r   r   r   r      s    r   c                   @   $   e Zd Zdd Zdd Zdd ZdS )LlamaConverterc                 C   s    | j dkrdS | j dkrdS d S )NllamaLlamaForCausalLMmistralMistralForCausalLMr	   r   r   r   r   r   ?   s
   

zLlamaConverter.get_architecturec                 #   s   |d }|d }|d }|d }|| }||  |d|  }d|d fV  d|d	 fV  | d
ds:d|d fV  tt|D ]}	|d |	 }
|
|||g}
t fddt|D }t | d }t d | d }d|fd|fd|ffD ]\}}d|	 d| d}||
| d|fV  q}|d |	 }d|	 d|fV  tj|d |	 ddd\}}d|	 d|fV  d|	 d|fV  |d  |	 }d|	 d!|fV  |d" |	 }d|	 d#|fV  |d$ |	 }d|	 d%|fV  q@d S )&Nhidden_sizenum_attention_headsnum_query_groups
num_layers   model.embed_tokens.weight&model.embedding.word_embeddings.weightmodel.norm.weight$model.decoder.final_layernorm.weight#share_embeddings_and_output_weightsFlm_head.weightmodel.output_layer.weight5model.decoder.layers.self_attention.linear_qkv.weightc                    ,   g | ]}t  d  |  d  |   qS r,   r   arange.0iheads_per_groupr   r   
<listcomp>Y       z2LlamaConverter.convert_weights.<locals>.<listcomp>   q_projk_projv_projmodel.layers..self_attn..weight6model.decoder.layers.self_attention.linear_proj.weight.self_attn.o_proj.weight*model.decoder.layers.mlp.linear_fc1.weightr   dim.mlp.gate_proj.weight.mlp.up_proj.weight*model.decoder.layers.mlp.linear_fc2.weight.mlp.down_proj.weight@model.decoder.layers.self_attention.linear_qkv.layer_norm_weight.input_layernorm.weight5model.decoder.layers.mlp.linear_fc1.layer_norm_weight .post_attention_layernorm.weight)getrangeintreshaper   catr8   chunk)r
   r   r   r(   head_numr*   r+   	head_sizeqkv_total_dimlayerqkv_weightsq_slicek_slicev_slicenamesliceweight_namelinear_proj_weightgate_proj_weightup_proj_weightmlp_up_weightinput_layernorm_weightpost_attn_layernorm_weightr   r<   r   r   F   sR   

zLlamaConverter.convert_weightsc                 C   r   NTr   r   r   r   r   r   y      z!LlamaConverter.requires_bos_tokenNr   r   r   r   r   r   r   r   r   r   r#   =   s    3r#   c                   @   r"   )MixtralConverterc                 C      | j dkrdS d S )NmixtralMixtralForCausalLMr	   r   r   r   r   r         
z!MixtralConverter.get_architecturec                 #   s0   |d }|d }|d }|d }|d }|| }||  |d|  }	d|d fV  d	|d
 fV  d|d fV  t t|D ]}
|d |
 }||	||g}t fddt |D }t |	 d }t d |	 d }d|fd|fd|ffD ]\}}d|
 d| d}||| d|fV  q{|d |
 }d|
 d|fV  |d |
 }d|
 d|fV  t |D ]@}|d |
 | }tj|ddd\}}d|
 d| d |fV  d|
 d| d!|fV  |d" |
 | }d|
 d| d#|fV  q|d$ |
 }d|
 d%|fV  |d& |
 }d|
 d'|fV  q>d S )(Nr(   r)   r*   r+   num_moe_expertsr,   r-   r.   r/   r0   r2   r3   r4   c                    r5   r6   r7   r9   r<   r   r   r>      r?   z4MixtralConverter.convert_weights.<locals>.<listcomp>r@   rA   rB   rC   rD   rE   rF   rG   rH   rI   z&model.decoder.layers.mlp.router.weightz.block_sparse_moe.gate.weightz:model.decoder.layers.mlp.experts.experts.linear_fc1.weightr   rK   z.block_sparse_moe.experts.z
.w1.weightz
.w3.weightz:model.decoder.layers.mlp.experts.experts.linear_fc2.weightz
.w2.weightrQ   rR   z-model.decoder.layers.pre_mlp_layernorm.weightrT   )rV   rW   rX   r   rY   r8   rZ   )r
   r   r   r(   r[   r*   r+   rt   r\   r]   r^   r_   r`   ra   rb   rc   rd   re   rf   mlp_router_weightexpertlinear_fc1_weightrg   rh   linear_fc2_weightrj   rk   r   r<   r   r      s^   


z MixtralConverter.convert_weightsc                 C   r   rl   r   r   r   r   r   r      rm   z#MixtralConverter.requires_bos_tokenNrn   r   r   r   r   ro   }   s    :ro   c                   @   r"   )GemmaConverterc                 C   rp   )NgemmaGemmaForCausalLMr	   r   r   r   r   r      rs   zGemmaConverter.get_architecturec                 c   s    |d }|d }|d }|d }|d }| dd}|| }	d|d	 fV  |d
 }
|s0|
d8 }
d|
fV  tt|D ]Q}|d | }|sJ|d8 }d| d|fV  |d | }|s_|d8 }d| d|fV  |d | }|jd d }d| d|d |d d f fV  d| d||d d d f fV  |d | }d| d|fV  |d | }d| d|fV  |d | }|d|  }||||}tj|||f|jd}tj|||f|jd}tj|||f|jd}d}t|D ]c}||||	 d d f |||	 |d |	 d d d d f< ||	7 }|||d d d d d f |||d d d d d f< |d7 }|||d d d d d f |||d d d d d f< |d7 }q||ksZJ ||| |}||| |}||| |}d| d|fV  d| d|fV  d| d |fV  q;d S )!Nr+   r*   r)   kv_channelsr(   layernorm_zero_centered_gammaFr-   r.   r0   g      ?r/   rQ   rD   rR   rS   rT   rJ   r   r,   rM   rN   rO   rP   rH   rI   r4   )dtyper@   z.self_attn.q_proj.weightz.self_attn.k_proj.weightz.self_attn.v_proj.weight)rU   rV   rW   shaperX   r   emptyr~   )r
   r   r   r+   r*   r[   r\   r(   zero_centered_gammar=   final_layernorm_weightr^   rj   post_attention_layernorm_weightgate_up_combined_weight	gate_sizedown_proj_weightself_attn_o_proj_weight
qkv_weightqkv_intermediate_sizeq_weightk_weightv_weightptrr;   r   r   r   r      st   
""&88
zGemmaConverter.convert_weightsc                 C   r   rl   r   r   r   r   r   r     rm   z!GemmaConverter.requires_bos_tokenNrn   r   r   r   r   ry      s    Fry   c                   @   r"   )Starcoder2Converterc                 C   rp   )N
starcoder2Starcoder2ForCausalLMr	   r   r   r   r   r     rs   z$Starcoder2Converter.get_architecturec                 C   s*   | d}|d ur|d |d< d|d< d S )Nwindow_sizer   sliding_windowFtie_word_embeddings)rU   )r
   r   r   window_sizesr   r   r   r     s   
z"Starcoder2Converter.convert_configc                 #   s   |d }|d }|d }|d }|| }||  |d|  }d|v r(|d }	n|d }	d|d	 fV  d
|d fV  |	rCd|d fV  d|d fV  t t|D ]
}
|d |
 }||||g}|	rp|d |
 }|||g}t fddt |D }t | d }t d | d }d|fd|fd|ffD ]-\}}|| d|}d|
 d| d|fV  |	r|| d}d|
 d| d|fV  qd|
 d|d |
 fV  |	rd|
 d|d  |
 fV  d|
 d!|d" |
 fV  |	rd|
 d#|d$ |
 fV  d|
 d%|d& |
 fV  |	r!d|
 d'|d( |
 fV  d|
 d)|d* |
 fV  |	r>d|
 d+|d, |
 fV  d|
 d-|d. |
 fV  |	r[d|
 d/|d0 |
 fV  qPd S )1Nr+   r*   r)   r(   r,   biasadd_bias_linearr-   r.   r/   r0   zmodel.norm.biasz"model.decoder.final_layernorm.biasr2   r3   r4   z3model.decoder.layers.self_attention.linear_qkv.biasc                    r5   r6   r7   r9   r<   r   r   r>   F  r?   z7Starcoder2Converter.convert_weights.<locals>.<listcomp>r@   rA   rB   rC   rG   rD   rE   rF   z.biasrI   rH   z.self_attn.o_proj.biasz4model.decoder.layers.self_attention.linear_proj.biasz.mlp.c_fc.weightrJ   z.mlp.c_fc.biasz(model.decoder.layers.mlp.linear_fc1.biasz.mlp.c_proj.weightrO   z.mlp.c_proj.biasz(model.decoder.layers.mlp.linear_fc2.biasrR   rQ   z.input_layernorm.biasz>model.decoder.layers.self_attention.linear_qkv.layer_norm_biasrT   rS   z.post_attention_layernorm.biasz3model.decoder.layers.mlp.linear_fc1.layer_norm_bias)rV   rW   rX   r   rY   r8   )r
   r   r   r+   r*   r[   r(   r\   r]   has_biasr^   r_   qkv_biasr`   ra   rb   rc   rd   qkv_weights_sliceqkv_bias_slicer   r<   r   r   '  s   





















z#Starcoder2Converter.convert_weightsN)r   r   r   r   r   r   r   r   r   r   r     s    r   )r$   r&   rq   rz   r   c                 C   s   |t | < dS )z
    Establishes a mapping from short model type to a class that converts the model from Nemo format
    to a vLLM compatible format.
    N)_MODEL_CONVERTERSr   clsr   r   r   register_model_converter  s   r   r   c                 C   s    t | d}|du rdS || S )zb
    Returns an instance of the the model conversion class for the given model type, or None.
    N)r   rU   r   r   r   r   get_model_converter  s   r   )abcr   r   typingr   r   r   r   r   r#   ro   ry   r   r   r   r   r   r   r   r   <module>   s    (@EQ{	