o
    iC                     @  s   d dl mZ d dlmZ d dlmZ d dlZd dlZd dl	m
Z
 d dlmZ d dlmZ d dlmZ eeejf ZedZG d	d
 d
eZdS )    )annotations)defaultdict)IterableN)default_weight_loader)
Qwen3Model)WeightsMapperz^layers\.(\d+)\.(.+)$c                      sX   e Zd ZdZeddidZ fddZ fddZdddZdddZ	dddZ
  ZS )"VoyageQwen3BidirectionalEmbedModelae  
    Qwen3Model + Voyage embedding head + bidirectional attention.

    Checkpoint conventions (HF):
      - MLP: gate_proj + up_proj (unfused)
      - Attn: q_proj + k_proj + v_proj (unfused)
      - Linear head: linear.weight
      - Weights prefixed with "model." (e.g., model.layers.0...)

    vLLM Qwen3Model expects:
      - mlp.gate_up_proj (fused)
      - self_attn.qkv_proj (fused)
      - No "model." prefix

    We remap/fuse weights using generator pipeline and load directly
    (bypassing parent's stacked_params_mapping which would cause
    double-transformation like qkv_proj -> qkqkv_proj).
    zmodel. )orig_to_new_prefixc                   s0   t  j|i | tj| jj| jjdd| _d S )NF)bias)super__init__nnLinearconfighidden_size
num_labelslinear)selfargskwargs	__class__ W/home/ubuntu/vllm_env/lib/python3.10/site-packages/vllm/model_executor/models/voyage.pyr   +   s   z+VoyageQwen3BidirectionalEmbedModel.__init__c                   s   t  j|i |}| |S N)r   forwardr   )r   r   r   outr   r   r   r   5   s   
z*VoyageQwen3BidirectionalEmbedModel.forwardweightsIterable[WeightItem]returnc           
      #  s   t t}dddd}|D ]*\}}t|}|r2|d|v r2t|d}||| ||d < q||fV  qt| D ]@}||  t fddd	D rit	j
 d  d  d gd
d}d| d|fV  q> r~ fddd	D }	td| d|	 q>dS )z*Fuse q_proj, k_proj, v_proj into qkv_proj.qkv)zself_attn.q_proj.weightzself_attn.k_proj.weightzself_attn.v_proj.weight      c                 3      | ]}| v V  qd S r   r   .0ppartsr   r   	<genexpr>M       zDVoyageQwen3BidirectionalEmbedModel._fuse_qkv_proj.<locals>.<genexpr>)r!   r"   r#   r   dimlayers.z.self_attn.qkv_proj.weightc                      g | ]}| vr|qS r   r   r'   r*   r   r   
<listcomp>Q       zEVoyageQwen3BidirectionalEmbedModel._fuse_qkv_proj.<locals>.<listcomp>Layer z missing QKV parts: Nr   dict	_LAYER_REmatchgroupintsortedkeysalltorchcat
ValueError)
r   r   qkv_bufqkv_suffixesnametensorm	layer_idxfusedmissingr   r*   r   _fuse_qkv_proj9   s,   
 z1VoyageQwen3BidirectionalEmbedModel._fuse_qkv_projc           
      #  s    t t}ddd}|D ]*\}}t|}|r1|d|v r1t|d}||| ||d < q||fV  qt| D ]=}||  t fdddD ret	j
 d  d gd	d
}d| d|fV  q= rz fdddD }	td| d|	 q=dS )z-Fuse gate_proj and up_proj into gate_up_proj.gateup)zmlp.gate_proj.weightzmlp.up_proj.weightr$   r%   c                 3  r&   r   r   r'   r*   r   r   r,   g   r-   zHVoyageQwen3BidirectionalEmbedModel._fuse_gate_up_proj.<locals>.<genexpr>)rJ   rK   r   r.   r0   z.mlp.gate_up_proj.weightc                   r1   r   r   r'   r*   r   r   r2   k   r3   zIVoyageQwen3BidirectionalEmbedModel._fuse_gate_up_proj.<locals>.<listcomp>r4   z missing MLP parts: Nr5   )
r   r   mlp_bufmlp_suffixesrC   rD   rE   rF   rG   rH   r   r*   r   _fuse_gate_up_projT   s*   
z5VoyageQwen3BidirectionalEmbedModel._fuse_gate_up_projset[str]c                 C  sv   | j |}| |}| |}t|  }t }|D ]\}}||vr$q|| }t|dt}||| |	| q|S )z7Remap, fuse, and load weights using generator pipeline.weight_loader)
hf_to_vllm_mapperapplyrI   rN   r6   named_parameterssetgetattrr   add)r   r   params_dictloaded_paramsrC   loaded_weightparamrP   r   r   r   load_weightsn   s   


z/VoyageQwen3BidirectionalEmbedModel.load_weights)r   r   r    r   )r   r   r    rO   )__name__
__module____qualname____doc__r   rQ   r   r   rI   rN   r[   __classcell__r   r   r   r   r      s    


r   )
__future__r   collectionsr   collections.abcr   regexrer>   torch.nnr   -vllm.model_executor.model_loader.weight_utilsr    vllm.model_executor.models.qwen3r    vllm.model_executor.models.utilsr   tuplestrTensor
WeightItemcompiler7   r   r   r   r   r   <module>   s   
