o
    i                      @   s$  d dl mZ d dlZd dlmZ d dlmZ d dlmZ d dl	m
Z
 d dlmZ d dlmZ d dlmZ d d	lmZmZmZ d d
lmZ d dlmZ d dlmZ G dd dejZG dd dejZG dd dejZG dd dejZG dd dejZ G dd dejZ!G dd dejjZ"dS )    )IterableN)$get_tensor_model_parallel_world_size)divide)
SiluAndMul)MMEncoderAttention)Conv2dLayer)RMSNorm)MergedColumnParallelLinearQKVParallelLinearRowParallelLinear)QuantizationConfig)default_weight_loader)AIMv2Configc                       @   e Zd Zdededef fddZdejdejfdd	Z	  Z
S )
AIMv2SwiGLUFFNconfigquant_configprefixc                    sb   t    |j}|j}|j}t||gd ||| dd| _t||||| dd| _t	 | _
d S )N   .fc13)biasr   r   z.fc2
input_sizeoutput_sizer   r   r   )super__init__intermediate_sizehidden_sizeuse_biasr	   fc13r   fc2r   act_fn)selfr   r   r   hidden_featuresin_featuresr   	__class__ V/home/ubuntu/vllm_env/lib/python3.10/site-packages/vllm/model_executor/models/aimv2.pyr      s&   
zAIMv2SwiGLUFFN.__init__xreturnc                 C   s*   |  |\}}| |}| |\}}|S N)r   r!   r    )r"   r)   _r'   r'   r(   forward4   s   
zAIMv2SwiGLUFFN.forward__name__
__module____qualname__r   r   strr   torchTensorr-   __classcell__r'   r'   r%   r(   r      s    r   c                       8   e Zd Zdef fddZdejdejfddZ  ZS )AIMv2PatchEmbedr   c                    sF   t    t|j|j|j|jf|j|jfd| _t|j|jd| _	d S )N)kernel_sizestrideeps)
r   r   r   num_channelsr   
patch_sizeprojr   rms_norm_epsnorm)r"   r   r%   r'   r(   r   <   s   


zAIMv2PatchEmbed.__init__r)   r*   c                 C   s(   |  |ddd}| j|}|S Nr      )r>   flatten	transposer@   forward_nativer"   r)   r'   r'   r(   r-   F   s   zAIMv2PatchEmbed.forward	r/   r0   r1   r   r   r3   r4   r-   r5   r'   r'   r%   r(   r7   ;   s    
r7   c                       r6   )AIMv2ViTPreprocessorr   c                    sB   t    |j|j d }t|| _tt	d||j
f| _d S rA   )r   r   
image_sizer=   r7   
patchifiernn	Parameterr3   zerosr   	pos_embed)r"   r   num_patchesr%   r'   r(   r   M   s   

zAIMv2ViTPreprocessor.__init__r)   r*   c                 C   s@   |  |}|j\}}}| j|j}||d d d |f  }|S r+   )rJ   shaperN   todevice)r"   r)   tokensr,   NrN   r'   r'   r(   r-   T   s
   
zAIMv2ViTPreprocessor.forwardrG   r'   r'   r%   r(   rH   L   s    rH   c                       r   )
AIMv2Attentionr   r   r   c                    s   t    || _|j| _|j| _| j| j | _| j| j | jkr-td| j d| j d| jd | _	t
| j| j| j|j|| dd| _t| j| j|j|| dd| _t | _t| j| j| _t| j| j| j	| d	d
| _d S )Nz;embed_dim must be divisible by num_heads (got `embed_dim`: z and `num_heads`: z).g      z.qkv)r   	head_sizetotal_num_headsr   r   r   z.projr   .attnr   )r   r   r   r   	embed_dimnum_attention_heads	num_headshead_dim
ValueErrorscaler
   qkv_biasqkvr   r   r>   r   tp_sizer   num_heads_per_partitionr   attnr"   r   r   r   r%   r'   r(   r   ]   sJ   
	zAIMv2Attention.__init__r)   r*   c                 C   sB   |  |\}}|jddd\}}}| |||}| |\}}|S )N   )dim)ra   chunkrd   r>   )r"   r)   ra   r,   qkvr'   r'   r(   r-      s
   zAIMv2Attention.forwardr.   r'   r'   r%   r(   rU   \   s    +rU   c                       r   )

AIMv2Blockr   r   r   c                    s^   t    t||| dd| _t|j|jd| _t||| dd| _	t|j|jd| _
d S )NrX   )r   r   r:   z.mlp)r   r   rU   rd   r   r   r?   norm_1r   mlpnorm_2re   r%   r'   r(   r      s   
zAIMv2Block.__init__r)   r*   c                 C   s0   ||  | j| }|| | j| }|S r+   )rd   rn   rE   ro   rp   rF   r'   r'   r(   r-      s   zAIMv2Block.forwardr.   r'   r'   r%   r(   rm      s    rm   c                	       sP   e Zd ZddddedededB def fdd	Zd
ej	dej	fddZ
  ZS )AIMv2TransformerN require_post_normr   r   r   rt   r   c                   sR   t    t fddt jD | _|r$t j j	d| _
d S d | _
d S )Nc                    s$   g | ]}t   d | dqS )z.blocks.rY   )rm   ).0ir   r   r   r'   r(   
<listcomp>   s    z-AIMv2Transformer.__init__.<locals>.<listcomp>r:   )r   r   rK   
ModuleListrangenum_hidden_layersblocksr   r   r?   post_trunk_normr"   r   r   rt   r   r%   rw   r(   r      s   

zAIMv2Transformer.__init__rS   r*   c                 C   s,   | j D ]}||}q| jd ur| |}|S r+   )r|   r}   )r"   rS   blockr'   r'   r(   r-      s
   



zAIMv2Transformer.forward)r/   r0   r1   r   r   boolr2   r   r3   r4   r-   r5   r'   r'   r%   r(   rq      s    rq   c                	       st   e Zd ZddddedededB def fdd	Zd
ej	dej	fddZ
deeeej	f  dee fddZ  ZS )
AIMv2ModelNrr   rs   r   r   rt   r   c                   s0   t    t|| _t|||| dd| _d S )Nz.trunk)r   rt   r   )r   r   rH   preprocessorrq   trunkr~   r%   r'   r(   r      s   

zAIMv2Model.__init__pixel_valuesr*   c                 C   s   |  |}| |}|S r+   )r   r   )r"   r   r)   r'   r'   r(   r-      s   

zAIMv2Model.forwardweightsc                 C   s   ddg}t |  }t }|D ]E\}}|dr| jjd u rq|D ]\}}}	||vr+q!|||}|| }
|
j}||
||	  n|| }
t|
dt	}||
| |
| q|S )N)r   z.fc1r   )r   z.fc3rB   ztrunk.post_trunk_normweight_loader)dictnamed_parametersset
startswithr   r}   replacer   getattrr   add)r"   r   stacked_params_mappingparams_dictloaded_paramsnameloaded_weight
param_nameweight_nameshard_idparamr   r'   r'   r(   load_weights   s.   
zAIMv2Model.load_weights)r/   r0   r1   r   r   r   r2   r   r3   r4   r-   r   tupler   r   r5   r'   r'   r%   r(   r      s    ,r   )#collections.abcr   r3   torch.nnrK   vllm.distributedr   vllm.distributed.utilsr   %vllm.model_executor.layers.activationr   $vllm.model_executor.layers.attentionr   vllm.model_executor.layers.convr   $vllm.model_executor.layers.layernormr   !vllm.model_executor.layers.linearr	   r
   r   'vllm.model_executor.layers.quantizationr   -vllm.model_executor.model_loader.weight_utilsr   $vllm.transformers_utils.configs.ovisr   Moduler   r7   rH   rU   rm   rq   r   r'   r'   r'   r(   <module>   s(    5