o
    
۾ix                     @   s  d dl mZmZ d dlZd dlmZ d dlmZ d dlmZ d dl	m
Z
mZmZmZ d dlmZ d dlmZ d d	lmZ d d
lmZmZmZ d dlmZmZmZ d dlmZ d dlmZm Z m!Z!m"Z" d dl#m$Z$m%Z% d dl&m'Z' d dl(m)Z) d dl*m+Z+ d dl,m-Z- d dl.m/Z/ ddl0m1Z1m2Z2 ddl3m4Z4m5Z5 ddl6m7Z7m8Z8m9Z9 G dd dej:Z;G dd de Z<G dd dej:Z=G dd dej:Z>G d d! d!ej:Z?G d"d# d#ej:Z@G d$d% d%ej:ZAG d&d' d'ej:ZBG d(d) d)ej:ZCee5d*d+G d,d- d-ej:e2ZDG d.d/ d/eDZEe5d*d+G d0d1 d1ej:e2ZFd2ZGd3ejHd4ejHd5dfd6d7ZId3ejHd5ejHfd8d9ZJG d:d; d;ej:ZKG d<d= d=eZLe5d*d+G d>d? d?eFZMe5d*d+G d@dA dAej:e1e2ZNe4dBe5dCdDG dEdF dFej:ZOdS )G    )IterableSetN)nn)
BertConfig)support_torch_compile)CacheConfigModelConfigPoolerConfig
VllmConfig)$get_tensor_model_parallel_world_size)
get_act_fn)EncoderOnlyAttention)ColumnParallelLinearQKVParallelLinearRowParallelLinear)DispatchPoolerPoolerPoolingParamsUpdate)LambdaPoolerActivation)EmbeddingPoolerHeadSequencePoolerSequencePoolerOutputget_seq_pooling_method)pooler_for_token_classifypooler_for_token_embed)QuantizationConfig)VocabParallelEmbedding)IntermediateTensors)PoolingTask)PoolingMetadata   )SupportsCrossEncodingSupportsQuant)	attn_typedefault_pooling_type)AutoWeightsLoaderWeightsMappermaybe_prefixc                	       sL   e Zd Zdef fddZ	ddejdejdejdB dejfd	d
Z  ZS )BertEmbeddingconfigc                    s   t    |j| _t|j|j| _t|j|j| _t|j	|j| _
tj|j|jd| _| dt|jd t|dd| _| jdkrHtdd S )Nepsposition_idsr   position_embedding_typeabsolutez4Only 'absolute' position_embedding_type is supported)super__init__hidden_sizesizer   
vocab_sizeword_embeddingsmax_position_embeddingsposition_embeddingstype_vocab_sizetoken_type_embeddingsr   	LayerNormlayer_norm_epsregister_buffertorcharange	unsqueezegetattrr-   
ValueError)selfr)   	__class__ S/home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/models/bert.pyr0   2   s0   

zBertEmbedding.__init__N	input_idsr,   inputs_embedsreturnc                 C   sH   t |}|d u r| |}| |}| |}|| | }| |}|S N)_decode_token_type_idsr4   r6   r8   r9   )rA   rF   r,   rG   token_type_idsr6   r8   
embeddingsrD   rD   rE   forwardL   s   



zBertEmbedding.forwardrI   )	__name__
__module____qualname__r   r0   r<   TensorrM   __classcell__rD   rD   rB   rE   r(   1   s    r(   c                       s"   e Zd Zdef fddZ  ZS )
BertPoolermodel_configc                    sz   |j }|d us	J |j}t jt|jt d |j}tj	|j
|j
|d _t  _t| fddt jd _d S )N)poolingheaddtypec                    s
     | S rI   )dense)xrA   rD   rE   <lambda>x   s   
 z%BertPooler.__init__.<locals>.<lambda>)
head_dtype	projector
activation)pooler_config	hf_configr/   r0   r   seq_pooling_typer   Identityr]   Linearr1   rY   Tanhact_fnr   r   rV   )rA   rT   r`   r)   r]   rB   r[   rE   r0   a   s&   

zBertPooler.__init__)rN   rO   rP   r   r0   rR   rD   rD   rB   rE   rS   `   s    rS   c                       s>   e Zd Zd
dedef fddZdejdejfdd	Z  Z	S )BertEncoder vllm_configprefixc                    sH   t    |jj|j |jt fddtj	D | _
d S )Nc              	      s&   g | ]}t   d | dqS )z.layer.)r)   cache_configquant_configrj   )	BertLayer).0	layer_idxrk   r)   rj   rl   rD   rE   
<listcomp>   s    z(BertEncoder.__init__.<locals>.<listcomp>)r/   r0   rT   ra   rk   rl   r   
ModuleListrangenum_hidden_layerslayerrA   ri   rj   rB   rp   rE   r0   ~   s   

zBertEncoder.__init__hidden_statesrH   c                 C   s   | j D ]}||}q|S rI   )ru   )rA   rw   ru   rD   rD   rE   rM      s   

zBertEncoder.forwardrh   )
rN   rO   rP   r
   strr0   r<   rQ   rM   rR   rD   rD   rB   rE   rg   }   s    rg   c                	       sN   e Zd Z			ddededB dedB def fddZd	ej	fd
dZ
  ZS )rm   Nrh   r)   rk   rl   rj   c                    sp   t    t|j|j|j||| dd| _t|j|j|j	|| dd| _
t|j|j|j|| dd| _d S )Nz
.attention)r1   num_attention_headsr:   rk   rl   rj   z.intermediate)r1   intermediate_size
hidden_actrl   rj   .output)r1   r{   r:   rl   rj   )r/   r0   BertAttentionr1   rz   r:   	attentionBertIntermediater{   r|   intermediate
BertOutputoutput)rA   r)   rk   rl   rj   rB   rD   rE   r0      s.   
	zBertLayer.__init__rw   c                 C   s$   |  |}| |}| ||}|S rI   )r   r   r   )rA   rw   attn_outputintermediate_outputr   rD   rD   rE   rM      s   

zBertLayer.forwardNNrh   )rN   rO   rP   r   r   r   ry   r0   r<   rQ   rM   rR   rD   rD   rB   rE   rm      s    "rm   c                       s\   e Zd Z			ddededededB dedB def fd	d
Zde	j
de	j
fddZ  ZS )r~   Nrh   r1   rz   r:   rk   rl   rj   c                    s@   t    t||||| dd| _t|||| dd| _d S )Nr}   )r1   rz   rk   rl   rj   )r1   r:   rl   rj   )r/   r0   BertSelfAttentionrA   BertSelfOutputr   )rA   r1   rz   r:   rk   rl   rj   rB   rD   rE   r0      s   
	zBertAttention.__init__rw   rH   c                 C   s   |  |}| ||S rI   )rA   r   )rA   rw   self_outputrD   rD   rE   rM      s   
zBertAttention.forwardr   )rN   rO   rP   intfloatr   r   ry   r0   r<   rQ   rM   rR   rD   rD   rB   rE   r~      s,    r~   c                       sX   e Zd Z			ddedededB dedB def
 fdd	Zd
ej	dej	fddZ
  ZS )r   Nrh   r1   rz   rk   rl   rj   c              	      s   t    || _t }|| _| j| dksJ | j| | _| j| _| j| j | _| j| j | jks3J td| j| | _	| j| j | _
| j	| j | _| jd | _t| j| j| j| jd|| dd| _t| j| j| j| j	||| dd| _d S )	Nr   r    g      Tz	.qkv_proj)r1   	head_sizetotal_num_headstotal_num_kv_headsbiasrl   rj   z.attn)	num_headsr   scalenum_kv_headsrk   rl   rj   )r/   r0   r1   r   r   r   r   head_dimmaxr   q_sizekv_sizescalingr   qkv_projr   attn)rA   r1   rz   rk   rl   rj   tp_sizerB   rD   rE   r0      s>   

zBertSelfAttention.__init__rw   rH   c                 C   s@   |  |\}}|j| j| j| jgdd\}}}| |||}|S )Ndim)r   splitr   r   r   )rA   rw   qkv_qkvr   rD   rD   rE   rM     s    zBertSelfAttention.forwardr   )rN   rO   rP   r   r   r   ry   r0   r<   rQ   rM   rR   rD   rD   rB   rE   r      s(    -r   c                	       sT   e Zd Z		ddedededB def fddZd	ej	d
ej	dej	fddZ
  ZS )r   Nrh   r1   r:   rl   rj   c                    s8   t    t||d|| dd| _tj||d| _d S NT.dense
input_sizeoutput_sizer   rl   rj   r*   r/   r0   r   rY   r   r9   )rA   r1   r:   rl   rj   rB   rD   rE   r0     s   
zBertSelfOutput.__init__rw   input_tensorrH   c                 C       |  |\}}| || }|S rI   rY   r9   rA   rw   r   r   rD   rD   rE   rM   /     zBertSelfOutput.forwardNrh   rN   rO   rP   r   r   r   ry   r0   r<   rQ   rM   rR   rD   rD   rB   rE   r     s&    r   c                       sR   e Zd Z		ddededededB def
 fdd	Zd
ejdejfddZ	  Z
S )r   Nrh   r1   r{   r|   rl   rj   c                    s2   t    t||d|| dd| _t|| _d S )NTr   r   )r/   r0   r   rY   r   intermediate_act_fn)rA   r1   r{   r|   rl   rj   rB   rD   rE   r0   8  s   
zBertIntermediate.__init__rw   rH   c                 C   s   |  |\}}| |}|S rI   )rY   r   )rA   rw   r   rD   rD   rE   rM   J  s   
zBertIntermediate.forwardr   )rN   rO   rP   r   ry   r   r0   r<   rQ   rM   rR   rD   rD   rB   rE   r   7  s    r   c                       sX   e Zd Z		ddededededB def
 fdd	Zd
ej	dej	dej	fddZ
  ZS )r   Nrh   r1   r{   r:   rl   rj   c                    s8   t    t||d|| dd| _tj||d| _d S r   r   )rA   r1   r{   r:   rl   rj   rB   rD   rE   r0   Q  s   
zBertOutput.__init__rw   r   rH   c                 C   r   rI   r   r   rD   rD   rE   rM   e  r   zBertOutput.forwardr   r   rD   rD   rB   rE   r   P  s*    r   CLS)rb   c                       s   e Zd ZdZdg diZdeddededee	j
 d	d
f fddZdejd	ejfddZ	
	
ddejdejded
B dejd
B d	ejf
ddZdeeeejf  fddZdeeeejf  d	ee fddZ  ZS )	BertModelTr   )querykeyvaluerh   rj   embedding_classri   rj   r   rH   Nc                   s8   t    |jj| _|| j| _t|| dd| _d S )Nz.encoderri   rj   )r/   r0   rT   ra   r)   rL   rg   encoderrA   ri   rj   r   rB   rD   rE   r0   t  s   

zBertModel.__init__rF   c                 C      | j |S rI   )rL   r4   rA   rF   rD   rD   rE   embed_input_ids     zBertModel.embed_input_ids	positionsintermediate_tensorsrG   c                 C   s   | j |||d}| |S )N)rF   r,   rG   )rL   r   )rA   rF   r   r   rG   rw   rD   rD   rE   rM     s   
zBertModel.forwardweightsc                 C   s   g d}g }g }t |  }|D ]:\}}|D ](\}}	}
|	|vr q||	|}||vr+q|| }|j}||||
 ||  n||v rJ|||f q||fS )N))r   r   r   )r   r   r   )r   r   r   )dictnamed_parametersreplaceweight_loaderappend)rA   r   stacked_params_mappingloaded_stacked_paramsother_weightsparams_dictnameloaded_weight
param_nameweight_nameshard_idparamr   rD   rD   rE   _load_weights  s(   
zBertModel._load_weightsc                 C   s4   |  |\}}t| dgd}||}|| |S )Nzpooler.skip_prefixesr   r%   load_weightsupdaterA   r   r   r   loaderloaded_paramsrD   rD   rE   r     s
   

zBertModel.load_weightsNN)rN   rO   rP   is_pooling_modelpacked_modules_mappingr(   r
   ry   typer   Moduler0   r<   rQ   r   r   rM   r   tupler   setr   rR   rD   rD   rB   rE   r   m  s<    
,r   c                	       sd   e Zd ZdZdeddededeej	 ddf fd	d
Z
deeeejf  dee fddZ  ZS )BertPoolingModelTrh   r   ri   rj   r   rH   Nc                   s"   t  j|||d t|j| _d S Nri   rj   r   )r/   r0   rS   rT   poolerr   rB   rD   rE   r0     s   zBertPoolingModel.__init__r   c                 C   s.   |  |\}}t| }||}|| |S rI   r   r   rD   rD   rE   r     s
   

zBertPoolingModel.load_weights)rN   rO   rP   r   r(   r
   ry   r   r   r   r0   r   r   r<   rQ   r   r   rR   rD   rD   rB   rE   r     s    ,r   c                       s   e Zd ZdZdZdddedef fddZd	ej	d
ej	fddZ
		dd	ej	dej	dedB dej	dB d
ej	f
ddZdeeeej	f  fddZddeded
efddZded
efddZ  ZS )BertEmbeddingModel_  A model that uses Bert to provide embedding functionalities.

    This class encapsulates the BertModel and provides an interface for
    embedding operations and customized pooling functions.

    Attributes:
        model: An instance of BertModel used for forward operations.
        _pooler: An instance of Pooler used for pooling operations.
    Trh   rj   ri   rj   c                   sD   t    |jj}|d usJ | j|t|dd| _| || _d S )Nmodelr   )	r/   r0   rT   r`   _build_modelr'   r   _build_poolerr   )rA   ri   rj   r`   rB   rD   rE   r0     s   

zBertEmbeddingModel.__init__rF   rH   c                 C   r   rI   )r   r   r   rD   rD   rE   r     r   z"BertEmbeddingModel.embed_input_idsNr   r   rG   c                 C   s   | j ||||dS )NrF   r   rG   r   )r   )rA   rF   r   r   rG   rD   rD   rE   rM     s   zBertEmbeddingModel.forwardr   c                 C   sH   t |}tdd |D }|stddid}t| dgd}|j||dS )	Nc                 s   s    | ]
\}}| d V  qdS )model.N)
startswith)rn   r   r   rD   rD   rE   	<genexpr>  s    z2BertEmbeddingModel.load_weights.<locals>.<genexpr>rh   r   )orig_to_new_prefixzlm_head.r   )mapper)listanyr&   r%   r   )rA   r   weights_listhas_model_prefixr   r   rD   rD   rE   r     s   zBertEmbeddingModel.load_weightsc                 C   s   t ||tdS r   )r   r(   rv   rD   rD   rE   r   	  s   zBertEmbeddingModel._build_modelr`   c                 C   s
   t |S rI   )r   for_embedding)rA   r`   rD   rD   rE   r        
z BertEmbeddingModel._build_poolerr   rx   )rN   rO   rP   __doc__r   r
   ry   r0   r<   rQ   r   r   rM   r   r   r   r   r   r	   r   r   rR   rD   rD   rB   rE   r     s*    


r      rF   rK   rH   c                 C   s    | d |j d  |t>  d S )Nr   )shapebitwise_or_TOKEN_TYPE_SHIFT)rF   rK   rD   rD   rE   _encode_token_type_ids*  s    r   c                 C   s<   t j| t j| jdt> }| }| |t? }| | |S )N)rX   device)r<   	ones_likeint32r   r   bitwise_notbitwise_andbitwise_and_)rF   ids_masktokens_maskrK   rD   rD   rE   rJ   1  s   
rJ   c                       sT   e Zd Z	ddededef fddZdejfdd	Zd
ejdejfddZ	  Z
S )BertMLMHead-q=r1   r3   r:   c                    sH   t    t||| _t | _tj||d| _tj||dd| _	d S )Nr*   T)r   )
r/   r0   r   rd   rY   GELUr_   r9   
layer_normdecoder)rA   r1   r3   r:   rB   rD   rE   r0   @  s
   

zBertMLMHead.__init__embeddings_weightc                 C   s   || j _d S rI   )r
  weight)rA   r  rD   rD   rE   tie_weights_with_embeddingsI  r   z'BertMLMHead.tie_weights_with_embeddingsrw   rH   c                 C   s,   |  |}| |}| |}| |}|S rI   )rY   r_   r	  r
  )rA   rw   rZ   logitsrD   rD   rE   rM   L  s
   



zBertMLMHead.forward)r  )rN   rO   rP   r   r   r0   r<   rQ   r  rM   rR   rD   rD   rB   rE   r  ?  s    	r  c                       s   e Zd ZdZ				ddejdedB d	edB d
edef
 fddZ	de
e fddZdedefddZdejdedefddZ  ZS )SPLADESparsePoolera  
    SPLADE sparse pooling:
    logits = mlm_head(hidden_states)
            -> log1p(relu(logits))
            -> (max|sum over L)
            -> [V]

    Padding is masked with an attention mask,
    [CLS]/[SEP] is removed (selected),
    and then pooled.
    e   f   r   Tmlm_headcls_token_idNsep_token_idrU   remove_cls_sepc                    s8   t    |dv sJ || _|| _|| _|| _|| _d S )N)r   sum)r/   r0   r  r  r  rU   r  )rA   r  r  r  rU   r  rB   rD   rE   r0   a  s   

zSPLADESparsePooler.__init__rH   c                 C   s   dhS )NembedrD   r[   rD   rD   rE   get_supported_tasksr  s   z&SPLADESparsePooler.get_supported_taskstaskc                 C   s
   t ddS )NT)requires_token_ids)r   )rA   r  rD   rD   rE   get_pooling_updatesu  r   z&SPLADESparsePooler.get_pooling_updatesrw   pooling_metadatac                 C   s\  |j }| }t|}|j}d}g }t|D ]}	t||	 }
||||
  }d}|
}| jr\|d ur\| jd urC||	df  | jkrCd}| j	d ur\||	|
d f  | j	kr\t
||
d }||krut| jjj}|||f ||
7 }q| ||| }tt|}| jdkr|jdd}n|j
ddj}||  ||
7 }qtj|dd S )Nr   r    r  r   )prompt_lenstolistlenprompt_token_idsrs   r   r  r  itemr  r   r  r
  out_featuresr   	new_zerosr<   log1prelurU   r  values
contiguousstack)rA   rw   r  lens_tensorlensB	token_idsoffsetpooled_listiLhs	start_idxend_idxVlogits_iscores_ipooled_irD   rD   rE   rM   x  s>   



zSPLADESparsePooler.forward)r  r  r   T)rN   rO   rP   r   r   r   r   ry   boolr0   r   r   r  r   r  r<   rQ   r   r   rM   rR   rD   rD   rB   rE   r  T  s4    r  c                       sd   e Zd ZdZddddededef fdd	Zd
edefddZ	de
eeejf  fddZ  ZS )BertSpladeSparseEmbeddingModelz
    BertEmbeddingModel + SPLADE sparse embedding.
    - Make logits by self.mlm_head
    - pooler: SPLADESparsePooler(mlm_head...)
    rh   r   )rj   splade_poolingri   rj   r:  c                   s^   t  j||d |jj}t|j|jt|ddd| _|| _	|jj
}|d us'J | || _d S )Nr   r:   r  r1   r3   r:   )r/   r0   rT   ra   r  r1   r3   r?   r  _splade_poolingr`   r   r   )rA   ri   rj   r:  cfgr`   rB   rD   rE   r0     s   
z'BertSpladeSparseEmbeddingModel.__init__r`   rH   c              	   C   st   | j j}t| dst|j|jt|ddd| _t| dd}t|dd }t|dd }tt	|t
| j|||d	d
dS )Nr  r:   r  r;  r<  r   r  r  T)r  r  r  rU   r  )token_embedr  )r   r)   hasattrr  r1   r3   r?   r  r   r   r  )rA   r`   r=  pooling_modecls_idsep_idrD   rD   rE   r     s*   

z,BertSpladeSparseEmbeddingModel._build_poolerr   c                    s   t | ds| jj}t|j|jt|ddd| _dtdtfdd}t	|}g }g }|D ]\}}||}	|	
d	r?||	|f q*||	|f q*t }
| j|}|
d
d |D  |r|ddddddd  fdd|D }|r|t| |}|
| |
S )Nr  r:   r  r;  r   rH   c                 S   s(   dD ]}|  |r| t|d  } q| S )N)r   zbert.)r   r  )r   prD   rD   rE   _strip  s
   
z;BertSpladeSparseEmbeddingModel.load_weights.<locals>._stripzcls.predictions.c                 S   s   h | ]}d | qS )r   rD   )rn   nrD   rD   rE   	<setcomp>  s    z>BertSpladeSparseEmbeddingModel.load_weights.<locals>.<setcomp>zmlm_head.dense.weightzmlm_head.dense.biaszmlm_head.layer_norm.weightzmlm_head.layer_norm.biaszmlm_head.decoder.weightzmlm_head.decoder.bias)z&cls.predictions.transform.dense.weightz$cls.predictions.transform.dense.biasz*cls.predictions.transform.LayerNorm.weightz(cls.predictions.transform.LayerNorm.biaszcls.predictions.decoder.weightzcls.predictions.decoder.biasc                    s$   g | ]\}}| v r | |fqS rD   rD   )rn   rE  wname_maprD   rE   rq     s   $ z?BertSpladeSparseEmbeddingModel.load_weights.<locals>.<listcomp>)r?  r   r)   r  r1   r3   r?   r  ry   r   r   r   r   r   r   r%   )rA   r   r=  rD  r   
model_sidemlm_sider   rG  r   loadedloaded_modelremapped
loaded_mlmrD   rH  rE   r     s@   



z+BertSpladeSparseEmbeddingModel.load_weights)rN   rO   rP   r   r
   ry   r0   r	   r   r   r   r   r<   rQ   r   rR   rD   rD   rB   rE   r9    s    $r9  c                       s   e Zd ZdZdZdddedef fddZd	ej	d
ej	fddZ
deeeej	f  fddZ			dd	ej	dB dej	dedB dej	dB dej	dB d
ej	fddZ  ZS )BertForSequenceClassificationr   Trh   r   ri   rj   c                   sz   t    |jj}|j| _t|t|dtd| _t	j
|j|j|jjd| _|jj}|d us/J tj|| jj| jd| _d S )Nbertr   rW   )rU   
classifier)r/   r0   rT   ra   
num_labelsr   r'   r(   rQ  r   rd   r1   r]   rR  r`   r   for_seq_clsr   rA   ri   rj   r)   r`   rB   rD   rE   r0   $  s(   
z&BertForSequenceClassification.__init__rF   rH   c                 C   r   rI   rQ  r   r   rD   rD   rE   r   =  r   z-BertForSequenceClassification.embed_input_idsr   c                 C      t | }||}|S rI   r%   r   rA   r   r   r   rD   rD   rE   r   @     
z*BertForSequenceClassification.load_weightsNr   r   rG   rK   c                 C   sF   |d ur| j jjdt> k sJ |d usJ t|| | j ||||dS Nr    r   )rQ  r)   r3   r   r   )rA   rF   r   r   rG   rK   rD   rD   rE   rM   E  s   
z%BertForSequenceClassification.forwardNNN)rN   rO   rP   r   r   r
   ry   r0   r<   rQ   r   r   r   r   r   rM   rR   rD   rD   rB   rE   rP    s,    
	rP  encoder_onlyALL)tok_pooling_typec                       s   e Zd ZdZdddedef fddZdejd	ejfd
dZ	de
eeejf  fddZ			ddejdB dejdedB dejdB dejdB d	ejfddZ  ZS )BertForTokenClassificationTrh   r   ri   rj   c                   st   t    |jj}|jj| _|j| _t|t|dtd| _	t
j|j|j| jd| _|jj}|d us3J t|| _d S )NrQ  r   rW   )r/   r0   rT   ra   r]   rS  r   r'   r(   rQ  r   rd   r1   rR  r`   r   r   rU  rB   rD   rE   r0   _  s   

z#BertForTokenClassification.__init__rF   rH   c                 C   r   rI   rV  r   rD   rD   rE   r   r  r   z*BertForTokenClassification.embed_input_idsr   c                 C   rW  rI   rX  rY  rD   rD   rE   r   u  rZ  z'BertForTokenClassification.load_weightsNr   r   rG   rK   c                 C   s\   |d ur| j jjdt> k sJ |d usJ t|| | j ||||d}|| j}| |S r[  )rQ  r)   r3   r   r   tor]   rR  )rA   rF   r   r   rG   rK   rw   rD   rD   rE   rM   z  s   

z"BertForTokenClassification.forwardr\  )rN   rO   rP   r   r
   ry   r0   r<   rQ   r   r   r   r   r   rM   rR   rD   rD   rB   rE   r`  Z  s*    	r`  )Pcollections.abcr   r   r<   r   transformersr   vllm.compilation.decoratorsr   vllm.configr   r   r	   r
   vllm.distributedr   %vllm.model_executor.layers.activationr   $vllm.model_executor.layers.attentionr   !vllm.model_executor.layers.linearr   r   r   !vllm.model_executor.layers.poolerr   r   r   -vllm.model_executor.layers.pooler.activationsr   )vllm.model_executor.layers.pooler.seqwiser   r   r   r   )vllm.model_executor.layers.pooler.tokwiser   r   'vllm.model_executor.layers.quantizationr   3vllm.model_executor.layers.vocab_parallel_embeddingr   vllm.sequencer   
vllm.tasksr   vllm.v1.pool.metadatar   
interfacesr!   r"   interfaces_baser#   r$   utilsr%   r&   r'   r   r(   rS   rg   rm   r~   r   r   r   r   r   r   r   r   rQ   r   rJ   r  r  r9  rP  r`  rD   rD   rD   rE   <module>   sn   /*#8KQ
WjC