o
    in                     @   s   d Z ddlmZ ddlmZmZ ddlZddlmZ ddlm	Z	m
Z
 ddlmZ ddlmZ d	d
lmZmZ d	dlmZ edddG dd deZdS )z
ColBERT late interaction model for retrieval and reranking.

ColBERT uses per-token embeddings and late interaction (MaxSim) scoring
instead of single-vector representations or cross-encoder concatenation.

Reference: https://arxiv.org/abs/2004.12832
    )Iterable)ClassVarLiteralN)nn)PoolerConfig
VllmConfig)Pooler)pooler_for_token_embed   )BertEmbeddingModel	BertModel)default_pooling_typeCLSALL)seq_pooling_typetok_pooling_typec                       s   e Zd ZU dZdZeed  ed< dddede	f fdd	Z
ddede	d
efddZd
ejfddZded
efddZdeee	ejf  fddZ  ZS )ColBERTModelaF  ColBERT late interaction model for retrieval/reranking.

    This model extends BertEmbeddingModel with a ColBERT-style linear
    projection layer for per-token embeddings. It supports only:
    - "token_embed" task: Per-token embeddings for late interaction

    ColBERT is fundamentally a per-token embedding model - the linear
    projection is trained for per-token representations, not for CLS
    pooling. Use a dedicated dense embedding model if you need single-
    vector representations.

    The ColBERT scoring (MaxSim) is computed externally, either client-side
    or via the late interaction scoring path in ServingScores.

    Attributes:
        colbert_linear: Linear projection from hidden_size to colbert_dim
        supports_late_interaction: Flag indicating this model uses late
            interaction scoring
    Tsupports_late_interaction )prefixvllm_configr   c                   sT   |j j}|j| _|j j| _t|dd pt|dd pt|dd | _t j||d d S )Ncolbert_dimdimprojection_dimr   r   )model_config	hf_confighidden_size
head_dtypegetattrr   super__init__)selfr   r   config	__class__ X/home/ubuntu/vllm_env/lib/python3.10/site-packages/vllm/model_executor/models/colbert.pyr!   3   s   


zColBERTModel.__init__returnc                 C   s   t ||dS )Nr   )r   )r"   r   r   r&   r&   r'   _build_modelE   s   zColBERTModel._build_modelc                 C   s*   | j du r	tdtj| j| j d| jdS )z*Build the ColBERT linear projection layer.Nz8colbert_dim must be set before building the linear layerF)biasdtype)r   
ValueErrorr   Linearr   r   )r"   r&   r&   r'   _build_colbert_linearH   s   
z"ColBERTModel._build_colbert_linearpooler_configc                 C   s*   | j d ur|  | _nd | _t|| jdS )N)	projector)r   r.   colbert_linearr	   )r"   r/   r&   r&   r'   _build_poolerS   s   
zColBERTModel._build_poolerweightsc                 C   sR  dt dt fdd}t|}g }g }|D ]3\}}||}|dv r'|d|f q|ds1|dr?|dd}	||	|f q|||f qt }
| j|}|
d	d
 |D  |r|D ]H\}}|dkr| j	d u r|j
d | _	|  | _t| j j}| j| | j| jj_|| jjjjj}| jjjjj| |
d  |
S q^|
S )Nnamer(   c                 S   s(   dD ]}|  |r| t|d  } q| S )N)model.zbert.)
startswithlen)r4   pr&   r&   r'   _stripf   s
   
z)ColBERTModel.load_weights.<locals>._strip)zlinear.weightcolbert_linear.weightr:   zlinear.zcolbert_linear.c                 S   s   h | ]}d | qS )r5   r&   ).0nr&   r&   r'   	<setcomp>   s    z,ColBERTModel.load_weights.<locals>.<setcomp>r   zpooler.head.projector.weight)strlistappendr6   replacesetmodelload_weightsupdater   shaper.   r1   next
parametersdevicetopoolerheadr0   weightdatacopy_add)r"   r3   r9   weights_list
model_sidecolbert_sider4   rM   strippednew_nameloadedloaded_modelrI   r&   r&   r'   rD   e   sB   


zColBERTModel.load_weights)r   )__name__
__module____qualname____doc__r   r   r   __annotations__r   r>   r!   r   r)   r   r-   r.   r   r   r2   r   tupletorchTensorrD   __classcell__r&   r&   r$   r'   r      s   
 $r   )r[   collections.abcr   typingr   r   r^   r   vllm.configr   r   !vllm.model_executor.layers.poolerr   )vllm.model_executor.layers.pooler.tokwiser	   bertr   r   interfaces_baser   r   r&   r&   r&   r'   <module>   s   	
