o
    
۾iW                     @   s  d dl mZ d dlmZ d dlmZmZmZmZ d dl	Z	d dl
mZ d dlmZ d dlmZ d dlmZ d dlmZ d d	lmZ d d
lmZ ddlmZ ddlmZmZ erdd dlmZmZ d dlmZ ede ej! dZ"ee#Z$g dZ%dddej!dB fddZ&dej'de(ddde)fddZ*de(de(de(fddZ+d e"de"fd!d"Z,d#e"de"fd$d%Z-d#e"de"fd&d'Z.G d(d) d)eZ/dej!fd*d+Z0ed,e)fd-d.Z1d/ee2e(e	j3f  fd0d1Z4d/ee2e(e	j3f  fd2d3Z5e4e5d4Z6d/ee2e(e	j3f  fd5d6Z7dS )7    )Iterable)contextmanager)TYPE_CHECKINGAnyTypeVarcastN)
VllmConfig)init_logger)
get_act_fn)VerifyAndUpdateConfig)try_get_dense_modules)get_hf_file_bytes   )supports_multimodal)VllmModelForPoolingis_pooling_model)ModelConfigr   )Pooler_T)bound)ForCausalLMForConditionalGeneration	ChatModelLMHeadModelmodel_configr   returnc                 C   s   t | j| jd}|du rdS zBg }|D ]2}|d }tj|d |d |dd| jd}t||| s2q|| |d	 }rE|t	| qtj
| j| jd
W S  ty_   td Y dS w )z3Load Sentence-Transformers Dense projection layers.)revisionNfolderin_featuresout_featuresbiasT)r    dtypeactivation_function)r!   zST projector loading failed)r   modelr   nnLinearget
head_dtype_load_dense_weightsappendr
   
Sequentialto	Exceptionlogger	exception)r   dense_moduleslayerslayer_configr   linearact_name r4   W/home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/models/adapters.py_load_st_projector'   s4   

r6   r2   r   c              	   C   s  ddl m} dD ]}|r| d| n|}zht||j|j}|s"W q|dr2ddlm} ||}nddl}	t	j|	
|dd	d
}dD ]8}
|
|v r{t| jd|}|| j||
  |
dd}| jdurv||v rvt| jd|}|| j||   W  d	S qCW q ty   td| Y qw dS )z0Load weights using vLLM's weight_loader pattern.r   default_weight_loader)zmodel.safetensorszpytorch_model.bin/z.safetensors)loadNcpuT)map_locationweights_only)weightzlinear.weightzdense.weightweight_loaderr>   r    zFailed to load %sF)-vllm.model_executor.model_loader.weight_utilsr8   r   r#   r   endswithsafetensors.torchr:   iotorchBytesIOgetattrr>   replacer    r,   r-   r.   )r2   r   r   r8   filename	file_path
file_bytesload_safetensors
state_dictrC   
weight_keyr?   bias_keybias_loaderr4   r4   r5   r(   G   sH   



r(   orig_model_namepooling_suffixc                 C   s    | }t D ]}||}q|| S N)_GENERATE_SUFFIXESremovesuffix)rP   rQ   
model_namegenerate_suffixr4   r4   r5   _get_pooling_model_namew   s   rW   orig_clsc                    sN   ddl m ddlm ddlm mm G  fddd| t}|S )Nr   )LogitsProcessorParallelLMHeadr   )AutoWeightsLoaderStageMissingLayerno_init_weightsc                	       s|   e Zd ZdZdddddededd	f fd
dZ	ddddeddfddZdee	ee
jf  f fddZ  ZS )z2_create_pooling_model_cls.<locals>.ModelForPoolingT prefixvllm_configr   ra   kwargsr   Nc                   s   | fdd fd t  jd||d| W d    n1 s$w   Y  || _t| dd }|sDt| rD|  }t|drD|j}|sM| j||d}|| _d S )Nc                    s
    d| S )Noutputr4   )mod)r]   r4   r5   <lambda>   s   
 zM_create_pooling_model_cls.<locals>.ModelForPooling.__init__.<locals>.<lambda>)targets)rb   ra   poolerr`   r4   )	super__init__rb   rF   r   get_language_modelhasattrrh   _init_pooler)selfrb   ra   rc   rh   language_model)rY   r[   r]   	__class__r^   r4   r5   rj      s    


z;_create_pooling_model_cls.<locals>.ModelForPooling.__init__r   c                 S   s   t rR   )NotImplementedError)rn   rb   ra   r4   r4   r5   rm      s   z?_create_pooling_model_cls.<locals>.ModelForPooling._init_poolerweightsc           
   	      s   t  ddg}dttttjf   }|D ]#\ }| |f zt fdd|D W  n	 t	y;   Y qw rZ}
dD ]	}|rNt|}qEtd|   fddg ||R D }fdd	}tt d
|}	|	|S )Nr_   zmodel.c                 3   s     | ]}|  v r|V  qd S rR   r4   ).0ra   )nameparams_dictr4   r5   	<genexpr>   s    zR_create_pooling_model_cls.<locals>.ModelForPooling.load_weights.<locals>.<genexpr>.zGMapping weights to %s as they are relative to this model instead of %s.c                 3   s     | ]\}} | |fV  qd S rR   r4   )rs   rt   r>   )target_prefixr4   r5   rv      s
    

c                    s    }| | S rR   )load_weights)rr   loader)r\   rn   r4   r5   default_load_weights   s   
z]_create_pooling_model_cls.<locals>.ModelForPooling.load_weights.<locals>.default_load_weightsry   )dictnamed_parameterslisttuplestrrD   Tensorr)   nextStopIterationsplitrF   r-   info	_get_nameri   )
rn   rr   candidate_prefixesseen_weightsloaded_weighttarget_modelattrmapped_weightsr{   ry   )r\   rp   )rt   ru   rn   rx   r5   ry      s>   

z?_create_pooling_model_cls.<locals>.ModelForPooling.load_weightsr_   )__name__
__module____qualname__r   r   r   rj   rm   r   r   rD   r   ry   __classcell__r4   r\   rY   r[   r]   r^   rp   r5   ModelForPooling   s*    !
*r   )	+vllm.model_executor.layers.logits_processorrY   3vllm.model_executor.layers.vocab_parallel_embeddingr[   utilsr\   r]   r^   r   )rX   r   r4   r   r5   _create_pooling_model_cls   s
   Xr   clsc                    sB   t | r| S ddlm  G  fdddt| }t| jd|_|S )a\  
    Subclass an existing vLLM model to support embeddings.

    By default, the embeddings of the whole prompt are extracted from the
    normalized hidden state corresponding to the last token.

    Note:
        We assume that no extra layers are added to the original model;
        please implement your own model if this is not the case.
    r   DispatchPoolerc                       s*   e Zd Z	d
dddeddf fddZd	S )z-as_embedding_model.<locals>.ModelForEmbeddingr_   rb   r   ra   r   r   c                    s   |j j}|d us
J  |S rR   )r   pooler_configfor_embedding)rn   rb   ra   r   r   r4   r5   rm      s   
z:as_embedding_model.<locals>.ModelForEmbedding._init_poolerNr   )r   r   r   r   rm   r4   r   r4   r5   ModelForEmbedding   s    r   ForEmbedding)r   !vllm.model_executor.layers.poolerr   r   rW   r   )r   r   r4   r   r5   as_embedding_model   s   r   c                    sl   t | r| S ddlm ddlm  ddlm} ddlm G  fdddt	| |}t
| jd	|_|S )
a  
    Subclass an existing vLLM model to support classify and score tasks.

    By default, the class probabilities are extracted from the softmaxed
    hidden state corresponding to the last token.

    Note:
        We assume that the classification head is a single linear layer
        stored as the attribute `score` of the top-level model;
        please implement your own model if this is not the case.
    r   )ReplicatedLinearr   )SupportsCrossEncodingr   )maybe_prefixc                       sR   e Zd Z	ddddeddffddZd	eeeejf  f fd
dZ	  Z
S )z8as_seq_cls_model.<locals>.ModelForSequenceClassificationr_   rb   r   ra   r   r   c              
      sd   |j j }|j }|j}| |jd|j j|d|dd| _|j j}|d us*J  j	|| jdS )NFscore)r    params_dtypequant_configreturn_biasra   )
classifier)
r   	hf_configget_text_configr   get_hidden_size
num_labelsr'   r   r   for_seq_cls)rn   rb   ra   text_configr   r   r   r   r   r   r4   r5   rm     s   
zEas_seq_cls_model.<locals>.ModelForSequenceClassification._init_poolerrr   c                    sp    j }| }t|dt|dd }t|dt|dd } fdd}||}|d u r3|d u r3t |S t |S )Nclassifier_from_tokenmethodc                 3   sd    | D ],\}}|dkr* j jj} j jj}|||}tj| j _d j _	q||fV  qd S )Nz
score.biasF)
r   r>   devicer!   r+   rD   r$   	Parameterr    skip_bias_add)rr   rt   r>   r   r!   r    rn   r4   r5   auto_set_score_bias@  s   


zbas_seq_cls_model.<locals>.ModelForSequenceClassification.load_weights.<locals>.auto_set_score_bias)configr   rF   ri   ry   seq_cls_model_loader)rn   rr   r   r   tokensr   r   r   r   r5   ry   6  s   

zEas_seq_cls_model.<locals>.ModelForSequenceClassification.load_weightsr   )r   r   r   r   rm   r   r   rD   r   ry   r   r4   r   r   r5   ModelForSequenceClassification  s    (r   ForSequenceClassification)r   !vllm.model_executor.layers.linearr   r   r   %vllm.model_executor.models.interfacesr   r   r   r   rW   r   )r   r   r   r4   r   r5   as_seq_cls_model  s   8r   c                   @   s   e Zd ZedddZdS )SequenceClassificationConfigrb   r   r   Nc                 C   s   | j j}| }t|dt|dd }t|dt|dd }|d u r"d S |d us(J |tv s4J d| d|dkrGt|dks@J d|_d|_n
t||_t||_t|dd	}||_d S )
Nr   r   method  not supportedfrom_2_way_softmax   r   use_sep_tokenF)r   r   r   rF   SEQ_CLS_LOAD_METHODSlenr   r   )rb   r   r   r   r   r   r4   r4   r5   verify_and_update_config[  s(   



z5SequenceClassificationConfig.verify_and_update_config)rb   r   r   N)r   r   r   staticmethodr   r4   r4   r4   r5   r   Z  s    r   c                 C   s   t | rz|  }|| ur|W S W n	 ty   Y nw dD ]}t| |r<t| |}t|tjr<|| ur<t|dr<|  S q|  D ]\}}t	|j
}d|v sRd|v r[t|dr[|  S qA| S )z
    Get the language model component for sequence classification conversion.
    For VLMs, returns the inner language model. For standard LLMs, returns model itself.
    )ro   lm
text_modelr#   r   LMHead)r   rk   r,   rl   rF   
isinstancer$   Modulenamed_childrentyper   )r#   r   	attr_name	candidatert   child
child_namer4   r4   r5   _get_language_model_for_seq_clsy  s6   



r   is_vlmc                 c   s    |sdV  dS t | dd}|du rdV  dS | }t |dd}t |dd}t |dd}z2|dur5d|_|dur<d|_|durCd|_dV  W |durN||_|durU||_|dur^||_dS dS |durg||_|durn||_|duru||_w )z
    Context manager to temporarily disable sequence classification loading
    on inner VLM models to prevent recursive seq_cls_model_loader calls.
    Nr   r   r   )rF   r   r   r   )ro   r   inner_hf_configinner_text_configoriginal_methodoriginal_tokensoriginal_hf_tokensr4   r4   r5   '_disable_seq_cls_loading_on_inner_model  sB   
r   rr   c                 C   s  ddl m} ddlm} | jj}| jj}| j}| }t	|dt	|dg }t
tt |}t|dks4J t| }	|	| u}
|
oBt|	d}||j|j|d|	_|jrf|	j}t|dr[|jn| }|	j||	_t|	|
 td	d
 t| jD }|| |}W d    n1 sw   Y  ddlm} ||j|j |j!|j"d}|#|d }|#|d }|	jj$}|j%|g &t'j(|j%|g &t'j( }|r|	j)n| j)}|j$}t	|d|}||| |	`|rdnd}|*| d}t	| dd  }r|+|}|,| |S )Nr   rZ   r7   r   r   r   r   embed_tokensc                 s       | ]
}|j d kr|V  qdS r   Nr   rs   xr4   r4   r5   rv         z8load_weights_using_from_2_way_softmax.<locals>.<genexpr>get_tokenizerr   tokenizer_modetrust_remote_coder   r?   language_model.score.weightscore.weightlm_head.weighthf_to_vllm_mapper)-r   r[   r@   r8   rb   r   r   r   r   rF   r   r~   intr   r   rl   
vocab_sizehidden_sizelm_headtie_word_embeddingsr#   r   get_input_embeddingstie_weightsr   r   r   __mro__ry   vllm.tokenizersr   	tokenizertokenizer_revisionr   r   convert_tokens_to_idsr>   datar+   rD   float32r   add	_map_namediscard)r#   rr   r[   r8   r   r   r   r   r   ro   r   using_vlm_headtext_backboner   pooling_model_clsloaded_weightsr   r   false_idtrue_idlm_head_weightscore_weightscore_layerparamr?   score_weight_namelm_head_namer   r4   r4   r5   %load_weights_using_from_2_way_softmax  sx   






r  c                    s  ddl m} ddlm} | jj}| jj}| j }t	|dg }t
tt |}t|dks.J t| }|| u}	|	o<t|d}
||j|j|d|_|jr`|j}t|drU|jn| }|j||_t||	 tdd	 t| jD }|| |}W d    n1 sw   Y  dd
lm} ||j|j |j!|j"d  fdd|D }|jj#j$| }|
r|j%n| j%}|j#}t	|d|}||| |`|
rdnd}|&| d}t	| dd  }r|'|}|(| |S )Nr   rZ   r7   r   r   r   r   c                 s   r   r   r   r   r4   r4   r5   rv   1  r   z2load_weights_no_post_processing.<locals>.<genexpr>r   r   c                    s   g | ]}  |qS r4   )r   )rs   tr   r4   r5   
<listcomp>@  s    z3load_weights_no_post_processing.<locals>.<listcomp>r?   r   r   r   r   ))r   r[   r@   r8   rb   r   r   r   r   rF   r   r~   r   r   r   rl   r   r   r   r   r#   r   r   r   r   r   r   r   ry   r   r   r   r   r   r   r>   r   r   r   r   r   )r#   rr   r[   r8   r   r   r   r   ro   r   r   r   r   r  r  r   	token_idsr  r  r  r?   r	  r
  r   r4   r  r5   load_weights_no_post_processing  sb   






r  )r   no_post_processingc                 C   sL   | j jj}| }t|dt|dd }|tv sJ d| dt| | |S )Nr   r   r   )rb   r   r   r   rF   r   )r#   rr   r   r   r   r4   r4   r5   r   \  s
   
r   )8collections.abcr   
contextlibr   typingr   r   r   r   rD   torch.nnr$   vllm.configr   vllm.loggerr	   %vllm.model_executor.layers.activationr
   !vllm.model_executor.models.configr   vllm.transformers_utils.configr   "vllm.transformers_utils.repo_utilsr   
interfacesr   interfaces_baser   r   r   r   r   r   r   r   r   r-   rS   r6   r%   r   boolr(   rW   r   r   r   r   r   r   r   r   r  r  r   r   r4   r4   r4   r5   <module>   sZ    
0	b"V!$
RF 