o
    
۾i)                     @   s  d Z ddlZddlZddlmZ ddlmZmZ ddlZddlm	Z	 ddl
mZ ddlmZmZmZ ddlmZ dd	lmZmZ dd
lmZmZ ddlmZmZ ddlmZ ddlmZ ddlm Z  ee!Z"edddddddede#de$e	j% dB dedB de	j%f
ddZ&de	j%dedej'ddfddZ(edej	j%dej'fdd Z)e*e+e,e$e	j% e#f f  Z-	 dede,e$e	j% e#f fd!d"Z.dede,e$e	j% e#f fd#d$Z/dede$e	j% fd%d&Z0dede#fd'd(Z1eG d)d* d*Z2d+ede$e	j% fd,d-Z3dS ).z+Utilities for selecting and loading models.    N)contextmanager)	dataclassfield)nn)assert_never)ModelConfig
VllmConfigset_current_vllm_config)init_logger)	AttentionMLAAttention)QuantizationConfigQuantizeMethodBase)record_metadata_for_reloadingset_torchao_reload_attrs)SupportsQuant)
instrument)is_pin_memory_availablezInitialize model)	span_name )prefixmodel_classmodel_configvllm_configr   r   r   returnc          
      C   s  |du r| j }|du rt|\}}| jdurt| j| t|j}dd |j D }d|v rWd|v rWt	| d|d || |d}t
| |W  d   S 1 sRw   Y  d	}tj|td
d td| i }	d|v rq||	d< d|v rz|j|	d< d|v r| j|	d< d|v r| j|	d< d|v r| j|	d< d|v r| j|	d< t	| d|d |di |	}t
| W d   |S 1 sw   Y  |S )z1Initialize a model with the given configurations.Nc                 S   s   g | ]}|j qS  )name).0paramr   r   Z/home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/model_loader/utils.py
<listcomp>2   s    z$initialize_model.<locals>.<listcomp>r   r   T)check_compiler   )r   r   a/  vLLM model class should accept `vllm_config` and `prefix` as input arguments. Possibly you have an old-style model class registered from out of tree and it is used for new vLLM version. Check https://docs.vllm.ai/en/latest/design/arch_overview.html for the design and update the model class accordingly.   )
stacklevelz:Trying to guess the arguments for old-style model class %sconfigcache_configquant_configlora_configscheduler_configr   )r   get_model_architecturer&   configure_quant_configinspect	signature__init__
parametersvaluesr	   r   warningswarnDeprecationWarningloggerwarning	hf_configr%   r'   r(   )
r   r   r   r   _
signatures
all_paramsmodelmsgkwargsr   r   r   initialize_model    sR   	
 






r<   r9   target_devicec              	   C   s   |   D ])\}}t|dd }t|tr-t|| || W d    n1 s(w   Y  q|   D ]\}}t|ttfrHt|drH||j	 q2|j
dkrUt| | d S d S )Nquant_methodprocess_weights_after_loadingtorchao)named_modulesgetattr
isinstancer   device_loading_contextr?   r   r   hasattrdtypequantizationr   )r9   r   r=   r6   moduler>   r   r   r   r?   \   s    

r?   rH   c                 c   s\   |j dkr| V  d S i }|  D ]\}}|jj dkr'|j||< |j||_qzE| V  W t }|  D ]7\}}||v rk|| }|j dkrdtj|j |j	 |jj
|jjd|d}||j ||_q4|j||_q4d S t }|  D ]7\}}||v r|| }|j dkrtj|j |j	 |jj
|jjd|d}||j ||_qu|j||_quw )Ncpu)sizestriderF   layoutdevice
pin_memory)typenamed_parametersrM   datator   torchempty_stridedrJ   rK   rF   rL   copy_)rH   r=   original_device_statesr   prN   original_devicecpu_datar   r   r   rD   z   s`   



rD   c                 C   s   ddl m}m} t| jdg }| jj|| d\}}||  kr1| jdks&J | jdkr1t	
d| | j}|dkr=	 ||fS |d	krNt	d
 ||}||fS |dkr_t	d ||}||fS t| ||fS )Nr   )as_embedding_modelas_seq_cls_modelarchitecturesr   vllmautoz%s has no vLLM implementation, falling back to Transformers implementation. Some features may not be supported and performance may not be optimal.noneembedzConverting to embedding model.classifyz,Converting to sequence classification model.)#vllm.model_executor.models.adaptersrZ   r[   rB   r5   registryresolve_model_cls_get_transformers_backend_cls
model_implr3   warning_onceconvert_type
debug_oncer   )r   rZ   r[   r\   	model_clsarchri   r   r   r   _get_model_architecture   s6   




rm   c                 C   sP   t | j| j| j| j| jtt| jdg f}|t	v rt	| S t
| }|t	|< |S )Nr\   )hashr9   ri   runner_typetrust_remote_coderg   tuplerB   r5   _MODEL_ARCH_BY_HASHrm   )r   key
model_archr   r   r   r)      s   
r)   c                 C      t | d S )Nr   r)   r]   r   r   r   get_model_cls      rw   c                 C   ru   )N   rv   r]   r   r   r   get_architecture_class_name   rx   rz   c                   @   sr   e Zd ZU dZeeee f ed< eedZ	eee
eef f ed< dd Zdede
eee f d	B fd
dZd	S )ParamMappingz
    A class to handle parameter mapping for model weight loading.
    It creates a bidirectional mapping between packed parameters and their
    constituent parts.
    packed_mapping)default_factoryinverse_packed_mappingc                 C   sR   | j  D ]!\}}t|dkr|d |krqt|D ]\}}||f| j|< qqd S )Nry   r   )r|   itemslen	enumerater~   )selfpacked_name
sub_paramsindex
param_namer   r   r   __post_init__   s   zParamMapping.__post_init__module_namer   Nc                 C   s.   | j  D ]\}}||r||f  S qd S )N)r|   r   endswith)r   r   rs   valuer   r   r   get_sub_modules   s
   
zParamMapping.get_sub_modules)__name__
__module____qualname____doc__dictstrlist__annotations__r   r~   rq   intr   r   r   r   r   r   r{      s   
 "&r{   r&   c                 C   sN   t |ts#t|dd}t|dd}|dur| | |dur%|| _dS dS dS )as  
    Pass packed_modules_mapping by reference to quant_config so that
    quant_config can properly match fused modules

    Note that model attributes are passed by reference to quant_config,
    enabling them to be updated by model_class.__new__ (ex. chatglm, qwen)

    Once the `SupportsQuant` mixin has been added to all models, this
    function can be removed
    hf_to_vllm_mapperNpacked_modules_mapping)
issubclassr   rB   apply_vllm_mapperr   )r&   r   r   r|   r   r   r   r*     s   


r*   )4r   r+   r0   
contextlibr   dataclassesr   r   rS   r   typing_extensionsr   vllm.configr   r   r	   vllm.loggerr
   $vllm.model_executor.layers.attentionr   r   3vllm.model_executor.layers.quantization.base_configr   r   'vllm.model_executor.model_loader.reloadr   r   %vllm.model_executor.models.interfacesr   vllm.tracingr   vllm.utils.platform_utilsr   r   r3   r   rO   Moduler<   rM   r?   rD   r   r   rq   rr   rm   r)   rw   rz   r{   r*   r   r   r   r   <module>   sp   ;
)  #