o
    i                     @   s  d Z ddlZddlZddlZddlZddlZddlZddlZddlm	Z	m
Z
 ddlmZmZ ddlmZmZmZ ddlmZ ddlmZ ddlmZmZmZ ddlmZ ddlZdd	lmZ dd
lm Z m!Z!m"Z" ddl#m$Z$ ddl%m&Z& ddl'm(Z( ddl)m*Z* erddl+m,Z, ddl-m.Z.m/Z/ neZ,eZ.eZ/ddl0m1Z1m2Z2m3Z3m4Z4m5Z5m6Z6m7Z7m8Z8m9Z9m:Z:m;Z;m<Z<m=Z= ddl>m?Z?m@Z@mAZAmBZBmCZC e$eDZEi dddddddddddddd d!d"d#d$d%d&d'd(d)d*d+d,d-d,d.d/d0d/d1di d2d3d4d5d6d7d8d9d:d;d<d;d=d>d?d@dAdBdCdDdEdFdGdHdIdJdKdLdMdNdOdPdQdRi dSdTdUdVdWdXdYdZd[d\d]d^d_d`dadbdcdddedfdgdhdidjdkdldmdndodpdqdrdsdti dudvdwdxdydzd{d|d}d|d~dddddddddddddddddddddddi ddddddddddddddddNddddddddddddddddddi ddddddddddddddddddddddddēddƓddȓddʓdd̓ddΓi ddГddғddԓdd֓ddؓddړddܓddޓddddddddddLdddddddddddddddddd	ZFdddd5dVdd^ddzddddd dddd eFG D i dddddddd֓d	dddd
ddddddddddddddddddddddddddd ZHd!d"d#d$d%d&d'd(d(d)	ZIi d*d+d,d-d.d/d0d1d2d3d4d5d6d7d8d9d:d;d<d=d>d?d@dAdBdCdDdEdFdGdHdIdJdKi dLdMdNdOdPdQdRdSdTdUdVdWdXdYdZd[d\d]d^d_d`dadbdcdddedfdgdhdidjdidkdli dmdndodpdqdrdsdtdudvdwdxdydzd{d|d}d~ddddddddddddddddi ddddddddddddddddddddddddddddddddddi ddddddddddddddddddddddddÓdĐdœdƐdǓdȐdɓdʐd˓d̐d͓dΐdϐdМZJi dѐdғdӐdԓdՐd֓dאdؓdِdړdېdړdܐdړdݐdړdސdߓdddddddddddddddddddddZKdddZLddddddddddd 
ZMi eFeHeIeJeKeLeMZNejOddgZPddddddddddd
ZQeddG d	d
 d
ZRG dd de	ZSeddG dd deSZTeddG dd deSZUedddeVdeSdeWejX dB fddZYedddeVdeSdeRdB fddZZeG dd dZ[e[dd eNG D Z\edZ]deg e]f de]fdd Z^d$d!d"Z_eDd#kre_  dS dS (%  z
Whenever you add an architecture to this page, please also update
`tests/models/registry.py` with example HuggingFace models for it.
    N)ABCabstractmethod)CallableSet)asdict	dataclassfield)	lru_cache)Path)TYPE_CHECKINGAnyTypeVar)envs)ModelConfigiter_architecture_defaultstry_match_architecture_defaults)init_logger)logtime)!try_get_class_from_dynamic_module)	safe_hash)AttnTypeStr)SequencePoolingTypeTokenPoolingType   )has_inner_state	has_noopsis_attention_free	is_hybridrequires_raw_input_tokenssupports_cross_encodingsupports_late_interactionsupports_mamba_prefix_cachingsupports_multimodal#supports_multimodal_encoder_tp_data"supports_multimodal_raw_input_onlysupports_ppsupports_transcription)get_attn_typeget_default_seq_pooling_typeget_default_tok_pooling_typeis_pooling_modelis_text_generation_modelAfmoeForCausalLM)afmoer,   ApertusForCausalLM)apertusr.   AquilaModel)llamaLlamaForCausalLMAquilaForCausalLMArceeForCausalLM)arceer4   ArcticForCausalLM)arcticr6   BaiChuanForCausalLM)baichuanr8   BaichuanForCausalLM)r9   r:   BailingMoeForCausalLM)bailing_moer;   BailingMoeV2ForCausalLM)r<   r=   BambaForCausalLM)bambar>   BloomForCausalLM)bloomr@   ChatGLMModel)chatglmChatGLMForCausalLMChatGLMForConditionalGenerationCohereForCausalLM)commandrrF   Cohere2ForCausalLMCwmForCausalLMDbrxForCausalLM)dbrxrJ   DeciLMForCausalLM)nemotron_nasrL   DeepseekForCausalLM)deepseek_v2rN   DeepseekV2ForCausalLM)rO   rP   DeepseekV3ForCausalLM)rO   rQ   DeepseekV32ForCausalLMDots1ForCausalLM)dots1rS   Ernie4_5ForCausalLM)ernie45rU   Ernie4_5_MoeForCausalLM)ernie45_moerW   ExaoneForCausalLM)exaonerY   Exaone4ForCausalLM)exaone4r[   ExaoneMoEForCausalLM)
exaone_moeExaoneMoeForCausalLMFairseq2LlamaForCausalLM)fairseq2_llamar`   FalconForCausalLM)falconrb   FalconMambaForCausalLM)mambaMambaForCausalLMFalconH1ForCausalLM)	falcon_h1rg   FlexOlmoForCausalLM)	flex_olmori   GemmaForCausalLM)gemmark   Gemma2ForCausalLM)gemma2rm   Gemma3ForCausalLM)gemma3ro   Gemma3nForCausalLM)gemma3nrq   Qwen3NextForCausalLM)
qwen3_nextrs   GlmForCausalLM)glmru   Glm4ForCausalLM)glm4rw   Glm4MoeForCausalLM)glm4_moery   Glm4MoeLiteForCausalLM)glm4_moe_liter{   GlmMoeDsaForCausalLM)rO   r}   GptOssForCausalLM)gpt_ossr~   GPT2LMHeadModel)gpt2r   GPTBigCodeForCausalLM)gpt_bigcoder   GPTJForCausalLM)gpt_jr   GPTNeoXForCausalLM)gpt_neoxr   GraniteForCausalLM)graniter   GraniteMoeForCausalLM)
granitemoer   GraniteMoeHybridForCausalLM)granitemoehybridr   GraniteMoeSharedForCausalLM)granitemoesharedr   GritLM)gritlmr   Grok1ModelForCausalLM)grok1GrokForCausalLMGrok1ForCausalLMHunYuanMoEV1ForCausalLM)
hunyuan_v1r   HunYuanDenseV1ForCausalLM)r   r   HCXVisionForCausalLM)hyperclovax_visionr   InternLMForCausalLMInternLM2ForCausalLM)	internlm2r   InternLM2VEForCausalLM)internlm2_ver   InternLM3ForCausalLMIQuestCoderForCausalLMIQuestLoopCoderForCausalLM)iquest_loopcoderr   JAISLMHeadModel)jaisr   Jais2ForCausalLM)jais2r   JambaForCausalLM)jambar   KimiLinearForCausalLM)kimi_linearr   Lfm2ForCausalLM)lfm2r   Lfm2MoeForCausalLM)lfm2_moer   r2   Llama4ForCausalLM)llama4r   LLaMAForCausalLMLongcatFlashForCausalLM)longcat_flashr   rf   Mamba2ForCausalLM)mamba2r   MiniCPMForCausalLM)minicpmr   MiniCPM3ForCausalLM)minicpm3r   MiniMaxForCausalLM)minimax_text_01MiniMaxText01ForCausalLMr   MiniMaxM1ForCausalLMMiniMaxM2ForCausalLM)
minimax_m2r   MistralForCausalLM)mistralr   MistralLarge3ForCausalLM)mistral_large_3r   MixtralForCausalLM)mixtralr   MptForCausalLM)mptMPTForCausalLMr   MiMoForCausalLM)mimor   MiMoV2FlashForCausalLM)mimo_v2_flashr   NemotronForCausalLM)nemotronr   NemotronHForCausalLM)
nemotron_hr   NemotronHPuzzleForCausalLMOlmoForCausalLM)olmor   Olmo2ForCausalLM)olmo2r   Olmo3ForCausalLMOlmoeForCausalLM)olmoer   OPTForCausalLM)optr   OrionForCausalLM)orionr   OuroForCausalLM)ouror   PanguEmbeddedForCausalLM)	openpangur   PanguProMoEV2ForCausalLM)r   r   PanguUltraMoEForCausalLM)r   r   PersimmonForCausalLM)	persimmonr   PhiForCausalLM)phir   Phi3ForCausalLM)phi3r   PhiMoEForCausalLM)phimoer   Plamo2ForCausalLM)plamo2r   Plamo3ForCausalLM)plamo3r   QWenLMHeadModel)qwenr   Qwen2ForCausalLM)qwen2r   Qwen2MoeForCausalLM)	qwen2_moer   Qwen3ForCausalLM)qwen3r   Qwen3MoeForCausalLM)	qwen3_moer   RWForCausalLMSeedOssForCausalLM)seed_ossr   Step1ForCausalLM)step1r   Step3TextForCausalLM)
step3_textr   Step3p5ForCausalLM)step3p5r  )stablelmStablelmForCausalLM)
starcoder2Starcoder2ForCausalLM)solarSolarForCausalLM)	telechat2TeleChat2ForCausalLM)teleflmTeleFLMForCausalLM)zamba2Zamba2ForCausalLM)	StableLMEpochForCausalLMStableLmForCausalLMr  r  TeleChatForCausalLMr
  r  XverseForCausalLMr  )bertBertEmbeddingModel)r  BertSpladeSparseEmbeddingModel)colbertColBERTModel)rp   Gemma3Model)r   GPT2ForSequenceClassification)bert_with_ropeSnowflakeGteNewModel)r  GteNewModel)r   InternLM2ForRewardModel)r   JambaForSequenceClassification)r1   LlamaBidirectionalModel)	BertModelr  
HF_ColBERTrL   Gemma2ModelGemma3TextModelru   r  r   GteModelr  r  r  r  
LlamaModelc                 C   s&   i | ]\}\}}|d kr|||fqS )r2    ).0kmodarchr&  r&  Y/home/ubuntu/vllm_env/lib/python3.10/site-packages/vllm/model_executor/models/registry.py
<dictcomp>   s
    
r,  MistralModelModernBertModel)
modernbertr.  NomicBertModel)r  r0  
Qwen2ModelQwen2ForRewardModel)qwen2_rmr2  Qwen2ForProcessRewardModel)r3  r4  RobertaForMaskedLM)robertaRobertaEmbeddingModelRobertaModelr  r
  "VoyageQwen3BidirectionalEmbedModel)voyager9  XLMRobertaModelBgeM3EmbeddingModel)r6  r<  	CLIPModel)clipCLIPEmbeddingModel!LlavaNextForConditionalGeneration)
llava_nextr@  )phi3vPhi3VForCausalLM)qwen2_vlQwen2VLForConditionalGeneration)siglipSiglipEmbeddingModel)
terratorch
Terratorch)rC  rE  SiglipModelPrithviGeoSpatialMAErI  )r  BertForSequenceClassification)r  BertForTokenClassification)r  GteNewForSequenceClassification)jina_vlJinaVLForSequenceClassification)r1   +LlamaBidirectionalForSequenceClassification)r/  #ModernBertForSequenceClassification)r/   ModernBertForTokenClassification)r6   RobertaForSequenceClassification)	rL  rM  rN  JinaVLForRankingrQ  rR  rS  rT  #XLMRobertaForSequenceClassificationAriaForConditionalGeneration)ariarW  &AudioFlamingo3ForConditionalGeneration)audioflamingo3rY  %MusicFlamingoForConditionalGeneration)musicflamingor[  !AyaVisionForConditionalGeneration)
aya_visionr]  BagelForConditionalGeneration)bagelr_  BeeForConditionalGeneration)beera  Blip2ForConditionalGeneration)blip2rc  !ChameleonForConditionalGeneration)	chameleonre  %Cohere2VisionForConditionalGeneration)cohere2_visionrg  DeepseekVLV2ForCausalLM)deepseek_vl2ri  DeepseekOCRForCausalLM)deepseek_ocrrk  DeepseekOCR2ForCausalLM)deepseek_ocr2rm  DotsOCRForCausalLM)dots_ocrro  #Eagle2_5_VLForConditionalGeneration)eagle2_5_vlrq  &Ernie4_5_VLMoeForConditionalGeneration)
ernie45_vlrs  $FunAudioChatForConditionalGeneration)funaudiochatru  FuyuForCausalLM)fuyurw  Gemma3ForConditionalGeneration)	gemma3_mmry  Gemma3nForConditionalGeneration)
gemma3n_mmr{  GlmAsrForConditionalGeneration)glmasrr}  GLM4VForCausalLM)glm4vr  Glm4vForConditionalGeneration)glm4_1vr   Glm4vMoeForConditionalGeneration)r  r  GlmOcrForConditionalGeneration)glm_ocrr  %GraniteSpeechForConditionalGeneration)granite_speechr  H2OVLChatModel)h2ovlr  !HunYuanVLForConditionalGeneration)hunyuan_visionr  StepVLForConditionalGeneration)step_vlr  InternVLChatModel)internvlr  NemotronH_Nano_VL_V2)nano_nemotron_vlr  OpenCUAForConditionalGeneration)opencuar   InternS1ForConditionalGeneration)interns1r   InternVLForConditionalGeneration#InternS1ProForConditionalGeneration)interns1_pror   Idefics3ForConditionalGeneration)idefics3r  IsaacForConditionalGeneration)isaacr  SmolVLMForConditionalGeneration)smolvlmr  KananaVForConditionalGeneration)kanana_vr  KeyeForConditionalGeneration)keyer  !KeyeVL1_5ForConditionalGeneration)
keye_vl1_5r  RForConditionalGeneration)rvlr  KimiVLForConditionalGeneration)kimi_vlr  KimiK25ForConditionalGeneration)kimi_k25r  "LightOnOCRForConditionalGeneration)
lightonocrr  Lfm2VlForConditionalGeneration)lfm2_vlLfm2VLForConditionalGenerationLlama_Nemotron_Nano_VL)nemotron_vlLlamaNemotronVLChatModelLlama4ForConditionalGeneration)mllama4r  LlavaForConditionalGeneration)llavar  &LlavaNextVideoForConditionalGeneration)llava_next_videor  &LlavaOnevisionForConditionalGeneration)llava_onevisionr  MantisForConditionalGeneration)r  r  MiDashengLMModel)midashenglmr  #MiniMaxVL01ForConditionalGeneration)minimax_vl_01r  MiniCPMO)minicpmor  MiniCPMV)minicpmvr   Mistral3ForConditionalGeneration)mistral3r  MolmoForCausalLM)molmor  Molmo2ForConditionalGeneration)molmo2r  NVLM_D)nvlm_dNVLM_D_Model#OpenPanguVLForConditionalGeneration)openpangu_vlr  Ovis)ovisr  Ovis2_5)ovis2_5r  #PaddleOCRVLForConditionalGeneration)paddleocr_vlr  !PaliGemmaForConditionalGeneration)	paligemmar  rC  Phi4MMForCausalLM)phi4mmr  PixtralForConditionalGeneration)pixtralr  QwenVLForConditionalGeneration)qwen_vlr  rE  "Qwen2_5_VLForConditionalGeneration)
qwen2_5_vlr  "Qwen2AudioForConditionalGeneration)qwen2_audior  Qwen2_5OmniModel)qwen2_5_omni_thinker*Qwen2_5OmniThinkerForConditionalGeneration#Qwen2_5OmniForConditionalGeneration$Qwen3OmniMoeForConditionalGeneration)qwen3_omni_moe_thinker+Qwen3OmniMoeThinkerForConditionalGeneration Qwen3ASRForConditionalGeneration)	qwen3_asrr  Qwen3VLForConditionalGeneration)qwen3_vlr  "Qwen3VLMoeForConditionalGeneration)qwen3_vl_moer  SkyworkR1VChatModel)
skyworkr1vr  Step3VLForConditionalGeneration)step3_vlr  TarsierForConditionalGeneration)tarsierr   Tarsier2ForConditionalGeneration)rD  r  UltravoxModel)ultravoxr  VoxtralForConditionalGeneration)voxtralr  VoxtralRealtimeGeneration)voxtral_realtimer  )nemotron_parse%NemotronParseForConditionalGeneration)whisperWhisperForConditionalGeneration)r  r  MiMoMTPModel)mimo_mtpMiMoMTPEagleLlamaForCausalLM)llama_eagler  EagleLlama4ForCausalLM)llama4_eagler  EagleMiniCPMForCausalLM)minicpm_eagler  Eagle3LlamaForCausalLM)llama_eagle3r  LlamaForCausalLMEagle3Eagle3Qwen2_5vlForCausalLMEagle3Qwen3vlForCausalLMEagleMistralLarge3ForCausalLM)mistral_large_3_eagler  EagleDeepSeekMTPModel)deepseek_eagleEagleDeepseekV3ForCausalLMDeepSeekMTPModel)deepseek_mtpDeepSeekMTPErnieMTPModel)	ernie_mtpErnieMTPExaoneMoeMTP)exaone_moe_mtpr  LongCatFlashMTPModel)longcat_flash_mtpLongCatFlashMTPGlm4MoeMTPModel)glm4_moe_mtp
Glm4MoeMTPGlm4MoeLiteMTPModel)glm4_moe_lite_mtpGlm4MoeLiteMTPGlmOcrMTPModel)glm_ocr_mtp	GlmOcrMTP)medusaMedusa)openpangu_mtpOpenPanguMTP)qwen3_next_mtpQwen3NextMTP)step3p5_mtp
Step3p5MTP)MedusaModelOpenPanguMTPModelr+  r-  )transformersTransformersForCausalLM)r0  !TransformersMultiModalForCausalLM)SmolLM3ForCausalLMEmu3ForConditionalGeneration)r0  TransformersMoEForCausalLM)r0  $TransformersMultiModalMoEForCausalLM)r0  TransformersEmbeddingModel)r0  TransformersMoEEmbeddingModel)r0  $TransformersMultiModalEmbeddingModel)r0  %TransformersForSequenceClassification)r0  (TransformersMoEForSequenceClassification)r0  /TransformersMultiModalForSequenceClassification)
r1  r5  r2  r6  r7  r8  r9  r:  r;  r<  z-mz#vllm.model_executor.models.registryz0.10.2z0.9.2z0.12.0)
MotifForCausalLMPhi3SmallForCausalLMPhi4FlashForCausalLMPhi4MultimodalForCausalLM	BartModelBartForConditionalGenerationDonutForConditionalGeneration!Florence2ForConditionalGenerationMBartForConditionalGenerationMllamaForConditionalGenerationT)frozenc                   @   s   e Zd ZU eed< eed< eed< eed< eed< eed< eed< eed< eed	< eed
< eed< eed< eed< eed< eed< eed< eed< eed< eed< eed< e	de
ej dd fddZdS )
_ModelInfoarchitecturer+   r*   	attn_typedefault_seq_pooling_typedefault_tok_pooling_typer   r    r"   r$   r   r#   r%   r   r   r   r   r!   r&   supports_transcription_onlymodelreturnc                 C   s   t di d| jdt| dt| dt| dt| dt| dt| dt| d	t	| d
t
| dt| dt| dt| dt| dt| dt| dt| dt| dt| ok| jdt| S dt| S )NrI  r+   r*   rK  rL  rJ  r   r    r"   r$   r   r#   r%   r   r   r   r!   r&   rM  r   r&  )rH  __name__r+   r*   r(   r)   r'   r   r    r"   r$   r   r#   r%   r   r   r   r!   r&   rM  r   )rN  r&  r&  r+  from_model_clsi  sb   	
z_ModelInfo.from_model_clsN)rP  
__module____qualname__str__annotations__boolr   r   r   staticmethodtypennModulerQ  r&  r&  r&  r+  rH  R  s.   
 rH  c                   @   s6   e Zd ZedefddZedeej fddZ	dS )_BaseRegisteredModelrO  c                 C      t NNotImplementedErrorselfr&  r&  r+  inspect_model_cls     z&_BaseRegisteredModel.inspect_model_clsc                 C   r\  r]  r^  r`  r&  r&  r+  load_model_cls  rc  z#_BaseRegisteredModel.load_model_clsN)
rP  rR  rS  r   rH  rb  rX  rY  rZ  rd  r&  r&  r&  r+  r[    s
    r[  c                   @   sb   e Zd ZU dZeed< eej ed< e	deej fddZ
defddZdeej fd	d
ZdS )_RegisteredModelzP
    Represents a model that has already been imported in the main process.
    
interfaces	model_clsc                 C   s   t t| | dS )N)rf  rg  )re  rH  rQ  rg  r&  r&  r+  rQ    s   z_RegisteredModel.from_model_clsrO  c                 C      | j S r]  )rf  r`  r&  r&  r+  rb       z"_RegisteredModel.inspect_model_clsc                 C   ri  r]  rh  r`  r&  r&  r+  rd    rj  z_RegisteredModel.load_model_clsN)rP  rR  rS  __doc__rH  rU  rX  rY  rZ  rW  rQ  rb  rd  r&  r&  r&  r+  re    s   
 re  c                   @   s   e Zd ZU dZeed< eed< edefddZdefddZ	d	ede
d
B fddZde
d	edd
fddZeeddde
fddZdeej fddZd
S )_LazyRegisteredModelzL
    Represents a model that has not been imported in the main process.
    module_name
class_namerO  c                   C   s   t tjd S )N
modelinfos)r
   r   VLLM_CACHE_ROOTr&  r&  r&  r+  _get_cache_dir  s   z#_LazyRegisteredModel._get_cache_dirc                 C   s$   | j  d| j dd}| dS )N-.z.json)rm  rn  replace)ra  cls_namer&  r&  r+  _get_cache_filename  s   
z(_LazyRegisteredModel._get_cache_filenamemodule_hashNc                 C   s   zVz%|   |   }t|dd}t|}W d    n1 s w   Y  W n ty:   td| j| j	 Y W d S w |d |krMtd| j| j	 W d S t
di |d W S  tyi   td| j| j	 Y d S w )	Nutf-8encodingz0Cached model info file for class %s.%s not foundhashz/Cached model info file for class %s.%s is stale	modelinfoz)Cached model info for class %s.%s error. r&  )rq  rv  openjsonloadFileNotFoundErrorloggerdebugrm  rn  rH  	Exception)ra  rw  modelinfo_pathfilemi_dictr&  r&  r+  _load_modelinfo_from_cache  s>   z/_LazyRegisteredModel._load_modelinfo_from_cachemic                 C   s   ddl m} z;|t|d}|  }|jddd ||   }||dd}tj||dd	 W d
   W d
S 1 s:w   Y  W d
S  tyP   t	
d Y d
S w )z"save dictionary json file to cacher   )atomic_writer)r{  r|  T)parentsexist_okrx  ry     )indentNzError saving model info cache.)-vllm.model_executor.model_loader.weight_utilsr  r   rq  mkdirrv  r~  dumpr  r  	exception)ra  r  rw  r  modelinfo_dict	cache_dirr  fr&  r&  r+  _save_modelinfo_to_cache  s   &z-_LazyRegisteredModel._save_modelinfo_to_cachezRegistry inspect model class)r  msgc                    s   t tj jdd  d }d }| rRt|d}t| dd	 }W d    n1 s0w   Y   
|}|d urItd j j |S td j j t fd	d
}td j j |d urm || |S )Nrs  z.pyrbF)usedforsecurityz,Loaded model info for class %s.%s from cachez=Cache model info for class %s.%s miss. Loading model instead.c                      s   t   S r]  )rH  rQ  rd  r&  r`  r&  r+  <lambda>  s    z8_LazyRegisteredModel.inspect_model_cls.<locals>.<lambda>z!Loaded model info for class %s.%s)r
   __file__parentrm  splitexistsr}  r   read	hexdigestr  r  r  rn  _run_in_subprocessr  )ra  
model_pathrw  r  r  r&  r`  r+  rb    s8    


z&_LazyRegisteredModel.inspect_model_clsc                 C   s   t | j}t|| jS r]  )	importlibimport_modulerm  getattrrn  )ra  r)  r&  r&  r+  rd    s   z#_LazyRegisteredModel.load_model_cls)rP  rR  rS  rk  rT  rU  rW  r
   rq  rv  rH  r  r  r   r  rb  rX  rY  rZ  rd  r&  r&  r&  r+  rl    s   
  
%rl     )maxsize
model_archrN  rO  c                 C   sB   ddl m} ||  z| W S  ty    td|  Y d S w )Nr   )current_platformz(Error in loading model architecture '%s')vllm.platformsr  verify_model_archrd  r  r  r  )r  rN  r  r&  r&  r+  _try_load_model_cls  s   

r  c                 C   s,   z|  W S  ty   td|  Y d S w )Nz+Error in inspecting model architecture '%s')rb  r  r  r  )r  rN  r&  r&  r+  _try_inspect_model_cls&  s   
r  c                   @   s  e Zd ZU eedZeeef ed< de	e fddZ
dedeej eB ddfd	d
Zdee fddZdedeej dB fddZdededB fddZdedededB fddZdededefddZdeee B dedeeef fddZdeee B dedeeej ef fddZdeee B dedefddZdeee B dedefddZdeee B dedefd d!Zdeee B dedefd"d#Zdeee B dedefd$d%Zdeee B dedefd&d'Z deee B dedefd(d)Z!deee B dedefd*d+Z"deee B dedefd,d-Z#deee B dedefd.d/Z$deee B dedefd0d1Z%deee B dedefd2d3Z&dS )4_ModelRegistry)default_factorymodelsrO  c                 C   s
   | j  S r]  )r  keysr`  r&  r&  r+  get_supported_archs7  s   
z"_ModelRegistry.get_supported_archsr  rg  Nc                 C   s   t |tsdt| }t||| jv rtd|| t |tr7|d}t|dkr2d}t	|t
| }nt |trHt|tjrHt|}ndt| }t||| j|< dS )a  
        Register an external model to be used in vLLM.

        `model_cls` can be either:

        - A [`torch.nn.Module`][] class directly referencing the model.
        - A string in the format `<module>:<class>` which can be used to
          lazily import the model. This is useful to avoid initializing CUDA
          when importing the model and thus the related error
          `RuntimeError: Cannot re-initialize CUDA in forked subprocess`.
        z'`model_arch` should be a string, not a z_Model architecture %s is already registered, and will be overwritten by the new model class %s.:r  z2Expected a string in the format `<module>:<class>`z=`model_cls` should be a string or PyTorch model class, not a N)
isinstancerT  rX  	TypeErrorr  r  warningr  len
ValueErrorrl  
issubclassrY  rZ  re  rQ  )ra  r  rg  r  	split_strrN  r&  r&  r+  register_model:  s.   




z_ModelRegistry.register_modelarchitecturesc                    sr   |    t fdd|D rtd| d|D ]}|tv r.t| }td| d| dqtd| d  )	Nc                 3   s    | ]}| v V  qd S r]  r&  r'  r*  all_supported_archsr&  r+  	<genexpr>k  s    z8_ModelRegistry._raise_for_unsupported.<locals>.<genexpr>zModel architectures z@ failed to be inspected. Please check the logs for more details.zModel architecture z was supported in vLLM until vzo, and is not supported anymore. Please use an older version of vLLM if you want to use this model architecture.z5 are not supported for now. Supported architectures: )r  anyr  _PREVIOUSLY_SUPPORTED_MODELS)ra  r  r*  previous_versionr&  r  r+  _raise_for_unsupportedh  s&   

z%_ModelRegistry._raise_for_unsupportedc                 C      || j vrd S t|| j | S r]  )r  r  ra  r  r&  r&  r+  r       
z"_ModelRegistry._try_load_model_clsc                 C   r  r]  )r  r  r  r&  r&  r+  r    r  z%_ModelRegistry._try_inspect_model_clsrI  model_configc              	   C   s   |t v r|S t|jdd pt }dD ]}| D ]\}}||r-t||j|j|j	dd qqtt
|d }|d u rh| D ]\}}|drXt||j|j|j	dd}|d urX nq=|jdkr`d S td|d	| s{|jdkrsd S td
|d| S )Nauto_map)
AutoConfig	AutoModelF)revisiontrust_remote_codewarn_on_failr  Tr0  zCannot find model module. z is not a registered model in the Transformers library (only relevant if the model is meant to be in Transformers) and 'AutoModel' is not present in the model config's 'auto_map' (relevant if the model is custom).z#The Transformers implementation of z is not compatible with vLLM.)_TRANSFORMERS_BACKEND_MODELSr  	hf_configdictitems
startswithr   rN  r  r  r0  
model_implr  is_backend_compatible_get_transformers_backend_cls)ra  rI  r  r  prefixnamemodulemodel_moduler&  r&  r+  _try_resolve_transformers  sT   






z(_ModelRegistry._try_resolve_transformersc                 C   sj   || j v r|S t|t|dd t|dd d}|r3|\}}t D ]\}}|||}|| j v r2|  S q|S )Nrunner_typeconvert_type)r  r  )r  r   r  r   rt  )ra  rI  r  matchsuffix_repl_suffix	base_archr&  r&  r+  _normalize_arch  s   



z_ModelRegistry._normalize_archc                    sh  t |tr|g}|std|jdkr- |d |}|d ur, |}|d ur,||fS n|jdkr; d}|dfS t fdd|D rl|jdkrlt|d	d
d
krl |d |}|d url |}|d url||fS |D ]} ||} |}|d ur||f  S qnt fdd|D r|jdkr |d |}|d ur |}|d ur||fS  	|S )N$No model architectures are specifiedr0  r   rH  rI  c                 3       | ]}| j vV  qd S r]  r  r  r`  r&  r+  r        z3_ModelRegistry.inspect_model_cls.<locals>.<genexpr>autor  nonec                 3   r  r]  r  r  r`  r&  r+  r    r  )
r  rT  r  r  r  r  allr  r  r  )ra  r  r  r*  
model_infonormalized_archr&  r`  r+  rb    sL   










z _ModelRegistry.inspect_model_clsc                    st  t |tr|g}|std|jdkr- |d |}|d ur, |}|d ur,||fS n|jdkrAd} |}|d urA||fS t fdd|D rr|jdkrrt|d	d
d
krr |d |}|d urr |}|d urr||fS |D ]} ||} |}|d ur||f  S qtt fdd|D r|jdkr |d |}|d ur |}|d ur||fS  	|S )Nr  r0  r   rH  rI  c                 3   r  r]  r  r  r`  r&  r+  r  8  r  z3_ModelRegistry.resolve_model_cls.<locals>.<genexpr>r  r  r  c                 3   r  r]  r  r  r`  r&  r+  r  J  r  )
r  rT  r  r  r  r  r  r  r  r  )ra  r  r  r*  rg  r  r&  r`  r+  resolve_model_cls  sP   










z _ModelRegistry.resolve_model_clsc                 C      |  ||\}}|jS r]  )rb  r+   ra  r  r  rg  r  r&  r&  r+  r+   U     z'_ModelRegistry.is_text_generation_modelc                 C   r  r]  )rb  r*   r  r&  r&  r+  r*   ]  r  z_ModelRegistry.is_pooling_modelc                 C   r  r]  )rb  r   r  r&  r&  r+  is_cross_encoder_modele  r  z%_ModelRegistry.is_cross_encoder_modelc                 C   r  r]  )rb  r"   r  r&  r&  r+  is_multimodal_modelm  r  z"_ModelRegistry.is_multimodal_modelc                 C   r  r]  )rb  r$   r  r&  r&  r+  "is_multimodal_raw_input_only_modelu  r  z1_ModelRegistry.is_multimodal_raw_input_only_modelc                 C   r  r]  )rb  r%   r  r&  r&  r+  is_pp_supported_model}  r  z$_ModelRegistry.is_pp_supported_modelc                 C   r  r]  )rb  r   r  r&  r&  r+  model_has_inner_state  r  z$_ModelRegistry.model_has_inner_statec                 C   r  r]  )rb  r   r  r&  r&  r+  is_attention_free_model  r  z&_ModelRegistry.is_attention_free_modelc                 C   r  r]  )rb  r   r  r&  r&  r+  is_hybrid_model  r  z_ModelRegistry.is_hybrid_modelc                 C   r  r]  )rb  r   r  r&  r&  r+  is_noops_model  r  z_ModelRegistry.is_noops_modelc                 C   r  r]  )rb  r&   r  r&  r&  r+  is_transcription_model  r  z%_ModelRegistry.is_transcription_modelc                 C   r  r]  )rb  rM  r  r&  r&  r+  is_transcription_only_model  r  z*_ModelRegistry.is_transcription_only_model)'rP  rR  rS  r   r  r  rT  r[  rU  r   r  rX  rY  rZ  r  listr  r  rH  r  r   r  r  tuplerb  r  rV  r+   r*   r  r  r  r  r  r  r  r  r  r  r&  r&  r&  r+  r  2  s   
 
.
D



4

6






















r  c                 C   s(   i | ]\}\}}|t d | |dqS )zvllm.model_executor.models.)rm  rn  )rl  )r'  r  mod_relnameru  r&  r&  r+  r,    s    
_Tfnc                 C   s   t  b}tj|d}dd l}|| |f}tjt	|dd}z|
  W n ty< } ztd|j  |d }~ww t|d}t|W  d    W  d    S 1 sYw   Y  W d    d S 1 siw   Y  d S )Nzregistry_output.tmpr   T)inputcapture_outputzError raised in subprocess:
r  )tempfileTemporaryDirectoryospathjoincloudpickledumps
subprocessrun_SUBPROCESS_COMMANDcheck_returncoder  RuntimeErrorstderrdecoder}  pickler  )r  tempdiroutput_filepathr  input_bytesreturneder  r&  r&  r+  r    s.   
"r  c                  C   sn   ddl m}  |   ttjj \}}| }t|d}|	t
| W d    d S 1 s0w   Y  d S )Nr   )load_general_pluginswb)vllm.pluginsr  r  loadssysstdinbufferr  r}  writer  )r  r  output_fileresultr  r&  r&  r+  _run  s   "r   __main__)rO  N)`rk  r  r~  r  r  r	  r  r  abcr   r   collections.abcr   r   dataclassesr   r   r   	functoolsr	   pathlibr
   typingr   r   r   torch.nnrY  r0  vllmr   vllm.configr   r   r   vllm.loggerr   vllm.logging_utilsr   &vllm.transformers_utils.dynamic_moduler   vllm.utils.hashingr   vllm.config.modelr   vllm.config.poolerr   r   rf  r   r   r   r   r   r   r    r!   r"   r#   r$   r%   r&   interfaces_baser'   r(   r)   r*   r+   rP  r  _TEXT_GENERATION_MODELSr  _EMBEDDING_MODELS_CROSS_ENCODER_MODELS_MULTIMODAL_MODELS_SPECULATIVE_DECODING_MODELS_TRANSFORMERS_SUPPORTED_MODELSr  _VLLM_MODELS
executabler  r  rH  r[  re  rl  rT  rX  rZ  r  r  r  ModelRegistryr  r  r   r&  r&  r&  r+  <module>   s  <	 !"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLNOPQRSTUVWXYZ\]^_`abcdefghijklmnopqrstuvwxyz{|}  
 !"#'(*+/
:"&*+,0123459:>?@AEIMQUVWXY]^_`defghlptuvz{|           	  
                    #  '  +  /  0  4  5  6  7  ;  <  =  ? 
 F	
 #6
l   


"
