o
    ein                    @   s  d Z ddlZddlmZ ddlmZ ddlmZ ddlm	Z	m
Z
mZmZ dd	lmZ erAdd
lmZ ddlmZ G dd deeZeeZeg dZeg dZeg dZeg dZeg dZedgZeg dZedgZeg dZedgZ eg dZ!eg dZ"edgZ#eg dZ$eg e%e$& ddddd d!d"d#d$Z'eg d%Z(eg d&Z)eg d'Z*eg d(Z+eg d)Z,eg d*Z-eg d+Z.eg d,Z/ed-gZ0eg d.Z1eg d/Z2eg d0Z3eg d1Z4eg d2Z5eg d3Z6eg d4Z7eg d5Z8eg d6Z9ed7d8gZ:eg d9Z;eg d:Z<eg d;Z=eg d<Z>ed=gZ?eg d>Z@eg d?ZAed@dAgZBedBdCgZCedDgZDedEgZEedFdGgZFeeeZGeeeZHeeeZIeeeZJeeeZKeee<ZLeeeZMeeeZNeee ZOeee!ZPeee"ZQeee$ZReee'ZSeee#ZTeee1ZUeee2ZVeee(ZWeeeZXeeeZYeee)ZZeee*Z[eee+Z\eee,Z]eee.Z^eee/Z_eee0Z`eee3Zaeee4Zbeee5Zceee6Zdeee7Zeeee-Zfeee8Zgeee9Zheee:Zieee;Zjeee=Zkeee>Zleee?Zmeee@ZneeeAZoeeeBZpeeeCZqeeeDZreeeEZseeeFZtG dHdI dIe
ZuG dJdK dKe
ZvG dLdM dMe
ZwG dNdO dOe
ZxG dPdQ dQe
ZyG dRdS dSe
ZzeezZzG dTdU dUe
Z{ee{dVdWZ{G dXdY dYe
Z|ee|dZdWZ|G d[d\ d\e
Z}ee}d]dWZ}G d^d_ d_e
Z~ee~d`dadbZ~G dcdd dde
ZeededWZG dfdg dge
ZeedhdWZG didj dje
ZeedkdldbZG dmdn dne
ZeedodpdbZG dqdr dre
ZeedsdtdbZG dudv dve
ZeedwdWZG dxdy dye
ZeedzdWZG d{d| d|e
Zeed}dWZG d~d de
ZeeddWZG dd de
ZeeddWZG dd de
ZeeddWZG dd de
ZeeddWZG dd de
ZeeddWZG dd de
ZeeddWZG dd de
ZeeddWZG dd de
ZeeddWZG dd de
ZeeddWZG dd de
ZeeddWZG dd de
ZeeddWZG dd de
ZeeddWZG dd de
ZeeddWZG dd de
ZeeddWZG dd de
ZeeddWZG dd de
ZeeddWZG dd de
ZeeddWZG dd de
ZG dd de
ZG dd de
ZG dd de	ZeeddWZG dd de
ZeeddWZG dd de
ZeeddWZg dZdS )zAuto Model class.    N)OrderedDict)TYPE_CHECKING   )logging   )_BaseAutoBackboneClass_BaseAutoModelClass_LazyAutoMappingauto_class_update)CONFIG_MAPPING_NAMES)GenerationMixin)PreTrainedModelc                   @   s   e Zd ZdS )_BaseModelWithGenerateN)__name__
__module____qualname__ r   r   d/home/ubuntu/transcripts/venv/lib/python3.10/site-packages/transformers/models/auto/modeling_auto.pyr   #   s    r   (  )afmoe
AfmoeModel)aimv2
Aimv2Modelaimv2_vision_modelAimv2VisionModelalbertAlbertModelalign
AlignModelaltclipAltCLIPModel)apertusApertusModel)arcee
ArceeModel)aria	AriaModel)	aria_textAriaTextModel)audio-spectrogram-transformerASTModelaudioflamingo3&AudioFlamingo3ForConditionalGeneration)audioflamingo3_encoderAudioFlamingo3Encoder)
autoformerAutoformerModel)
aya_visionAyaVisionModel)bamba
BambaModelbark	BarkModel)bart	BartModelbeit	BeitModelbert	BertModel)bert-generationBertGenerationEncoderbig_birdBigBirdModel)bigbird_pegasusBigBirdPegasusModel)biogptBioGptModelbitBitModel)bitnetBitNetModel)
blenderbotBlenderbotModel)blenderbot-smallBlenderbotSmallModelblip	BlipModel)blip-2
Blip2Model)blip_2_qformerBlip2QFormerModel)bloom
BloomModel)bltBltModel)bridgetowerBridgeTowerModel)bros	BrosModel)	camembertCamembertModel)canineCanineModel)	chameleonChameleonModelchinese_clipChineseCLIPModel)chinese_clip_vision_modelChineseCLIPVisionModel)clap	ClapModelclip	CLIPModelclip_text_modelCLIPTextModel)clip_vision_modelCLIPVisionModelclipsegCLIPSegModel)clvp!ClvpModelForConditionalGeneration)
code_llama
LlamaModel)codegenCodeGenModel)cohereCohereModel)cohere2Cohere2Modelcohere2_visionCohere2VisionModelconditional_detrConditionalDetrModel)convbertConvBertModelconvnextConvNextModel
convnextv2ConvNextV2Model)cpmantCpmAntModelcsmCsmForConditionalGeneration)ctrl	CTRLModel)cvtCvtModel)cwmCwmModel)d_fine
DFineModeldab-detrDabDetrModeldacDacModel)data2vec-audioData2VecAudioModeldata2vec-textData2VecTextModeldata2vec-visionData2VecVisionModel)dbrx	DbrxModeldebertaDebertaModel
deberta-v2DebertaV2Model)decision_transformerDecisionTransformerModel)deepseek_v2DeepseekV2Model)deepseek_v3DeepseekV3Model)deepseek_vlDeepseekVLModel)deepseek_vl_hybridDeepseekVLHybridModeldeformable_detrDeformableDetrModeldeit	DeiTModel	depth_proDepthProModeldetr	DetrModel)diaDiaModel)	diffllamaDiffLlamaModeldinat
DinatModeldinov2Dinov2Modeldinov2_with_registersDinov2WithRegistersModeldinov3_convnextDINOv3ConvNextModel
dinov3_vitDINOv3ViTModel
distilbertDistilBertModel)doge	DogeModel)
donut-swinDonutSwinModel)dots1
Dots1Model)dprDPRQuestionEncoderdptDPTModeledgetamEdgeTamModel)edgetam_videoEdgeTamVideoModel)edgetam_vision_modelEdgeTamVisionModel)efficientloftrEfficientLoFTRModelefficientnetEfficientNetModelelectraElectraModel)emu3	Emu3Model)encodecEncodecModel)ernie
ErnieModel)ernie4_5Ernie4_5Model)ernie4_5_moeErnie4_5_MoeModel)ernie4_5_vl_moeErnie4_5_VL_MoeModel)esmEsmModel)evollaEvollaModel)exaone4Exaone4Model)
exaone_moeExaoneMoeModel)falconFalconModel)	falcon_h1FalconH1Model)falcon_mambaFalconMambaModel)fast_vlmFastVlmModelfastspeech2_conformerFastSpeech2ConformerModel"fastspeech2_conformer_with_hifiganFastSpeech2ConformerWithHifiGanflaubertFlaubertModel)flava
FlavaModel)	flex_olmoFlexOlmoModel)	florence2Florence2Model)fnet	FNetModelfocalnetFocalNetModel)fsmt	FSMTModel)funnel)FunnelModelFunnelBaseModel)fuyu	FuyuModel)gemma
GemmaModel)gemma2Gemma2Model)gemma3Gemma3Model)gemma3_textGemma3TextModel)gemma3nGemma3nModel)gemma3n_audioGemma3nAudioEncoder)gemma3n_textGemma3nTextModel)gemma3n_visionTimmWrapperModel)gitGitModel)glmGlmModel)glm4	Glm4Model)glm46vGlm46VModel)glm4_moeGlm4MoeModel)glm4_moe_liteGlm4MoeLiteModel)glm4v
Glm4vModel)	glm4v_moeGlm4vMoeModel)glm4v_moe_textGlm4vMoeTextModel)glm4v_moe_visionGlm4vMoeVisionModel)
glm4v_textGlm4vTextModel)glm4v_visionGlm4vVisionModel)	glm_imageGlmImageModel)glm_image_textGlmImageTextModel)glm_image_visionGlmImageVisionModel)glm_image_vqmodelGlmImageVQVAE)glm_moe_dsaGlmMoeDsaModel)glm_ocrGlmOcrModel)glm_ocr_textGlmOcrTextModel)glm_ocr_visionGlmOcrVisionModelglmasrGlmAsrForConditionalGeneration)glmasr_encoderGlmAsrEncoderglpn	GLPNModel)got_ocr2GotOcr2Model)gpt-sw3	GPT2Model)gpt2rx  )gpt_bigcodeGPTBigCodeModel)gpt_neoGPTNeoModel)gpt_neoxGPTNeoXModel)gpt_neox_japaneseGPTNeoXJapaneseModel)gpt_ossGptOssModel)gptj	GPTJModel)graniteGraniteModel)
granitemoeGraniteMoeModel)granitemoehybridGraniteMoeHybridModel)granitemoesharedGraniteMoeSharedModel)grounding-dinoGroundingDinoModel)groupvitGroupViTModel)heliumHeliumModelhgnet_v2HGNetV2Backbonehiera
HieraModel)hubertHubertModel)hunyuan_v1_denseHunYuanDenseV1Model)hunyuan_v1_moeHunYuanMoEV1Modelibert
IBertModel)ideficsIdeficsModel)idefics2Idefics2Model)idefics3Idefics3Model)idefics3_visionIdefics3VisionTransformerijepa
IJepaModelimagegptImageGPTModel)informerInformerModel)instructblipInstructBlipModel)instructblipvideoInstructBlipVideoModel)internvlInternVLModel)internvl_visionInternVLVisionModel)jais2
Jais2Model)jamba
JambaModel)janus
JanusModel)jetmoeJetMoeModel)kosmos-2Kosmos2Model)
kosmos-2.5Kosmos2_5Model)kyutai_speech_to_textKyutaiSpeechToTextModellasr_ctc
LasrForCTC)lasr_encoderLasrEncoder)layoutlmLayoutLMModel)
layoutlmv2LayoutLMv2Model)
layoutlmv3LayoutLMv3Model)ledLEDModellevit
LevitModel)lfm2	Lfm2Model)lfm2_moeLfm2MoeModel)lfm2_vlLfm2VlModel	lightglueLightGlueForKeypointMatching)lighton_ocrLightOnOcrModel)lilt	LiltModel)llamar   llama4Llama4ForConditionalGeneration)llama4_textLlama4TextModel)llava
LlavaModel)
llava_nextLlavaNextModel)llava_next_videoLlavaNextVideoModel)llava_onevisionLlavaOnevisionModel)longcat_flashLongcatFlashModel
longformerLongformerModel)longt5LongT5Model)luke	LukeModel)lw_detrLwDetrModel)lxmertLxmertModel)m2m_100M2M100Model)mamba
MambaModel)mamba2Mamba2Model)marianMarianModel)markuplmMarkupLMModel)mask2formerMask2FormerModel)
maskformerMaskFormerModel)maskformer-swinMaskFormerSwinModel)mbart
MBartModel)megatron-bertMegatronBertModel
metaclip_2MetaClip2Model)zmgp-strMgpstrForSceneTextRecognition)mimi	MimiModel)minimaxMiniMaxModel)
minimax_m2MiniMaxM2Model)	ministralMinistralModel)
ministral3Ministral3Model)mistralMistralModel)mistral3Mistral3Model)mixtralMixtralModelmlcdMLCDVisionModel)mllamaMllamaModel)mm-grounding-dinoMMGroundingDinoModel
mobilebertMobileBertModelmobilenet_v1MobileNetV1Modelmobilenet_v2MobileNetV2Model	mobilevitMobileViTModelmobilevitv2MobileViTV2Model)
modernbertModernBertModel)modernbert-decoderModernBertDecoderModel)	moonshineMoonshineModel)moonshine_streamingMoonshineStreamingModel)moshi
MoshiModel)mpnet
MPNetModel)mptMptModel)mraMraModel)mt5MT5Model)musicgenMusicgenModel)musicgen_melodyMusicgenMelodyModel)mvpMvpModel)nanochatNanoChatModel)nemotronNemotronModel)nllb-moeNllbMoeModelnystromformerNystromformerModel)olmo	OlmoModel)olmo2
Olmo2Model)olmo3
Olmo3Model)olmoe
OlmoeModelzomdet-turboOmDetTurboForObjectDetection)	oneformerOneFormerModel)
openai-gptOpenAIGPTModel)optOPTModel)ovis2
Ovis2Model)owlv2
Owlv2Model)owlvitOwlViTModel)	paligemmaPaliGemmaModelparakeet_ctcParakeetForCTC)parakeet_encoderParakeetEncoder)patchtsmixerPatchTSMixerModel)patchtstPatchTSTModel)pe_audioPeAudioModel)pe_audio_encoderPeAudioEncoder)pe_audio_videoPeAudioVideoModel)pe_audio_video_encoderPeAudioVideoEncoder)pe_videoPeVideoModel)pe_video_encoderPeVideoEncoder)pegasusPegasusModel)	pegasus_xPegasusXModel)	perceiverPerceiverModel)perception_lmPerceptionLMModel)	persimmonPersimmonModel)phiPhiModel)phi3	Phi3Model)phi4_multimodalPhi4MultimodalModel)phimoePhimoeModelpixio
PixioModel)pixtralPixtralVisionModel)plbartPLBartModel
poolformerPoolFormerModel)pp_doclayout_v3PPDocLayoutV3Model)
prophetnetProphetNetModelpvtPvtModel)pvt_v2
PvtV2Model)qwen2
Qwen2Model)
qwen2_5_vlQwen2_5_VLModel)qwen2_5_vl_textQwen2_5_VLTextModel)qwen2_audio_encoderQwen2AudioEncoder)	qwen2_moeQwen2MoeModel)qwen2_vlQwen2VLModel)qwen2_vl_textQwen2VLTextModel)qwen3
Qwen3Model)qwen3_5Qwen3_5Model)qwen3_5_moeQwen3_5MoeModel)qwen3_5_moe_textQwen3_5MoeTextModel)qwen3_5_textQwen3_5TextModel)	qwen3_moeQwen3MoeModel)
qwen3_nextQwen3NextModel)qwen3_vlQwen3VLModel)qwen3_vl_moeQwen3VLMoeModel)qwen3_vl_moe_textQwen3VLMoeTextModel)qwen3_vl_textQwen3VLTextModel)recurrent_gemmaRecurrentGemmaModelreformerReformerModelregnetRegNetModelrembertRemBertModelresnetResNetModelrobertaRobertaModelroberta-prelayernormRobertaPreLayerNormModelroc_bertRoCBertModelroformerRoFormerModel)rt_detrRTDetrModel)
rt_detr_v2RTDetrV2Model)rwkv	RwkvModelsamSamModelsam2	Sam2Model)sam2_hiera_det_modelSam2HieraDetModel)
sam2_videoSam2VideoModel)sam2_vision_modelSam2VisionModel)sam3	Sam3Modelsam3_trackerSam3TrackerModelr  )sam3_tracker_videoSam3TrackerVideoModel)
sam3_videoSam3VideoModel)sam3_vision_modelSam3VisionModel)sam3_vit_modelSam3ViTModelsam_hq
SamHQModel)sam_hq_vision_modelSamHQVisionModel)sam_vision_modelSamVisionModel)seamless_m4tSeamlessM4TModel)seamless_m4t_v2SeamlessM4Tv2Model)seed_ossSeedOssModel	segformerSegformerModel)seggptSegGptModel)sewSEWModel)sew-d	SEWDModelsiglipSiglipModelsiglip2Siglip2Model)siglip2_vision_modelSiglip2VisionModelsiglip_vision_modelSiglipVisionModel)smollm3SmolLM3Model)smolvlmSmolVLMModel)smolvlm_visionSmolVLMVisionTransformer)
solar_openSolarOpenModel)speech_to_textSpeech2TextModel)speecht5SpeechT5Model)splinterSplinterModelsqueezebertSqueezeBertModel)stablelmStableLmModel)
starcoder2Starcoder2ModelswiftformerSwiftFormerModelswin	SwinModelswin2srSwin2SRModelswinv2Swinv2Model)switch_transformersSwitchTransformersModel)t5T5Model)t5gemmaT5GemmaModel)t5gemma2T5Gemma2Model)t5gemma2_encoderT5Gemma2Encodertable-transformerTableTransformerModel)tapas
TapasModel)textnetTextNetModel)time_series_transformerTimeSeriesTransformerModel)timesfmTimesFmModeltimesformerTimesformerModeltimm_backboneTimmBackbonetimm_wrapperrD  )tvpTvpModel)udop	UdopModel)umt5	UMT5Model)	unispeechUniSpeechModel)unispeech-satUniSpeechSatModel)univnetUnivNetModel)
vaultgemmaVaultGemmaModelvibevoice_acoustic_tokenizerVibeVoiceAcousticTokenizerModel)video_llama_3VideoLlama3Model)video_llama_3_visionVideoLlama3VisionModel)video_llavaVideoLlavaModelvideomaeVideoMAEModel)vilt	ViltModel)vipllavaVipLlavaModel)zvision-text-dual-encoderVisionTextDualEncoderModel)visual_bertVisualBertModelvitViTModelvit_maeViTMAEModelvit_msnViTMSNModelvitdetVitDetModelvits	VitsModelvivit
VivitModel)vjepa2VJEPA2ModelvoxtralVoxtralForConditionalGeneration)voxtral_encoderVoxtralEncodervoxtral_realtime'VoxtralRealtimeForConditionalGeneration)voxtral_realtime_encoderVoxtralRealtimeEncoder)voxtral_realtime_textVoxtralRealtimeTextModel)wav2vec2Wav2Vec2Model)wav2vec2-bertWav2Vec2BertModel)wav2vec2-conformerWav2Vec2ConformerModel)wavlm
WavLMModel)whisperWhisperModel)xclip
XCLIPModel)xcodecXcodecModel)xglm	XGLMModelxlmXLMModelxlm-robertaXLMRobertaModelxlm-roberta-xlXLMRobertaXLModel)xlnet
XLNetModel)xlstm
xLSTMModel)xmod	XmodModelyolos
YolosModel)yoso	YosoModel)youtu
YoutuModel)zamba
ZambaModel)zamba2Zamba2Model)X)r   AlbertForPreTrainingr.   r<   BartForConditionalGeneration)rB   BertForPreTraining)rG   BigBirdForPreTrainingr]   BloomForCausalLMre   CamembertForMaskedLMcolpaliColPaliForRetrieval)colqwen2ColQwen2ForRetrievalr   CTRLLMHeadModelr   Data2VecTextForMaskedLMr   DebertaForMaskedLMr   DebertaV2ForMaskedLMr   DistilBertForMaskedLM)r   ElectraForPreTraining)r  ErnieForPreTrainingr  EvollaForProteinText2Textr  Exaone4ForCausalLMr  ExaoneMoeForCausalLMr  FalconMambaForCausalLMr!  FlaubertWithLMHeadModel)r#  FlavaForPreTrainingr'  !Florence2ForConditionalGeneration)r)  FNetForPreTrainingr.  FSMTForConditionalGeneration)r0  FunnelForPreTrainingr9  Gemma3ForConditionalGenerationrm  rw  GPT2LMHeadModelry  r  rz  GPTBigCodeForCausalLM)r  HieraForPreTrainingr  IBertForMaskedLMr  IdeficsForVisionText2Textr   Idefics2ForConditionalGenerationr   Idefics3ForConditionalGenerationr  JanusForConditionalGenerationr  LayoutLMForMaskedLMr  LlavaForConditionalGenerationr  !LlavaNextForConditionalGenerationr  &LlavaNextVideoForConditionalGenerationr  &LlavaOnevisionForConditionalGenerationr  LongformerForMaskedLMr  LukeForMaskedLM)r  LxmertForPreTrainingr  MambaForCausalLMr  Mamba2ForCausalLM)r  MegatronBertForPreTrainingr%   Mistral3ForConditionalGenerationr,  MllamaForConditionalGeneration)r1  MobileBertForPreTrainingrI  MPNetForMaskedLMrK  MptForCausalLMrM  MraForMaskedLMrU  MvpForConditionalGenerationrW  NanoChatForCausalLMr[  NllbMoeForConditionalGenerationrl  OpenAIGPTLMHeadModelrv  !PaliGemmaForConditionalGenerationqwen2_audio"Qwen2AudioForConditionalGenerationr  RobertaForMaskedLMr  RobertaPreLayerNormForMaskedLM)r  RoCBertForPreTrainingr  RwkvForCausalLM)r<  SplinterForPreTrainingr?  SqueezeBertForMaskedLMrQ  *SwitchTransformersForConditionalGenerationrS  T5ForConditionalGenerationrU  T5GemmaForConditionalGenerationrW   T5Gemma2ForConditionalGenerationr^  TapasForMaskedLM)rt  UniSpeechForPreTraining)rv  UniSpeechSatForPreTrainingr  "VideoLlavaForConditionalGeneration)r  VideoMAEForPreTrainingr   VipLlavaForConditionalGeneration)r  VisualBertForPreTraining)r  ViTMAEForPreTrainingr  r  )r  Wav2Vec2ForPreTraining)r  Wav2Vec2ConformerForPreTrainingr  XLMWithLMHeadModelr  XLMRobertaForMaskedLMr  XLMRobertaXLForMaskedLMr  XLNetLMHeadModelr  xLSTMForCausalLMr  XmodForMaskedLM))r   AfmoeForCausalLM)r$   ApertusForCausalLM)r&   ArceeForCausalLM)r*   AriaTextForCausalLM)r7   BambaForCausalLM)r<   BartForCausalLM)rB   BertLMHeadModel)rD   BertGenerationDecoder)rG   BigBirdForCausalLM)rI   BigBirdPegasusForCausalLM)rK   BioGptForCausalLM)rP   BitNetForCausalLM)rR   BlenderbotForCausalLM)rT   BlenderbotSmallForCausalLMr  )r_   BltForCausalLM)re   CamembertForCausalLM)r   LlamaForCausalLM)r   CodeGenForCausalLM)r   CohereForCausalLM)r   Cohere2ForCausalLM)r   CpmAntForCausalLMr  )r   CwmForCausalLM)r   Data2VecTextForCausalLM)r   DbrxForCausalLM)r   DeepseekV2ForCausalLM)r   DeepseekV3ForCausalLM)r   DiffLlamaForCausalLM)r   DogeForCausalLM)r   Dots1ForCausalLM)r   ElectraForCausalLM)r   Emu3ForCausalLM)r  ErnieForCausalLM)r  Ernie4_5ForCausalLM)r  Ernie4_5_MoeForCausalLMr  r  )r  FalconForCausalLM)r  FalconH1ForCausalLMr  )r%  FlexOlmoForCausalLMr3  FuyuForCausalLM)r5  GemmaForCausalLM)r7  Gemma2ForCausalLMr  )r;  Gemma3ForCausalLMr=  Gemma3nForConditionalGeneration)rA  Gemma3nForCausalLMrE  GitForCausalLM)rG  GlmForCausalLM)rI  Glm4ForCausalLM)rM  Glm4MoeForCausalLM)rO  Glm4MoeLiteForCausalLM)re  GlmMoeDsaForCausalLMru  GotOcr2ForConditionalGenerationr  r  r	  )r|  GPTNeoForCausalLM)r~  GPTNeoXForCausalLM)r  GPTNeoXJapaneseForCausalLM)r  GptOssForCausalLM)r  GPTJForCausalLM)r  GraniteForCausalLM)r  GraniteMoeForCausalLM)r  GraniteMoeHybridForCausalLM)r  GraniteMoeSharedForCausalLM)r  HeliumForCausalLM)r  HunYuanDenseV1ForCausalLM)r  HunYuanMoEV1ForCausalLM)r  Jais2ForCausalLM)r  JambaForCausalLM)r  JetMoeForCausalLM)r  Lfm2ForCausalLM)r  Lfm2MoeForCausalLM)r  r}  )r  Llama4ForCausalLM)r  r  )r  LongcatFlashForCausalLMr%  r'  )r  MarianForCausalLM)r  MBartForCausalLM)r  MegatronBertForCausalLM)r  MiniMaxForCausalLM)r  MiniMaxM2ForCausalLM)r  MinistralForCausalLM)r!  Ministral3ForCausalLM)r#  MistralForCausalLM)r'  MixtralForCausalLM)r,  MllamaForCausalLM)rA  ModernBertDecoderForCausalLM)rG  MoshiForCausalLMr1  )rQ  MusicgenForCausalLM)rS  MusicgenMelodyForCausalLM)rU  MvpForCausalLMr7  )rY  NemotronForCausalLM)r`  OlmoForCausalLM)rb  Olmo2ForCausalLM)rd  Olmo3ForCausalLM)rf  OlmoeForCausalLMr;  )rn  OPTForCausalLM)r  PegasusForCausalLM)r  PersimmonForCausalLM)r  PhiForCausalLM)r  Phi3ForCausalLMr  Phi4MultimodalForCausalLM)r  PhimoeForCausalLM)r  PLBartForCausalLM)r  ProphetNetForCausalLM)r  Qwen2ForCausalLM)r  Qwen2MoeForCausalLM)r  Qwen3ForCausalLM)r  Qwen3_5ForCausalLM)r  Qwen3_5MoeForCausalLM)r  r  )r  r  )r  Qwen3MoeForCausalLM)r  Qwen3NextForCausalLM)r  RecurrentGemmaForCausalLM)r  ReformerModelWithLMHead)r  RemBertForCausalLM)r  RobertaForCausalLM)r  RobertaPreLayerNormForCausalLM)r  RoCBertForCausalLM)r  RoFormerForCausalLMrG  )r  SeedOssForCausalLM)r0  SmolLM3ForCausalLM)r6  SolarOpenForCausalLM)rA  StableLmForCausalLM)rC  Starcoder2ForCausalLM)trocrTrOCRForCausalLM)rz  VaultGemmaForCausalLM)r  WhisperForCausalLM)r  XGLMForCausalLMra  )r  XLMRobertaForCausalLM)r  XLMRobertaXLForCausalLMrg  ri  )r  XmodForCausalLM)r  YoutuForCausalLM)r  ZambaForCausalLM)r  Zamba2ForCausalLM)7r   r>   rM   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r+  rr  r  r  r  r  )r  Llama4VisionModelr)  )r,  MllamaVisionModelr3  r6  r9  r<  r  r  r  r  r  r  r-  rE  rH  rK  rN  r[  rf  ri  rl  r  r  r  r  r  r  r  ))r   DeiTForMaskedImageModeling)r,  FocalNetForMaskedImageModeling)rI  SwinForMaskedImageModeling)rO  Swinv2ForMaskedImageModeling)r  ViTForMaskedImageModeling)r  ImageGPTForCausalImageModeling)))r?   BeitForImageClassification)rN   BitForImageClassification)rs   CLIPForImageClassification)r   ConvNextForImageClassification)r    ConvNextV2ForImageClassification)r   CvtForImageClassification)r   $Data2VecVisionForImageClassification)r   )DeiTForImageClassification%DeiTForImageClassificationWithTeacher)r   DinatForImageClassification)r   Dinov2ForImageClassification)r   )Dinov2WithRegistersForImageClassification)r   DonutSwinForImageClassification)r   "EfficientNetForImageClassification)r,  FocalNetForImageClassification)r  HGNetV2ForImageClassification)r  HieraForImageClassification)r  IJepaForImageClassification)r  ImageGPTForImageClassification)r  )LevitForImageClassification&LevitForImageClassificationWithTeacher)r  MetaClip2ForImageClassification)r4  !MobileNetV1ForImageClassification)r7  !MobileNetV2ForImageClassification)r:  MobileViTForImageClassification)r=  !MobileViTV2ForImageClassification)r  )&PerceiverForImageClassificationLearned&PerceiverForImageClassificationFourier-PerceiverForImageClassificationConvProcessing)r   PoolFormerForImageClassification)r  PvtForImageClassification)r  PvtV2ForImageClassification)r  RegNetForImageClassification)r  ResNetForImageClassification)r  SegformerForImageClassification)shieldgemma2"ShieldGemma2ForImageClassification)r&  SiglipForImageClassification)r)  Siglip2ForImageClassification)rF  !SwiftFormerForImageClassification)rI  SwinForImageClassification)rO  Swinv2ForImageClassification)r`  TextNetForImageClassification)rm  !TimmWrapperForImageClassification)r  ViTForImageClassification)r  ViTMSNForImageClassificationr   DetrForSegmentation))r?   BeitForSemanticSegmentation)r   %Data2VecVisionForSemanticSegmentation)r   DPTForSemanticSegmentation)r7  "MobileNetV2ForSemanticSegmentation)r:   MobileViTForSemanticSegmentation)r=  "MobileViTV2ForSemanticSegmentation)r   SegformerForSemanticSegmentation)upernetUperNetForSemanticSegmentationr  !MaskFormerForInstanceSegmentation)r(  )eomtEomtForUniversalSegmentation)eomt_dinov3"EomtDinov3ForUniversalSegmentation)r  #Mask2FormerForUniversalSegmentationr3  )rj  !OneFormerForUniversalSegmentation))rg  !TimesformerForVideoClassification)r  VideoMAEForVideoClassification)r  VivitForVideoClassification)r  VJEPA2ForVideoClassificationr  )<)r(   AriaForConditionalGeneration)r5   !AyaVisionForConditionalGeneration)rW   BlipForConditionalGenerationrY   Blip2ForConditionalGeneration)ri   !ChameleonForConditionalGeneration)r   %Cohere2VisionForConditionalGeneration)r   "DeepseekVLForConditionalGeneration)r   (DeepseekVLHybridForConditionalGeneration)r   Emu3ForConditionalGeneration)r  'Ernie4_5_VL_MoeForConditionalGenerationr  )r  FastVlmForConditionalGenerationr  r  r  r  r  )rK  Glm46VForConditionalGeneration)rQ  Glm4vForConditionalGeneration)rS   Glm4vMoeForConditionalGeneration)rg  GlmOcrForConditionalGenerationr  r  r  r  )r  $InstructBlipForConditionalGeneration)r  )InstructBlipVideoForConditionalGeneration)r   InternVLForConditionalGenerationr  )r  Kosmos2ForConditionalGeneration)r  !Kosmos2_5ForConditionalGeneration)r  Lfm2VlForConditionalGeneration)r  "LightOnOcrForConditionalGenerationr  r  r  r  r  r*  r,  )rp  Ovis2ForConditionalGeneration)paddleocr_vl#PaddleOCRVLForConditionalGenerationr=  )r  $PerceptionLMForConditionalGeneration)
pix2struct"Pix2StructForConditionalGeneration)r  r  )r  "Qwen2_5_VLForConditionalGeneration)r  Qwen2VLForConditionalGeneration)r  Qwen3_5ForConditionalGeneration)r  "Qwen3_5MoeForConditionalGeneration)r  Qwen3VLForConditionalGeneration)r  "Qwen3VLMoeForConditionalGeneration)r  r  )r2  SmolVLMForConditionalGenerationrR  )rp  UdopForConditionalGeneration)r  #VideoLlama3ForConditionalGenerationrX  r[  )zvision-encoder-decoderVisionEncoderDecoderModelrm  granite_speech%GraniteSpeechForConditionalGenerationr  *KyutaiSpeechToTextForConditionalGenerationr  qwen2_5_omni#Qwen2_5OmniForConditionalGenerationr?  qwen3_omni_moe$Qwen3OmniMoeForConditionalGenerationr  r  )*)r   AlbertForMaskedLMr  )rB   BertForMaskedLM)rG   BigBirdForMaskedLMr  )r   ConvBertForMaskedLMr  r  r  r  )r   ElectraForMaskedLM)r  ErnieForMaskedLM)r
  EsmForMaskedLMr  )r)  FNetForMaskedLM)r0  FunnelForMaskedLMr  r  r   r"  r  MBartForConditionalGeneration)r  MegatronBertForMaskedLM)r1  MobileBertForMaskedLM)r?  ModernBertForMaskedLMr/  r3  r5  )r^  NystromformerForMaskedLM)r  PerceiverForMaskedLM)r  ReformerForMaskedLM)r  RemBertForMaskedLMrB  rD  )r  RoCBertForMaskedLM)r  RoFormerForMaskedLMrJ  rT  ra  rc  re  rk  )r  YosoForMaskedLM))r   !ConditionalDetrForObjectDetection)r   DFineForObjectDetection)r   DabDetrForObjectDetection)r    DeformableDetrForObjectDetection)r   DetrForObjectDetection)r  LwDetrForObjectDetection)r  PPDocLayoutV3ForObjectDetection)r  RTDetrForObjectDetection)r  RTDetrV2ForObjectDetection)r\  "TableTransformerForObjectDetection)r  YolosForObjectDetection))r  GroundingDinoForObjectDetection)r.  !MMGroundingDinoForObjectDetectionrh  )rr  Owlv2ForObjectDetection)rt  OwlViTForObjectDetection))depth_anythingDepthAnythingForDepthEstimation)r   DepthProForDepthEstimation)r   DPTForDepthEstimation)rs  GLPNForDepthEstimation)prompt_depth_anything%PromptDepthAnythingForDepthEstimation)zoedepthZoeDepthForDepthEstimation)r.   r  )rI   &BigBirdPegasusForConditionalGeneration)rR   "BlenderbotForConditionalGeneration)rT   'BlenderbotSmallForConditionalGeneration)zencoder-decoderEncoderDecoderModelr  rm  rf  )r  LEDForConditionalGeneration)r  LongT5ForConditionalGeneration)r  M2M100ForConditionalGeneration)r  MarianMTModelrz  )rO  MT5ForConditionalGenerationr5  r9  )r  PegasusForConditionalGeneration)r   PegasusXForConditionalGeneration)r  PLBartForConditionalGeneration)r  "ProphetNetForConditionalGenerationr?  )r  SeamlessM4TForTextToText)r  SeamlessM4Tv2ForTextToTextrL  rN  rP  rR  )rr  UMT5ForConditionalGenerationr  r  ))r   DiaForConditionalGenerationrf  ri  )rC  !MoonshineForConditionalGeneration)rE  *MoonshineStreamingForConditionalGeneration)	pop2piano!Pop2PianoForConditionalGeneration)r  SeamlessM4TForSpeechToText)r  SeamlessM4Tv2ForSpeechToText)zspeech-encoder-decoderSpeechEncoderDecoderModel)r8  #Speech2TextForConditionalGeneration)r:  SpeechT5ForSpeechToTextr  r  )r  WhisperForConditionalGeneration)n)r   AlbertForSequenceClassification)r&   ArceeForSequenceClassification)r<   BartForSequenceClassification)rB   BertForSequenceClassification)rG    BigBirdForSequenceClassification)rI   'BigBirdPegasusForSequenceClassification)rK   BioGptForSequenceClassification)r]   BloomForSequenceClassification)re   "CamembertForSequenceClassification)rg   CanineForSequenceClassification)r   LlamaForSequenceClassification)r   !ConvBertForSequenceClassification)r   CTRLForSequenceClassification)r   %Data2VecTextForSequenceClassification)r    DebertaForSequenceClassification)r   "DebertaV2ForSequenceClassification)r   #DeepseekV2ForSequenceClassification)r   #DeepseekV3ForSequenceClassification)r   "DiffLlamaForSequenceClassification)r   #DistilBertForSequenceClassification)r   DogeForSequenceClassification)r    ElectraForSequenceClassification)r  ErnieForSequenceClassification)r
  EsmForSequenceClassification)r   Exaone4ForSequenceClassification)r  FalconForSequenceClassification)r!  !FlaubertForSequenceClassification)r)  FNetForSequenceClassification)r0  FunnelForSequenceClassification)r5  GemmaForSequenceClassification)r7  Gemma2ForSequenceClassification)r9  Gemma3ForSequenceClassification)r;  #Gemma3TextForSequenceClassification)rG  GlmForSequenceClassification)rI  Glm4ForSequenceClassification)rw  GPT2ForSequenceClassification)ry  r  )rz  #GPTBigCodeForSequenceClassification)r|  GPTNeoForSequenceClassification)r~   GPTNeoXForSequenceClassification)r  GptOssForSequenceClassification)r  GPTJForSequenceClassification)r  HeliumForSequenceClassification)r  'HunYuanDenseV1ForSequenceClassification)r  %HunYuanMoEV1ForSequenceClassification)r  IBertForSequenceClassification)r  JambaForSequenceClassification)r  JetMoeForSequenceClassification)r  !LayoutLMForSequenceClassification)r  #LayoutLMv2ForSequenceClassification)r  #LayoutLMv3ForSequenceClassification)r  LiltForSequenceClassification)r  r  )r  #LongformerForSequenceClassification)r  LukeForSequenceClassification)r	  !MarkupLMForSequenceClassification)r  MBartForSequenceClassification)r  %MegatronBertForSequenceClassification)r   MiniMaxForSequenceClassification)r  "MinistralForSequenceClassification)r!  #Ministral3ForSequenceClassification)r#   MistralForSequenceClassification)r'   MixtralForSequenceClassification)r1  #MobileBertForSequenceClassification)r?  #ModernBertForSequenceClassification)rA  *ModernBertDecoderForSequenceClassification)rI  MPNetForSequenceClassification)rK  MptForSequenceClassification)rM  MraForSequenceClassification)rO  MT5ForSequenceClassification)rU  MvpForSequenceClassification)rY  !NemotronForSequenceClassification)r^  &NystromformerForSequenceClassification)rl  "OpenAIGPTForSequenceClassification)rn  OPTForSequenceClassification)r  "PerceiverForSequenceClassification)r  "PersimmonForSequenceClassification)r  PhiForSequenceClassification)r  Phi3ForSequenceClassification)r  PhimoeForSequenceClassification)r  PLBartForSequenceClassification)r  Qwen2ForSequenceClassification)r  !Qwen2MoeForSequenceClassification)r  Qwen3ForSequenceClassification)r  !Qwen3MoeForSequenceClassification)r  "Qwen3NextForSequenceClassification)r  !ReformerForSequenceClassification)r   RemBertForSequenceClassification)r   RobertaForSequenceClassification)r  ,RobertaPreLayerNormForSequenceClassification)r   RoCBertForSequenceClassification)r  !RoFormerForSequenceClassification)r   SeedOssForSequenceClassification)r0   SmolLM3ForSequenceClassification)r?  $SqueezeBertForSequenceClassification)rA  !StableLmForSequenceClassification)rC  #Starcoder2ForSequenceClassification)rS  T5ForSequenceClassification)rU   T5GemmaForSequenceClassification)rW  !T5Gemma2ForSequenceClassification)r^  TapasForSequenceClassification)rr  UMT5ForSequenceClassification)r  XLMForSequenceClassification)r  #XLMRobertaForSequenceClassification)r  %XLMRobertaXLForSequenceClassification)r  XLNetForSequenceClassification)r  XmodForSequenceClassification)r  YosoForSequenceClassification)r  ZambaForSequenceClassification)r  Zamba2ForSequenceClassification)L)r   AlbertForQuestionAnswering)r&   ArceeForQuestionAnswering)r<   BartForQuestionAnswering)rB   BertForQuestionAnswering)rG   BigBirdForQuestionAnswering)rI   "BigBirdPegasusForQuestionAnswering)r]   BloomForQuestionAnswering)re   CamembertForQuestionAnswering)rg   CanineForQuestionAnswering)r   ConvBertForQuestionAnswering)r    Data2VecTextForQuestionAnswering)r   DebertaForQuestionAnswering)r   DebertaV2ForQuestionAnswering)r   DiffLlamaForQuestionAnswering)r   DistilBertForQuestionAnswering)r   ElectraForQuestionAnswering)r  ErnieForQuestionAnswering)r  Exaone4ForQuestionAnswering)r  FalconForQuestionAnswering)r!  "FlaubertForQuestionAnsweringSimple)r)  FNetForQuestionAnswering)r0  FunnelForQuestionAnswering)ry  GPT2ForQuestionAnswering)r|  GPTNeoForQuestionAnswering)r~  GPTNeoXForQuestionAnswering)r  GPTJForQuestionAnswering)r  IBertForQuestionAnsweringr  LayoutLMv2ForQuestionAnsweringr  LayoutLMv3ForQuestionAnswering)r  LEDForQuestionAnswering)r  LiltForQuestionAnswering)r  LlamaForQuestionAnswering)r  LongformerForQuestionAnswering)r  LukeForQuestionAnswering)r  LxmertForQuestionAnswering)r	  MarkupLMForQuestionAnswering)r  MBartForQuestionAnswering)r   MegatronBertForQuestionAnswering)r  MiniMaxForQuestionAnswering)r  MinistralForQuestionAnswering)r!  Ministral3ForQuestionAnswering)r#  MistralForQuestionAnswering)r'  MixtralForQuestionAnswering)r1  MobileBertForQuestionAnswering)r?  ModernBertForQuestionAnswering)rI  MPNetForQuestionAnswering)rK  MptForQuestionAnswering)rM  MraForQuestionAnswering)rO  MT5ForQuestionAnswering)rU  MvpForQuestionAnswering)rY  NemotronForQuestionAnswering)r^  !NystromformerForQuestionAnswering)rn  OPTForQuestionAnswering)r  Qwen2ForQuestionAnswering)r  Qwen2MoeForQuestionAnswering)r  Qwen3ForQuestionAnswering)r  Qwen3MoeForQuestionAnswering)r  Qwen3NextForQuestionAnswering)r  ReformerForQuestionAnswering)r  RemBertForQuestionAnswering)r  RobertaForQuestionAnswering)r  'RobertaPreLayerNormForQuestionAnswering)r  RoCBertForQuestionAnswering)r  RoFormerForQuestionAnswering)r  SeedOssForQuestionAnswering)r0  SmolLM3ForQuestionAnswering)r<  SplinterForQuestionAnswering)r?  SqueezeBertForQuestionAnswering)rS  T5ForQuestionAnswering)rr  UMT5ForQuestionAnswering)r  XLMForQuestionAnsweringSimple)r  XLMRobertaForQuestionAnswering)r   XLMRobertaXLForQuestionAnswering)r  XLNetForQuestionAnsweringSimple)r  XmodForQuestionAnswering)r  YosoForQuestionAnswering)r^  TapasForQuestionAnswering))rW   BlipForQuestionAnsweringrB  )r  ViltForQuestionAnswering))r  LayoutLMForQuestionAnsweringr@  rB  )W)r   AlbertForTokenClassification)r$   ApertusForTokenClassification)r&   ArceeForTokenClassification)rB   BertForTokenClassification)rG   BigBirdForTokenClassification)rK   BioGptForTokenClassification)r]   BloomForTokenClassification)rc   BrosForTokenClassification)re   CamembertForTokenClassification)rg   CanineForTokenClassification)r   ConvBertForTokenClassification)r   "Data2VecTextForTokenClassification)r   DebertaForTokenClassification)r   DebertaV2ForTokenClassification)r    DeepseekV3ForTokenClassification)r   DiffLlamaForTokenClassification)r    DistilBertForTokenClassification)r   ElectraForTokenClassification)r  ErnieForTokenClassification)r
  EsmForTokenClassification)r  Exaone4ForTokenClassification)r  FalconForTokenClassification)r!  FlaubertForTokenClassification)r)  FNetForTokenClassification)r0  FunnelForTokenClassification)r5  GemmaForTokenClassification)r7  Gemma2ForTokenClassification)rG  GlmForTokenClassification)rI  Glm4ForTokenClassification)rw  GPT2ForTokenClassification)ry  r  )rz   GPTBigCodeForTokenClassification)r|  GPTNeoForTokenClassification)r~  GPTNeoXForTokenClassification)r  GptOssForTokenClassification)r  HeliumForTokenClassification)r  IBertForTokenClassification)r  LayoutLMForTokenClassification)r   LayoutLMv2ForTokenClassification)r   LayoutLMv3ForTokenClassification)r  LiltForTokenClassification)r  LlamaForTokenClassification)r   LongformerForTokenClassification)r  LukeForTokenClassification)r	  MarkupLMForTokenClassification)r  "MegatronBertForTokenClassification)r  MiniMaxForTokenClassification)r  MinistralForTokenClassification)r!   Ministral3ForTokenClassification)r#  MistralForTokenClassification)r'  MixtralForTokenClassification)r1   MobileBertForTokenClassification)r?   ModernBertForTokenClassification)rI  MPNetForTokenClassification)rK  MptForTokenClassification)rM  MraForTokenClassification)rO  MT5ForTokenClassification)rY  NemotronForTokenClassification)r^  #NystromformerForTokenClassification)r  PersimmonForTokenClassification)r  PhiForTokenClassification)r  Phi3ForTokenClassification)r  Qwen2ForTokenClassification)r  Qwen2MoeForTokenClassification)r  Qwen3ForTokenClassification)r  Qwen3MoeForTokenClassification)r  Qwen3NextForTokenClassification)r  RemBertForTokenClassification)r  RobertaForTokenClassification)r  )RobertaPreLayerNormForTokenClassification)r  RoCBertForTokenClassification)r  RoFormerForTokenClassification)r  SeedOssForTokenClassification)r0  SmolLM3ForTokenClassification)r?  !SqueezeBertForTokenClassification)rA  StableLmForTokenClassification)rC   Starcoder2ForTokenClassification)rS  T5ForTokenClassification)rU  T5GemmaForTokenClassification)rW  T5Gemma2ForTokenClassification)rr  UMT5ForTokenClassification)r  XLMForTokenClassification)r   XLMRobertaForTokenClassification)r  "XLMRobertaXLForTokenClassification)r  XLNetForTokenClassification)r  XmodForTokenClassification)r  YosoForTokenClassification)#)r   AlbertForMultipleChoice)rB   BertForMultipleChoice)rG   BigBirdForMultipleChoice)re   CamembertForMultipleChoice)rg   CanineForMultipleChoice)r   ConvBertForMultipleChoice)r   Data2VecTextForMultipleChoice)r   DebertaV2ForMultipleChoice)r   DistilBertForMultipleChoice)r   ElectraForMultipleChoice)r  ErnieForMultipleChoice)r!  FlaubertForMultipleChoice)r)  FNetForMultipleChoice)r0  FunnelForMultipleChoice)r  IBertForMultipleChoice)r  LongformerForMultipleChoice)r  LukeForMultipleChoice)r  MegatronBertForMultipleChoice)r1  MobileBertForMultipleChoice)r?  ModernBertForMultipleChoice)rI  MPNetForMultipleChoice)rM  MraForMultipleChoice)r^  NystromformerForMultipleChoice)r  RemBertForMultipleChoice)r  RobertaForMultipleChoice)r  $RobertaPreLayerNormForMultipleChoice)r  RoCBertForMultipleChoice)r  RoFormerForMultipleChoice)r?  SqueezeBertForMultipleChoice)r  XLMForMultipleChoice)r  XLMRobertaForMultipleChoice)r  XLMRobertaXLForMultipleChoice)r  XLNetForMultipleChoice)r  XmodForMultipleChoice)r  YosoForMultipleChoice))rB   BertForNextSentencePrediction)r  ErnieForNextSentencePrediction)r)  FNetForNextSentencePrediction)r  %MegatronBertForNextSentencePrediction)r1  #MobileBertForNextSentencePrediction))r,   ASTForAudioClassification)r   &Data2VecAudioForSequenceClassification)r  HubertForSequenceClassification)r!  SEWForSequenceClassification)r#  SEWDForSequenceClassification)rt  "UniSpeechForSequenceClassification)rv  %UniSpeechSatForSequenceClassification)r  !Wav2Vec2ForSequenceClassification)r  %Wav2Vec2BertForSequenceClassification)r  *Wav2Vec2ConformerForSequenceClassification)r  WavLMForSequenceClassification)r  WhisperForAudioClassification))r   Data2VecAudioForCTC)r  HubertForCTCr  rx  )r!  	SEWForCTC)r#  
SEWDForCTC)rt  UniSpeechForCTC)rv  UniSpeechSatForCTC)r  Wav2Vec2ForCTC)r  Wav2Vec2BertForCTC)r  Wav2Vec2ConformerForCTC)r  WavLMForCTC))r   (Data2VecAudioForAudioFrameClassification)rv  'UniSpeechSatForAudioFrameClassification)r  #Wav2Vec2ForAudioFrameClassification)r  'Wav2Vec2BertForAudioFrameClassification)r  ,Wav2Vec2ConformerForAudioFrameClassification)r   WavLMForAudioFrameClassification))r   Data2VecAudioForXVector)rv  UniSpeechSatForXVector)r  Wav2Vec2ForXVector)r  Wav2Vec2BertForXVector)r  Wav2Vec2ConformerForXVector)r  WavLMForXVectorr  )r:  SpeechT5ForTextToSpeech)r9   r   )r  r  r  )rQ   MusicgenForConditionalGeneration)rS  &MusicgenMelodyForConditionalGenerationrk  rn  )r  SeamlessM4TForTextToSpeech)r  SeamlessM4Tv2ForTextToSpeechr  )
r   r!   rV   )rY   Blip2ForImageTextRetrievalrk   rr   rz   r  r%  r(  ))r?   BeitBackbone)rN   BitBackbone)r   ConvNextBackbone)r   ConvNextV2Backbone)r   DinatBackbone)r   Dinov2Backbone)r   Dinov2WithRegistersBackbone)r   DINOv3ConvNextBackbone)r   DINOv3ViTBackbone)r,  FocalNetBackboner  )r  HieraBackbone)lw_detr_vitLwDetrViTBackbone)r  MaskFormerSwinBackbone)r  PixioBackbone)r  PvtV2Backbone)r  ResNetBackbone)rt_detr_resnetRTDetrResNetBackbone)rI  SwinBackbone)rO  Swinv2Backbone)r`  TextNetBackboneri  )r  VitDetBackbone)vitpose_backboneVitPoseBackbone)r   )r   r   r  r  )r  r  r  )r	  r  r  )
superpointSuperPointForKeypointDetection))r   !EfficientLoFTRForKeypointMatchingr  )	superglueSuperGlueForKeypointMatching)r   rA   rF   ru   r   r   r   r   r   )r   Emu3TextModelr   r  )r  r  r  )r,  MllamaTextModelr0  )rO  MT5EncoderModelr]  r  r  r  r  r  r  r>  )rS  T5EncoderModel)rU  T5GemmaEncoderModel)rr  UMT5EncoderModelr  r  r  )r}  'PatchTSMixerForTimeSeriesClassification)r  PatchTSTForClassification)r}  PatchTSMixerForRegression)r  PatchTSTForRegression)rd  TimesFmModelForPrediction)rL  Swin2SRForImageSuperResolutionr   r|  c                   @      e Zd ZeZdS )AutoModelForMaskGenerationN)r   r   r   !MODEL_FOR_MASK_GENERATION_MAPPING_model_mappingr   r   r   r   rH  @      rH  c                   @   rG  )AutoModelForKeypointDetectionN)r   r   r   $MODEL_FOR_KEYPOINT_DETECTION_MAPPINGrJ  r   r   r   r   rL  D  rK  rL  c                   @   rG  )AutoModelForKeypointMatchingN)r   r   r   #MODEL_FOR_KEYPOINT_MATCHING_MAPPINGrJ  r   r   r   r   rN  H  rK  rN  c                   @   rG  )AutoModelForTextEncodingN)r   r   r   MODEL_FOR_TEXT_ENCODING_MAPPINGrJ  r   r   r   r   rP  L  rK  rP  c                   @   rG  )AutoModelForImageToImageN)r   r   r    MODEL_FOR_IMAGE_TO_IMAGE_MAPPINGrJ  r   r   r   r   rR  P  rK  rR  c                   @   rG  )	AutoModelN)r   r   r   MODEL_MAPPINGrJ  r   r   r   r   rT  T  rK  rT  c                   @   rG  )AutoModelForPreTrainingN)r   r   r   MODEL_FOR_PRETRAINING_MAPPINGrJ  r   r   r   r   rV  [  rK  rV  pretraining)head_docc                       @   e Zd ZeZeded  deej	e B ddf fddZ
  ZS )AutoModelForCausalLMclspretrained_model_name_or_pathreturnr   c                       t  j|g|R i |S Nsuperfrom_pretrainedr\  r]  
model_argskwargs	__class__r   r   rc  f     z$AutoModelForCausalLM.from_pretrained)r   r   r   MODEL_FOR_CAUSAL_LM_MAPPINGrJ  classmethodtypestrosPathLikerc  __classcell__r   r   rg  r   r[  b      r[  zcausal language modelingc                   @   rG  )AutoModelForMaskedLMN)r   r   r   MODEL_FOR_MASKED_LM_MAPPINGrJ  r   r   r   r   rr  s  rK  rr  zmasked language modelingc                   @   rG  )AutoModelForSeq2SeqLMN)r   r   r   &MODEL_FOR_SEQ_TO_SEQ_CAUSAL_LM_MAPPINGrJ  r   r   r   r   rt  z  rK  rt  z&sequence-to-sequence language modelingzgoogle-t5/t5-base)rY  checkpoint_for_examplec                   @   rG  )"AutoModelForSequenceClassificationN)r   r   r   )MODEL_FOR_SEQUENCE_CLASSIFICATION_MAPPINGrJ  r   r   r   r   rw    rK  rw  zsequence classificationc                   @   rG  )AutoModelForQuestionAnsweringN)r   r   r   $MODEL_FOR_QUESTION_ANSWERING_MAPPINGrJ  r   r   r   r   ry    rK  ry  zquestion answeringc                   @   rG  )"AutoModelForTableQuestionAnsweringN)r   r   r   *MODEL_FOR_TABLE_QUESTION_ANSWERING_MAPPINGrJ  r   r   r   r   r{    rK  r{  ztable question answeringzgoogle/tapas-base-finetuned-wtqc                   @   rG  )#AutoModelForVisualQuestionAnsweringN)r   r   r   +MODEL_FOR_VISUAL_QUESTION_ANSWERING_MAPPINGrJ  r   r   r   r   r}    rK  r}  zvisual question answeringzdandelin/vilt-b32-finetuned-vqac                   @   rG  )%AutoModelForDocumentQuestionAnsweringN)r   r   r   -MODEL_FOR_DOCUMENT_QUESTION_ANSWERING_MAPPINGrJ  r   r   r   r   r    rK  r  zdocument question answeringz/impira/layoutlm-document-qa", revision="52e01b3c                   @   rG  )AutoModelForTokenClassificationN)r   r   r   &MODEL_FOR_TOKEN_CLASSIFICATION_MAPPINGrJ  r   r   r   r   r    rK  r  ztoken classificationc                   @   rG  )AutoModelForMultipleChoiceN)r   r   r   !MODEL_FOR_MULTIPLE_CHOICE_MAPPINGrJ  r   r   r   r   r    rK  r  zmultiple choicec                   @   rG  )"AutoModelForNextSentencePredictionN)r   r   r   *MODEL_FOR_NEXT_SENTENCE_PREDICTION_MAPPINGrJ  r   r   r   r   r    rK  r  znext sentence predictionc                   @   rG  )AutoModelForImageClassificationN)r   r   r   &MODEL_FOR_IMAGE_CLASSIFICATION_MAPPINGrJ  r   r   r   r   r    rK  r  zimage classificationc                   @   rG  )'AutoModelForZeroShotImageClassificationN)r   r   r   0MODEL_FOR_ZERO_SHOT_IMAGE_CLASSIFICATION_MAPPINGrJ  r   r   r   r   r    rK  r  zzero-shot image classificationc                   @   rG  )AutoModelForImageSegmentationN)r   r   r   $MODEL_FOR_IMAGE_SEGMENTATION_MAPPINGrJ  r   r   r   r   r    rK  r  zimage segmentationc                   @   rG  ) AutoModelForSemanticSegmentationN)r   r   r   'MODEL_FOR_SEMANTIC_SEGMENTATION_MAPPINGrJ  r   r   r   r   r    rK  r  zsemantic segmentationc                   @   rG  ) AutoModelForTimeSeriesPredictionN)r   r   r   (MODEL_FOR_TIME_SERIES_PREDICTION_MAPPINGrJ  r   r   r   r   r    rK  r  ztime-series predictionc                   @   rG  )!AutoModelForUniversalSegmentationN)r   r   r   (MODEL_FOR_UNIVERSAL_SEGMENTATION_MAPPINGrJ  r   r   r   r   r    rK  r  zuniversal image segmentationc                   @   rG  ) AutoModelForInstanceSegmentationN)r   r   r   'MODEL_FOR_INSTANCE_SEGMENTATION_MAPPINGrJ  r   r   r   r   r    rK  r  zinstance segmentationc                   @   rG  )AutoModelForObjectDetectionN)r   r   r   "MODEL_FOR_OBJECT_DETECTION_MAPPINGrJ  r   r   r   r   r    rK  r  zobject detectionc                   @   rG  )#AutoModelForZeroShotObjectDetectionN)r   r   r   ,MODEL_FOR_ZERO_SHOT_OBJECT_DETECTION_MAPPINGrJ  r   r   r   r   r    rK  r  zzero-shot object detectionc                   @   rG  )AutoModelForDepthEstimationN)r   r   r   "MODEL_FOR_DEPTH_ESTIMATION_MAPPINGrJ  r   r   r   r   r    rK  r  zdepth estimationc                   @   rG  )AutoModelForVideoClassificationN)r   r   r   &MODEL_FOR_VIDEO_CLASSIFICATION_MAPPINGrJ  r   r   r   r   r    rK  r  zvideo classificationc                       rZ  )AutoModelForImageTextToTextr\  r]  r^  r   c                    r_  r`  ra  rd  rg  r   r   rc  *  ri  z+AutoModelForImageTextToText.from_pretrained)r   r   r   $MODEL_FOR_IMAGE_TEXT_TO_TEXT_MAPPINGrJ  rk  rl  rm  rn  ro  rc  rp  r   r   rg  r   r  &  rq  r  zimage-text-to-text modelingc                   @   rG  )AutoModelForMultimodalLMN)r   r   r   MODEL_FOR_MULTIMODAL_LM_MAPPINGrJ  r   r   r   r   r  7  rK  r  zmultimodal generationc                   @   rG  )AutoModelForAudioClassificationN)r   r   r   &MODEL_FOR_AUDIO_CLASSIFICATION_MAPPINGrJ  r   r   r   r   r  >  rK  r  zaudio classificationc                   @   rG  )AutoModelForCTCN)r   r   r   MODEL_FOR_CTC_MAPPINGrJ  r   r   r   r   r  E  rK  r  z%connectionist temporal classificationc                   @   rG  )AutoModelForSpeechSeq2SeqN)r   r   r   "MODEL_FOR_SPEECH_SEQ_2_SEQ_MAPPINGrJ  r   r   r   r   r  L  rK  r  z,sequence-to-sequence speech-to-text modelingc                   @   rG  )$AutoModelForAudioFrameClassificationN)r   r   r   ,MODEL_FOR_AUDIO_FRAME_CLASSIFICATION_MAPPINGrJ  r   r   r   r   r  U  rK  r  z"audio frame (token) classificationc                   @   rG  )AutoModelForAudioXVectorN)r   r   r   MODEL_FOR_AUDIO_XVECTOR_MAPPINGrJ  r   r   r   r   r  ^  rK  r  c                   @   rG  )AutoModelForTextToSpectrogramN)r   r   r   %MODEL_FOR_TEXT_TO_SPECTROGRAM_MAPPINGrJ  r   r   r   r   r  b  rK  r  c                   @   rG  )AutoModelForTextToWaveformN)r   r   r   "MODEL_FOR_TEXT_TO_WAVEFORM_MAPPINGrJ  r   r   r   r   r  f  rK  r  c                   @   rG  )AutoBackboneN)r   r   r   MODEL_FOR_BACKBONE_MAPPINGrJ  r   r   r   r   r  j  rK  r  zaudio retrieval via x-vectorc                   @   rG  )AutoModelForMaskedImageModelingN)r   r   r   'MODEL_FOR_MASKED_IMAGE_MODELING_MAPPINGrJ  r   r   r   r   r  q  rK  r  zmasked image modelingc                   @   rG  )AutoModelForAudioTokenizationN)r   r   r   $MODEL_FOR_AUDIO_TOKENIZATION_MAPPINGrJ  r   r   r   r   r  x  rK  r  z$audio tokenization through codebooks)Wr  r  r  r  r  'MODEL_FOR_CAUSAL_IMAGE_MODELING_MAPPINGrj  r  r  r  r  MODEL_FOR_IMAGE_MAPPINGr  rS  rM  rO  r  r  rs  rI  r  r  r  rW  rz  r  ru  rx  r  r|  rQ  r  r  r  r  r  r  MODEL_FOR_RETRIEVAL_MAPPINGr  r  r~  rU  r  r  ,MODEL_FOR_TIME_SERIES_CLASSIFICATION_MAPPING(MODEL_FOR_TIME_SERIES_REGRESSION_MAPPINGrT  r  r  r  r  r  r[  r  r  r  r  rR  r  rL  rN  rH  rP  r  rr  r  r  r  r  rV  ry  r  rt  rw  r  r{  r  r  r  r  r  r  r}  r  r  r  r  )__doc__rn  collectionsr   typingr   utilsr   auto_factoryr   r   r	   r
   configuration_autor   
generationr   modeling_utilsr   r   
get_loggerr   loggerMODEL_MAPPING_NAMES#MODEL_FOR_PRETRAINING_MAPPING_NAMES!MODEL_FOR_CAUSAL_LM_MAPPING_NAMESMODEL_FOR_IMAGE_MAPPING_NAMES-MODEL_FOR_MASKED_IMAGE_MODELING_MAPPING_NAMES-MODEL_FOR_CAUSAL_IMAGE_MODELING_MAPPING_NAMES,MODEL_FOR_IMAGE_CLASSIFICATION_MAPPING_NAMES*MODEL_FOR_IMAGE_SEGMENTATION_MAPPING_NAMES-MODEL_FOR_SEMANTIC_SEGMENTATION_MAPPING_NAMES-MODEL_FOR_INSTANCE_SEGMENTATION_MAPPING_NAMES.MODEL_FOR_UNIVERSAL_SEGMENTATION_MAPPING_NAMES,MODEL_FOR_VIDEO_CLASSIFICATION_MAPPING_NAMES!MODEL_FOR_RETRIEVAL_MAPPING_NAMES*MODEL_FOR_IMAGE_TEXT_TO_TEXT_MAPPING_NAMESlistitems%MODEL_FOR_MULTIMODAL_LM_MAPPING_NAMES!MODEL_FOR_MASKED_LM_MAPPING_NAMES(MODEL_FOR_OBJECT_DETECTION_MAPPING_NAMES2MODEL_FOR_ZERO_SHOT_OBJECT_DETECTION_MAPPING_NAMES(MODEL_FOR_DEPTH_ESTIMATION_MAPPING_NAMES,MODEL_FOR_SEQ_TO_SEQ_CAUSAL_LM_MAPPING_NAMES(MODEL_FOR_SPEECH_SEQ_2_SEQ_MAPPING_NAMES/MODEL_FOR_SEQUENCE_CLASSIFICATION_MAPPING_NAMES*MODEL_FOR_QUESTION_ANSWERING_MAPPING_NAMES0MODEL_FOR_TABLE_QUESTION_ANSWERING_MAPPING_NAMES1MODEL_FOR_VISUAL_QUESTION_ANSWERING_MAPPING_NAMES3MODEL_FOR_DOCUMENT_QUESTION_ANSWERING_MAPPING_NAMES,MODEL_FOR_TOKEN_CLASSIFICATION_MAPPING_NAMES'MODEL_FOR_MULTIPLE_CHOICE_MAPPING_NAMES0MODEL_FOR_NEXT_SENTENCE_PREDICTION_MAPPING_NAMES,MODEL_FOR_AUDIO_CLASSIFICATION_MAPPING_NAMESMODEL_FOR_CTC_MAPPING_NAMES2MODEL_FOR_AUDIO_FRAME_CLASSIFICATION_MAPPING_NAMES%MODEL_FOR_AUDIO_XVECTOR_MAPPING_NAMES+MODEL_FOR_TEXT_TO_SPECTROGRAM_MAPPING_NAMES(MODEL_FOR_TEXT_TO_WAVEFORM_MAPPING_NAMES6MODEL_FOR_ZERO_SHOT_IMAGE_CLASSIFICATION_MAPPING_NAMES MODEL_FOR_BACKBONE_MAPPING_NAMES'MODEL_FOR_MASK_GENERATION_MAPPING_NAMES*MODEL_FOR_KEYPOINT_DETECTION_MAPPING_NAMES)MODEL_FOR_KEYPOINT_MATCHING_MAPPING_NAMES%MODEL_FOR_TEXT_ENCODING_MAPPING_NAMES2MODEL_FOR_TIME_SERIES_CLASSIFICATION_MAPPING_NAMES.MODEL_FOR_TIME_SERIES_REGRESSION_MAPPING_NAMES.MODEL_FOR_TIME_SERIES_PREDICTION_MAPPING_NAMES&MODEL_FOR_IMAGE_TO_IMAGE_MAPPING_NAMES"MODEL_FOR_AUDIO_TOKENIZATION_NAMESrU  rW  rj  r  r  r  r  r  r  r  r  r  r  r  r~  r  rs  r  r  r  r  r  ru  rx  rz  r|  r  r  r  r  r  r  r  r  r  r  r  rI  rM  rO  rQ  r  r  r  rS  r  rH  rL  rN  rP  rR  rT  rV  r[  rr  rt  rw  ry  r{  r}  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  __all__r   r   r   r   <module>   s"  
   9^ =<	C
	
0&tR])
$



















