o
    i                    @   s  d Z ddlZddlZddlmZ ddlmZmZ ddlm	Z	 ddl
mZmZmZmZ dd	lmZ erGdd
lmZ ddlmZ G dd deeZe	eZeg dZeg dZeg dZeg dZeg dZeg dZedgZeg dZ edgZ!eg dZ"edgZ#eg dZ$eg dZ%eg dZ&edgZ'eg dZ(eg dZ)eg dZ*eg d Z+eg d!Z,eg d"Z-eg d#Z.eg d$Z/eg d%Z0ed&gZ1eg d'Z2eg d(Z3eg d)Z4eg d*Z5eg d+Z6eg d,Z7eg d-Z8eg d.Z9eg d/Z:ed0d1gZ;eg d2Z<eg d3Z=eg d4Z>eg d5Z?ed6gZ@eg d7ZAeg d8ZBed9d:gZCed;d<gZDed=gZEed>gZFed?gZGeeeZHeeeZIeeeZJeeeZKeeeZLeee ZMeee=ZNeee!ZOeee"ZPeee#ZQeee$ZReee%ZSeee&ZTeee(ZUeee'ZVeee2ZWeee3ZXeee)ZYeeeZZeeeZ[eee*Z\eee+Z]eee,Z^eee-Z_eee/Z`eee0Zaeee1Zbeee4Zceee5Zdeee6Zeeee7Zfeee8Zgeee.Zheee9Zieee:Zjeee;Zkeee<Zleee>Zmeee?Zneee@ZoeeeAZpeeeBZqeeeCZreeeDZseeeEZteeeFZueeeGZvG d@dA dAeZwG dBdC dCeZxG dDdE dEeZyG dFdG dGeZzG dHdI dIeZ{G dJdK dKeZ|ee|Z|G dLdM dMeZ}ee}dNdOZ}G dPdQ dQeZ~ee~dRdOZ~G dSdT dTeZeedUdOZG dVdW dWeZeedXdOZG dYdZ dZeZeed[d\d]ZG d^d_ d_eZeed`dOZG dadb dbeZeedcdOZG ddde deeZeedfdgd]ZG dhdi dieZeedjdkd]ZG dldm dmeZeedndod]ZG dpdq dqeZeedrdOZG dsdt dteZeedudOZG dvdw dweZeedxdOZG dydz dzeZeed{dOZG d|d} d}eZeed~dOZG dd deZeeddOZG dd deZeeddOZG dd deZeeddOZG dd deZeeddOZG dd deZeeddOZG dd deZeeddOZG dd deZeeddOZG dd deZeeddOZG dd deZeeddOZG dd deZeeddOZG dd deZeeddOZG dd deZeeddOZG dd deZeeddOZG dd deZeeddOZG dd deZeeddOZG dd deZG dd deZG dd deZG dd deZeeddOZG dd deZeeddOZG dd deZeeddOZG dd de~ZG dd deZg dZdS )zAuto Model class.    N)OrderedDict)TYPE_CHECKINGUnion   )logging   )_BaseAutoBackboneClass_BaseAutoModelClass_LazyAutoMappingauto_class_update)CONFIG_MAPPING_NAMES)GenerationMixin)PreTrainedModelc                   @   s   e Zd ZdS )_BaseModelWithGenerateN)__name__
__module____qualname__ r   r   _/home/ubuntu/LTX-2/.venv/lib/python3.10/site-packages/transformers/models/auto/modeling_auto.pyr   %   s    r   (  )aimv2
Aimv2Modelaimv2_vision_modelAimv2VisionModelalbertAlbertModelalign
AlignModelaltclipAltCLIPModel)apertusApertusModel)arcee
ArceeModel)aria	AriaModel)	aria_textAriaTextModel)audio-spectrogram-transformerASTModel)
autoformerAutoformerModel)
aya_visionAyaVisionModel)bamba
BambaModelbark	BarkModel)bart	BartModelbeit	BeitModelbert	BertModel)bert-generationBertGenerationEncoderbig_birdBigBirdModel)bigbird_pegasusBigBirdPegasusModel)biogptBioGptModelbitBitModel)bitnetBitNetModel)
blenderbotBlenderbotModel)blenderbot-smallBlenderbotSmallModelblip	BlipModel)blip-2
Blip2Model)blip_2_qformerBlip2QFormerModel)bloom
BloomModel)bltBltModel)bridgetowerBridgeTowerModel)bros	BrosModel)	camembertCamembertModel)canineCanineModel)	chameleonChameleonModelchinese_clipChineseCLIPModel)chinese_clip_vision_modelChineseCLIPVisionModel)clap	ClapModelclip	CLIPModelclip_text_modelCLIPTextModel)clip_vision_modelCLIPVisionModelclipsegCLIPSegModel)clvp!ClvpModelForConditionalGeneration)
code_llama
LlamaModel)codegenCodeGenModel)cohereCohereModel)cohere2Cohere2Modelcohere2_visionCohere2VisionModelconditional_detrConditionalDetrModel)convbertConvBertModelconvnextConvNextModel
convnextv2ConvNextV2Model)cpmantCpmAntModelcsmCsmForConditionalGeneration)ctrl	CTRLModel)cvtCvtModel)d_fine
DFineModeldab-detrDabDetrModeldacDacModel)data2vec-audioData2VecAudioModeldata2vec-textData2VecTextModeldata2vec-visionData2VecVisionModel)dbrx	DbrxModeldebertaDebertaModel
deberta-v2DebertaV2Model)decision_transformerDecisionTransformerModel)deepseek_v2DeepseekV2Model)deepseek_v3DeepseekV3Model)deepseek_vlDeepseekVLModel)deepseek_vl_hybridDeepseekVLHybridModeldeformable_detrDeformableDetrModeldeit	DeiTModel	depth_proDepthProModeldeta	DetaModeldetr	DetrModel)diaDiaModel)	diffllamaDiffLlamaModeldinat
DinatModeldinov2Dinov2Modeldinov2_with_registersDinov2WithRegistersModeldinov3_convnextDINOv3ConvNextModel
dinov3_vitDINOv3ViTModel
distilbertDistilBertModel)doge	DogeModel)
donut-swinDonutSwinModel)dots1
Dots1Model)dprDPRQuestionEncoderdptDPTModeledgetamEdgeTamModel)edgetam_videoEdgeTamVideoModel)edgetam_vision_modelEdgeTamVisionModelefficientformerEfficientFormerModel)efficientloftrEfficientLoFTRModelefficientnetEfficientNetModelelectraElectraModel)emu3	Emu3Model)encodecEncodecModel)ernie
ErnieModel)ernie4_5Ernie4_5Model)ernie4_5_moeErnie4_5_MoeModel)ernie_mErnieMModel)esmEsmModel)evollaEvollaModel)exaone4Exaone4Model)falconFalconModel)	falcon_h1FalconH1Model)falcon_mambaFalconMambaModelfastspeech2_conformerFastSpeech2ConformerModel"fastspeech2_conformer_with_hifiganFastSpeech2ConformerWithHifiGanflaubertFlaubertModel)flava
FlavaModel)	flex_olmoFlexOlmoModel)	florence2Florence2Model)fnet	FNetModelfocalnetFocalNetModel)fsmt	FSMTModel)funnel)FunnelModelFunnelBaseModel)fuyu	FuyuModel)gemma
GemmaModel)gemma2Gemma2Model)gemma3Gemma3Model)gemma3_textGemma3TextModel)gemma3nGemma3nModel)gemma3n_audioGemma3nAudioEncoder)gemma3n_textGemma3nTextModel)gemma3n_visionTimmWrapperModel)gitGitModel)glmGlmModel)glm4	Glm4Model)glm4_moeGlm4MoeModel)glm4v
Glm4vModel)	glm4v_moeGlm4vMoeModel)glm4v_moe_textGlm4vMoeTextModel)
glm4v_textGlm4vTextModelglpn	GLPNModel)got_ocr2GotOcr2Model)gpt-sw3	GPT2Model)gpt2rU  )gpt_bigcodeGPTBigCodeModel)gpt_neoGPTNeoModel)gpt_neoxGPTNeoXModel)gpt_neox_japaneseGPTNeoXJapaneseModel)gpt_ossGptOssModel)gptj	GPTJModelzgptsan-japanese&GPTSanJapaneseForConditionalGeneration)graniteGraniteModel)
granitemoeGraniteMoeModel)granitemoehybridGraniteMoeHybridModel)granitemoesharedGraniteMoeSharedModel)
graphormerGraphormerModel)grounding-dinoGroundingDinoModel)groupvitGroupViTModel)heliumHeliumModelhgnet_v2HGNetV2Backbonehiera
HieraModel)hubertHubertModel)hunyuan_v1_denseHunYuanDenseV1Model)hunyuan_v1_moeHunYuanMoEV1Modelibert
IBertModel)ideficsIdeficsModel)idefics2Idefics2Model)idefics3Idefics3Model)idefics3_visionIdefics3VisionTransformerijepa
IJepaModelimagegptImageGPTModel)informerInformerModel)instructblipInstructBlipModel)instructblipvideoInstructBlipVideoModel)internvlInternVLModel)internvl_visionInternVLVisionModel)jamba
JambaModel)janus
JanusModel)jetmoeJetMoeModel)jukeboxJukeboxModel)kosmos-2Kosmos2Model)
kosmos-2.5Kosmos2_5Model)kyutai_speech_to_textKyutaiSpeechToTextModel)layoutlmLayoutLMModel)
layoutlmv2LayoutLMv2Model)
layoutlmv3LayoutLMv3Model)ledLEDModellevit
LevitModel)lfm2	Lfm2Model)lfm2_vlLfm2VlModel	lightglueLightGlueForKeypointMatching)lilt	LiltModel)llamarz   llama4Llama4ForConditionalGeneration)llama4_textLlama4TextModel)llava
LlavaModel)
llava_nextLlavaNextModel)llava_next_videoLlavaNextVideoModel)llava_onevisionLlavaOnevisionModel)longcat_flashLongcatFlashModel
longformerLongformerModel)longt5LongT5Model)luke	LukeModel)lxmertLxmertModel)m2m_100M2M100Model)mamba
MambaModel)mamba2Mamba2Model)marianMarianModel)markuplmMarkupLMModel)mask2formerMask2FormerModel)
maskformerMaskFormerModel)maskformer-swinMaskFormerSwinModel)mbart
MBartModel)mctct
MCTCTModel)mega	MegaModel)megatron-bertMegatronBertModel
metaclip_2MetaClip2Model)zmgp-strMgpstrForSceneTextRecognition)mimi	MimiModel)minimaxMiniMaxModel)	ministralMinistralModel)mistralMistralModel)mistral3Mistral3Model)mixtralMixtralModelmlcdMLCDVisionModel)mllamaMllamaModel)mm-grounding-dinoMMGroundingDinoModel
mobilebertMobileBertModelmobilenet_v1MobileNetV1Modelmobilenet_v2MobileNetV2Model	mobilevitMobileViTModelmobilevitv2MobileViTV2Model)
modernbertModernBertModel)modernbert-decoderModernBertDecoderModel)	moonshineMoonshineModel)moshi
MoshiModel)mpnet
MPNetModel)mptMptModel)mraMraModel)mt5MT5Model)musicgenMusicgenModel)musicgen_melodyMusicgenMelodyModel)mvpMvpModelnatNatModel)nemotronNemotronModel)nezha
NezhaModel)nllb-moeNllbMoeModelnystromformerNystromformerModel)olmo	OlmoModel)olmo2
Olmo2Model)olmo3
Olmo3Model)olmoe
OlmoeModelzomdet-turboOmDetTurboForObjectDetection)	oneformerOneFormerModel)
open-llamaOpenLlamaModel)
openai-gptOpenAIGPTModel)optOPTModel)ovis2
Ovis2Model)owlv2
Owlv2Model)owlvitOwlViTModel)	paligemmaPaliGemmaModelparakeet_ctcParakeetForCTC)parakeet_encoderParakeetEncoder)patchtsmixerPatchTSMixerModel)patchtstPatchTSTModel)pegasusPegasusModel)	pegasus_xPegasusXModel)	perceiverPerceiverModel)perception_encoderPerceptionEncoder)perception_lmPerceptionLMModel)	persimmonPersimmonModel)phiPhiModel)phi3	Phi3Model)phi4_multimodalPhi4MultimodalModel)phimoePhimoeModel)pixtralPixtralVisionModel)plbartPLBartModel
poolformerPoolFormerModel)
prophetnetProphetNetModelpvtPvtModel)pvt_v2
PvtV2Model)qdqbertQDQBertModel)qwen2
Qwen2Model)
qwen2_5_vlQwen2_5_VLModel)qwen2_5_vl_textQwen2_5_VLTextModel)qwen2_audio_encoderQwen2AudioEncoder)	qwen2_moeQwen2MoeModel)qwen2_vlQwen2VLModel)qwen2_vl_textQwen2VLTextModel)qwen3
Qwen3Model)	qwen3_moeQwen3MoeModel)
qwen3_nextQwen3NextModel)qwen3_vlQwen3VLModel)qwen3_vl_moeQwen3VLMoeModel)qwen3_vl_moe_textQwen3VLMoeTextModel)qwen3_vl_textQwen3VLTextModel)recurrent_gemmaRecurrentGemmaModelreformerReformerModelregnetRegNetModelrembertRemBertModelresnetResNetModel	retribertRetriBertModelrobertaRobertaModelroberta-prelayernormRobertaPreLayerNormModelroc_bertRoCBertModelroformerRoFormerModel)rt_detrRTDetrModel)
rt_detr_v2RTDetrV2Model)rwkv	RwkvModelsamSamModelsam2	Sam2Model)sam2_hiera_det_modelSam2HieraDetModel)
sam2_videoSam2VideoModel)sam2_vision_modelSam2VisionModelsam_hq
SamHQModel)sam_hq_vision_modelSamHQVisionModel)sam_vision_modelSamVisionModel)seamless_m4tSeamlessM4TModel)seamless_m4t_v2SeamlessM4Tv2Model)seed_ossSeedOssModel	segformerSegformerModel)seggptSegGptModel)sewSEWModel)sew-d	SEWDModelsiglipSiglipModelsiglip2Siglip2Model)siglip2_vision_modelSiglip2VisionModelsiglip_vision_modelSiglipVisionModel)smollm3SmolLM3Model)smolvlmSmolVLMModel)smolvlm_visionSmolVLMVisionTransformer)speech_to_textSpeech2TextModel)speecht5SpeechT5Model)splinterSplinterModelsqueezebertSqueezeBertModel)stablelmStableLmModel)
starcoder2Starcoder2ModelswiftformerSwiftFormerModelswin	SwinModelswin2srSwin2SRModelswinv2Swinv2Model)switch_transformersSwitchTransformersModel)t5T5Model)t5gemmaT5GemmaModeltable-transformerTableTransformerModel)tapas
TapasModel)textnetTextNetModel)time_series_transformerTimeSeriesTransformerModel)timesfmTimesFmModeltimesformerTimesformerModeltimm_backboneTimmBackbonetimm_wrapperr>  )trajectory_transformerTrajectoryTransformerModel)
transfo-xlTransfoXLModel)tvlt	TvltModel)tvpTvpModel)udop	UdopModel)umt5	UMT5Model)	unispeechUniSpeechModel)unispeech-satUniSpeechSatModel)univnetUnivNetModelvanVanModel)
vaultgemmaVaultGemmaModel)video_llavaVideoLlavaModelvideomaeVideoMAEModel)vilt	ViltModel)vipllavaVipLlavaModel)zvision-text-dual-encoderVisionTextDualEncoderModel)visual_bertVisualBertModelvitViTModel
vit_hybridViTHybridModelvit_maeViTMAEModelvit_msnViTMSNModelvitdetVitDetModelvits	VitsModelvivit
VivitModel)vjepa2VJEPA2ModelvoxtralVoxtralForConditionalGeneration)voxtral_encoderVoxtralEncoder)wav2vec2Wav2Vec2Model)wav2vec2-bertWav2Vec2BertModel)wav2vec2-conformerWav2Vec2ConformerModel)wavlm
WavLMModel)whisperWhisperModel)xclip
XCLIPModel)xcodecXcodecModel)xglm	XGLMModelxlmXLMModel)xlm-prophetnetXLMProphetNetModelxlm-robertaXLMRobertaModelxlm-roberta-xlXLMRobertaXLModel)xlnet
XLNetModel)xlstm
xLSTMModel)xmod	XmodModelyolos
YolosModel)yoso	YosoModel)zamba
ZambaModel)zamba2Zamba2Model)X)r   AlbertForPreTrainingr6   BartForConditionalGeneration)r<   BertForPreTraining)rA   BigBirdForPreTrainingrW   BloomForCausalLMr_   CamembertForMaskedLMcolpaliColPaliForRetrieval)colqwen2ColQwen2ForRetrievalr   CTRLLMHeadModelr   Data2VecTextForMaskedLMr   DebertaForMaskedLMr   DebertaV2ForMaskedLMr   DistilBertForMaskedLM)r   ElectraForPreTraining)r   ErnieForPreTrainingr
  EvollaForProteinText2Textr  Exaone4ForCausalLMr  FalconMambaForCausalLMr  FlaubertWithLMHeadModel)r  FlavaForPreTrainingr!  !Florence2ForConditionalGeneration)r#  FNetForPreTrainingr(  FSMTForConditionalGeneration)r*  FunnelForPreTrainingr3  Gemma3ForConditionalGenerationrT  GPT2LMHeadModelrV  r  rW  GPTBigCodeForCausalLMrc  )ry  HieraForPreTrainingr  IBertForMaskedLMr  IdeficsForVisionText2Textr   Idefics2ForConditionalGenerationr   Idefics3ForConditionalGenerationr  JanusForConditionalGenerationr  LayoutLMForMaskedLMr  LlavaForConditionalGenerationr  !LlavaNextForConditionalGenerationr  &LlavaNextVideoForConditionalGenerationr  &LlavaOnevisionForConditionalGenerationr  LongformerForMaskedLMr  LukeForMaskedLM)r  LxmertForPreTrainingr  MambaForCausalLMr  Mamba2ForCausalLMr  MegaForMaskedLM)r  MegatronBertForPreTrainingr   Mistral3ForConditionalGenerationr  MllamaForConditionalGeneration)r  MobileBertForPreTrainingr  MPNetForMaskedLMr  MptForCausalLMr!  MraForMaskedLMr)  MvpForConditionalGeneration)r0  NezhaForPreTrainingr2  NllbMoeForConditionalGenerationrE  OpenAIGPTLMHeadModelrO  !PaliGemmaForConditionalGenerationqwen2_audio"Qwen2AudioForConditionalGenerationr  r  RobertaForMaskedLMr  RobertaPreLayerNormForMaskedLM)r  RoCBertForPreTrainingr  RwkvForCausalLM)r  SplinterForPreTrainingr  SqueezeBertForMaskedLMr	  *SwitchTransformersForConditionalGenerationr  T5ForConditionalGenerationr  T5GemmaForConditionalGenerationr  TapasForMaskedLMr$  TransfoXLLMHeadModel)r&  TvltForPreTraining)r.  UniSpeechForPreTraining)r0  UniSpeechSatForPreTrainingr9  "VideoLlavaForConditionalGeneration)r<  VideoMAEForPreTrainingr@   VipLlavaForConditionalGeneration)rC  VisualBertForPreTraining)rL  ViTMAEForPreTrainingr\  )ra  Wav2Vec2ForPreTraining)re  Wav2Vec2ConformerForPreTrainingrr  XLMWithLMHeadModelrw  XLMRobertaForMaskedLMrz  XLMRobertaXLForMaskedLMr|  XLNetLMHeadModelr~  xLSTMForCausalLMr  XmodForMaskedLM)Tr   AlbertForMaskedLMr  r<   BertForMaskedLMrA   BigBirdForMaskedLMrC   &BigBirdPegasusForConditionalGenerationrN   'BlenderbotSmallForConditionalGenerationr  r  r{   CodeGenForCausalLMr   ConvBertForMaskedLMr   CpmAntForCausalLMr  r  r  r  r   DiaForConditionalGenerationr  r   ElectraForMaskedLMzencoder-decoderEncoderDecoderModelr   ErnieForMaskedLMr  EsmForMaskedLMr  r  r  r#  FNetForMaskedLMr  r*  FunnelForMaskedLMr?  GitForCausalLMr  r  r  rY  GPTNeoForCausalLMr[  GPTNeoXForCausalLMr]  GPTNeoXJapaneseForCausalLMra  GPTJForCausalLMrc  r  r  r  LEDForConditionalGenerationr  r  LongT5ForConditionalGenerationr  r  M2M100ForConditionalGenerationr  r  r  MarianMTModelr  r  MegatronBertForCausalLMr  MobileBertForMaskedLMr  !MoonshineForConditionalGenerationr  r  r  r  r0  NezhaForMaskedLMr  r5  NystromformerForMaskedLMr  r\   PegasusXForConditionalGenerationrp  PLBartForConditionalGeneration	pop2piano!Pop2PianoForConditionalGenerationr|  QDQBertForMaskedLMr  ReformerModelWithLMHeadr  RemBertForMaskedLMr  r  r  RoCBertForMaskedLMr  RoFormerForMaskedLMr  r  #Speech2TextForConditionalGenerationr  r  r  r  r  r  ra  Wav2Vec2ForMaskedLMri  WhisperForConditionalGenerationr  r  r  r  r  r  YosoForMaskedLM))r#   ApertusForCausalLM)r%   ArceeForCausalLM)r)   AriaTextForCausalLM)r1   BambaForCausalLM)r6   BartForCausalLM)r<   BertLMHeadModel)r>   BertGenerationDecoder)rA   BigBirdForCausalLM)rC   BigBirdPegasusForCausalLM)rE   BioGptForCausalLM)rJ   BitNetForCausalLM)rL   BlenderbotForCausalLM)rN   BlenderbotSmallForCausalLMr  )rY   BltForCausalLM)r_   CamembertForCausalLM)ry   LlamaForCausalLMr)  )r}   CohereForCausalLM)r   Cohere2ForCausalLMr-  r  )r   Data2VecTextForCausalLM)r   DbrxForCausalLM)r   DeepseekV2ForCausalLM)r   DeepseekV3ForCausalLM)r   DiffLlamaForCausalLM)r   DogeForCausalLM)r   Dots1ForCausalLM)r   ElectraForCausalLM)r   Emu3ForCausalLM)r   ErnieForCausalLM)r  Ernie4_5ForCausalLM)r  Ernie4_5_MoeForCausalLMr  )r  FalconForCausalLM)r  FalconH1ForCausalLMr  )r  FlexOlmoForCausalLMr-  FuyuForCausalLM)r/  GemmaForCausalLM)r1  Gemma2ForCausalLMr  )r5  Gemma3ForCausalLMr7  Gemma3nForConditionalGeneration)r;  Gemma3nForCausalLMr=  )rA  GlmForCausalLM)rC  Glm4ForCausalLM)rE  Glm4MoeForCausalLMrR  GotOcr2ForConditionalGenerationr  r  r  r?  rA  rC  )r_  GptOssForCausalLMrE  )re  GraniteForCausalLM)rg  GraniteMoeForCausalLM)ri  GraniteMoeHybridForCausalLM)rk  GraniteMoeSharedForCausalLM)rs  HeliumForCausalLM)r}  HunYuanDenseV1ForCausalLM)r  HunYuanMoEV1ForCausalLM)r  JambaForCausalLM)r  JetMoeForCausalLM)r  Lfm2ForCausalLM)r  r  )r  Llama4ForCausalLM)r  r  )r  LongcatFlashForCausalLMr  r  )r  MarianForCausalLM)r  MBartForCausalLM)r  MegaForCausalLMrO  )r  MiniMaxForCausalLM)r  MinistralForCausalLM)r  MistralForCausalLM)r  MixtralForCausalLM)r  MllamaForCausalLM)r  ModernBertDecoderForCausalLM)r  MoshiForCausalLMr  )r%  MusicgenForCausalLM)r'  MusicgenMelodyForCausalLM)r)  MvpForCausalLM)r.  NemotronForCausalLM)r7  OlmoForCausalLM)r9  Olmo2ForCausalLM)r;  Olmo3ForCausalLM)r=  OlmoeForCausalLM)rC  OpenLlamaForCausalLMr  )rG  OPTForCausalLM)rZ  PegasusForCausalLM)rd  PersimmonForCausalLM)rf  PhiForCausalLM)rh  Phi3ForCausalLM)rj  Phi4MultimodalForCausalLM)rl  PhimoeForCausalLM)rp  PLBartForCausalLM)ru  ProphetNetForCausalLM)r|  QDQBertLMHeadModel)r~  Qwen2ForCausalLM)r  Qwen2MoeForCausalLM)r  Qwen3ForCausalLM)r  Qwen3MoeForCausalLM)r  Qwen3NextForCausalLM)r  RecurrentGemmaForCausalLMrb  )r  RemBertForCausalLM)r  RobertaForCausalLM)r  RobertaPreLayerNormForCausalLM)r  RoCBertForCausalLM)r  RoFormerForCausalLMr  )r  SeedOssForCausalLM)r  SmolLM3ForCausalLM)speech_to_text_2Speech2Text2ForCausalLM)r  StableLmForCausalLM)r  Starcoder2ForCausalLMr  )trocrTrOCRForCausalLM)r7  VaultGemmaForCausalLM)ri  WhisperForCausalLM)ro  XGLMForCausalLMr  )rt  XLMProphetNetForCausalLM)rw  XLMRobertaForCausalLM)rz  XLMRobertaXLForCausalLMr  r  )r  XmodForCausalLM)r  ZambaForCausalLM)r  Zamba2ForCausalLM);r   r8   rG   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r%  rO  rx  r  r  r  )r  Llama4VisionModelr  )r  MllamaVisionModelr	  r  r  r  r+  rr  rw  r  r  r  r  r  r   r  r  r  r  r  r   r4  r;  rE  rH  rK  rN  rQ  rW  r  ))r   DeiTForMaskedImageModeling)r&  FocalNetForMaskedImageModeling)r  SwinForMaskedImageModeling)r  Swinv2ForMaskedImageModeling)rF  ViTForMaskedImageModeling)r  ImageGPTForCausalImageModeling)-)r9   BeitForImageClassification)rH   BitForImageClassification)rm   CLIPForImageClassification)r   ConvNextForImageClassification)r    ConvNextV2ForImageClassification)r   CvtForImageClassification)r   $Data2VecVisionForImageClassification)r   )DeiTForImageClassification%DeiTForImageClassificationWithTeacher)r   DinatForImageClassification)r   Dinov2ForImageClassification)r   )Dinov2WithRegistersForImageClassification)r   DonutSwinForImageClassification)r   )%EfficientFormerForImageClassification0EfficientFormerForImageClassificationWithTeacher)r   "EfficientNetForImageClassification)r&  FocalNetForImageClassification)rv  HGNetV2ForImageClassification)ry  HieraForImageClassification)r  IJepaForImageClassification)r  ImageGPTForImageClassification)r  )LevitForImageClassification&LevitForImageClassificationWithTeacher)r  MetaClip2ForImageClassification)r
  !MobileNetV1ForImageClassification)r  !MobileNetV2ForImageClassification)r  MobileViTForImageClassification)r  !MobileViTV2ForImageClassification)r,  NatForImageClassification)r^  )&PerceiverForImageClassificationLearned&PerceiverForImageClassificationFourier-PerceiverForImageClassificationConvProcessing)rs   PoolFormerForImageClassification)rx  PvtForImageClassification)rz  PvtV2ForImageClassification)r  RegNetForImageClassification)r  ResNetForImageClassification)r  SegformerForImageClassification)shieldgemma2"ShieldGemma2ForImageClassification)r  SiglipForImageClassification)r  Siglip2ForImageClassification)r  !SwiftFormerForImageClassification)r  SwinForImageClassification)r  Swinv2ForImageClassification)r  TextNetForImageClassification)r!  !TimmWrapperForImageClassification)r5  VanForImageClassification)rF  ViTForImageClassification)rI  ViTHybridForImageClassification)rO  ViTMSNForImageClassificationr   DetrForSegmentation))r9   BeitForSemanticSegmentation)r   %Data2VecVisionForSemanticSegmentation)r   DPTForSemanticSegmentation)r  "MobileNetV2ForSemanticSegmentation)r   MobileViTForSemanticSegmentation)r  "MobileViTV2ForSemanticSegmentation)r   SegformerForSemanticSegmentation)upernetUperNetForSemanticSegmentationr  !MaskFormerForInstanceSegmentation)r!  )eomtEomtForUniversalSegmentation)r  #Mask2FormerForUniversalSegmentationr,  )rA  !OneFormerForUniversalSegmentation))r  !TimesformerForVideoClassification)r<  VideoMAEForVideoClassification)rX  VivitForVideoClassification)rZ  VJEPA2ForVideoClassification)rQ   BlipForConditionalGenerationrS   Blip2ForConditionalGenerationrc   !ChameleonForConditionalGenerationr=  r  r  r  $InstructBlipForConditionalGeneration)r  )InstructBlipVideoForConditionalGenerationr  Kosmos2ForConditionalGenerationr  !Kosmos2_5ForConditionalGenerationr  r  r  r  r  r  rI  Ovis2ForConditionalGenerationr  
pix2struct"Pix2StructForConditionalGenerationr  "Qwen2_5_VLForConditionalGenerationr  Qwen2VLForConditionalGenerationr  Qwen3VLForConditionalGenerationr  "Qwen3VLMoeForConditionalGenerationr
  r  zvision-encoder-decoderVisionEncoderDecoderModelr  )0)r'   AriaForConditionalGeneration)r/   !AyaVisionForConditionalGenerationr6  r8  r:  )r   %Cohere2VisionForConditionalGeneration)r   "DeepseekVLForConditionalGeneration)r   (DeepseekVLHybridForConditionalGeneration)r   Emu3ForConditionalGenerationr  r  r  r  r  r=  )rG  Glm4vForConditionalGeneration)rI   Glm4vMoeForConditionalGenerationr  r  r  r  r<  )r   InternVLForConditionalGenerationr  r?  rA  )r  Lfm2VlForConditionalGenerationr  r  r  r  r  r  r  rC  r  )rb  $PerceptionLMForConditionalGenerationrE  )rn  r  rH  rJ  rL  rN  )r  r  )r  SmolVLMForConditionalGeneration)r*  UdopForConditionalGenerationr  rP  ).r  r  r!  r#  r  r+  r  r  r  r  r1  r5  r7  r  r9  r;  r  r  r  r  r  MBartForConditionalGenerationr  )r  MegatronBertForMaskedLMrQ  )r  ModernBertForMaskedLMr  r  r  rU  rW  )r^  PerceiverForMaskedLMr`  )r  ReformerForMaskedLMrd  r  r  rf  rh  r  r  rl  r  r  r  r  rp  )
)r   !ConditionalDetrForObjectDetection)r   DFineForObjectDetection)r   DabDetrForObjectDetection)r    DeformableDetrForObjectDetection)r   DetaForObjectDetection)r   DetrForObjectDetection)r  RTDetrForObjectDetection)r  RTDetrV2ForObjectDetection)r  "TableTransformerForObjectDetection)r  YolosForObjectDetection))ro  GroundingDinoForObjectDetection)r  !MMGroundingDinoForObjectDetectionr?  )rK  Owlv2ForObjectDetection)rM  OwlViTForObjectDetection))depth_anythingDepthAnythingForDepthEstimation)r   DepthProForDepthEstimation)r   DPTForDepthEstimation)rP  GLPNForDepthEstimation)prompt_depth_anything%PromptDepthAnythingForDepthEstimation)zoedepthZoeDepthForDepthEstimation)r  r%  )rL   "BlenderbotForConditionalGenerationr'  r3  r  rc  granite_speech%GraniteSpeechForConditionalGenerationrG  rI  rK  rM  r_  )r#  MT5ForConditionalGenerationr  r  )rZ  PegasusForConditionalGenerationrY  r[  )ru  "ProphetNetForConditionalGenerationr  )r  SeamlessM4TForTextToText)r  SeamlessM4Tv2ForTextToTextr  r  r  )r,  UMT5ForConditionalGenerationr\  )rt  %XLMProphetNetForConditionalGeneration)r/  r}  )r  *KyutaiSpeechToTextForConditionalGenerationrS  r]  )r  SeamlessM4TForSpeechToText)r  SeamlessM4Tv2ForSpeechToText)zspeech-encoder-decoderSpeechEncoderDecoderModelrj  )r  SpeechT5ForSpeechToTextrn  )s)r   AlbertForSequenceClassification)r%   ArceeForSequenceClassification)r6   BartForSequenceClassification)r<   BertForSequenceClassification)rA    BigBirdForSequenceClassification)rC   'BigBirdPegasusForSequenceClassification)rE   BioGptForSequenceClassification)rW   BloomForSequenceClassification)r_   "CamembertForSequenceClassification)ra   CanineForSequenceClassification)ry   LlamaForSequenceClassification)r   !ConvBertForSequenceClassification)r   CTRLForSequenceClassification)r   %Data2VecTextForSequenceClassification)r    DebertaForSequenceClassification)r   "DebertaV2ForSequenceClassification)r   #DeepseekV2ForSequenceClassification)r   #DeepseekV3ForSequenceClassification)r   "DiffLlamaForSequenceClassification)r   #DistilBertForSequenceClassification)r   DogeForSequenceClassification)r    ElectraForSequenceClassification)r   ErnieForSequenceClassification)r  ErnieMForSequenceClassification)r  EsmForSequenceClassification)r   Exaone4ForSequenceClassification)r  FalconForSequenceClassification)r  !FlaubertForSequenceClassification)r#  FNetForSequenceClassification)r*  FunnelForSequenceClassification)r/  GemmaForSequenceClassification)r1  Gemma2ForSequenceClassification)r3  Gemma3ForSequenceClassification)r5  #Gemma3TextForSequenceClassification)rA  GlmForSequenceClassification)rC  Glm4ForSequenceClassification)rT  GPT2ForSequenceClassification)rV  r  )rW  #GPTBigCodeForSequenceClassification)rY  GPTNeoForSequenceClassification)r[   GPTNeoXForSequenceClassification)r_  GptOssForSequenceClassification)ra  GPTJForSequenceClassification)rs  HeliumForSequenceClassification)r}  'HunYuanDenseV1ForSequenceClassification)r  %HunYuanMoEV1ForSequenceClassification)r  IBertForSequenceClassification)r  JambaForSequenceClassification)r  JetMoeForSequenceClassification)r  !LayoutLMForSequenceClassification)r  #LayoutLMv2ForSequenceClassification)r  #LayoutLMv3ForSequenceClassification)r  LEDForSequenceClassification)r  LiltForSequenceClassification)r  r  )r  #LongformerForSequenceClassification)r  LukeForSequenceClassification)r  !MarkupLMForSequenceClassification)r  MBartForSequenceClassification)r  MegaForSequenceClassification)r  %MegatronBertForSequenceClassification)r   MiniMaxForSequenceClassification)r  "MinistralForSequenceClassification)r   MistralForSequenceClassification)r   MixtralForSequenceClassification)r  #MobileBertForSequenceClassification)r  #ModernBertForSequenceClassification)r  *ModernBertDecoderForSequenceClassification)r  MPNetForSequenceClassification)r  MptForSequenceClassification)r!  MraForSequenceClassification)r#  MT5ForSequenceClassification)r)  MvpForSequenceClassification)r.  !NemotronForSequenceClassification)r0  NezhaForSequenceClassification)r5  &NystromformerForSequenceClassification)rC  "OpenLlamaForSequenceClassification)rE  "OpenAIGPTForSequenceClassification)rG  OPTForSequenceClassification)r^  "PerceiverForSequenceClassification)rd  "PersimmonForSequenceClassification)rf  PhiForSequenceClassification)rh  Phi3ForSequenceClassification)rl  PhimoeForSequenceClassification)rp  PLBartForSequenceClassification)r|   QDQBertForSequenceClassification)r~  Qwen2ForSequenceClassification)r  !Qwen2MoeForSequenceClassification)r  Qwen3ForSequenceClassification)r  !Qwen3MoeForSequenceClassification)r  "Qwen3NextForSequenceClassification)r  !ReformerForSequenceClassification)r   RemBertForSequenceClassification)r   RobertaForSequenceClassification)r  ,RobertaPreLayerNormForSequenceClassification)r   RoCBertForSequenceClassification)r  !RoFormerForSequenceClassification)r   SeedOssForSequenceClassification)r   SmolLM3ForSequenceClassification)r  $SqueezeBertForSequenceClassification)r  !StableLmForSequenceClassification)r  #Starcoder2ForSequenceClassification)r  T5ForSequenceClassification)r   T5GemmaForSequenceClassification)r  TapasForSequenceClassification)r$  "TransfoXLForSequenceClassification)r,  UMT5ForSequenceClassification)rr  XLMForSequenceClassification)rw  #XLMRobertaForSequenceClassification)rz  %XLMRobertaXLForSequenceClassification)r|  XLNetForSequenceClassification)r  XmodForSequenceClassification)r  YosoForSequenceClassification)r  ZambaForSequenceClassification)r  Zamba2ForSequenceClassification)O)r   AlbertForQuestionAnswering)r%   ArceeForQuestionAnswering)r6   BartForQuestionAnswering)r<   BertForQuestionAnswering)rA   BigBirdForQuestionAnswering)rC   "BigBirdPegasusForQuestionAnswering)rW   BloomForQuestionAnswering)r_   CamembertForQuestionAnswering)ra   CanineForQuestionAnswering)r   ConvBertForQuestionAnswering)r    Data2VecTextForQuestionAnswering)r   DebertaForQuestionAnswering)r   DebertaV2ForQuestionAnswering)r   DiffLlamaForQuestionAnswering)r   DistilBertForQuestionAnswering)r   ElectraForQuestionAnswering)r   ErnieForQuestionAnswering)r  ErnieMForQuestionAnswering)r  Exaone4ForQuestionAnswering)r  FalconForQuestionAnswering)r  "FlaubertForQuestionAnsweringSimple)r#  FNetForQuestionAnswering)r*  FunnelForQuestionAnswering)rV  GPT2ForQuestionAnswering)rY  GPTNeoForQuestionAnswering)r[  GPTNeoXForQuestionAnswering)ra  GPTJForQuestionAnswering)r  IBertForQuestionAnsweringr  LayoutLMv2ForQuestionAnsweringr  LayoutLMv3ForQuestionAnswering)r  LEDForQuestionAnswering)r  LiltForQuestionAnswering)r  LlamaForQuestionAnswering)r  LongformerForQuestionAnswering)r  LukeForQuestionAnswering)r  LxmertForQuestionAnswering)r  MarkupLMForQuestionAnswering)r  MBartForQuestionAnswering)r  MegaForQuestionAnswering)r   MegatronBertForQuestionAnswering)r  MiniMaxForQuestionAnswering)r  MinistralForQuestionAnswering)r  MistralForQuestionAnswering)r  MixtralForQuestionAnswering)r  MobileBertForQuestionAnswering)r  ModernBertForQuestionAnswering)r  MPNetForQuestionAnswering)r  MptForQuestionAnswering)r!  MraForQuestionAnswering)r#  MT5ForQuestionAnswering)r)  MvpForQuestionAnswering)r.  NemotronForQuestionAnswering)r0  NezhaForQuestionAnswering)r5  !NystromformerForQuestionAnswering)rG  OPTForQuestionAnswering)r|  QDQBertForQuestionAnswering)r~  Qwen2ForQuestionAnswering)r  Qwen2MoeForQuestionAnswering)r  Qwen3ForQuestionAnswering)r  Qwen3MoeForQuestionAnswering)r  Qwen3NextForQuestionAnswering)r  ReformerForQuestionAnswering)r  RemBertForQuestionAnswering)r  RobertaForQuestionAnswering)r  'RobertaPreLayerNormForQuestionAnswering)r  RoCBertForQuestionAnswering)r  RoFormerForQuestionAnswering)r  SeedOssForQuestionAnswering)r  SmolLM3ForQuestionAnswering)r  SplinterForQuestionAnswering)r  SqueezeBertForQuestionAnswering)r  T5ForQuestionAnswering)r,  UMT5ForQuestionAnswering)rr  XLMForQuestionAnsweringSimple)rw  XLMRobertaForQuestionAnswering)rz   XLMRobertaXLForQuestionAnswering)r|  XLNetForQuestionAnsweringSimple)r  XmodForQuestionAnswering)r  YosoForQuestionAnswering)r  TapasForQuestionAnswering))rQ   BlipForQuestionAnsweringr8  )r>  ViltForQuestionAnswering))r  LayoutLMForQuestionAnsweringr  r  )Y)r   AlbertForTokenClassification)r#   ApertusForTokenClassification)r%   ArceeForTokenClassification)r<   BertForTokenClassification)rA   BigBirdForTokenClassification)rE   BioGptForTokenClassification)rW   BloomForTokenClassification)r]   BrosForTokenClassification)r_   CamembertForTokenClassification)ra   CanineForTokenClassification)r   ConvBertForTokenClassification)r   "Data2VecTextForTokenClassification)r   DebertaForTokenClassification)r   DebertaV2ForTokenClassification)r    DeepseekV3ForTokenClassification)r   DiffLlamaForTokenClassification)r    DistilBertForTokenClassification)r   ElectraForTokenClassification)r   ErnieForTokenClassification)r  ErnieMForTokenClassification)r  EsmForTokenClassification)r  Exaone4ForTokenClassification)r  FalconForTokenClassification)r  FlaubertForTokenClassification)r#  FNetForTokenClassification)r*  FunnelForTokenClassification)r/  GemmaForTokenClassification)r1  Gemma2ForTokenClassification)rA  GlmForTokenClassification)rC  Glm4ForTokenClassification)rT  GPT2ForTokenClassification)rV  rp  )rW   GPTBigCodeForTokenClassification)rY  GPTNeoForTokenClassification)r[  GPTNeoXForTokenClassification)r_  GptOssForTokenClassification)rs  HeliumForTokenClassification)r  IBertForTokenClassification)r  LayoutLMForTokenClassification)r   LayoutLMv2ForTokenClassification)r   LayoutLMv3ForTokenClassification)r  LiltForTokenClassification)r  LlamaForTokenClassification)r   LongformerForTokenClassification)r  LukeForTokenClassification)r  MarkupLMForTokenClassification)r  MegaForTokenClassification)r  "MegatronBertForTokenClassification)r  MiniMaxForTokenClassification)r  MinistralForTokenClassification)r  MistralForTokenClassification)r  MixtralForTokenClassification)r   MobileBertForTokenClassification)r   ModernBertForTokenClassification)r  MPNetForTokenClassification)r  MptForTokenClassification)r!  MraForTokenClassification)r#  MT5ForTokenClassification)r.  NemotronForTokenClassification)r0  NezhaForTokenClassification)r5  #NystromformerForTokenClassification)rd  PersimmonForTokenClassification)rf  PhiForTokenClassification)rh  Phi3ForTokenClassification)r|  QDQBertForTokenClassification)r~  Qwen2ForTokenClassification)r  Qwen2MoeForTokenClassification)r  Qwen3ForTokenClassification)r  Qwen3MoeForTokenClassification)r  Qwen3NextForTokenClassification)r  RemBertForTokenClassification)r  RobertaForTokenClassification)r  )RobertaPreLayerNormForTokenClassification)r  RoCBertForTokenClassification)r  RoFormerForTokenClassification)r  SeedOssForTokenClassification)r  SmolLM3ForTokenClassification)r  !SqueezeBertForTokenClassification)r  StableLmForTokenClassification)r   Starcoder2ForTokenClassification)r  T5ForTokenClassification)r  T5GemmaForTokenClassification)r,  UMT5ForTokenClassification)rr  XLMForTokenClassification)rw   XLMRobertaForTokenClassification)rz  "XLMRobertaXLForTokenClassification)r|  XLNetForTokenClassification)r  XmodForTokenClassification)r  YosoForTokenClassification)')r   AlbertForMultipleChoice)r<   BertForMultipleChoice)rA   BigBirdForMultipleChoice)r_   CamembertForMultipleChoice)ra   CanineForMultipleChoice)r   ConvBertForMultipleChoice)r   Data2VecTextForMultipleChoice)r   DebertaV2ForMultipleChoice)r   DistilBertForMultipleChoice)r   ElectraForMultipleChoice)r   ErnieForMultipleChoice)r  ErnieMForMultipleChoice)r  FlaubertForMultipleChoice)r#  FNetForMultipleChoice)r*  FunnelForMultipleChoice)r  IBertForMultipleChoice)r  LongformerForMultipleChoice)r  LukeForMultipleChoice)r  MegaForMultipleChoice)r  MegatronBertForMultipleChoice)r  MobileBertForMultipleChoice)r  ModernBertForMultipleChoice)r  MPNetForMultipleChoice)r!  MraForMultipleChoice)r0  NezhaForMultipleChoice)r5  NystromformerForMultipleChoice)r|  QDQBertForMultipleChoice)r  RemBertForMultipleChoice)r  RobertaForMultipleChoice)r  $RobertaPreLayerNormForMultipleChoice)r  RoCBertForMultipleChoice)r  RoFormerForMultipleChoice)r  SqueezeBertForMultipleChoice)rr  XLMForMultipleChoice)rw  XLMRobertaForMultipleChoice)rz  XLMRobertaXLForMultipleChoice)r|  XLNetForMultipleChoice)r  XmodForMultipleChoice)r  YosoForMultipleChoice))r<   BertForNextSentencePrediction)r   ErnieForNextSentencePrediction)r#  FNetForNextSentencePrediction)r  %MegatronBertForNextSentencePrediction)r  #MobileBertForNextSentencePrediction)r0  NezhaForNextSentencePrediction)r|   QDQBertForNextSentencePrediction))r+   ASTForAudioClassification)r   &Data2VecAudioForSequenceClassification)r{  HubertForSequenceClassification)r  SEWForSequenceClassification)r  SEWDForSequenceClassification)r.  "UniSpeechForSequenceClassification)r0  %UniSpeechSatForSequenceClassification)ra  !Wav2Vec2ForSequenceClassification)rc  %Wav2Vec2BertForSequenceClassification)re  *Wav2Vec2ConformerForSequenceClassification)rg  WavLMForSequenceClassification)ri  WhisperForAudioClassification))r   Data2VecAudioForCTC)r{  HubertForCTC)r  MCTCTForCTCrQ  )r  	SEWForCTC)r  
SEWDForCTC)r.  UniSpeechForCTC)r0  UniSpeechSatForCTC)ra  Wav2Vec2ForCTC)rc  Wav2Vec2BertForCTC)re  Wav2Vec2ConformerForCTC)rg  WavLMForCTC))r   (Data2VecAudioForAudioFrameClassification)r0  'UniSpeechSatForAudioFrameClassification)ra  #Wav2Vec2ForAudioFrameClassification)rc  'Wav2Vec2BertForAudioFrameClassification)re  ,Wav2Vec2ConformerForAudioFrameClassification)rg   WavLMForAudioFrameClassification))r   Data2VecAudioForXVector)r0  UniSpeechSatForXVector)ra  Wav2Vec2ForXVector)rc  Wav2Vec2BertForXVector)re  Wav2Vec2ConformerForXVector)rg  WavLMForXVectorr  )r  SpeechT5ForTextToSpeech)r3   r   )r  r  r  )r%   MusicgenForConditionalGeneration)r'  &MusicgenMelodyForConditionalGeneration)qwen2_5_omni#Qwen2_5OmniForConditionalGeneration)qwen3_omni_moe$Qwen3OmniMoeForConditionalGeneration)r  SeamlessM4TForTextToSpeech)r  SeamlessM4Tv2ForTextToSpeechrT  )
r   r    rP   )rS   Blip2ForImageTextRetrievalre   rl   rt   r  r  r  ))r9   BeitBackbone)rH   BitBackbone)r   ConvNextBackbone)r   ConvNextV2Backbone)r   DinatBackbone)r   Dinov2Backbone)r   Dinov2WithRegistersBackbone)r&  FocalNetBackboneru  )ry  HieraBackbone)r  MaskFormerSwinBackbone)r,  NatBackbone)rz  PvtV2Backbone)r  ResNetBackbone)rt_detr_resnetRTDetrResNetBackbone)r  SwinBackbone)r  Swinv2Backbone)r  TextNetBackboner  )rR  VitDetBackbone)vitpose_backboneVitPoseBackbone)r   )r   r   r  r  )r  r  r  )
superpointSuperPointForKeypointDetection))r   !EfficientLoFTRForKeypointMatchingr  )	superglueSuperGlueForKeypointMatching)r   r;   r@   ro   r   r   r   r   r   )r   Emu3TextModelr  r  )r  r  r  )r  MllamaTextModelr  )r#  MT5EncoderModelr4  r  r  r  r  r  r  r  )r  T5EncoderModel)r  T5GemmaEncoderModel)r,  UMT5EncoderModelrq  rv  ry  )rV  'PatchTSMixerForTimeSeriesClassification)rX  PatchTSTForClassification)rV  PatchTSMixerForRegression)rX  PatchTSTForRegression)r  TimesFmModelForPrediction)r  Swin2SRForImageSuperResolutionr   c                   @      e Zd ZeZdS )AutoModelForMaskGenerationN)r   r   r   !MODEL_FOR_MASK_GENERATION_MAPPING_model_mappingr   r   r   r   r,        r,  c                   @   r+  )AutoModelForKeypointDetectionN)r   r   r   $MODEL_FOR_KEYPOINT_DETECTION_MAPPINGr.  r   r   r   r   r0    r/  r0  c                   @   r+  )AutoModelForKeypointMatchingN)r   r   r   #MODEL_FOR_KEYPOINT_MATCHING_MAPPINGr.  r   r   r   r   r2    r/  r2  c                   @   r+  )AutoModelForTextEncodingN)r   r   r   MODEL_FOR_TEXT_ENCODING_MAPPINGr.  r   r   r   r   r4    r/  r4  c                   @   r+  )AutoModelForImageToImageN)r   r   r    MODEL_FOR_IMAGE_TO_IMAGE_MAPPINGr.  r   r   r   r   r6    r/  r6  c                   @   r+  )	AutoModelN)r   r   r   MODEL_MAPPINGr.  r   r   r   r   r8    r/  r8  c                   @   r+  )AutoModelForPreTrainingN)r   r   r   MODEL_FOR_PRETRAINING_MAPPINGr.  r   r   r   r   r:    r/  r:  pretraining)head_docc                   @   r+  )_AutoModelWithLMHeadN)r   r   r   MODEL_WITH_LM_HEAD_MAPPINGr.  r   r   r   r   r>    r/  r>  zlanguage modelingc                       D   e Zd ZeZeded  deee	j
e f ddf fddZ  ZS )AutoModelForCausalLMclspretrained_model_name_or_pathreturnr   c                       t  j|g|R i |S Nsuperfrom_pretrainedrB  rC  
model_argskwargs	__class__r   r   rI       z$AutoModelForCausalLM.from_pretrained)r   r   r   MODEL_FOR_CAUSAL_LM_MAPPINGr.  classmethodtyper   strosPathLikerI  __classcell__r   r   rM  r   rA        rA  zcausal language modelingc                   @   r+  )AutoModelForMaskedLMN)r   r   r   MODEL_FOR_MASKED_LM_MAPPINGr.  r   r   r   r   rX    r/  rX  zmasked language modelingc                   @   r+  )AutoModelForSeq2SeqLMN)r   r   r   &MODEL_FOR_SEQ_TO_SEQ_CAUSAL_LM_MAPPINGr.  r   r   r   r   rZ    r/  rZ  z&sequence-to-sequence language modelingzgoogle-t5/t5-base)r=  checkpoint_for_examplec                   @   r+  )"AutoModelForSequenceClassificationN)r   r   r   )MODEL_FOR_SEQUENCE_CLASSIFICATION_MAPPINGr.  r   r   r   r   r]    r/  r]  zsequence classificationc                   @   r+  )AutoModelForQuestionAnsweringN)r   r   r   $MODEL_FOR_QUESTION_ANSWERING_MAPPINGr.  r   r   r   r   r_    r/  r_  zquestion answeringc                   @   r+  )"AutoModelForTableQuestionAnsweringN)r   r   r   *MODEL_FOR_TABLE_QUESTION_ANSWERING_MAPPINGr.  r   r   r   r   ra    r/  ra  ztable question answeringzgoogle/tapas-base-finetuned-wtqc                   @   r+  )#AutoModelForVisualQuestionAnsweringN)r   r   r   +MODEL_FOR_VISUAL_QUESTION_ANSWERING_MAPPINGr.  r   r   r   r   rc    r/  rc  zvisual question answeringzdandelin/vilt-b32-finetuned-vqac                   @   r+  )%AutoModelForDocumentQuestionAnsweringN)r   r   r   -MODEL_FOR_DOCUMENT_QUESTION_ANSWERING_MAPPINGr.  r   r   r   r   re    r/  re  zdocument question answeringz/impira/layoutlm-document-qa", revision="52e01b3c                   @   r+  )AutoModelForTokenClassificationN)r   r   r   &MODEL_FOR_TOKEN_CLASSIFICATION_MAPPINGr.  r   r   r   r   rg    r/  rg  ztoken classificationc                   @   r+  )AutoModelForMultipleChoiceN)r   r   r   !MODEL_FOR_MULTIPLE_CHOICE_MAPPINGr.  r   r   r   r   ri    r/  ri  zmultiple choicec                   @   r+  )"AutoModelForNextSentencePredictionN)r   r   r   *MODEL_FOR_NEXT_SENTENCE_PREDICTION_MAPPINGr.  r   r   r   r   rk    r/  rk  znext sentence predictionc                   @   r+  )AutoModelForImageClassificationN)r   r   r   &MODEL_FOR_IMAGE_CLASSIFICATION_MAPPINGr.  r   r   r   r   rm    r/  rm  zimage classificationc                   @   r+  )'AutoModelForZeroShotImageClassificationN)r   r   r   0MODEL_FOR_ZERO_SHOT_IMAGE_CLASSIFICATION_MAPPINGr.  r   r   r   r   ro    r/  ro  zzero-shot image classificationc                   @   r+  )AutoModelForImageSegmentationN)r   r   r   $MODEL_FOR_IMAGE_SEGMENTATION_MAPPINGr.  r   r   r   r   rq  %  r/  rq  zimage segmentationc                   @   r+  ) AutoModelForSemanticSegmentationN)r   r   r   'MODEL_FOR_SEMANTIC_SEGMENTATION_MAPPINGr.  r   r   r   r   rs  ,  r/  rs  zsemantic segmentationc                   @   r+  ) AutoModelForTimeSeriesPredictionN)r   r   r   (MODEL_FOR_TIME_SERIES_PREDICTION_MAPPINGr.  r   r   r   r   ru  5  r/  ru  ztime-series predictionc                   @   r+  )!AutoModelForUniversalSegmentationN)r   r   r   (MODEL_FOR_UNIVERSAL_SEGMENTATION_MAPPINGr.  r   r   r   r   rw  >  r/  rw  zuniversal image segmentationc                   @   r+  ) AutoModelForInstanceSegmentationN)r   r   r   'MODEL_FOR_INSTANCE_SEGMENTATION_MAPPINGr.  r   r   r   r   ry  G  r/  ry  zinstance segmentationc                   @   r+  )AutoModelForObjectDetectionN)r   r   r   "MODEL_FOR_OBJECT_DETECTION_MAPPINGr.  r   r   r   r   r{  P  r/  r{  zobject detectionc                   @   r+  )#AutoModelForZeroShotObjectDetectionN)r   r   r   ,MODEL_FOR_ZERO_SHOT_OBJECT_DETECTION_MAPPINGr.  r   r   r   r   r}  W  r/  r}  zzero-shot object detectionc                   @   r+  )AutoModelForDepthEstimationN)r   r   r   "MODEL_FOR_DEPTH_ESTIMATION_MAPPINGr.  r   r   r   r   r  `  r/  r  zdepth estimationc                   @   r+  )AutoModelForVideoClassificationN)r   r   r   &MODEL_FOR_VIDEO_CLASSIFICATION_MAPPINGr.  r   r   r   r   r  g  r/  r  zvideo classificationc                   @   r+  )_AutoModelForVision2SeqN)r   r   r   MODEL_FOR_VISION_2_SEQ_MAPPINGr.  r   r   r   r   r  o  r/  r  zvision-to-text modelingc                       r@  )AutoModelForImageTextToTextrB  rC  rD  r   c                    rE  rF  rG  rJ  rM  r   r   rI  z  rO  z+AutoModelForImageTextToText.from_pretrained)r   r   r   $MODEL_FOR_IMAGE_TEXT_TO_TEXT_MAPPINGr.  rQ  rR  r   rS  rT  rU  rI  rV  r   r   rM  r   r  v  rW  r  zimage-text-to-text modelingc                   @   r+  )AutoModelForAudioClassificationN)r   r   r   &MODEL_FOR_AUDIO_CLASSIFICATION_MAPPINGr.  r   r   r   r   r    r/  r  zaudio classificationc                   @   r+  )AutoModelForCTCN)r   r   r   MODEL_FOR_CTC_MAPPINGr.  r   r   r   r   r    r/  r  z%connectionist temporal classificationc                   @   r+  )AutoModelForSpeechSeq2SeqN)r   r   r   "MODEL_FOR_SPEECH_SEQ_2_SEQ_MAPPINGr.  r   r   r   r   r    r/  r  z,sequence-to-sequence speech-to-text modelingc                   @   r+  )$AutoModelForAudioFrameClassificationN)r   r   r   ,MODEL_FOR_AUDIO_FRAME_CLASSIFICATION_MAPPINGr.  r   r   r   r   r    r/  r  z"audio frame (token) classificationc                   @   r+  )AutoModelForAudioXVectorN)r   r   r   MODEL_FOR_AUDIO_XVECTOR_MAPPINGr.  r   r   r   r   r    r/  r  c                   @   r+  )AutoModelForTextToSpectrogramN)r   r   r   %MODEL_FOR_TEXT_TO_SPECTROGRAM_MAPPINGr.  r   r   r   r   r    r/  r  c                   @   r+  )AutoModelForTextToWaveformN)r   r   r   "MODEL_FOR_TEXT_TO_WAVEFORM_MAPPINGr.  r   r   r   r   r    r/  r  c                   @   r+  )AutoBackboneN)r   r   r   MODEL_FOR_BACKBONE_MAPPINGr.  r   r   r   r   r    r/  r  zaudio retrieval via x-vectorc                   @   r+  )AutoModelForMaskedImageModelingN)r   r   r   'MODEL_FOR_MASKED_IMAGE_MODELING_MAPPINGr.  r   r   r   r   r    r/  r  zmasked image modelingc                   @   r+  )AutoModelForAudioTokenizationN)r   r   r   $MODEL_FOR_AUDIO_TOKENIZATION_MAPPINGr.  r   r   r   r   r    r/  r  z$audio tokenization through codebooksc                       0   e Zd Ze fddZe fddZ  ZS )AutoModelWithLMHeadc                        t dt t j|fi |S NzThe class `AutoModelWithLMHead` is deprecated and will be removed in a future version. Please use `AutoModelForCausalLM` for causal language models, `AutoModelForMaskedLM` for masked language models and `AutoModelForSeq2SeqLM` for encoder-decoder models.warningswarnFutureWarningrH  from_configrB  configrL  rM  r   r   r    s
   zAutoModelWithLMHead.from_configc                    &   t dt t j|g|R i |S r  r  r  r  rH  rI  rJ  rM  r   r   rI    s
   z#AutoModelWithLMHead.from_pretrainedr   r   r   rQ  r  rI  rV  r   r   rM  r   r    s
    	r  c                       r  )AutoModelForVision2Seqc                    r  NzThe class `AutoModelForVision2Seq` is deprecated and will be removed in v5.0. Please use `AutoModelForImageTextToText` instead.r  r  rM  r   r   r    s
   z"AutoModelForVision2Seq.from_configc                    r  r  r  rJ  rM  r   r   rI    s
   z&AutoModelForVision2Seq.from_pretrainedr  r   r   rM  r   r    s
    r  )Yr  r  r  r  r  'MODEL_FOR_CAUSAL_IMAGE_MODELING_MAPPINGrP  r  rf  r  rn  MODEL_FOR_IMAGE_MAPPINGrr  r7  r1  r3  rz  r  rY  r-  rj  rl  r|  r;  r`  rt  r[  r^  r  rb  r5  r  r  rv  rh  rx  r  r  MODEL_FOR_RETRIEVAL_MAPPINGr  rd  r9  r?  rp  r~  ,MODEL_FOR_TIME_SERIES_CLASSIFICATION_MAPPING(MODEL_FOR_TIME_SERIES_REGRESSION_MAPPINGr8  r  r  r  r  r  rA  r  r  rm  rq  r6  ry  r0  r2  r,  r4  r  rX  ri  rk  r{  r:  r_  rs  rZ  r]  r  ra  r  r  ru  rg  rw  r  r  rc  re  r  ro  r}  r  )__doc__rT  r  collectionsr   typingr   r   utilsr   auto_factoryr   r	   r
   r   configuration_autor   
generationr   modeling_utilsr   r   
get_loggerr   loggerMODEL_MAPPING_NAMES#MODEL_FOR_PRETRAINING_MAPPING_NAMES MODEL_WITH_LM_HEAD_MAPPING_NAMES!MODEL_FOR_CAUSAL_LM_MAPPING_NAMESMODEL_FOR_IMAGE_MAPPING_NAMES-MODEL_FOR_MASKED_IMAGE_MODELING_MAPPING_NAMES-MODEL_FOR_CAUSAL_IMAGE_MODELING_MAPPING_NAMES,MODEL_FOR_IMAGE_CLASSIFICATION_MAPPING_NAMES*MODEL_FOR_IMAGE_SEGMENTATION_MAPPING_NAMES-MODEL_FOR_SEMANTIC_SEGMENTATION_MAPPING_NAMES-MODEL_FOR_INSTANCE_SEGMENTATION_MAPPING_NAMES.MODEL_FOR_UNIVERSAL_SEGMENTATION_MAPPING_NAMES,MODEL_FOR_VIDEO_CLASSIFICATION_MAPPING_NAMES$MODEL_FOR_VISION_2_SEQ_MAPPING_NAMES!MODEL_FOR_RETRIEVAL_MAPPING_NAMES*MODEL_FOR_IMAGE_TEXT_TO_TEXT_MAPPING_NAMES!MODEL_FOR_MASKED_LM_MAPPING_NAMES(MODEL_FOR_OBJECT_DETECTION_MAPPING_NAMES2MODEL_FOR_ZERO_SHOT_OBJECT_DETECTION_MAPPING_NAMES(MODEL_FOR_DEPTH_ESTIMATION_MAPPING_NAMES,MODEL_FOR_SEQ_TO_SEQ_CAUSAL_LM_MAPPING_NAMES(MODEL_FOR_SPEECH_SEQ_2_SEQ_MAPPING_NAMES/MODEL_FOR_SEQUENCE_CLASSIFICATION_MAPPING_NAMES*MODEL_FOR_QUESTION_ANSWERING_MAPPING_NAMES0MODEL_FOR_TABLE_QUESTION_ANSWERING_MAPPING_NAMES1MODEL_FOR_VISUAL_QUESTION_ANSWERING_MAPPING_NAMES3MODEL_FOR_DOCUMENT_QUESTION_ANSWERING_MAPPING_NAMES,MODEL_FOR_TOKEN_CLASSIFICATION_MAPPING_NAMES'MODEL_FOR_MULTIPLE_CHOICE_MAPPING_NAMES0MODEL_FOR_NEXT_SENTENCE_PREDICTION_MAPPING_NAMES,MODEL_FOR_AUDIO_CLASSIFICATION_MAPPING_NAMESMODEL_FOR_CTC_MAPPING_NAMES2MODEL_FOR_AUDIO_FRAME_CLASSIFICATION_MAPPING_NAMES%MODEL_FOR_AUDIO_XVECTOR_MAPPING_NAMES+MODEL_FOR_TEXT_TO_SPECTROGRAM_MAPPING_NAMES(MODEL_FOR_TEXT_TO_WAVEFORM_MAPPING_NAMES6MODEL_FOR_ZERO_SHOT_IMAGE_CLASSIFICATION_MAPPING_NAMES MODEL_FOR_BACKBONE_MAPPING_NAMES'MODEL_FOR_MASK_GENERATION_MAPPING_NAMES*MODEL_FOR_KEYPOINT_DETECTION_MAPPING_NAMES)MODEL_FOR_KEYPOINT_MATCHING_MAPPING_NAMES%MODEL_FOR_TEXT_ENCODING_MAPPING_NAMES2MODEL_FOR_TIME_SERIES_CLASSIFICATION_MAPPING_NAMES.MODEL_FOR_TIME_SERIES_REGRESSION_MAPPING_NAMES.MODEL_FOR_TIME_SERIES_PREDICTION_MAPPING_NAMES&MODEL_FOR_IMAGE_TO_IMAGE_MAPPING_NAMES"MODEL_FOR_AUDIO_TOKENIZATION_NAMESr9  r;  r?  rP  r  rn  rp  rr  rt  rz  rx  r  r  r  r  rd  rf  rY  r  r  r|  r~  r  r[  r^  r`  rb  rh  rj  rl  r  r  r  r  r  r  r  r  r-  r1  r3  r5  r  r  rv  r7  r  r,  r0  r2  r4  r6  r8  r:  r>  rA  rX  rZ  r]  r_  ra  rc  re  rg  ri  rk  rm  ro  rq  rs  ru  rw  ry  r{  r}  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  __all__r   r   r   r   <module>   s
  
   ^Z AF	54#yU_-$




















