o
    	Ti.                     @   s  d dl mZmZ d dlmZ d dlmZmZmZm	Z	m
Z
mZmZmZmZmZmZmZmZmZmZmZmZmZmZmZmZmZmZmZmZmZm Z m!Z!m"Z"m#Z#m$Z$m%Z%m&Z&m'Z'm(Z(m)Z)m*Z*m+Z+m,Z,m-Z-m.Z.m/Z/m0Z0m1Z1m2Z2m3Z3m4Z4m5Z5m6Z6m7Z7m8Z8m9Z9m:Z:m;Z;m<Z<m=Z=m>Z>m?Z?m@Z@ dZAdZBe ZCdUddZDd	d
 ZEde
edfdeedfdeedfdeedfdeedfdeedfdeedfdeedfdeedfdeedfde"e#dfde"e#dfde"e#dfde)e*dfde)e*dfd e+e,dfd!e/e0dfd"e3e4d#fd$e3e4d%fd&e8e9dffD ]7\ZFZGZHZIeFd&krd'nd(ZJejKeFeJd)ZLeGeLjMeNeLjOP  d*d+d,d,d-d.ZQeHeQZReEeR eDeReLd/eI qd0e;e<dfd1eedffD ]/\ZFZGZHZIeKeFZLeGeLjMeNeLjOP  d*d+d,d,d-d+d,d2ZQeHeQZReEeR eDeReLd/eI qBeKd"ZLe3eLjMeNeLjOP  d3d+d,d,d-d.ZQe4eQZReDeReLd4d# eKd5ZLe8eLjMeNeLjOP  d3d+d,d,d-d.ZQe9eQZReDeReLd4 de"e$dfd"e3e5d#fd5e8e:dffD ]*\ZFZGZHZIeKeFZLeGeLjMeNeLjOP  d*d+d,d,d-d6d7ZQeHeQZReDeReLd/eI qd8ee	dfd9e?e@dffD ]-\ZFZGZHZIeKeFZLeGeLjMeNeLjOP  d:d,d,d,d;d<d*d d=d>
ZQeHeQZReDeReLd/eI qd?eefd@e-e.fdAe e!fdBe=e>fdCe%e&fdDe'e(fdEe6e7fdFe1e2ffD ]\ZFZGZHeKeFZSi ZTi ZUi ZVeGe-krfd*eTdG< eGe%e'e-fv rrd*eVdG< eGe%e'fv rdHeVdI< dJeVdK< eGe6e1fv rdLeTdM< dNd6gdOeUdP< d+eVdQ< d;eVdR< eGdVeWdVeSjLjMeNeSjLjO d*d+d,d,d-d.eUeWdVd:d+d,d-dSeVdTeTZQeHeQZReDeReSd/ qMdS )W    )HfApi	ModelCard)nn);AutoProcessorAutoTokenizer
BartConfig	BartModelBloomConfigBloomForCausalLMCohereConfigCohereForCausalLM
DbrxConfigDbrxForCausalLMDeepseekV3ConfigDeepseekV3ForCausalLMFalconMambaConfigFalconMambaForCausalLMGemma2ConfigGemma2ForCausalLMGemma3ConfigGemma3ForConditionalGenerationGemmaConfigGemmaForCausalLM
GPT2ConfigGPT2LMHeadModelGPTNeoXConfigGPTNeoXForCausalLMGptOssConfigGptOssForCausalLMIdefics2Config Idefics2ForConditionalGenerationLlamaConfigLlamaForCausalLMLlamaForSequenceClassificationLlavaConfigLlavaForConditionalGenerationLlavaNextConfig!LlavaNextForConditionalGenerationMistralConfigMistralForCausalLM	OPTConfigOPTForCausalLMPaliGemmaConfig!PaliGemmaForConditionalGeneration
Phi3ConfigPhi3ForCausalLMQwen2_5_VLConfig"Qwen2_5_VLForConditionalGenerationQwen2ConfigQwen2ForCausalLMQwen2ForSequenceClassificationQwen2VLConfigQwen2VLForConditionalGenerationQwen3ConfigQwen3ForCausalLMQwen3ForSequenceClassificationQwen3MoeConfigQwen3MoeForCausalLMSmolVLMConfigSmolVLMForConditionalGenerationT5ConfigT5ForConditionalGenerationztrl-internal-testingz
---
library_name: transformers
tags: [trl]
---

# Tiny {model_class_name}

This is a minimal model built for unit tests in the [TRL](https://github.com/huggingface/trl) library.
Nc                 C   s   | j j}tj|d}t|}|d ur| d| }t d| }|d ur+|d| 7 }t|r:td| d d S | 	| |	| |	| d S )N)model_class_name-/zModel z already exists, skipping)
	__class____name__
MODEL_CARDformatr   ORGANIZATIONapirepo_existsprintpush_to_hub)model	tokenizerprefixsuffixr@   content
model_cardrepo_id rS   P/home/ubuntu/.local/lib/python3.10/site-packages/scripts/generate_tiny_models.pyrK   e   s   


rK   c                 C   s   |   D ]`}t|tjr |jdurtj|j tj|j qt|tj	r1tjj
|jddd qt|tjrKtj|j |jdurJtj|j qt|tjrd|jdur]tj|j tj|j qdS )a7  
    Initialize tiny test models to avoid NaNs from uninitialized weights.

    Uses safe defaults:
      - Linear/Conv1d: Xavier uniform (weights), zero (biases)
      - Embedding: Normal(0, 0.02)
      - LayerNorm: Ones (weights), zero (biases)

    Args:
        model: PyTorch model (modified in-place)
    Ng        g{Gz?)meanstd)modules
isinstancer   Linearbiasinitzeros_xavier_uniform_weight	Embeddingnormal_	LayerNormones_Conv1d)rL   modulerS   rS   rT   init_weights_tiny_modelw   s$   


re   zbigscience/bloomz-560mzCohereForAI/aya-expanse-8bzdatabricks/dbrx-instructzdeepseek-ai/DeepSeek-R1zdeepseek-ai/DeepSeek-R1-05280528ztiiuae/falcon-7b-instructzgoogle/gemma-2-2b-itzgoogle/gemma-7b-itzopenai-community/gpt2zEleutherAI/pythia-14mz#meta-llama/Meta-Llama-3-8B-Instruct3z meta-llama/Llama-3.1-8B-Instructz3.1z meta-llama/Llama-3.2-1B-Instructz3.2z"mistralai/Mistral-7B-Instruct-v0.1z0.1z"mistralai/Mistral-7B-Instruct-v0.2z0.2zfacebook/opt-1.3bzmicrosoft/Phi-3.5-mini-instructzQwen/Qwen2.5-32B-Instructz2.5zQwen/Qwen2.5-Coder-0.5Bz	2.5-CoderzQwen/Qwen3-8Bz
refs/pr/14main)revision             )
vocab_sizehidden_sizenum_attention_headsnum_key_value_headsnum_hidden_layersintermediate_sizetinyzQwen/Qwen3-30B-A3Bzopenai/gpt-oss-20b)rn   ro   rp   rq   rr   rs   num_expertsnum_experts_per_tok   smallzQwen/Qwen3-4B   )rn   ro   rp   rq   rr   rs   
num_labelszfacebook/bart-basezgoogle/flan-t5-small   @      T)
rn   d_modelencoder_layersdecoder_layersd_kvd_ff
num_layers	num_headsdecoder_start_token_idis_encoder_decoderzgoogle/gemma-3-4b-itzgoogle/paligemma-3b-pt-224zHuggingFaceM4/idefics2-8bz$HuggingFaceTB/SmolVLM2-2.2B-Instructzllava-hf/llava-1.5-7b-hfz!llava-hf/llava-v1.6-mistral-7b-hfzQwen/Qwen2-VL-2B-InstructzQwen/Qwen2.5-VL-3B-Instructprojection_dimiP  
image_size   
patch_sizeidP vision_start_token_idmrope)typemrope_sectionrope_scalingdepth	embed_dim)ro   rp   rr   rs   )text_configvision_config)NNrS   )Xhuggingface_hubr   r   torchr   transformersr   r   r   r   r	   r
   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r    r!   r"   r#   r$   r%   r&   r'   r(   r)   r*   r+   r,   r-   r.   r/   r0   r1   r2   r3   r4   r5   r6   r7   r8   r9   r:   r;   r<   r=   r>   r?   rG   rE   rH   rK   re   model_idconfig_classmodel_classrO   ri   from_pretrainedrM   rn   lenadded_tokens_encoderkeysconfigrL   	processorkwargstext_kwargsvision_kwargsdictrS   rS   rS   rT   <module>   sJ  ?

&




























	





	