o
    -i                     @   sz   U d Z ddlmZ ddlmZ ddlmZ eeZ	ddddd	Z
eeeeeeef f ed
< dZdZG dd deZdS )z Radio vision model configuration    )Any)PretrainedConfig)logging)i        i   )i   r   r   i   )i         i   )i       r   i   )vit_small_patch16_224vit_base_patch16_224vit_large_patch16_224vit_huge_patch16_224VIT_TIMM_DIM_BY_NAME)g3<4'?gwgM?gy{ ?)gB91?gwt.?g	U?c                        s   e Zd ZdZdZddddddd	d
deedddfdededede	de	dede
de
dededee
e
e
f eB dee
e
e
f eB dedB deeeef  dB de	f fddZ  ZS )RadioConfiga>  
    This is the configuration class to store the configuration of a Radio
    vision model. It is used to instantiate a Radio model according to the
    specified arguments, defining the model architecture.

    Args:
        model_name: Name of the vision transformer model
            (e.g., "vit_base_patch16_224"). Used to determine architecture
            dimensions from `VIT_TIMM_DIM_BY_NAME`.
        image_size: The size (resolution) of each image.
        patch_size: The size (resolution) of each patch.
        qkv_bias: Whether to add a bias to the queries, keys and values.
        qk_normalization: Whether to apply normalization to queries and keys.
        norm_type: The normalization type to use.
        layer_norm_eps: The epsilon used by the layer normalization layers.
        initializer_factor: A factor for initializing all weight matrices.
        hidden_act: The non-linear activation function in the encoder.
        cpe_max_size: Maximum image size for position embeddings.
        norm_mean: Mean values for image normalization (RGB channels).
            Defaults to (0.48145466, 0.4578275, 0.40821073)).
        norm_std: Standard deviation values for image normalization
            (RGB channels). Defaults to (0.26862954, 0.26130258, 0.27577711)).
        register_multiple: Number of register tokens to use.
        teachers: A list of teacher model configurations. Each teacher configuration is
            a dict with keys like "name" and some may have "use_summary".
        cls_token_per_teacher: Whether to use a separate CLS token for each teacher.
    radio   r   TF
layer_normgư>g      ?gelui   N
model_name
image_size
patch_sizeqkv_biasqk_normalization	norm_typelayer_norm_epsinitializer_factor
hidden_actcpe_max_size	norm_meannorm_stdregister_multipleteacherscls_token_per_teacherc                    s   || _ t| \| _| _| _| _|| _|| _|| _|| _	|| _
|| _|| _|	| _|
| _t|ttfr5t|n|| _t|ttfrCt|n|| _|| _|d urO|ng | _|| _t jdi | d S )N )r   r   hidden_sizenum_hidden_layersnum_attention_headsintermediate_sizer   r   r   r   r   r   r   r   r   
isinstancetuplelistr   r   r    r!   r"   super__init__)selfr   r   r   r   r   r   r   r   r   r   r   r   r    r!   r"   kwargs	__class__r#   b/home/ubuntu/veenaModal/venv/lib/python3.10/site-packages/vllm/transformers_utils/configs/radio.pyr,   6   s0   zRadioConfig.__init__)__name__
__module____qualname____doc__
model_typeOPENAI_CLIP_MEANOPENAI_CLIP_STDstrintboolfloatr)   r*   dictr   r,   __classcell__r#   r#   r/   r1   r      s`    	
r   N)r5   typingr    transformers.configuration_utilsr   transformers.utilsr   
get_loggerr2   loggerr   r=   r9   r)   r:   __annotations__r7   r8   r   r#   r#   r#   r1   <module>   s   
"