o
    
۾io                     @   s   d dl mZ d dlmZmZ G dd deZG dd deZG dd deZG d	d
 d
eZe	de e	de G dd deZ
dS )    )Any)
AutoConfigPretrainedConfigc                       s~   e Zd ZU dZdZeed< 								
					ddedededededededededede	de	de
f fddZ  ZS )AIMv2Configa  This is the configuration class to store the configuration of an [`AIMv2Model`].
    Instantiating a configuration with the defaults will yield a similar configuration
    to that of the [apple/aimv2-large-patch14-224](https://huggingface.co/apple/aimv2-large-patch14-224).
    Args:
        hidden_size: Dimension of the hidden representations.
        intermediate_size: Dimension of the SwiGLU representations.
        num_hidden_layers: Number of hidden layers in the Transformer.
        num_attention_heads: Number of attention heads for each attention layer
            in the Transformer.
        num_channels: Number of input channels.
        image_size: Image size.
        patch_size: Patch size.
        rms_norm_eps: Epsilon value used for the RMS normalization layer.
        attention_dropout: Dropout ratio for attention probabilities.
        projection_dropout: Dropout ratio for the projection layer after the attention.
        qkv_bias: Whether to add a bias to the queries, keys and values.
        use_bias: Whether to add a bias in the feed-forward and projection layers.
        kwargs: Keyword arguments for the [`PretrainedConfig`].
    aimv2
model_type                     h㈵>        Fhidden_sizeintermediate_sizenum_hidden_layersnum_attention_headsnum_channels
image_size
patch_sizerms_norm_epsattention_dropoutprojection_dropoutqkv_biasuse_biaskwargsc                    s^   t  jdi | || _|| _|| _|| _|| _|| _|| _|	| _	|| _
|
| _|| _|| _d S )N )super__init__r   r   r   r   r   r   r   r   r   r   r   r   )selfr   r   r   r   r   r   r   r   r   r   r   r   r   	__class__r   X/home/ubuntu/.local/lib/python3.10/site-packages/vllm/transformers_utils/configs/ovis.pyr    $   s   
zAIMv2Config.__init__)r   r	   r
   r   r   r   r   r   r   r   FF)__name__
__module____qualname____doc__r   str__annotations__intfloatboolr   r    __classcell__r   r   r"   r$   r      sT   
 	
r   c                       s>   e Zd Z							ddeeB dB def fd	d
Z  ZS )BaseVisualTokenizerConfig @  softmax      ?NF   backbone_confighidden_stridec           
         s   t  jdi | || _|| _|| _t|tr!dd |dD }|| _t	tt
f  | _|| _|d urgt|tt	fsDJ dt| dt|tsg|d }	|	dkr`|d tj|	fi |}ntdi |}|| _|| _d S )	Nc                 S   s   g | ]}t |qS r   )r+   ).0xr   r   r$   
<listcomp>X   s    z6BaseVisualTokenizerConfig.__init__.<locals>.<listcomp>|zMexpect `backbone_config` to be instance of PretrainedConfig or dict, but got  typer   r   r   )r   r    
vocab_sizetokenize_functiontau
isinstancer)   splitdepthsdictr   backbone_kwargsdrop_cls_tokenr   typepopr   	for_modelr   r4   r5   )
r!   r;   r<   r=   r@   rC   r4   r5   r   r   r"   r   r$   r    H   s2   



z"BaseVisualTokenizerConfig.__init__)r0   r1   r2   NFNr3   )r%   r&   r'   r   rA   r+   r    r.   r   r   r"   r$   r/   G   s    
r/   c                           e Zd ZdZ fddZ  ZS )Aimv2VisualTokenizerConfigaimv2_visual_tokenizerc                    N   t  jdi | | jrd| _| jr%t| jdksJ | jd | jd< d S d S NFr3   r   r   r   r   r    rC   r@   lenrB   r!   r   r"   r   r$   r    p      z#Aimv2VisualTokenizerConfig.__init__r%   r&   r'   r   r    r.   r   r   r"   r$   rH   m       rH   c                       rG   )SiglipVisualTokenizerConfigsiglip_visual_tokenizerc                    rJ   rK   rL   rN   r"   r   r$   r    |   rO   z$SiglipVisualTokenizerConfig.__init__rP   r   r   r"   r$   rR   y   rQ   rR   rS   rI   c                       sJ   e Zd ZdZ							d	deeB dB deeB dB f fddZ  ZS )

OvisConfigovisN    F
llm_configvisual_tokenizer_configc           
         s   t  jdi | |d ur5t|ttfsJ dt| dt|ts5|d }	|d tj|	fi |}|| _	|d urdt|ttfsMJ dt| dt|tsd|d }	|d tj|	fi |}|| _
|| _|| _|| _|| _|| _d S )NzHexpect `llm_config` to be instance of PretrainedConfig or dict, but got r:   r   zUexpect `visual_tokenizer_config` to be instance of PretrainedConfig or dict, but got r   )r   r    r>   r   rA   rD   rE   r   rF   text_configrX   multimodal_max_lengthr   conversation_formatter_classllm_attn_implementationdisable_tie_weight)
r!   rW   rX   rZ   r   r[   r\   r]   r   r   r"   r   r$   r       s8   




zOvisConfig.__init__)NNrV   NNNF)r%   r&   r'   r   r   rA   r    r.   r   r   r"   r$   rT      s    

rT   N)typingr   transformersr   r   r   r/   rH   rR   registerrT   r   r   r   r$   <module>   s   :&