o
    پi                     @   s   d dl mZmZ d dlmZmZmZmZ d dlm	Z	 de
defddZde
defdd	ZeG d
d deZeG dd deZeG dd deZeG dd deZdS )    )	dataclassfield)ImageEncoderArchConfigImageEncoderConfigTextEncoderArchConfigTextEncoderConfig)AttentionBackendEnumnreturnc                 C   s   d| v ot | dd S )Nlayers.)strisdigitsplitr	   m r   f/home/ubuntu/.local/lib/python3.10/site-packages/sglang/multimodal_gen/configs/models/encoders/clip.py_is_transformer_layer   s   r   c                 C   s
   |  dS )N
embeddings)endswithr   r   r   r   _is_embeddings   s   
r   c                   @   s.  e Zd ZU dZeed< dZeed< dZeed< dZeed< dZ	eed	< d
Z
eed< dZeed< dZeed< dZeed< dZeed< dZeed< dZeed< dZeed< dZeed< dZeed< dZeed< dZeed< ed d! d"Zee ed#< ed$d! d"Zeeeeef  ed%< ed&d! d"Zeed'< d(S ))CLIPTextArchConfigi   
vocab_size   hidden_sizei   intermediate_sizeprojection_dim   num_hidden_layers   num_attention_headsM   max_position_embeddings
quick_gelu
hidden_acth㈵>layer_norm_eps        dropoutattention_dropout{Gz?initializer_range      ?initializer_factor   pad_token_idi  bos_token_idi  eos_token_idtext_lenc                   C   s   t jhS N)r   
TORCH_SDPAr   r   r   r   <lambda>+   s   zCLIPTextArchConfig.<lambda>default_factory_supported_attention_backendsc                   C      g dS N))qkv_projq_projq)r=   k_projk)r=   v_projvr   r   r   r   r   r7   0       stacked_params_mappingc                   C   s   t tgS r5   )r   r   r   r   r   r   r7   8   rD   _fsdp_shard_conditionsN) __name__
__module____qualname__r   int__annotations__r   r   r   r    r"   r$   r&   r   r(   floatr*   r+   r-   r/   r1   r2   r3   r4   r   r:   setr   rE   listtuplerF   r   r   r   r   r      s6   
 r   c                   @   s   e Zd ZU dZeed< dZeed< dZeed< dZeed< dZ	eed	< d
Z
eed< dZeed< dZeed< dZeed< dZeed< dZeed< dZeed< dZeed< dZeed< edd dZeeeeef  ed< dS ) CLIPVisionArchConfigi   r   i   r   r   r   r   r    r"      num_channels   
image_size    
patch_sizer%   r&   r'   r(   r)   r*   r+   r,   r-   r.   r/   c                   C   r;   r<   r   r   r   r   r   r7   M   rD   zCLIPVisionArchConfig.<lambda>r8   rE   N)rG   rH   rI   r   rJ   rK   r   r   r    r"   rR   rT   rV   r&   r   r(   rL   r*   r+   r-   r/   r   rE   rN   rO   r   r   r   r   rP   <   s$   
  rP   c                   @   L   e Zd ZU eedZeed< dZe	dB ed< dZ
edB ed< dZeed< dS )CLIPTextConfigr8   arch_configNnum_hidden_layers_overriderequire_post_normclipprefix)rG   rH   rI   r   r   rY   r   rK   rZ   rJ   r[   boolr]   r   r   r   r   r   rX   V   
   
 rX   c                   @   rW   )CLIPVisionConfigr8   rY   NrZ   r[   r\   r]   )rG   rH   rI   r   rP   rY   r   rK   rZ   rJ   r[   r^   r]   r   r   r   r   r   r`   _   r_   r`   N)dataclassesr   r   2sglang.multimodal_gen.configs.models.encoders.baser   r   r   r   'sglang.multimodal_gen.runtime.platformsr   r   r^   r   r   r   rP   rX   r`   r   r   r   r   <module>   s   $