o
    پi6                     @   s   d dl mZmZ d dlmZ d dlZd dlmZmZ d dl	m
Z
 d dlmZ eG dd deZeG d	d
 d
eZeG dd deZeG dd dZeG dd deZeG dd deZeG dd deZdS )    )	dataclassfield)AnyN)
ArchConfigModelConfig)QuantizationConfig)AttentionBackendEnumc                   @   sp   e Zd ZU edd dZeed< edd dZee ed< edd dZ	e
e ed< d	Zeed
< dZeed< dS )EncoderArchConfigc                   C      g S N r   r   r   f/home/ubuntu/.local/lib/python3.10/site-packages/sglang/multimodal_gen/configs/models/encoders/base.py<lambda>       zEncoderArchConfig.<lambda>default_factory_fsdp_shard_conditionsc                   C   r
   r   r   r   r   r   r   r      r   architecturesc                   C   s   t jt jt jhS r   )r   FA
TORCH_SDPASAGE_ATTN_3r   r   r   r   r      s   _supported_attention_backendsFoutput_hidden_statesTuse_return_dictN)__name__
__module____qualname__r   r   list__annotations__r   strr   setr   r   boolr   r   r   r   r   r	      s   
 r	   c                   @   s   e Zd ZU dZeed< dZeed< dZeed< dZeed< dZ	eed< dZ
eed< dZeed< dZeed	< dZeed
< dZeed< dZeed< dZeed< eedZeeeeef  ed< eedZeeef ed< edd dZeed< dddZdS )TextEncoderArchConfigr   
vocab_sizehidden_sizenum_hidden_layersnum_attention_headspad_token_ideos_token_idtext_lenhidden_state_skip_layerdecoder_start_token_idToutput_pastscalable_attentionFtie_word_embeddingsr   stacked_params_mappingtokenizer_kwargsc                   C   r
   r   r   r   r   r   r   r   /   r   zTextEncoderArchConfig.<lambda>r   returnNc                 C   s   d| j dd| _d S )NTpt)
truncation
max_lengthreturn_tensors)r)   r0   )selfr   r   r   __post_init__1   s   z#TextEncoderArchConfig.__post_init__)r1   N)r   r   r   r#   intr   r$   r%   r&   r'   r(   r)   r*   r+   r,   r!   r-   r.   r   r   r/   tupler   dictr0   r   r   r7   r   r   r   r   r"      s&   
 r"   c                   @   s   e Zd ZdS )ImageEncoderArchConfigN)r   r   r   r   r   r   r   r;   9   s    r;   c                   @   sx   e Zd ZU dZejdB ed< dZejdB ed< dZe	ejdf dB ed< dZ
e	ejdf dB ed< dZejdB ed< dS )BaseEncoderOutputNlast_hidden_statepooler_output.hidden_states
attentionsattention_mask)r   r   r   r=   torchFloatTensorr   r>   r?   r9   r@   rA   Tensorr   r   r   r   r<   >   s   
 r<   c                   @   sL   e Zd ZU eedZeed< dZe	ed< dZ
edB ed< dZedB ed< dS )EncoderConfigr   arch_config prefixNquant_configlora_config)r   r   r   r   r	   rF   r   r   rH   r   rI   r   rJ   r   r   r   r   r   rE   G   s
   
 rE   c                   @   s8   e Zd ZU eedZeed< dZe	ed< dZ
eed< dS )TextEncoderConfigr   rF   Fparallel_foldingspparallel_folding_modeN)r   r   r   r   r"   rF   r   r   rL   r!   rN   r   r   r   r   r   rK   P   s   
 rK   c                   @   s    e Zd ZU eedZeed< dS )ImageEncoderConfigr   rF   N)r   r   r   r   r;   rF   r   r   r   r   r   r   rO   Z   s   
 rO   )dataclassesr   r   typingr   rB   )sglang.multimodal_gen.configs.models.baser   r   1sglang.multimodal_gen.runtime.layers.quantizationr   'sglang.multimodal_gen.runtime.platformsr   r	   r"   r;   r<   rE   rK   rO   r   r   r   r   <module>   s(   	