o
    
۾iv                     @   sL   d dl mZ d dlmZ G dd deZG dd deZG dd deZd	S )
    )Any)PretrainedConfigc                       s6   e Zd ZdZ										
	d fdd	Z  ZS )Step3VisionEncoderConfigstep3_vision_encoder         ?              
quick_geluh㈵>c                    sR   || _ || _|| _|| _|| _|| _|| _|| _|
| _|	| _	t
 jdi | d S N )hidden_sizeintermediate_sizeoutput_hidden_sizenum_hidden_layersnum_attention_headsnum_channels
patch_size
image_sizelayer_norm_eps
hidden_actsuper__init__)selfr   r   r   r   r   r   r   r   r   r   kwargs	__class__r   \/home/ubuntu/.local/lib/python3.10/site-packages/vllm/transformers_utils/configs/step3_vl.pyr      s   z!Step3VisionEncoderConfig.__init__)
r   r   r   r	   r
   r   r   r   r   r   )__name__
__module____qualname__
model_typer   __classcell__r   r   r    r"   r      s    r   c                '       s   e Zd ZdZdgZ									
										d)dedededededededededededeee	f dB ded ed!ed"ed#e
d$eed%f d&df& fd'd(Z  ZS )*Step3TextConfig
step3_textStep3TextForCausalLM    H  @      =      / r      0   r   N      F8               	   
            r      r
                                                    !   "   #   $   %   &   '   (   )   *   +   ,   -   .   /   r3   1   2   3   4   5   6   7   8   9   :   ;   r   r   r   num_attention_groupsr   max_seq_len
vocab_sizerms_norm_epsmoe_intermediate_sizemoe_num_experts	moe_top_krope_parametersmax_position_embeddingshare_expert_dimshare_q_dimhead_dimnorm_expert_weightmoe_layers_enum.returnc                    s   || _ || _|| _|| _|| _|| _|| _|| _|	| _|
| _	|| _
|dd }|p.|p.ddi}|dd}d|vr=||d< || _|| _|| _|| _|| _|| _|| _t jdi | d S )Nrope_scaling	rope_typedefault
rope_thetag    Ar   )r   r   r   rl   r   rm   rn   ro   rp   rq   rr   poprs   rt   ru   rv   rw   rx   ry   r   r   )r   r   r   r   rl   r   rm   rn   ro   rp   rq   rr   rs   rt   ru   rv   rw   rx   ry   r   r{   r~   r    r   r"   r   *   s0   OzStep3TextConfig.__init__)r+   r,   r-   r.   r/   r0   r1   r   r2   r3   r   Nr0   r2   r4   r5   Fr6   )r#   r$   r%   r&   architecturesintfloatdictstrr   booltupler   r'   r   r   r    r"   r(   &   sx    	

Nr(   c                       sV   e Zd ZdZ					ddeeB dB deeB dB ded	ed
eddf fddZ	  Z
S )Step3VLConfigstep3_vlNr.   T vision_configtext_configunderstand_projector_strideprojector_biasimage_token_idrz   c                    s   |d u rt  }nt|trt di |}|| _|d u rt }nt|tr+tdi |}|| _|| _|| _|j| _|| _	t
 jdi | d S r   )r   
isinstancer   r   r(   r   r   r   r   r   r   r   )r   r   r   r   r   r   r   r    r   r"   r      s   	

zStep3VLConfig.__init__)NNr.   Tr   )r#   r$   r%   r&   r   r   r(   r   r   r   r'   r   r   r    r"   r      s(    

r   N)typingr    transformers.configuration_utilsr   r   r(   r   r   r   r   r"   <module>   s
   o