o
    پim                     @   s   d dl Z d dlZd dlmZ d dlmZmZmZmZm	Z	m
Z
 d dlZd dlmZmZmZmZmZmZmZmZ d dlmZ ddiZi ZG dd	 d	eZG d
d deZG dd deZG dd deZejeedfdd dS )    N)copyfile)AnyDictListOptionalTupleUnion)TOKENIZER_MAPPINGGptOssConfigLlamaConfigPretrainedConfigPreTrainedTokenizerQwen2ConfigQwen3ConfigQwen3MoeConfig)logger
vocab_filez./tokenizer.modelc                       sX   e Zd ZdZdZdZ									
											d fdd	Zdd Z  ZS )InternLM2Configa  
    This is the configuration class to store the configuration of a [`InternLM2Model`]. It is used to instantiate
    an InternLM2 model according to the specified arguments, defining the model architecture. Instantiating a
    configuration with the defaults will yield a similar configuration to that of the InternLM2-7B.

    Configuration objects inherit from [`PretrainedConfig`] and can be used to control the model outputs. Read the
    documentation from [`PretrainedConfig`] for more information.


    Args:
        vocab_size (`int`, *optional*, defaults to 32000):
            Vocabulary size of the InternLM2 model. Defines the number of different tokens that can be represented by the
            `inputs_ids` passed when calling [`InternLM2Model`]
        hidden_size (`int`, *optional*, defaults to 4096):
            Dimension of the hidden representations.
        intermediate_size (`int`, *optional*, defaults to 11008):
            Dimension of the MLP representations.
        num_hidden_layers (`int`, *optional*, defaults to 32):
            Number of hidden layers in the Transformer encoder.
        num_attention_heads (`int`, *optional*, defaults to 32):
            Number of attention heads for each attention layer in the Transformer encoder.
        num_key_value_heads (`int`, *optional*):
            This is the number of key_value heads that should be used to implement Grouped Query Attention. If
            `num_key_value_heads=num_attention_heads`, the model will use Multi Head Attention (MHA), if
            `num_key_value_heads=1 the model will use Multi Query Attention (MQA) otherwise GQA is used. When
            converting a multi-head checkpoint to a GQA checkpoint, each group key and value head should be constructed
            by meanpooling all the original heads within that group. For more details checkout [this
            paper](https://arxiv.org/pdf/2305.13245.pdf). If it is not specified, will default to
            `num_attention_heads`.
        hidden_act (`str` or `function`, *optional*, defaults to `"silu"`):
            The non-linear activation function (function or string) in the decoder.
        max_position_embeddings (`int`, *optional*, defaults to 2048):
            The maximum sequence length that this model might ever be used with. Typically set this to something large
            just in case (e.g., 512 or 1024 or 2048).
        initializer_range (`float`, *optional*, defaults to 0.02):
            The standard deviation of the truncated_normal_initializer for initializing all weight matrices.
        rms_norm_eps (`float`, *optional*, defaults to 1e-12):
            The epsilon used by the rms normalization layers.
        use_cache (`bool`, *optional*, defaults to `True`):
            Whether or not the model should return the last key/values attentions (not used by all models). Only
            relevant if `config.is_decoder=True`.
        tie_word_embeddings(`bool`, *optional*, defaults to `False`):
            Whether to tie weight embeddings
        Example:

    	internlm2
AutoConfig      +      Nsilu   {Gz?ư>Tr         F'  eagerc                    s   || _ || _|| _|| _|| _|| _|| _|d u r|}|| _|| _|	| _	|
| _
|| _|| _|| _|   || _| jd u r?d| _t jd||||d| d S )Nr!   )pad_token_idbos_token_ideos_token_idtie_word_embeddings )
vocab_sizemax_position_embeddingshidden_sizeintermediate_sizenum_hidden_layersnum_attention_headsbiasnum_key_value_heads
hidden_actinitializer_rangerms_norm_eps	use_cache
rope_thetarope_scaling_rope_scaling_validationattn_implementationsuper__init__)selfr'   r)   r*   r+   r,   r.   r/   r(   r0   r1   r2   r"   r#   r$   r%   r-   r3   r4   r6   kwargs	__class__r&   O/home/ubuntu/.local/lib/python3.10/site-packages/sglang/srt/configs/internvl.pyr8   P   s8   

zInternLM2Config.__init__c                 C   s   | j du rdS t| j trt| j dkrtd| j  | j dd}| j dd}|du s2|dvr9td| |du sHt|ttfrH|dk rTtd	|d
t|t|tr_t|}dS dS )z<
        Validate the `rope_scaling` configuration.
        Nr   zS`rope_scaling` must be a dictionary with with two fields, `type` and `factor`, got typefactor)lineardynamiczF`rope_scaling`'s type field must be one of ['linear', 'dynamic'], got g      ?zP`rope_scaling`'s factor field must be a float|int >= 1, got rope_scaling_factor=z, type(rope_scaling_factor)=)	r4   
isinstancedictlen
ValueErrorgetfloatintr>   )r9   rope_scaling_typerope_scaling_factorr&   r&   r=   r5      s0   

z(InternLM2Config._rope_scaling_validation)r   r   r   r   r   Nr   r   r   r   Tr   r   r   FTr    Nr!   )	__name__
__module____qualname____doc__
model_type_auto_classr8   r5   __classcell__r&   r&   r;   r=   r      s2    /6r   c                       sh   e Zd ZdZdZ									
		
							d fdd	Zedeee	j
f ddfddZ  ZS )InternVisionConfiga  
    This is the configuration class to store the configuration of a [`InternVisionModel`]. It is used to
    instantiate a vision encoder according to the specified arguments, defining the model architecture.

    Configuration objects inherit from [`PretrainedConfig`] and can be used to control the model outputs. Read the
    documentation from [`PretrainedConfig`] for more information.

    Args:
        num_channels (`int`, *optional*, defaults to 3):
            Number of color channels in the input images (e.g., 3 for RGB).
        patch_size (`int`, *optional*, defaults to 14):
            The size (resolution) of each patch.
        image_size (`int`, *optional*, defaults to 224):
            The size (resolution) of each image.
        qkv_bias (`bool`, *optional*, defaults to `False`):
            Whether to add a bias to the queries and values in the self-attention layers.
        hidden_size (`int`, *optional*, defaults to 3200):
            Dimensionality of the encoder layers and the pooler layer.
        num_attention_heads (`int`, *optional*, defaults to 25):
            Number of attention heads for each attention layer in the Transformer encoder.
        intermediate_size (`int`, *optional*, defaults to 12800):
            Dimensionality of the "intermediate" (i.e., feed-forward) layer in the Transformer encoder.
        qk_normalization (`bool`, *optional*, defaults to `True`):
            Whether to normalize the queries and keys in the self-attention layers.
        num_hidden_layers (`int`, *optional*, defaults to 48):
            Number of hidden layers in the Transformer encoder.
        use_flash_attn (`bool`, *optional*, defaults to `True`):
            Whether to use flash attention mechanism.
        hidden_act (`str` or `function`, *optional*, defaults to `"gelu"`):
            The non-linear activation function (function or string) in the encoder and pooler. If string, `"gelu"`,
            `"relu"`, `"selu"` and `"gelu_new"` ``"gelu"` are supported.
        layer_norm_eps (`float`, *optional*, defaults to 1e-6):
            The epsilon used by the layer normalization layers.
        dropout (`float`, *optional*, defaults to 0.0):
            The dropout probability for all fully connected layers in the embeddings, encoder, and pooler.
        drop_path_rate (`float`, *optional*, defaults to 0.0):
            Dropout rate for stochastic depth.
        attention_dropout (`float`, *optional*, defaults to 0.0):
            The dropout ratio for the attention probabilities.
        initializer_range (`float`, *optional*, defaults to 0.02):
            The standard deviation of the truncated_normal_initializer for initializing all weight matrices.
        initializer_factor (`float`, *optional*, defaults to 0.1):
            A factor for layer scale.
    intern_vit_6b         F      2  T0   gelur           r   皙?c                    s|   t  jdi | || _|| _|| _|| _|	| _|| _|| _|| _	|| _
|| _|| _|| _|| _|| _|| _|| _|
| _d S )Nr&   )r7   r8   r)   r*   dropoutdrop_path_rater+   r,   num_channels
patch_size
image_sizer0   initializer_factorattention_dropoutlayer_norm_epsr/   qkv_biasqk_normalizationuse_flash_attn)r9   r`   ra   rb   rf   r)   r,   r*   rg   r+   rh   r/   re   r^   r_   rd   r0   rc   r:   r;   r&   r=   r8      s$   
zInternVisionConfig.__init__pretrained_model_name_or_pathreturnr   c                 K   sv   | j |fi |\}}d|v r|d }d|v r2t| dr2|d | jkr2td|d  d| j d | j|fi |S )Nvision_configrO   zYou are using a model of type z  to instantiate a model of type zN. This is not supported for all configurations of models and can yield errors.)get_config_dicthasattrrO   r   warning	from_dict)clsri   r:   config_dictr&   r&   r=   from_pretrained   s"   
z"InternVisionConfig.from_pretrained)rT   rU   rV   FrW   rX   rY   TrZ   Tr[   r   r\   r\   r\   r   r]   )rK   rL   rM   rN   rO   r8   classmethodr   strosPathLikerr   rQ   r&   r&   r;   r=   rR      s6    -)rR   c                       sJ   e Zd ZdZdZ															
d fdd	Zdd Z  ZS )InternVLChatConfiginternvl_chatTNr   F      ?v1r      c                    s  t  jdi | |d u rddgi}td |d u r%ddgi}td tdi || _|dd dkr?tdi || _nf|dd dkrQt	di || _nT|dd dkrct
di || _nB|dd d	krutdi || _n0|dd d
krtdi || _n|dd dkrtdi || _ntd|dd || _|| _|| _|| _|| _|| _|	| _|
| _|| _|| _|| _|| _| jj| _d| _| j| j_d S )NarchitecturesInternVisionModelzOvision_config is None. Initializing the InternVisionConfig with default values.InternLM2ForCausalLMz\llm_config is None. Initializing the LlamaConfig config with default values (`LlamaConfig`).r   LlamaForCausalLMQwen2ForCausalLMQwen3MoeForCausalLMQwen3ForCausalLMGptOssForCausalLMzUnsupported architecture: {}Fr&   )r7   r8   r   inforR   rk   rF   r   
llm_configr   r   r   r   r
   rE   formatuse_backbone_lorause_llm_lora
pad2squareselect_layerforce_image_sizedownsample_ratiotemplatedynamic_image_sizeuse_thumbnail
ps_versionmin_dynamic_patchmax_dynamic_patchr)   r%   )r9   rk   r   r   r   r   r   r   r   r   r   r   r   r   r   r:   r;   r&   r=   r8     sX   


zInternVLChatConfig.__init__c                 C   s   t | j}| j |d< | j |d< | jj|d< | j|d< | j	|d< | j
|d< | j|d< | j|d< | j|d	< | j|d
< | j|d< | j|d< | j|d< | j|d< |S )z
        Serializes this instance to a Python dictionary. Override the default [`~PretrainedConfig.to_dict`].

        Returns:
            `Dict[str, any]`: Dictionary of all the attributes that make up this configuration instance,
        rk   r   rO   r   r   r   r   r   r   r   r   r   r   r   )copydeepcopy__dict__rk   to_dictr   r<   rO   r   r   r   r   r   r   r   r   r   r   r   )r9   outputr&   r&   r=   r   _  s    










zInternVLChatConfig.to_dict)NNr   r   Fry   Nrz   NFFr{   r   r|   )rK   rL   rM   rO   is_compositionr8   r   rQ   r&   r&   r;   r=   rw     s&    Frw   c                
       sT  e Zd ZdZeZeZddgZdZ									
	
	
d/de
eeef  f fddZedd Zedd Zede
e fddZede
e fddZdd Zdd Zdd Zdd Zdd  Zd!d" Z	d0d#e
e dee fd$d%Zd0d&d'Z		
d1d(ee d)e
ee  d*edee f fd+d,Z 	d0d(ee d)e
ee  dee fd-d.Z!  Z"S )2InternLM2Tokenizerz
    Construct a InternLM2 tokenizer. Based on byte-level Byte-Pair-Encoding.

    Args:
        vocab_file (`str`):
            Path to the vocabulary file.
    	input_idsattention_maskAutoTokenizer<unk><s></s>NTFsp_model_kwargsc                    sz   t d |d u r
i n|| _|| _|| _|| _|	| _tjdi | j| _| j	| d | _
t jd|||||
d| d S )Nzregister succeed)	bos_token	eos_token	unk_token	pad_tokenclean_up_tokenization_spacesr&   )printr   r   add_bos_tokenadd_eos_tokendecode_with_prefix_spacespmSentencePieceProcessorsp_modelLoad_no_prefix_space_tokensr7   r8   )r9   r   r   r   r   r   r   r   r   r   r   r:   r;   r&   r=   r8     s$   
zInternLM2Tokenizer.__init__c                 C   s8   | j d u r| tt| j}dd t|D | _ | j S )Nc                 S   s   h | ]\}}| d s|qS )u   ▁)
startswith).0itokr&   r&   r=   	<setcomp>  s
    z<InternLM2Tokenizer.no_prefix_space_tokens.<locals>.<setcomp>)r   convert_ids_to_tokenslistranger'   	enumerater9   vocabr&   r&   r=   no_prefix_space_tokens  s   
z)InternLM2Tokenizer.no_prefix_space_tokensc                 C   
   | j  S )zReturns vocab size)r   get_piece_sizer9   r&   r&   r=   r'     s   
zInternLM2Tokenizer.vocab_sizerj   c                 C   r   N)r   bos_idr   r&   r&   r=   r#   "     
zInternLM2Tokenizer.bos_token_idc                 C   r   r   )r   eos_idr   r&   r&   r=   r$   &  r   zInternLM2Tokenizer.eos_token_idc                    s(    fddt  jD }| j |S )zReturns vocab as a dictc                    s   i | ]}  ||qS r&   )r   )r   r   r   r&   r=   
<dictcomp>,  s    z0InternLM2Tokenizer.get_vocab.<locals>.<dictcomp>)r   r'   updateadded_tokens_encoderr   r&   r   r=   	get_vocab*  s   zInternLM2Tokenizer.get_vocabc                 C   s   | j j|tdS )zReturns a tokenized string.)out_type)r   encodert   )r9   textr&   r&   r=   	_tokenize0  s   zInternLM2Tokenizer._tokenizec                 C   s   | j |S )z0Converts a token (str) in an id using the vocab.)r   piece_to_id)r9   tokenr&   r&   r=   _convert_token_to_id4  s   z'InternLM2Tokenizer._convert_token_to_idc                 C   s   | j |}|S )z=Converts an index (integer) in a token (str) using the vocab.)r   	IdToPiece)r9   indexr   r&   r&   r=   _convert_id_to_token8  s   z'InternLM2Tokenizer._convert_id_to_tokenc                 C   s   |r|d | j vrd| S |S )Nr    )r   )r9   tokensdecodedr&   r&   r=   _maybe_add_prefix_space=  s   z*InternLM2Tokenizer._maybe_add_prefix_spacec                 C   s   g }d}d}|D ]#}|| j v r$|s|d7 }|| j|| 7 }d}g }q|| d}q|| j|7 }| |}| j||d}|dd S )z:Converts a sequence of tokens (string) in a single string. Fr   T)r   r   r   N)all_special_tokensr   decodeappendclean_up_tokenizationr   )r9   r   current_sub_tokens
out_stringprev_is_specialr   r&   r&   r=   convert_tokens_to_stringC  s    


z+InternLM2Tokenizer.convert_tokens_to_stringfilename_prefixc                 C   s   t j|std| d dS t j||r|d ndtd  }t j| jt j|kr?t j	| jr?t
| j| |fS t j	| jsgt|d}| j }|| W d   |fS 1 sbw   Y  |fS )a  
        Save the vocabulary and special tokens file to a directory.

        Args:
            save_directory (`str`):
                The directory in which to save the vocabulary.

        Returns:
            `Tuple(str)`: Paths to the files saved.
        zVocabulary path (z) should be a directoryN-r   r   wb)ru   pathisdirr   errorjoinVOCAB_FILES_NAMESabspathr   isfiler   openr   serialized_model_protowrite)r9   save_directoryr   out_vocab_fileficontent_spiece_modelr&   r&   r=   save_vocabularyX  s0   

z"InternLM2Tokenizer.save_vocabularyc                 C   sB   | j r| jg}ng }|| }|d ur|| }| jr|| jg }|S r   )r   r#   r   r$   )r9   token_ids_0token_ids_1bos_token_idsr   r&   r&   r=    build_inputs_with_special_tokensy  s   
z3InternLM2Tokenizer.build_inputs_with_special_tokensr   r   already_has_special_tokensc                    sh   |rt  j||ddS |du rdgdgt|  dg S dgdgt|  ddg dgt|  dg S )a  
        Retrieve sequence ids from a token list that has no special tokens added. This method is called when adding
        special tokens using the tokenizer `prepare_for_model` method.

        Args:
            token_ids_0 (`List[int]`):
                List of IDs.
            token_ids_1 (`List[int]`, *optional*):
                Optional second list of IDs for sequence pairs.
            already_has_special_tokens (`bool`, *optional*, defaults to `False`):
                Whether or not the token list is already formatted with special tokens for the model.

        Returns:
            `List[int]`: A list of integers in the range [0, 1]: 1 for a special token, 0 for a sequence token.
        T)r   r   r   Nr   r   )r7   get_special_tokens_maskrD   )r9   r   r   r   r;   r&   r=   r     s   0z*InternLM2Tokenizer.get_special_tokens_maskc                 C   s<   | j g}|du rt|| dg S t|| | | dg S )a  
        Create a mask from the two sequences passed to be used in a sequence-pair classification task. T5 does not make
        use of token type ids, therefore a list of zeros is returned.

        Args:
            token_ids_0 (`List[int]`):
                List of IDs.
            token_ids_1 (`List[int]`, *optional*):
                Optional second list of IDs for sequence pairs.

        Returns:
            `List[int]`: List of zeros.
        Nr   )r$   rD   )r9   r   r   eosr&   r&   r=   $create_token_type_ids_from_sequences  s   z7InternLM2Tokenizer.create_token_type_ids_from_sequences)	r   r   r   r   NTFFFr   )NF)#rK   rL   rM   rN   r   vocab_files_namesPRETRAINED_VOCAB_FILES_MAPpretrained_vocab_files_mapmodel_input_namesrP   r   r   rt   r   r8   propertyr   r'   rH   r#   r$   r   r   r   r   r   r   r   r   r   r   boolr   r   rQ   r&   r&   r;   r=   r     st     



!
!
r   T)exist_ok)r   ru   shutilr   typingr   r   r   r   r   r   sentencepiecer   transformersr	   r
   r   r   r   r   r   r   sglang.utilsr   r   r   r   rR   rw   r   registerr&   r&   r&   r=   <module>   s&     ( q R [

