o
    پi9`                     @   s:  U d Z ddlZddlZddlZddlZddlZddlZddlmZ ddl	m
Z
mZmZmZmZmZ ddlZddlmZ ddlmZ edrNddlmZmZ nddlmZmZ dd	lmZmZmZmZmZmZ dd
lm Z  ddl!m"Z"m#Z#m$Z$m%Z%m&Z&m'Z'm(Z(m)Z)m*Z*m+Z+m,Z,m-Z-m.Z.m/Z/m0Z0m1Z1m2Z2m3Z3m4Z4m5Z5m6Z6m7Z7m8Z8m9Z9m:Z: ddl;m<Z< ddl=m>Z> ddl?m@Z@ ddlAmBZB ddlmCZCmDZDmEZEmFZF ddlGmHZH e"e#e$e%e)e&e2e0e>e:e1e5e/e8e*e+e(e'e3e4e<e6e7e,e-e.e9gZIeee  eJd< dd eID ZIeIK D ] \ZLZMeNeO ePeLeM W d   n	1 sw   Y  q	dTdeQdeeeQeRf  fddZSdefddZT		dUdeQdeUdeeQ fdd ZV		dUdeQdeUdeeQ fd!d"ZWded#eUfd$d%ZXded#eUfd&d'ZYde<d#dfd(d)ZZdVded+e[d#dfd,d-Z\dWd.d/Z]eEd0d1		dXd2eQdeUdeeQ d3ee^ fd4d5Z_eEd0d1	dTd2eQdeUdeeQ fd6d7Z`	8dYd2eQd9eQd#eeQe
f fd:d;Zag d<Zbd=d> Zcd?ZdG d@dA dAejeZfdBdddCdDeQdEeQdeUdFeeQ d#eeef f
dGdHZgdIdJ ZhdBdddKdLdDeQdEeQdeUdFeeQ dMeeU f
dNdOZidPdQ Zjd2eeQejkf d#eUfdRdSZldS )Zz'Utilities for Huggingface Transformers.    N)Path)AnyDictListOptionalTypeUnion)snapshot_download)get_bool_env_varSGLANG_USE_MODELSCOPE)
AutoConfigGenerationConfig)AutoProcessorAutoTokenizerPretrainedConfigPreTrainedTokenizerPreTrainedTokenizerBasePreTrainedTokenizerFast)!MODEL_FOR_CAUSAL_LM_MAPPING_NAMES)AfmoeConfigBailingHybridConfigChatGLMConfig
DbrxConfigDeepseekVL2ConfigDotsOCRConfigDotsVLMConfigExaoneConfigFalconH1ConfigGraniteMoeHybridConfigJetNemotronConfigJetVLMConfigKimiK25ConfigKimiLinearConfigKimiVLConfigLongcatFlashConfigMultiModalityConfigNemotronH_Nano_VL_V2_ConfigNemotronHConfigOlmo3ConfigQwen3_5ConfigQwen3_5MoeConfigQwen3NextConfigStep3p5ConfigStep3VLConfig)DeepseekVLV2Config)InternVLChatConfig)create_remote_connector)_CUSTOMIZED_MM_PROCESSOR)is_remote_urlloggerlru_cache_frozensetmistral_utils)patch_tokenizer_CONFIG_REGISTRYc                 C   s   i | ]}|j |qS  )
model_type).0
config_clsr8   r8   Z/home/ubuntu/.local/lib/python3.10/site-packages/sglang/srt/utils/hf_transformers_utils.py
<dictcomp>n   s    r=   
model_pathallow_patternsc                 C   s(   t j| r| S |sg d}t| |dS )N)*.json*.binz*.modelr?   )ospathexistsr	   )r>   r?   r8   r8   r<   download_from_hfw   s
   rF   configc                 C   s   | j dur| j d }|dr|drt| dtj | S t| dr-t| jds*J | jS t| dr=t| jds:J | jS t| d	rE| j	S t| d
rb| j
}t|dr`t|jdt|dd |jS |S t| drj| jS | S )zaGet the "sub" config relevant to llm for multi modal models.
    No op for pure text models.
    Nr   LlavaForCausalLMdtypetext_confignum_attention_heads
llm_configlanguage_configthinker_configtorch_dtype)architectures
startswithendswithsetattrtorchfloat16hasattrrK   rM   rN   rO   getattr)rG   
class_namerO   r8   r8   r<   get_hf_text_config   s4   








rZ   Ftrust_remote_coderevisionc           
      K   s   t | }tj|d}tj|std| dt|d}t|}W d    n1 s.w   Y  dg|d< d|d< tjt	
 d	}tj|d
d tj|dt  }	t|	d}t|| W d    n1 snw   Y  tj|	f||d|S )Nzconfig.jsonzCan't find config file in .rDeepseekV3ForCausalLMrQ   deepseek_v3r9   _tmp_config_folderT)exist_okdeepseek_v32_wr[   r\   )rF   rC   rD   joinrE   RuntimeErroropenjsonloadtempfile
gettempdirmakedirsgetpiddumpr   from_pretrained)
r>   r[   r\   kwargs
local_pathconfig_filefconfig_jsontmp_pathunique_pathr8   r8   r<   _load_deepseek_v32_model   s,   
rx   c                 K   s   t | }t }||\}}tjddd}t|| |  t	j
|jf||d|}	W d    n1 s8w   Y  t|	dd }
|
d urZt|
trZt	jdi |
}
t|	d|
 t|	dd }|d urwt|trwt	jdi |}t|	d| |	S )Nzw+.json)modesuffixre   rK   vision_configr8   )rF   r5   MistralConfigParserparserk   NamedTemporaryFileri   ro   flushr   rp   namerX   
isinstancedict	for_modelrT   )r>   r[   r\   rq   rr   parserconfig_dict_rt   loaded_configrK   r|   r8   r8   r<   #_load_mistral_large_3_for_causal_LM   s.   r   returnc                 C      t | dd pi }|ddkS )Nauto_map	AutoModelz+modeling_deepseekocr.DeepseekOCRForCausalLMrX   getrG   r   r8   r8   r<   _is_deepseek_ocr_model   s   r   c                 C   r   )Nr   r   z-modeling_deepseekocr2.DeepseekOCR2ForCausalLMr   r   r8   r8   r<   _is_deepseek_ocr2_model   s   r   c                 C   s2   | j jdkrd}|| j _td| d d S d S )Nr      z/Overriding deepseek-ocr's v_head_dim from 0 to  to avoid potential issues.)rK   
v_head_dimr3   warning)rG   V_HEAD_DIM_PATCHr8   r8   r<   !_override_deepseek_ocr_v_head_dim   s   
r   r   patchc                 C   sb   t | dd }t | dd }|p|}|d u rd S t |dd dkr/t|d| td| d d S d S )NrK   rN   r   r   z Overriding v_head_dim from 0 to r   )rX   rT   r3   r   )rG   r   rK   rN   targetr8   r8   r<   _override_v_head_dim_if_zero  s   
r   c                  C   sN   zddl m}  W n
 ty   Y dS w t| ds#t| dr%| j| _dS dS dS )zHEnsure LlamaFlashAttention2 symbol exists for remote code compatibility.r   modeling_llamaNLlamaFlashAttention2LlamaAttention)transformers.models.llamar   	ExceptionrW   r   r   r   r8   r8   r<   %_ensure_llama_flash_attention2_compat  s   

r       )maxsizemodelmodel_override_argsc              
   K   s6  t | }|r| |d< t| j} t| r#t| }|jg dd | } dt|  v r7t	| f||d|}n7t
  ztj| f||d|}W n% tym } zdt|vrX|t| f||d|}W Y d }~nd }~ww |jd ur|jd dkrdd	lm}	 d
ddddddd}
|	di |
|_t|d}t| tr|d urt|dr| nt| }|D ]\}}t||s|d urt||| qt|rt| d|_|ddgi tj| |d}t| |ddgi t|d|  nN|jtv r@|j}|dkrt|s
t|rd}t| }|j| |d}t|r)t | |ddgi nt|r:t| |ddgi t|d|  t| trd|jdkrd|j!j" D ]\}}t||sbt||| qR|jdkrr|ddgi |rz|| |r|jt#vrt$d|j dt#|j }|d|gi |S ) N	gguf_filez*.ptz*.safetensorsrA   ignore_patternmistral-large-3re   deepseek_v32r   Phi4MMForCausalLM)SiglipVisionConfigi  i  i  siglip_vision_model         )hidden_size
image_sizeintermediate_sizer9   rL   num_hidden_layers
patch_size)rG   itemsdeepseek-ocrrQ   DeepseekOCRForCausalLM)r\   _name_or_pathdeepseek_vl_v2internvl_chatmulti_modalityMultiModalityCausalLMzCan't get gguf config for r]   r8   )%check_gguf_filer   parentr2   r0   
pull_filesget_local_dirstrlowerr   r   r   rp   
ValueErrorrx   rQ   transformersr   r|   rZ   r   rW   r   varsrT   r   r   r9   updater.   r7   r   r   rM   __dict__r   rg   )r   r[   r\   r   rq   is_ggufclientrG   er   r|   rK   r   keyvalr9   config_classr8   r8   r<   
get_config  s   











r   c              
   K   sB   zt j| f||d|W S  ty  } zW Y d }~d S d }~ww )Nre   )r   rp   OSError)r   r[   r\   rq   r   r8   r8   r<   get_generation_config  s   r   sparse_attention_config.json sparse_attention_config_filenamec                 C   st   t j| }|st| dgd} t j| |}t j|si S t|}t|}W d    |S 1 s3w   Y  |S )Nr@   rB   )	rC   rD   isdirrF   rf   rE   rh   ri   rj   )r   r   is_localrs   rt   rG   r8   r8   r<   get_sparse_attention_config  s   

r   )max_sequence_length
seq_lengthmax_seq_lenmodel_max_lengthmax_position_embeddingsc                 C   sx   | }t |dd}|r!|dd}d|v rd}|dddkr d}nd}tD ]}t ||d}|dur9t||   S q%dS )	zCGet the context length of a model from a huggingface model configs.rope_scalingNfactor    original_max_position_embeddings	rope_typellama3i   )rX   r   CONTEXT_LENGTH_KEYSint)rG   rK   r   rope_scaling_factorr   r   r8   r8   r<   get_context_length  s    r   z#hf-internal-testing/llama-tokenizerc                   @   s    e Zd ZdejdefddZdS )TokenizerWarningsFilterrecordr   c                 C   s   d|  vS )NzCalling super().encode with)
getMessage)selfr   r8   r8   r<   filter  s   zTokenizerWarningsFilter.filterN)__name__
__module____qualname__logging	LogRecordboolr   r8   r8   r8   r<   r     s    r   auto)tokenizer_moder[   tokenizer_revisiontokenizer_namer   r   c             
   O   sj  |  drddlm} || S |dkr!|ddrtdd|d< | dkr'd	} t| }|r6| |d
< t| j} t| rJt	| }|j
g dd | } ztj| g|R ||dd|}	t|	jjt  W n9 ty~ }
 zdt d}t||
d}
~
w ty }
 z|sdt|
v sdt|
v rd}t||
|
d}
~
ww t|	tstd t|	 t|	}	|	S )z:Gets a tokenizer for the given model name via Huggingface.ry   r   )TiktokenTokenizerslowuse_fastFz5Cannot use the fast tokenizer in slow tokenizer mode.zmistralai/Devstral-Small-2505z-mistralai/Mistral-Small-3.1-24B-Instruct-2503r   r   r   )r[   r   clean_up_tokenization_spaceszPFailed to load the tokenizer. If you are using a LLaMA V1 model consider using 'z$' instead of the original tokenizer.Nz,does not exist or is not currently imported.z*requires you to execute the tokenizer filezFailed to load the tokenizer. If the tokenizer is a custom tokenizer not yet available in the HuggingFace transformers library, consider setting `trust_remote_code=True` in LLM or using the `--trust-remote-code` flag in the CLI.ziUsing a slow tokenizer. This might cause a significant slowdown. Consider using a fast tokenizer instead.)rS   'sglang.srt.tokenizer.tiktoken_tokenizerr   r   r   r   r   r   r2   r0   r   r   r   rp   r   	getLogger	__class__r   	addFilterr   	TypeError_FAST_LLAMA_TOKENIZERrg   r   r   r   warningswarn attach_additional_stop_token_idsr6   )r   r   r[   r   argsrq   r   r   r   	tokenizerr   err_msgr8   r8   r<   get_tokenizer  sp   
	
	


r  c                 C   s   t | tr| S | jS N)r   r   r  )	processorr8   r8   r<   get_tokenizer_from_processor9  s   
r
  T)r   r[   r   r   r   c             
   O   s  | d|}dt|  v rt| f||d|}nt  tj| f||d|}t|r9d|_|	ddgi nt
|rLd|_|	ddgi t| |jdv r\d|vr\d	d
d|d< |jdvre||d< z=d| v rztj| g|R ||d|}	n'|jtv rt|j j| g|R ||d|}	ntj| g|R ||d|}	W n9 ty }
 z-t|
}d|v rtd|  d d|d< tj| g|R ||d|}	n|
W Y d }
~
nd }
~
ww t|	}t| |	S )Nr\   r   re   r   rQ   r   >   qwen2_vlsarashina2_visionsizei@  i P )shortest_edgelongest_edge>   clipllavar   InternVL3_5zdoes not have a slow versionz
Processor z= does not have a slow version. Automatically use fast versionT)popr   r   r   r   r   rp   r   r9   r   r   r   r   r1   r   r   r3   infor
  r  )r   r   r[   r   r   r  rq   r\   rG   r	  r   error_messager  r8   r8   r<   get_processor?  s   





r  c                 C   s.   d|   v rt|   d g| _d S d | _d S )Nz
<|eom_id|>)get_added_vocabsetadditional_stop_token_ids)r  r8   r8   r<   r    s
   

r  c                 C   sd   t | } |  s
dS | jdkrdS t| d}|d}W d   |dkS 1 s)w   Y  |dkS )z"Check if the file is a GGUF model.Fz.ggufTrb   Ns   GGUF)r   is_filer{   rh   read)r   rt   headerr8   r8   r<   r     s   

r   r  )FN)r   )r   N)NN)r   )m__doc__
contextlibri   r   rC   rk   r  pathlibr   typingr   r   r   r   r   r   rU   huggingface_hubr	   sglang.srt.utilsr
   
modelscoper   r   r   r   r   r   r   r   r   &transformers.models.auto.modeling_autor   sglang.srt.configsr   r   r   r   r   r   r   r   r   r   r   r    r!   r"   r#   r$   r%   r&   r'   r(   r)   r*   r+   r,   r-   sglang.srt.configs.deepseek_ocrr.   sglang.srt.configs.internvlr/   sglang.srt.connectorr0   3sglang.srt.multimodal.customized_mm_processor_utilsr1   r2   r3   r4   r5    sglang.srt.utils.patch_tokenizerr6   r7   __annotations__r   r   clssuppressr   registerr   listrF   rZ   r   rx   r   r   r   r   r   r   r   r   r   r   r   r   r   r   Filterr   r  r
  r  r  PathLiker   r8   r8   r8   r<   <module>   s6    l
0
"

u

	

V	
\ 
