o
    پi                     @   s  d Z ddlZddlZddlZddlZddlZddlZddlmZ ddl	m
Z
 ddlmZ ddlmZ ddlmZ ddlmZ dd	lmZ dd
lmZ ejddd ejd ejd ejd ejG dd dZeddedefddZdd Z dedefddZ!dedefddZ"dedefddZ#e$dkre% Z&e'e& e'e& e&( Z)e*e)Z+e*e)Z,e"e+e, e#e+e, dS dS ) aC  
Compile DeepGEMM Kernels for a model with specify server arguments

This script launches a server for capturing DeepGEMM calls and then compiles the kernels.
It accepts server arguments (the same as launch_server.py).

Usage:
python3 -m sglang.compile_deep_gemm --model deepseek-ai/DeepSeek-V3 --tp 8 --trust-remote-code

    N)FAKE_BOOTSTRAP_HOST)launch_server)warmup)envs)GenerateReqInput)TokenizerManager)
ServerArgs)kill_process_treespawnT)forcec                   @   sB   e Zd ZU dZeed< edejfddZ	e
dejfddZd	S )
CompileArgsi  timeoutparserc                 C   s   | j dttjd d S )Nz	--timeout)typedefault)add_argumentintr   r   )r    r   L/home/ubuntu/.local/lib/python3.10/site-packages/sglang/compile_deep_gemm.pyadd_cli_args+   s   zCompileArgs.add_cli_argsargsc                    s0   dd t | D }| di  fdd|D S )Nc                 S   s   g | ]
}|j t|jfqS r   )namer   r   ).0attrr   r   r   
<listcomp>2   s    z-CompileArgs.from_cli_args.<locals>.<listcomp>c                    s    i | ]\}}||t  |qS r   )getattr)r   r   	attr_typer   r   r   
<dictcomp>4   s     z-CompileArgs.from_cli_args.<locals>.<dictcomp>r   )dataclassesfields)clsr   attrsr   r   r   from_cli_args/   s   zCompileArgs.from_cli_argsN)__name__
__module____qualname__r   r   __annotations__staticmethodargparseArgumentParserr   classmethod	Namespacer#   r   r   r   r   r   '   s   
 r   compile-deep-gemmdisaggregation_modetokenizer_managerc                    sP   t d tg dddddd}| dkrd	|_t|_||d  I d H  d S )
Nz4
Generate warm up request for compiling DeepGEMM...
r            g           T)temperaturemax_new_tokens
ignore_eos	input_idssampling_paramsnullr   )printr   bootstrap_roomr   bootstrap_hostgenerate_request	__anext__)r.   r/   generate_req_inputr   r   r   warm_up_compile8   s   rB   c              
   C   sT   z zt |  W n ty } z|d }~ww W tt dd d S tt dd w )NF)include_parent)r   	Exceptionr	   osgetpid)server_argser   r   r   launch_server_internalL   s   &rI   rG   compile_argsc                 C   s  t jt| fd}|  d| j d| j }|j}t }t | |k rzddi}| j	dkr:t
j| d|d}n
t
j| d	|d}|jd
kr| j	dkrg ddddd}| jdkret|d< d|d< t
j| d|dd}|jd
kr| }	td|	 |W S t }
| rt |
 |k rtd ntd| s|W S W n
 t
jy   Y nw td t | |k s%td)N)targetr   zhttp://:zContent-Typezapplication/json; charset=utf-8r   z
/v1/models)headersz/health   r0   r4   )r6   r5   r8   r;   r>   r=   z	/generateiX  )jsonr   zSync request failed: 
   zWaiting for main node timeout!zPDeepGEMM Kernels compilation timeout.

Feel free and please restart the command.)multiprocessingProcessrI   starthostportr   timeperf_counter	node_rankrequestsgetstatus_coder.   r   postrO   RuntimeErroris_alivesleepTimeoutErrorRequestException)rG   rJ   procbase_urlr   
start_timerM   responsepayloaderrorstart_time_waitingr   r   r   *launch_server_process_and_send_one_requestU   s^   




	 
-ri   c                 C   s&   d| _ d| _td |j| _d| _d S )NTFz4Disable CUDA Graph and Torch Compile to save time...r-   )disable_cuda_graphenable_torch_compiler<   r   watchdog_timeoutwarmups)rG   rJ   r   r   r   refine_server_args   s
   
rn   c                 C   s`   t d t| |}t d td | rt|j d S zt|j W d S  ty/   Y d S w )NzBegin DeepGEMM Kernels compilation...
It may take a long time and timeout maybe raised while the compilation is still in progress.
Just feel free to restart the command until the compilation is fully finished.
z4
DeepGEMM Kernels compilation finished successfully.rP   )r<   ri   rV   r_   r^   r	   pidrD   )rG   rJ   rb   r   r   r   run_compile   s   

rp   __main__)-__doc__r)   r   rQ   rE   rV   rY   sglang.srt.disaggregation.utilsr   "sglang.srt.entrypoints.http_serverr   sglang.srt.entrypoints.warmupr   sglang.srt.environr   sglang.srt.managers.io_structr   %sglang.srt.managers.tokenizer_managerr   sglang.srt.server_argsr   sglang.srt.utilsr	   set_start_method#SGLANG_IN_DEEPGEMM_PRECOMPILE_STAGEsetSGLANG_ENABLE_JIT_DEEPGEMM%SGLANG_CHUNKED_PREFIX_CACHE_THRESHOLD	dataclassr   strrB   rI   ri   rn   rp   r$   r*   r   r   
parse_argsr   r#   rG   rJ   r   r   r   r   <module>   sZ    	
<




