o
    پiR<                     @   s  U d dl Z d dlZd dlZd dlZd dlZd dlZd dlmZ d dlm	Z	 d dl
mZmZmZ d dlZd dlmZ d dlmZ d dlmZ d dlmZ d dlmZmZ d d	lmZ d d
lmZ d dlmZ d dl m!Z!m"Z" e#e$Z%defddZ&dej'dede(e)ef dede*de*de+e* defddZ,G dd dZ-ej.G dd dZ/dej'de+e) de0ej'e+e/ f fd d!Z1dZ2d"a3G d#d$ d$ejj4Z5d%a6e)e7d&< e	d'e)fd(d)Z8G d*d+ d+Z9dS ),    N)Sequence)contextmanager)AnyCallableOptional)enable_python_dispatcher)CompilationConfig)compilation_counter)EagerAdapterInductorAdaptor)CUDAPiecewiseBackend)NPUPiecewiseBackend)PostGradPassManager)is_npu	rank0_logconfigc                 C   s0   | j dkrt S | j dkrt S td| j  )NeagerinductorzUnknown compiler: )compilerr
   r   
ValueError)r    r   R/home/ubuntu/.local/lib/python3.10/site-packages/sglang/srt/compilation/backend.pymake_compiler   s
   

r   graphcompile_configinductor_config
graph_poolpiecewise_compile_indextotal_piecewise_compilessym_shape_indices compiled_graph_for_general_shapec	           
   
   C   s&   t  stnt}	|	| ||||||||	S N)r   r   r   )
r   r   r   r   r   r   r   r    sglang_backendbackend_clsr   r   r   make_backend&   s   r$   c                   @   s   e Zd ZdefddZdd Z	dded	ed
efddZdd Z		dde
jdee dedee dee f
ddZ			dde
jdeeef dededee defddZdS ) CompilerManagerr   c                 C   s   t  | _d| _t|| _d S )NF)dictcacheis_cache_updatedr   r   )selfr   r   r   r   __init__A   s   zCompilerManager.__init__c                 C   s
   | j  S r!   )r   compute_hashr)   r   r   r   r+   I   s   
zCompilerManager.compute_hashF 	cache_dirdisable_cacheprefixc                 C   s~   || _ || _tj|d| _|s4tj| jr4t| j}t	|
 | _W d    n1 s/w   Y  | jj|||d d S )Nzsglang_compile_cache.py)r.   r/   r0   )r/   r.   ospathjoincache_file_pathexistsopenastliteral_evalreadr'   r   initialize_cache)r)   r.   r/   r0   fr   r   r   r:   L   s   
z CompilerManager.initialize_cachec                 C   sd   | j s| jsd S tjdd}|| j}t| jd}|| W d    d S 1 s+w   Y  d S )N   )indentw)	r/   r(   pprintPrettyPrinterpformatr'   r6   r4   write)r)   printerdatar;   r   r   r   save_to_file[   s   "zCompilerManager.save_to_fileNr   example_inputsgraph_indexruntime_shapereturnc                 C   sf   | j ||| jjf }| j|||||}|d u r$td|| jj| |S td|t|| jj| |S )NzEDirectly load the %s-th graph for dynamic shape from %s via handle %sz@Directly load the %s-th graph for shape %s from %s via handle %s)r'   r   nameloadloggerdebugstr)r)   r   rF   rG   rH   handlecompiled_graphr   r   r   rK   c   s(   
zCompilerManager.loadr      r   
num_graphsc                 C   sF  |dkrt   at jd7  _d }t| jtrd }nd| d| }| j|||||\}}	|d us6J d|	d ur|	| j||| jj	f< t j
d7  _
d| _|dkrd|d u r\td ntdt| |d u rstd	|| jj	|	 ntd
|t|| jj	|	 ||d krt   }
|
t }|d u rtd| |S td|| |S )Nr   rQ   artifact_shape_
_subgraph_zFailed to compile the graphTz/Cache the graph for dynamic shape for later usez)Cache the graph of shape %s for later usez=Store the %s-th graph for dynamic shape from %s via handle %sz8Store the %s-th graph for shape %s from %s via handle %sz0Compiling a graph for dynamic shape takes %.2f sz+Compiling a graph for shape %s takes %.2f s)timecompilation_start_timer	   num_backend_compilations
isinstancer   r   compiler'   rJ   num_cache_entries_updatedr(   rL   inforN   rM   )r)   r   rF   r   rG   rR   rH   rP   	maybe_keyrO   nowelapsedr   r   r   rY      s^   	
	zCompilerManager.compile)Fr-   r!   )r   rQ   N)__name__
__module____qualname__r   r*   r+   rN   boolr:   rE   fxGraphModulelistr   intr   r   rK   r&   rY   r   r   r   r   r%   @   sT    


"
r%   c                   @   s0   e Zd ZU eed< eed< eed< ejed< dS )	SplitItemsubmod_namegraph_idis_splitting_graphr   N)	r_   r`   ra   rN   __annotations__rf   rb   rc   rd   r   r   r   r   rg      s
   
 rg   opsrI   c                    s  d}i  g }| j jD ]*}|jdv rq
|jdkr0t|j|v r0|d7 }| |< || |d7 }q
| |< q
tjjj	j	| d  fdddd}g }d	d
 |
 D }|D ]$}d|v s\|dkr]qRt||}	t|dd}
|t||
|
|v |	 qR|jdd d ||fS )Nr   )outputplaceholdercall_functionrQ   c                    s    |  S r!   r   )nodenode_to_subgraph_idr   r   <lambda>   s    zsplit_graph.<locals>.<lambda>T)keep_original_orderc                 S   s   g | ]\}}|qS r   r   ).0rJ   moduler   r   r   
<listcomp>   s    zsplit_graph.<locals>.<listcomp>.r-   submod_c                 S   s   | j S r!   )ri   )xr   r   r   rs      s    )key)r   nodesoprN   targetappendtorchrc   passessplit_modulenamed_modulesgetattrrf   replacerg   sort)r   rl   subgraph_idsplit_op_graphsrp   split_gmoutputsnamesrJ   rv   ri   r   rq   r   split_graph   s2   





r   g        c                
       s   e Zd Zdejjdee deee	f de
ddf
 fddZ fd	d
Zdejjjdeejjjdf deee	f de	f fddZ  ZS )PiecewiseCompileInterpreterrv   compile_submod_namesr   r   r"   SGLangBackendc                    sH   t  | ddlm} | | _|| _|| _|| _d| _|| _	|| _
d S )Nr   )detect_fake_modeF)superr*   torch._guardsr   	fake_moder   r   r"   extra_tracebackr   r   )r)   rv   r   r   r   r   r"   r   	__class__r   r   r*   
  s   	
z$PiecewiseCompileInterpreter.__init__c              	      s~    fdd|D } j ) t  t j| W  d    W  d    S 1 s(w   Y  W d    d S 1 s8w   Y  d S )Nc                    s(   g | ]}t |tjr j|n|qS r   )rX   r   Tensorr   from_tensor)ru   tr,   r   r   rw      s    z3PiecewiseCompileInterpreter.run.<locals>.<listcomp>)r   r   r   run)r)   args	fake_argsr   r,   r   r     s   

RzPiecewiseCompileInterpreter.runr~   r   .kwargsrI   c           	   
      s   t |tsJ t |||}|| jv rW| j|}| |}dd t|D }| jj	j
||| j|t| jd d}t|| j| j| j|t| j||| j	| jj|< t jd7  _|S )Nc                 S   s    g | ]\}}t |tjr|qS r   )rX   r   SymInt)ru   irz   r   r   r   rw   3  s
    z;PiecewiseCompileInterpreter.call_module.<locals>.<listcomp>)rG   rR   rH   rQ   )rX   rN   r   call_moduler   index
fetch_attr	enumerater"   compiler_managerrY   r   lenr$   r   r   rv   __dict__r	   $num_piecewise_capturable_graphs_seen)	r)   r~   r   r   rm   r   submodr    compiled_graph_for_dynamic_shaper   r   r   r   '  s<   

z'PiecewiseCompileInterpreter.call_module)r_   r`   ra   r   rc   rd   re   rN   r&   r   r   r*   r   rp   TargettupleArgumentr   __classcell__r   r   r   r   r   	  s,    

r   backbone	model_tagtagc                 c   s@    | t ksJ d|  dt  dt }| a zdV  W |a dS |a w )z%Context manager to set the model tag.z
Model tag z  is the same as the current tag rx   N)r   )r   old_tagr   r   r   set_model_tagV  s   
r   c                   @   s   e Zd ZU eed< dZeed< ejed< ejed< e	e
 ed< eed< ee ed< e	e ed	< e	ej ed
< eed< dedefddZdd ZdejdefddZdS )r   r   F_calledr   r   piecewise_graphsreturned_callablepost_grad_passessym_tensor_indicesinput_buffersr   r   c                 C   sL   t d |d us
J || _t | _g | _g | _t|| _ddi| _|| _	d S )NzInitializing SGLangBackendenable_auto_functionalized_v2F)
r   r   r   post_grad_pass_managerr   r   r%   r   r   r   )r)   r   r   r   r   r   r*   u  s   

zSGLangBackend.__init__c                 C   s   | j   | j | jd< d S )Npost_grad_custom_post_pass)r   	configurer   r,   r   r   r   configure_post_pass  s   
z!SGLangBackend.configure_post_passrI   c                 C   s  t d tjtdd}| j }tj|d|}tj|dd d}d}tj|d| d	| t	}tj|dd | jj
|d
dd t jd7  _| jrRJ d|| _|   t|| jj\| _| _ddlm}	 |	d| j |	d| j t jt| j7  _dd | jD }
t| j|
| j| j| j| j|  tj }|dkrtj|dt    d}tj!|sd| jj"d
d }|#dd}t$|d}|%| W d    n1 sw   Y  t d|  d| _| jS )NzSGLangBackend __call__SGLANG_CACHE_DIRz~/.cache/sglang/torch_compile_cacheT)exist_okr   rank__Fr-   )r/   r0   rQ   z%SGLangBackend can only be called once)lazy_format_graph_codezbefore splitzafter splitc                 S   s   g | ]}|j s|jqS r   )rj   rh   )ru   itemr   r   r   rw     s    z*SGLangBackend.__call__.<locals>.<listcomp>computation_graph_z.pyz0from __future__ import annotations
import torch
)print_outputz<lambda>rd   r>   zComputation graph saved to )&r   r1   r2   
expandusergetenvr   r+   r3   makedirsr   r:   r	   num_graphs_seenr   r   r   r   r   	split_opsr   r   torch._dynamo.utilsr   num_piecewise_graphs_seenr   r   r   r   r   r   distributedget_rankrU   r5   print_readabler   r6   rB   )r)   r   rF   base_cache_dir
cache_hashr.   rankdp_ranklocal_cache_dirr   submod_names_to_compile
graph_pathsrcr;   r   r   r   __call__  sx   


	zSGLangBackend.__call__N)r_   r`   ra   r   rk   r   rb   rc   rd   re   rg   r   r   rf   r   r   r%   r   r*   r   r   r   r   r   r   r   e  s$   
 


r   ):r7   dataclassesloggingr1   r?   rU   collections.abcr   
contextlibr   typingr   r   r   r   torch.fxrc   torch._dispatch.pythonr   )sglang.srt.compilation.compilation_configr   *sglang.srt.compilation.compilation_counterr	   )sglang.srt.compilation.compiler_interfacer
   r   -sglang.srt.compilation.cuda_piecewise_backendr   ,sglang.srt.compilation.npu_piecewise_backendr   #sglang.srt.compilation.pass_managerr   sglang.srt.utils.commonr   r   	getLoggerr_   rL   r   rd   r&   rN   rf   re   r$   r%   	dataclassrg   r   r   global_graph_poolrV   Interpreterr   r   rk   r   r   r   r   r   r   <module>   sp   

	

 
/J