o
    پiZ!                     @   s   d dl Z d dlZd dlmZ d dlmZmZmZ d dlm	Z	 d dl
Z
d dlmZ d dlmZ d dlmZ d dlmZmZ d dlmZ eeZe jG d	d
 d
ZG dd dZdS )    N)	ExitStack)AnyCallableOptional)patch)CompilationConfig)compilation_counter)get_pcg_capture_streamis_in_pcg_torch_compile)weak_ref_tensorsc                   @   s   e Zd ZU eed< eed< eed< dZeed< dZeed< dZ	eed	< dZ
eejj ed
< dZee ed< dZeee  ed< dS )ConcreteSizeEntryruntime_shapeneed_to_compileuse_cudagraphFcompiledNrunnabler   num_finished_warmup	cudagraphoutputinput_addresses)__name__
__module____qualname__int__annotations__boolr   r   r   r   r   r   torchcuda	CUDAGraphr   r   r   list r    r    a/home/ubuntu/.local/lib/python3.10/site-packages/sglang/srt/compilation/cuda_piecewise_backend.pyr      s   
 r   c                   @   sZ   e Zd Zdejdedeeef dede	de	de
e	 defd	d
Zdd ZdefddZdS )CUDAPiecewiseBackendgraphcompile_configinductor_config
graph_poolpiecewise_compile_indextotal_piecewise_compilessym_shape_indices compiled_graph_for_general_shapec
                 C   s   || _ || _|| _|| _|| _|	| _|dk| _||d k| _tg | _	|| _
t| | _d| _|| _|| _i | _| j	 | _| j	| jD ]}
t|
|
| j	v |
| jv d| j|
< qFdS )a  
        The backend for piecewise compilation.
        It mainly handles the compilation and cudagraph capturing.

        We will compile `self.graph` once for the general shape,
        and then compile for different shapes specified in
        `compilation_config.compile_sizes`.

        Independently, we will capture cudagraph for different shapes.

        If a shape needs both compilation and cudagraph, we will
        compile it first, and then capture cudagraph.
        r      F)r   r   r   N)r#   r%   r&   r'   r(   sglang_backendis_first_graphis_last_graphsetcompile_sizesr$   get_capture_sizescudagraph_capture_sizesfirst_run_finishedr*   r)   concrete_size_entriescopyto_be_compiled_sizesunionr   )selfr#   r$   r%   r&   r'   r(   r)   r*   r,   shaper    r    r!   __init__*   s.   

zCUDAPiecewiseBackend.__init__c                 C   s$   | j r| js| jj  d S d S d S N)r.   r6   r,   compiler_managersave_to_file)r8   r    r    r!   check_for_ending_compilatione   s   z1CUDAPiecewiseBackend.check_for_ending_compilationreturnc           
   	   G   sT  | j sd| _ |   | j| S t| jdkr| j| S || jd  }|| jvr,| j| S | j| }|jd u r:| j|_|jre|jsed|_| j	
| | jjj| j|| j| j| j|d|_| jre| j	se|   t rm|j| S |jd u r|jdk r| jd7  _|j| S | j rdd |D }||_tj }t P}| js|tddd	  |td
dd	  t  }|d usJ dtjj|| j!|d |j| }| jrt"|}W d    n1 sw   Y  W d    n1 sw   Y  t"||_#||_t$ j%d7  _%|S | j r"dd |D }	|	|jks"J d|j d|	 |j&  |j#S )NTr   )graph_index
num_graphsr   r+   c                 S       g | ]}t |tjr| qS r    
isinstancer   Tensordata_ptr.0xr    r    r!   
<listcomp>   
    z1CUDAPiecewiseBackend.__call__.<locals>.<listcomp>z
gc.collectc                   S      d S r;   r    r    r    r    r!   <lambda>       z/CUDAPiecewiseBackend.__call__.<locals>.<lambda>ztorch.cuda.empty_cachec                   S   rL   r;   r    r    r    r    r!   rM      rN   zMPCG capture stream is not set, please check if runtime recompilation happened)poolstreamc                 S   rB   r    rC   rG   r    r    r!   rJ      rK   zEInput addresses for cudagraphs are different during replay. Expected z, got )'r3   r>   r*   lenr)   r4   r   r   r   r6   remover,   r<   compiler#   r%   r'   r(   r.   r
   r   r   r$   get_enable_debug_moder   r   r   r   r   r-   enter_contextr   r	   r&   r   r   r   num_cudagraph_capturedreplay)
r8   argsr   entryr   r   stackrP   r   new_input_addressesr    r    r!   __call__k   s   















zCUDAPiecewiseBackend.__call__N)r   r   r   fxGraphModuler   dictstrr   r   r   r   r:   r>   r\   r    r    r    r!   r"   (   s(    
	
;r"   )dataclasseslogging
contextlibr   typingr   r   r   unittest.mockr   r   torch.fxr]   )sglang.srt.compilation.compilation_configr   *sglang.srt.compilation.compilation_counterr   0sglang.srt.compilation.piecewise_context_managerr	   r
   &sglang.srt.compilation.weak_ref_tensorr   	getLoggerr   logger	dataclassr   r"   r    r    r    r!   <module>   s   
