o
    پi6                     @   sx   d dl mZ d dlmZmZ d dlmZ d dlZd dlm	Z	 d dl
mZ d dlmZ d dlmZmZ G dd	 d	eZdS )
    )	ExitStack)AnyCallable)patchN)CompilationConfig)compilation_counter)CUDAPiecewiseBackendweak_ref_tensorsc                       sT   e Zd Zdejdedeeef dede	de	de
e	 def fd	d
Zdd Z  ZS )NPUPiecewiseBackendgraphcompile_configinductor_config
graph_poolpiecewise_compile_indextotal_piecewise_compilessym_shape_indices compiled_graph_for_general_shapec
           
         s    t  |||||||||		 d S N)super__init__)
selfr   r   r   r   r   r   r   r   sglang_backend	__class__ `/home/ubuntu/.local/lib/python3.10/site-packages/sglang/srt/compilation/npu_piecewise_backend.pyr      s   zNPUPiecewiseBackend.__init__c           	   	   G   s  || j d  }|| jvr| j| S | j| }|jd u r| j|_|jd u r|jdk r5| jd7  _|j| S | j rDdd |D }||_t	j
 }t D}| jsd|tddd  |tdd	d  t	j
j|| jd
 |j| }| jrzt|}W d    n1 sw   Y  W d    n1 sw   Y  t||_||_t jd7  _|S | j rdd |D }||jksJ d|j d| |j  |jS )Nr      c                 S       g | ]}t |tjr| qS r   
isinstancetorchTensordata_ptr.0xr   r   r   
<listcomp>:   
    z0NPUPiecewiseBackend.__call__.<locals>.<listcomp>z
gc.collectc                   S      d S r   r   r   r   r   r   <lambda>H       z.NPUPiecewiseBackend.__call__.<locals>.<lambda>ztorch.npu.empty_cachec                   S   r(   r   r   r   r   r   r   r)   I   r*   )poolc                 S   r   r   r   r#   r   r   r   r&   e   r'   zEInput addresses for cudagraphs are different during replay. Expected z, got )r   concrete_size_entriesr   runnable	cudagraphnum_finished_warmupr   get_enable_debug_modeinput_addressesr    npuNPUGraphr   is_first_graphenter_contextr   r   r   is_last_graphr	   outputr   num_cudagraph_capturedreplay)	r   argsruntime_shapeentryr1   npugraphstackr7   new_input_addressesr   r   r   __call__)   sZ   












zNPUPiecewiseBackend.__call__)__name__
__module____qualname__fxGraphModuler   dictstrr   intlistr   r   r@   __classcell__r   r   r   r   r
      s&    
	r
   )
contextlibr   typingr   r   unittest.mockr   r    torch.fxrD   )sglang.srt.compilation.compilation_configr   *sglang.srt.compilation.compilation_counterr   -sglang.srt.compilation.cuda_piecewise_backendr   r	   r
   r   r   r   r   <module>   s    