o
    پi                     @   s2   d dl Z dadd Zdd ZdddZd	d
 ZdS )    Nc                   C   s   t S )z
    Cache Management Operation(CMO).
    Launch a new stream to prefetch the weight of matmul when running other
    AIV or communication kernels, aiming to overlap the memory access time.
    
cmo_stream r   r   W/home/ubuntu/.local/lib/python3.10/site-packages/sglang/srt/hardware_backend/npu/cmo.pyget_cmo_stream   s   r   c                 C   s   | a d S Nr   )streamr   r   r   set_cmo_stream   s   r	    ʚ;c                 C   s   ddl }t }|du rtj }t| |tj  tj|* t	|t
r5|D ]	}||| | q*n||| | W d   dS W d   dS 1 sOw   Y  dS )u   
    PREFETCH_MAX_SIZE: maximum size (bytes) for each prefetch operation.
    This affects the time spent in prefetch:
        time ≈ PREFETCH_MAX_SIZE / system_bandwidth
    r   N)	torch_npur   torchnpuStreamr	   wait_streamcurrent_streamr   
isinstancelistnpu_prefetch)handlecachePREFETCH_MAX_SIZEr   r   weightr   r   r   prepare_weight_cache   s.   

"r   c                  C   s*   t  } | d urtj }||  d S d S r   )r   r   r   r   r   )r   
cur_streamr   r   r   wait_cmo_stream2   s
   
r   )r
   )r   r   r   r	   r   r   r   r   r   r   <module>   s    

