o
    پi                     @   s   d dl Z d dlmZ d dlmZmZmZ d dlZG dd de jZ	e	 Z
defddZd	efd
dZedefddZ	ddededeejj deejj d	eeef f
ddZdS )    N)contextmanager)AnyCallableOptionalc                   @   s   e Zd Zdd ZdS )do_multi_stream_localc                 C   s
   d| _ d S )NF)do_multi_stream)self r	   W/home/ubuntu/.local/lib/python3.10/site-packages/sglang/srt/utils/multi_stream_utils.py__init__      
zdo_multi_stream_local.__init__N)__name__
__module____qualname__r   r	   r	   r	   r
   r   
   s    r   enablec                 C   s
   | t _d S N_localr   )r   r	   r	   r
   set_do_multi_stream   r   r   returnc                   C   s   t jS r   r   r	   r	   r	   r
   r      s   r   c                 c   s0    t j}t|  z
d V  W t| d S t| w r   )r   r   r   )r   prev_do_multi_streamr	   r	   r
   with_multi_stream   s   r   fn0fn1events
aux_streamc                 C   s   t  o|du}|rA|d   |  }tj| |d   | }|d   W d   n1 s2w   Y  |d   ||fS |  }| }||fS )a  Utility function to run two functions in two cuda streams in parallel. Multi-stream is
    only enabled when cuda graph is turned on because switch stream has extra host overhead.

    This design is mainly for low latency use case. It needs to be improved for max throughput
    use case.
    For simplicity, fn0 and fn1 do not support inputs.

    Args:
        fn0 (Callable): callable for the default stream
        fn1 (Callable): callable for the second stream, aux_stream
        events (list[torch.cuda.Event]): cuda events for callables
        aux_stream (Optional[torch.cuda.Stream]): the second cuda stream for fn1.
            Multi-stream is disabled when aux_stream is None.

    Returns:
        tuple[Any, Any]: the return values of fn0() and fn1()
    Nr      )r   recordtorchcudastreamwait)r   r   r   r   multi_streamresult0result1r	   r	   r
   maybe_execute_in_parallel%   s   r%   r   )	threading
contextlibr   typingr   r   r   r   localr   r   boolr   r   r   listr   EventStreamtupler%   r	   r	   r	   r
   <module>   s,   


