o
    
۾il                     @   sZ   d dl Zd dlZd dlmZmZmZ d dlmZ G dd deZ	dej
dejfdd	ZdS )
    N)AsyncModelRunnerOutputLogprobsTensorsModelRunnerOutput)SamplerOutputc                
   @   sB   e Zd Zdededejdejjdejj	f
ddZ
defd	d
ZdS )AsyncOutputmodel_runner_outputsampler_outputnum_sampled_tokenscopy_stream
copy_eventc                 C   s   || _ || _|| _|| _tj }tj|G || t	|j
| _
d | _|jd ur1|j | _d | _|jd ur?t	|j| _t	|| _dd | j j D | _| j| W d    d S 1 saw   Y  d S )Nc                 S   s&   i | ]\}}||d ur|  nd qS N)to_cpu_nonblocking).0kv r   R/home/ubuntu/.local/lib/python3.10/site-packages/vllm/v1/worker/gpu/async_utils.py
<dictcomp>*   s    z(AsyncOutput.__init__.<locals>.<dictcomp>)r   r   r	   r   torchcudacurrent_streamstreamwait_streamasync_copy_to_npsampled_token_idslogprobs_tensorsr   num_nansnum_sampled_tokens_npprompt_logprobs_dictitemsrecord)selfr   r   r	   r
   r   default_streamr   r   r   __init__   s*   





"zAsyncOutput.__init__returnc                 C   s   | j   | j }| j }t||D ]	\}}||d = q|| j_| jd ur5tt| jj	| j | j_
| jd urA| j | j_| j| j_| jS r   )r   synchronizer   tolistr   zipr   r   dictreq_idsnum_nans_in_logitsr   tolistslogprobsr   )r!   r   r	   	token_ids
num_tokensr   r   r   
get_output0   s   





zAsyncOutput.get_outputN)__name__
__module____qualname__r   r   r   Tensorr   StreamEventr#   r/   r   r   r   r   r      s    
$r   xr$   c                 C   s   | j ddd S )NcpuT)non_blocking)tonumpy)r6   r   r   r   r   H   s   r   )r:   npr   vllm.v1.outputsr   r   r    vllm.v1.worker.gpu.sample.outputr   r   r3   ndarrayr   r   r   r   r   <module>   s   =