o
    i"                     @   sV  U d dl mZmZ d dlmZmZ d dlmZmZm	Z	 d dl
Zd dlZd dlmZ d dlmZ er>d dlmZ d dlmZ neZeZG d	d
 d
eZG dd deZejeej B eejdB  B Ze	ed< eG dd dZeG dd dZeG dd dZeG dd dZG dd deZ eG dd dZ!dddefddZ"eg i dZ#dS )     )ABCabstractmethod)	dataclassfield)TYPE_CHECKING
NamedTuple	TypeAliasN)CUDAGraphStat)SchedulerOutput)KVConnectorKVEvents)KVConnectorStatsc                   @   sR   e Zd ZU ejed< ejed< ejed< dZee dB ed< dedefdd	Z	dS )
LogprobsListslogprob_token_idslogprobssampled_token_ranksNcu_num_generated_tokensreq_idxnum_positionsc                 C   sH   | j d ur
| j | }|| }t| j|| | j|| | j|| d S N)r   r   r   r   r   )selfr   r   end_idx r   E/home/ubuntu/vllm_env/lib/python3.10/site-packages/vllm/v1/outputs.pyslice_request#   s   

zLogprobsLists.slice_request)
__name__
__module____qualname__npndarray__annotations__r   listintr   r   r   r   r   r      s   
 


r   c                   @   s   e Zd ZU ejed< ejed< ejed< dZee dB ed< ddee dB fddZ	dd	d
Z
dejdd fddZedededd fddZdS )LogprobsTensorsr   r   selected_token_ranksNr   c                 C   s<   t | j  | j  | j  |d ur|S | jS r   )r   r   cpunumpyr   r#   r   )r   r   r   r   r   tolists9   s   zLogprobsTensors.tolistsreturnc                 C   sF   | j jjdkr	| S t| j jddd| jjddd| jjddd| jS )Nr$   T)non_blocking)r   devicetyper"   tor   r#   r   r   r   r   r   to_cpu_nonblockingC   s   z"LogprobsTensors.to_cpu_nonblockingmaskc                 C   s0   | j du s	J dt| j| | j| | j| S )z5Filter the logprobs tensors with the given bool mask.Nz1filter can't be used with cu_num_generated_tokens)r   r"   r   r   r#   )r   r.   r   r   r   filterM   s   zLogprobsTensors.filterr   num_tokens_per_positionc                 C   sF   t j| |ft jdd}t j|t jd}t j| t jdd}t|||dS )z$Create empty LogprobsTensors on CPU.r$   )dtyper)   )r1   )r   r   r#   )torchemptyint32
empty_likefloat32r"   )r   r0   r   r   r#   r   r   r   	empty_cpuX   s   zLogprobsTensors.empty_cpur   )r'   r"   )r   r   r   r2   Tensorr   r   r    r!   r&   r-   r/   staticmethodr7   r   r   r   r   r"   /   s    
 





r"   PoolerOutputc                   @   s$   e Zd ZU ejed< edB ed< dS )SamplerOutputsampled_token_idsNlogprobs_tensors)r   r   r   r2   r8   r   r"   r   r   r   r   r;   q   s   
 
r;   c                   @   s   e Zd ZU dZee dB ed< dZee dB ed< dZe	dB ed< dZ
edB ed< eedZee ed< dZeed	< d
d ZdS )KVConnectorOutputNfinished_sendingfinished_recvingkv_connector_statskv_cache_eventsdefault_factoryinvalid_block_idsr   expected_finished_countc                 C   s(   | j  o| j o| j o| j o| j S r   )r?   r@   rA   rB   rE   r,   r   r   r   is_empty   s   zKVConnectorOutput.is_empty)r   r   r   r?   setstrr   r@   rA   r   rB   r   r   rE   r!   rF   rG   r   r   r   r   r>   {   s   
 r>   c                   @   s6   e Zd ZU dZee dB ed< dZee dB ed< dS )ECConnectorOutputNr?   r@   )r   r   r   r?   rH   rI   r   r@   r   r   r   r   rJ      s   
 rJ   c                   @   s   e Zd ZU ee ed< eeef ed< eedZ	eee  ed< dZ
edB ed< eedZeeedB f ed< dZeejdB  dB ed< dZedB ed	< dZedB ed
< dZeeef dB ed< dZedB ed< dS )ModelRunnerOutputreq_idsreq_id_to_indexrC   r<   Nr   prompt_logprobs_dictpooler_outputkv_connector_outputec_connector_outputnum_nans_in_logitscudagraph_stats)r   r   r   r    rI   r   dictr!   r   r<   r   r   rN   r"   rO   r2   r8   rP   r>   rQ   rJ   rR   rS   r	   r   r   r   r   rK      s   
 rK   c                   @   s   e Zd ZedefddZdS )AsyncModelRunnerOutputr'   c                 C   s   dS )a  Get the ModelRunnerOutput for this async output.

        This is a blocking call that waits until the results are ready, which
        might involve copying device tensors to the host.
        This method should only be called once per AsyncModelRunnerOutput.
        Nr   r,   r   r   r   
get_output   s   z!AsyncModelRunnerOutput.get_outputN)r   r   r   r   rK   rV   r   r   r   r   rU      s    rU   c                   @   s*   e Zd ZU ee ed< eee  ed< dS )DraftTokenIdsrL   draft_token_idsN)r   r   r   r    rI   r   r!   r   r   r   r   rW      s   
 rW   scheduler_outputr
   r'   c                 C   sV   | j stS t| j  }dd t|D }dd |D }dd |D }t||||dS )zz
    Create a ModelRunnerOutput stub that contains the correct
    per-request bookkeeping but no generated data yet.
    c                 S   s   i | ]\}}||qS r   r   ).0idxridr   r   r   
<dictcomp>   s    z:make_empty_encoder_model_runner_output.<locals>.<dictcomp>c                 S   s   g | ]}d gqS )r   r   rZ   _r   r   r   
<listcomp>   s    z:make_empty_encoder_model_runner_output.<locals>.<listcomp>c                 S   s   g | ]}d qS r   r   r^   r   r   r   r`      s    )rL   rM   r<   rO   )num_scheduled_tokensEMPTY_MODEL_RUNNER_OUTPUTr    keys	enumeraterK   )rY   rL   rM   r<   rO   r   r   r   &make_empty_encoder_model_runner_output   s   re   )rL   rM   )$abcr   r   dataclassesr   r   typingr   r   r   r%   r   r2   vllm.compilation.cuda_graphr	   vllm.v1.core.sched.outputr
   vllm.distributed.kv_eventsr   4vllm.distributed.kv_transfer.kv_connector.v1.metricsr   objectr   r"   r8   r    r:   r   r;   r>   rJ   rK   rU   rW   re   rb   r   r   r   r   <module>   s@   &?	(
