o
    ii                     @   s^   d dl m Z  d dlmZ d dlmZ d dlmZmZ d dlm	Z	 d dl
mZ G dd dZd	S )
    )copy)cast)CompletionOutput)RequestOutputKindSamplingParams)EngineCoreRequest)IterationStatsc                   @   s   e Zd ZU dZeed< eed< eed< ee ed< ee	 ed< e
ed< edB ed	< d
eddfddZde
defddZde
deeef fddZede
fddZdede	deee	 ef fddZde
fddZedddede
fdd ZdS )!ParentRequestzInfo, state & processing for parallel sampling request.

    Store parent request ID and sampling params.
    Facilitate generating child request sampling params.
    
request_idexternal_req_idsampling_paramschild_requestsoutput_aggregatormax_num_generation_tokensNcached_child_sampling_paramsrequestreturnc                 C   sf   |j d usJ |j}|j| _|j | _ || _t | _|jtjkr(t	t
d g|j ng | _d| _d | _d S )Nr   )r   paramsr
   r   setr   output_kindr   
FINAL_ONLYr   r   nr   r   r   )selfr   r    r   V/home/ubuntu/vllm_env/lib/python3.10/site-packages/vllm/v1/engine/parallel_sampling.py__init__$   s   
zParentRequest.__init__indexc                 C   sD   | j j}| jr
| jS t| j }d|_|du r|| _|S || |_|S )aR  Efficiently obtain child `sampling_params`

        If `sampling_params.seed` is not `None` then
        each child request requires a unique clone of
        parent `sampling_params` with a unique seed.

        Args:
          index: index within `n` child requests

        Returns:
          Child `sampling_params` instance.
           N)r   seedr   r   r   )r   r   r   child_sampling_paramsr   r   r   _get_child_sampling_params4   s   

z(ParentRequest._get_child_sampling_paramsc                 C   s*   | d| j  }| j| || |fS )zGet child request ID and sampling params.

        Args:
          index: index within `n` child requests.

        Returns:
          (request ID, sampling_params) tuple
        _)r
   r   addr    )r   r   child_req_idr   r   r   get_child_infoS   s   	zParentRequest.get_child_infoc                 C   s   | j jS N)r   r   )r   r   r   r   r   `   s   zParentRequest.nchild_request_idcompletion_outputc                 C   sr   d}|  r|| jv r| j| nd}| jjtjkr#|rg n|g}n|| j|j< | jr.g n| j}| j }||fS )NFT)	finishedr   remover   r   r   r   r   r   )r   r&   r'   already_finished_and_returnedoutputsr(   r   r   r   get_outputsd   s   
zParentRequest.get_outputsnum_generation_tokensc                 C   s   t || j| _| jS r%   )maxr   )r   r-   r   r   r   observe_num_generation_tokens   s   z+ParentRequest.observe_num_generation_tokens
parent_reqzParentRequest | Noneiteration_statsc                 C   sR   | d ur| j nd}| d ur| |}| d u s| js'|j| |j| d S d S )Nr   )r   r/   r   max_num_generation_tokens_iterappendn_params_iter)r0   r1   r-   n_paramr   r   r   observe_finished_request   s   z&ParentRequest.observe_finished_request)__name__
__module____qualname____doc__str__annotations__r   r   listr   intr   r   r    tupler$   propertyr   boolr,   r/   staticmethodr   r6   r   r   r   r   r	      sD   
 

r	   N)r   typingr   vllm.outputsr   vllm.sampling_paramsr   r   vllm.v1.enginer   vllm.v1.metrics.statsr   r	   r   r   r   r   <module>   s   