o
    
۾iS2                     @   st  d dl mZ d dl mZ d dlmZ d dlmZmZ d dl	Z
d dlZd dlmZ d dlmZ d dlmZmZ d d	lmZ d d
lmZ d dlmZ eeZeG dd dZeG dd dZG dd dZeddddg ddZededZG dd dee Z eG dd dZ!G dd de e! Z"eG dd dZ#G dd  d e e# Z$eG d!d" d"Z%G d#d$ d$e e% Z&dS )%    )MutableSequence)Sequence)	dataclass)AnyGenericN)TypeVar)init_logger)PromptLogprobsSampleLogprobs)LoRARequest)MultiModalPlaceholderDict)RequestStateStatsc                   @   s   e Zd ZU dZeed< eed< ee ed< edB ed< e	dB ed< dZ
ejdB ed< dZedB ed	< dZeeB dB ed
< dZedB ed< defddZdefddZdS )CompletionOutputa!  The output data of one completion output of a request.

    Args:
        index: The index of the output in the request.
        text: The generated output text.
        token_ids: The token IDs of the generated output text.
        cumulative_logprob: The cumulative log probability of the generated
            output text.
        logprobs: The log probabilities of the top probability words at each
            position if the logprobs are requested.
        finish_reason: The reason why the sequence is finished.
        stop_reason: The stop string or token id that caused the completion
            to stop, None if the completion finished for some other reason
            including encountering the EOS token.
        lora_request: The LoRA request that was used to generate the output.
    indextext	token_idsNcumulative_logproblogprobsrouted_expertsfinish_reasonstop_reasonlora_requestreturnc                 C   s
   | j d uS N)r   self r   @/home/ubuntu/.local/lib/python3.10/site-packages/vllm/outputs.pyfinished3   s   
zCompletionOutput.finishedc                 C   sF   d| j  d| jd| j d| j d| j d| j d| j d| j d	S )
NzCompletionOutput(index=z, text=z, token_ids=z, routed_experts=z, cumulative_logprob=z, logprobs=z, finish_reason=z, stop_reason=))r   r   r   r   r   r   r   r   r   r   r   r   __repr__6   s    
zCompletionOutput.__repr__)__name__
__module____qualname____doc__int__annotations__strGenericSequencefloatr
   r   npndarrayr   r   r   r   boolr   r    r   r   r   r   r      s   
 r   c                   @   s<   e Zd ZU dZejed< defddZde	de
fddZd	S )
PoolingOutputznThe output data of one pooling output of a request.

    Args:
        data: The extracted hidden states.
    datar   c                 C      d| j  dS )NzPoolingOutput(data=r   )r.   r   r   r   r   r    M      zPoolingOutput.__repr__otherc                 C   s    t || jot| j|jk S r   )
isinstance	__class__r,   r.   all)r   r1   r   r   r   __eq__P   s   zPoolingOutput.__eq__N)r!   r"   r#   r$   torchTensorr&   r'   r    objectr,   r5   r   r   r   r   r-   C   s
   
 
r-   c                    @   s   e Zd ZdZ					dddddededB dee dB dedB dee d	e	d
e
dB dedB dedB dee dB dedB dedB deeef dB deddfddZdd de	ddfddZdefddZdS )RequestOutputak  The output data of a completion request to the LLM.

    Args:
        request_id: The unique ID of the request.
        prompt: The prompt string of the request.
                For encoder/decoder models, this is the
                decoder input prompt.
        prompt_token_ids: The token IDs of the prompt.
                          For encoder/decoder models, this is the
                          decoder input prompt token ids.
        prompt_logprobs: The log probabilities to return per prompt token.
        outputs: The output sequences of the request.
        finished: Whether the whole request is finished.
        metrics: Metrics associated with the request.
        lora_request: The LoRA request that was used to generate the output.
        encoder_prompt: The encoder prompt string of the request.
                        None if decoder-only.
        encoder_prompt_token_ids: The token IDs of the encoder prompt.
                                  None if decoder-only.
        num_cached_tokens: The number of tokens with prefix cache hit.
        kv_transfer_params: The params for remote K/V transfer.
    N)multi_modal_placeholderskv_transfer_params
request_idpromptprompt_token_idsprompt_logprobsoutputsr   metricsr   encoder_promptencoder_prompt_token_idsnum_cached_tokensr:   r;   kwargsr   c                K   sj   |r
t dt| || _|| _|| _|pi | _|| _|| _|| _	|| _
|| _|	| _|
| _|| _|| _d S )Nz+RequestOutput: Ignoring extra arguments: %s)loggerwarning_oncer'   r<   r=   r>   r:   r?   r@   r   rA   r   rB   rC   rD   r;   )r   r<   r=   r>   r?   r@   r   rA   r   rB   rC   rD   r:   r;   rE   r   r   r   __init__n   s"   

zRequestOutput.__init__next_output	aggregatec                 C   s   |  j |j O  _ |j| _|jD ]Z}t| jD ]L\}}|j|jkrb|r[| j|j7  _t|jts6t	|j|_|j
|j |jrN|jdusGJ |j
|j |j|_|j|_|j|_n|| j|<  nq| j| qdS )z,Merge subsequent RequestOutput into this oneN)r   r;   r@   	enumerater   r   r2   r   r   listextendr   r   r   r   append)r   rI   rJ   next_completioni
completionr   r   r   add   s.   


zRequestOutput.addc                 C   sf   d| j  d| jd| j d| jd| j d| j d| j d| j d	| j d
| j	 d| j
 d| j dS )NzRequestOutput(request_id=z	, prompt=, prompt_token_ids=z, encoder_prompt=z, encoder_prompt_token_ids=z, prompt_logprobs=
, outputs=, finished=z
, metrics=z, lora_request=, num_cached_tokens=z, multi_modal_placeholders=r   )r<   r=   r>   rB   rC   r?   r@   r   rA   r   rD   r:   r   r   r   r   r       s0   
	
zRequestOutput.__repr__)NNNNN)r!   r"   r#   r$   r'   rL   r%   r	   r   r,   r   r   r   dictr   rH   rR   r    r   r   r   r   r9   V   sV    
	


&r9    T)r<   r=   r>   r?   r@   r   _O)defaultc                
   @   s:   e Zd ZdZdededee dedef
ddZ	d	d
 Z
dS )PoolingRequestOutputa  
    The output data of a pooling request to the LLM.

    Args:
        request_id (str): A unique identifier for the pooling request.
        outputs (PoolingOutput): The pooling results for the given input.
        prompt_token_ids (list[int]): A list of token IDs used in the prompt.
        num_cached_tokens: The number of tokens with prefix cache hit.
        finished (bool): A flag indicating whether the pooling is completed.
    r<   r@   r>   rD   r   c                 C   s"   || _ || _|| _|| _|| _d S r   )r<   r>   rD   r   r@   )r   r<   r@   r>   rD   r   r   r   r   rH      s
   
zPoolingRequestOutput.__init__c                 C   s8   t | j d| jd| jd| j d| j d| j dS )Nz(request_id=rT   rS   rV   rU   r   )typer!   r<   r@   r>   rD   r   r   r   r   r   r       s   zPoolingRequestOutput.__repr__N)r!   r"   r#   r$   r'   rY   rL   r%   r,   rH   r    r   r   r   r   r[      s    
r[   c                   @   P   e Zd ZU dZee ed< edefddZ	e
defddZdefd	d
ZdS )EmbeddingOutputzThe output data of one embedding output of a request.

    Args:
        embedding: The embedding vector, which is a list of floats.
            Its length depends on the hidden dimension of the model.
    	embeddingpooling_outputc                 C   $   | j }|jdkrtdt| S )N   z,pooled_data should be a 1-D embedding vector)r.   ndim
ValueErrorr^   tolistr`   pooled_datar   r   r   	from_base   s   
zEmbeddingOutput.from_baser   c                 C   
   t | jS r   )lenr_   r   r   r   r   hidden_size     
zEmbeddingOutput.hidden_sizec                 C   r/   )NzEmbeddingOutput(hidden_size=r   )rk   r   r   r   r   r      r0   zEmbeddingOutput.__repr__N)r!   r"   r#   r$   rL   r)   r&   staticmethodr-   rh   propertyr%   rk   r'   r    r   r   r   r   r^      s   
 r^   c                   @      e Zd ZedefddZdS )EmbeddingRequestOutputrequest_outputc                 C   "   t | jt| j| j| j| jdS N)r<   r@   r>   rD   r   )rp   r<   r^   rh   r@   r>   rD   r   rq   r   r   r   rh        
z EmbeddingRequestOutput.from_baseNr!   r"   r#   rm   r[   rh   r   r   r   r   rp         rp   c                   @   r]   )ClassificationOutputzThe output data of one classification output of a request.

    Args:
        probs: The probability vector, which is a list of floats.
            Its length depends on the number of classes.
    probsr`   c                 C   ra   )Nrb   z.pooled_data should be a 1-D probability vector)r.   rc   rd   rx   re   rf   r   r   r   rh   &  s   
zClassificationOutput.from_baser   c                 C   ri   r   )rj   ry   r   r   r   r   num_classes/  rl   z ClassificationOutput.num_classesc                 C   r/   )Nz!ClassificationOutput(num_classes=r   )rz   r   r   r   r   r    3  r0   zClassificationOutput.__repr__N)r!   r"   r#   r$   rL   r)   r&   rm   r-   rh   rn   r%   rz   r'   r    r   r   r   r   rx     s   
 rx   c                   @   ro   )ClassificationRequestOutputrq   c                 C   rr   rs   )r{   r<   rx   rh   r@   r>   rD   r   rt   r   r   r   rh   8  ru   z%ClassificationRequestOutput.from_baseNrv   r   r   r   r   r{   7  rw   r{   c                   @   s:   e Zd ZU dZeed< edefddZde	fddZ
d	S )
ScoringOutputzThe output data of one scoring output of a request.

    Args:
        score: The similarity score, which is a scalar value.
    scorer`   c                 C   s(   | j  }|jdkrtdt| S )Nr   z$pooled_data should be a scalar score)r.   squeezerc   rd   r|   itemrf   r   r   r   rh   M  s   

zScoringOutput.from_baser   c                 C   r/   )NzScoringOutput(score=r   )r}   r   r   r   r   r    X  r0   zScoringOutput.__repr__N)r!   r"   r#   r$   r)   r&   rm   r-   rh   r'   r    r   r   r   r   r|   C  s   
 
r|   c                   @   ro   )ScoringRequestOutputrq   c                 C   rr   rs   )r   r<   r|   rh   r@   r>   rD   r   rt   r   r   r   rh   ]  ru   zScoringRequestOutput.from_baseNrv   r   r   r   r   r   \  rw   r   )'collections.abcr   r   r(   dataclassesr   typingr   r   numpyr*   r6   typing_extensionsr   vllm.loggerr   vllm.logprobsr	   r
   vllm.lora.requestr   vllm.multimodal.inputsr   vllm.v1.metrics.statsr   r!   rF   r   r-   r9   STREAM_FINISHEDrY   r[   r^   rp   rx   r{   r|   r   r   r   r   r   <module>   sJ   ,n	$