o
    i3                     @   s  d dl mZmZ d dlZd dlmZ d dlmZ d dlmZ d dl	m
Z
 d dlmZ d dlmZ d d	lmZmZ d d
lmZ d dlmZ eeZeejedkZdZG dd dZG dd deeZG dd deZG dd deZde de!de"e  de#de$e e!f dB f
ddZ%dS )    )ABCabstractmethodN)version)	Tokenizer)DecodeStream)PreTrainedTokenizerFast)init_logger)TokenizerLike)convert_prompt_ids_to_tokensdetokenize_incrementally)&length_from_prompt_token_ids_or_embeds)EngineCoreRequestz0.21.1zInvalid prefix encounteredc                   @   s|   e Zd Zdd Zedee fddZdee dede	dB fd	d
Z
dedede	fddZededB dedd fddZdS )IncrementalDetokenizerc                 C   s
   g | _ d S N	token_idsself r   P/home/ubuntu/vllm_env/lib/python3.10/site-packages/vllm/v1/engine/detokenizer.py__init__   s   
zIncrementalDetokenizer.__init__returnc                 C   s   | j S r   r   r   r   r   r   output_token_ids"   s   z'IncrementalDetokenizer.output_token_idsnew_token_idsstop_terminatedNc                 C   s   | j | d S r   )r   extend)r   r   r   r   r   r   update&   s   zIncrementalDetokenizer.updatefinisheddeltac                 C   s   dS N r   )r   r   r   r   r   r   get_next_output_text*   s   z+IncrementalDetokenizer.get_next_output_text	tokenizerrequestc                 C   s>   |j d usJ |d u rt S trt|trt||S t||S r   )sampling_paramsr   USE_FAST_DETOKENIZER
isinstancer   FastIncrementalDetokenizerSlowIncrementalDetokenizer)clsr"   r#   r   r   r   from_new_request-   s   

z'IncrementalDetokenizer.from_new_request)__name__
__module____qualname__r   propertylistintr   boolstrr   r!   classmethodr	   r   r*   r   r   r   r   r      s    r   c                       sl   e Zd Zdef fddZdee dededB fdd	Z	e
d
edefddZdededefddZ  ZS )BaseIncrementalDetokenizerr#   c                    s   t    |j}|d usJ |jd u rg }nt|jtr!|jg}n|j}|| _|j| _|j| _| jrC| jsCtdd | jD d | _	nd| _	d| _
d| _d S )Nc                 s   s    | ]}t |V  qd S r   )len).0sr   r   r   	<genexpr>V   s    z6BaseIncrementalDetokenizer.__init__.<locals>.<genexpr>   r   r    )superr   r$   stopr&   r2   
min_tokensinclude_stop_str_in_outputmaxstop_buffer_length_last_output_text_offsetoutput_text)r   r#   params	stop_list	__class__r   r   r   B   s    



z#BaseIncrementalDetokenizer.__init__r   r   r   Nc           	      C   s   |sdS |r| j s|d }|dd }nd}t| j}|D ]"}| j| |  j| |7  _| jr?t| j| jkr?t| j}q|durJ| j| d}| jr{t| j| jkr{t	| jt| j| | j| j d}|dur{|\}}|dkr{| jd| | _|S )z
        Update RequestState for the request_id by:
            1) Detokenize the new token ids incrementally.
            2) Evaluate stop criteria.

        Return matched stop string or None.
        N)rA   new_char_countr;   include_in_output)
r=   r5   rA   r   appenddecode_nextr<   r   r;   check_stop_strings)	r   r   r   skipped_stop_token_idstop_check_offsetnew_token_idstop_stringr;   truncate_tor   r   r   r   ^   s8   


z!BaseIncrementalDetokenizer.updatenext_token_idc                 C   s   t r   )NotImplementedError)r   rQ   r   r   r   rJ      s   z&BaseIncrementalDetokenizer.decode_nextr   r   c                 C   s`   |rdn| j }|s|r| jd|  S | jS t| j| }| j}||k r.|| _| j|| S dS )zVIf delta is True, only new text since the last call to
        this method is returnedr   Nr    )r?   rA   r5   r@   )r   r   r   buffer_lengthlengthlast_offsetr   r   r   r!      s   z/BaseIncrementalDetokenizer.get_next_output_text)r+   r,   r-   r   r   r/   r0   r1   r2   r   r   rJ   r!   __classcell__r   r   rD   r   r4   A   s    3r4   c                       sN   e Zd Zdedef fddZdedefddZdeded	B fd
dZ	  Z
S )r'   r"   r#   c                    s"  t  | |j}|d usJ |j| _|j| _t| jd| _|j| _|j	p&g }|}t
|}|dkrQtdt|d dD ]}|| d  }d| j|vrP|} nq;|D ]}	| |	 qS|jp`|j| _| jst| jdd  }
d u rdd | j  D  | j_}
|
rd	| _|
| _d S d
| _d S d S )Nskip_special_tokens   r9      u   �added_token_idsc                 S   s   i | ]\}}||j qS r   )content)r6   tidtokr   r   r   
<dictcomp>   s    z7FastIncrementalDetokenizer.__init__.<locals>.<dictcomp>FT)r:   r   r$   
request_idrX   r   stream
_tokenizerr"   prompt_token_idsr5   rangemindecode_protected_stepspaces_between_special_tokensgetattrget_added_tokens_decoderitemsr[   last_special)r   r"   r#   r$   rc   prompt_suffix
prompt_lenisuffixr]   r[   rD   r   r   r      sD   


z#FastIncrementalDetokenizer.__init__rQ   r   c                 C   s@   |  |}| js| j|}|d u}|r| jr|}|| _|pdS r   )rg   rh   r[   getrl   )r   rQ   tokenspecial_token
is_specialr   r   r   rJ      s   

z&FastIncrementalDetokenizer.decode_nextNc              
   C   s   z| j | j|}W |S  ttfy   td| d }Y |S  tyP } z&t|	t
s/|td| j t| jd| _ | j | j|}W Y d }~|S d }~ww )Nz Encountered invalid token id: %rzXEncountered invalid prefix detokenization error for request %s, resetting decode stream.rW   )ra   stepr"   OverflowError	TypeErrorlogger	exception	Exceptionr2   
startswithINVALID_PREFIX_ERR_MSGwarningr`   r   rX   )r   rQ   rr   er   r   r   rg      s&   z*FastIncrementalDetokenizer._protected_step)r+   r,   r-   r   r   r   r0   r2   rJ   rg   rV   r   r   rD   r   r'      s    2r'   c                       sN   e Zd Zdedef fddZedee fddZ	dede
fd	d
Z  ZS )r(   r"   r#   c                    s   t  | || _|j}|d usJ t|j|j| _|jd ur/t||j|j	d\| _
| _| _ndg| j | _
d| _d| _| j|jpGdg| j  |j	| _	|j| _d S )N)r"   
prompt_idsrX   r    r   )r:   r   r"   r$   r   rc   prompt_embedsrn   r
   rX   tokensprefix_offsetread_offsetr   r   rh   )r   r"   r#   rB   rD   r   r   r     s(   
	z#SlowIncrementalDetokenizer.__init__r   c                 C   s   | j s| jS | j| j d  S r   )rn   r   r   r   r   r   r   "  s
   z+SlowIncrementalDetokenizer.output_token_idsrQ   c              	   C   sH   t | j| j| j| j| j| j| jd\}}}}| j| || _|| _|S )N)r"   all_input_idsprev_tokensr   r   rX   rh   )	r   r"   r   r   r   r   rX   rh   r   )r   rQ   
new_tokensdecoded_textr   r   r   r   r   rJ   *  s   
z&SlowIncrementalDetokenizer.decode_next)r+   r,   r-   r	   r   r   r.   r/   r0   r   r2   rJ   rV   r   r   rD   r   r(     s
    r(   rA   rG   r;   rH   r   c                 C   sn   |r|sdS |D ],}t |}| |d| | }|dkrq|r/||7 }|t | kr/|df  S ||f  S dS )a5  Check if any stop strings are matched and truncate sequence
    output text accordingly.

    Returns tuple (stop_string, offset) if matched or else None.

    Where stop_string is the matched stop string and offset is the
    length to which output_text should be truncated, or -1 for no
    truncation.
    Nr9   rF   )r5   find)rA   rG   r;   rH   stop_strstop_string_len
stop_indexr   r   r   rK   <  s   rK   )&abcr   r   
tokenizers	packagingr   r   tokenizers.decodersr   transformersr   vllm.loggerr   vllm.tokenizersr	   !vllm.tokenizers.detokenizer_utilsr
   r   
vllm.utilsr   vllm.v1.enginer   r+   rx   parse__version__r%   r|   r   r4   r'   r(   r2   r0   r/   r1   tuplerK   r   r   r   r   <module>   s:   #hY: