o
    c۷i                  	   @   s   d dl Z d dlZd dlmZ d dlmZmZmZmZ d dl	Z
d dlmZ d dlmZmZmZmZ d dlmZ d dlmZmZmZmZmZmZmZmZ d dlmZmZm Z m!Z!m"Z"m#Z# d d	l$m%Z%m&Z& d d
l'm(Z(m)Z) d dl*m+Z+ d dl,m-Z-m.Z. d dl/m0Z0m1Z1 d dl2m3Z3m4Z4m5Z5m6Z6m7Z7m8Z8m9Z9m:Z:m;Z; d dl<m=Z= d dl>m?Z? G dd de5ee4e3e9e f Z@G dd de@ee4e3e9e f ZAG dd deAee4e3e9e f ZBG dd deBee4e3e9e f ZCG dd deCZDG dd deDZEG dd deEZFG dd  d eFZGdS )!    N)abstractmethod)AnyGenericSequenceoverload)Audio) InvalidAssistantMessageException InvalidMessageStructureExceptionInvalidRequestExceptionTokenizerException)
FIMRequest)
AudioChunkAudioURLChunkContentChunk
ImageChunkImageURLChunk	TextChunk
ThinkChunkUserContentChunk)UATSAssistantMessageAssistantMessageTypeSystemMessageToolMessageUserMessage)InstructRequestModelSettings)ToolToolCall)SpeechRequest)StreamingModeTranscriptionRequest)AudioEncoderTranscriptionFormat)	FIMRequestTypeInstructRequestTypeInstructTokenizerSpecialTokenPolicySpecialTokens	TokenizedTokenizedType	TokenizerUserMessagePosition)ImageEncoder)
Tekkenizerc                
       st  e Zd ZdZ		d*dededB dedB f fddZededB fd	d
Z	de
e fddZededeeef fddZededede
e fddZedededede
e fddZedede
e fddZde
e
e dB  de
e dededdf
ddZede
e ddfd d!Zdeeef defd"d#Ze j!fd$e
e d%e de"fd&d'Z#d$e
e de"fd(d)Z$  Z%S )+InstructTokenizerBasezBase instruct tokenizer.N	tokenizerimage_encoderaudio_encoderc                    s&   || _ || _|| _t ||| dS )zInitialize the instruct tokenizer.

        Args:
            tokenizer: The tokenizer to use.
            image_encoder: The image encoder to use if any.
            audio_encoder: The audio encoder to use.
        N)r0   r1   r2   super__init__selfr0   r1   r2   	__class__ _/home/ubuntu/vllm_env/lib/python3.10/site-packages/mistral_common/tokens/tokenizers/instruct.pyr4   ;   s   zInstructTokenizerBase.__init__returnc                 C   s   | j S N)r1   r6   r9   r9   r:   
mm_encoderM   s   z InstructTokenizerBase.mm_encoderc                 C   s
   | j jgS )zReturn the start tokens.)r0   bos_idr=   r9   r9   r:   startT   s   
zInstructTokenizerBase.startrequestc                 C   sB   d}d}t t| jD ]\}}t|tr|dkr|}|}q||fS )zFind the first and last user message in the request.

        Args:
            request: The request to search for user messages.

        Returns:
            The index of the first and last user message.
        )list	enumeratemessages
isinstancer   )rA   last_user_idxfirst_user_idximsgr9   r9   r:   find_first_last_userX   s   

z*InstructTokenizerBase.find_first_last_usermessageis_before_last_user_messagec                 C      t d)zEncode a tool message.

        Raises:
            NotImplementedError: The tool message is not implemented for the base tokenizer.
        zTool message not implementedNotImplementedErrorr6   rL   rM   r9   r9   r:   encode_tool_messagek      z)InstructTokenizerBase.encode_tool_messagecontinue_messagec                 C   rN   )zEncode an assistant message.

        Raises:
            NotImplementedError: The assistant message is not implemented for the base tokenizer.
        z!Assistant message not implementedrO   r6   rL   rM   rT   r9   r9   r:   encode_assistant_messaget   s   	z.InstructTokenizerBase.encode_assistant_messagechunkc                 C   rN   )zEncode a think chunk.

        Raises:
            NotImplementedError: The think chunk is not implemented for the base tokenizer.
        zThink chunk not implementedrO   r6   rW   r9   r9   r:   encode_think   rS   z"InstructTokenizerBase.encode_think	tokenizedrE   
max_tokenslast_user_message_indexc                 C      d S r<   r9   )r6   rZ   rE   r[   r\   r9   r9   r:   _truncate_for_max_tokens   s   z.InstructTokenizerBase._truncate_for_max_tokensc                 C   r]   r<   r9   clsrE   r9   r9   r:   validate_messages      z'InstructTokenizerBase.validate_messagesc              
   C   s  g }g }d}g }|  |j | |\}}t|jD ]\}}	|jr3|t|jd kr3t|	ts3tdt|	t	rY| j
|	|j||k||k|jd|jd\}
}}|| || nIt|	trg| |	||k }
n;t|	tr|jow|t|jd k}| j|	||k |d}
|t|jd kr|
}nt|	tr| |	}
n	tdt|	 ||
 q|jdur| ||j|j| |  }|D ]}|dur|| qt|| j|tjd|||d	S )
zEncode an instruct request.

        Args:
            request: The request to encode.

        Returns:
            The encoded tokens.
        N   z?Cannot continue final message if it is not an assistant messageT)system_promptforce_img_firstsettings)rT   zUnknown message type special_token_policy)tokenstext
prefix_idsimagesaudios)ra   rE   rK   rD   continue_final_messagelenrF   r   r	   r   encode_user_messageavailable_toolsrd   rf   extendr   rR   rV   r   encode_system_messager   typeappendtruncate_at_max_tokensr^   r@   r)   decoder'   KEEP)r6   rA   rl   rm   rk   tokens_listrH   rG   msg_idxrJ   
new_tokens
new_images
new_audiosrT   ri   tokr9   r9   r:   encode_instruct   sx   

	





z%InstructTokenizerBase.encode_instructri   rh   c                 C   s   | j j||dS )zDecode tokens to a string.

        Args:
            tokens: The tokens to decode.
            special_token_policy: The policy to use for special tokens.

        Returns:
            The decoded string.
        rg   )r0   rw   )r6   ri   rh   r9   r9   r:   rw      s   
zInstructTokenizerBase.decodec                 C   s   | j |S r<   )r0   
_to_string)r6   ri   r9   r9   r:   r      s   z InstructTokenizerBase._to_stringNN)&__name__
__module____qualname____doc__r+   r-   r"   r4   propertyr>   rC   intr@   staticmethodr   tuplerK   r   r   boolrR   r   rV   r   rY   r^   classmethodr   ra   r   r)   r   r'   IGNOREstrrw   r   __classcell__r9   r9   r7   r:   r/   6   sd    




 Rr/   c                   @   sL  e Zd ZdZdde fdedee dB de	de	de
dB d	e	d
edeee eej ee f fddZdedee fddZ		d#de
ee B de	de
dB d	e	deee eej ee f f
ddZdede	dee fddZdede	de	dee fddZdedee fddZdedefddZdedefdd Zde defd!d"Z!dS )$InstructTokenizerV1zrInstruct tokenizer V1.

    This tokenizer has basic for messages. It does not support tools or image inputs.
    NFrL   rq   is_lastis_firstrd   re   rf   r;   c                 C   st   t |jts
J d| jdu sJ dd}|r!|r!|d |j }n|j}d| d}	| j|	ddd	\}
}}|
||fS )
ar  Encode a user message.

        Args:
            message: The message to encode.
            available_tools: Not used.
            is_last: Not used.
            is_first: Whether the message is the first one.
            system_prompt: The system prompt.
            force_img_first: Not used.

        Returns:
            The encoded tokens and empty list.
        "Message content must be normalizedNz(InstructTokenizerV1 cannot encode images 

z[INST] z [/INST]F)contentr   rd   )rF   r   r   r1   encode_user_content)r6   rL   rq   r   r   rd   re   rf   r   message_txtcurr_tokensimageaudior9   r9   r:   rp     s   
z'InstructTokenizerV1.encode_user_messagec                 C      t d| jj )Nz,System message encoding not implemented for )rP   r8   r   r6   rL   r9   r9   r:   rs   %     z)InstructTokenizerV1.encode_system_messager   c                 C   s>   t |tsJ |r|r|d | }| jj|ddd}|g g fS )a*  Encode a user content.

        Args:
            content: The content to encode.
            is_last: Whether the message is the last one.
            system_prompt: The system prompt.
            force_img_first: Not used.

        Returns:
            The encoded tokens and empty list.
        r   Fboseos)rF   r   r0   encode)r6   r   r   rd   re   ri   r9   r9   r:   r   (  s
   
z'InstructTokenizerV1.encode_user_contentrM   c                 C   rN   )zEncode a tool message.

        Raises:
            TokenizerException: The tool message is not implemented for this version.
        &Tools not implemented for tokenizer V1r   rQ   r9   r9   r:   rR   B     z'InstructTokenizerV1.encode_tool_messagerT   c                 C   s   t |ts	J ||jdurt|jdkrtd|r"|jr"td|jr:t |jts/J d| j	j
|jddd}nt|j d|j |jsQ|sQ|| j	j |S )	[  Encode an assistant message.

        Args:
            message: The message to encode.
            is_before_last_user_message: Not used.
            continue_message: Whether to continue the message generation.
                Only use this if the assistant message is the last message.

        Returns:
            The encoded tokens.
        Nr   r   U`continue_message` is only supported for assistant messages that have `prefix=False`.z4Message content must be a string for tokenizer < V13Fr   z // )rF   r   
tool_callsro   r   prefixr   r   r   r0   r   ru   eos_idr6   rL   rM   rT   r   r9   r9   r:   rV   J  s   

z,InstructTokenizerV1.encode_assistant_messagerW   c                 C   rN   )zEncode a think chunk.

        Raises:
            TokenizerException: The think chunk is not implemented for this version.
        z*Think not implemented for tokenizer < V13.r   rX   r9   r9   r:   rY   h  r   z InstructTokenizerV1.encode_thinkrA   c                 C   r   )zEncode a FIM request.

        Raises:
           TokenizerException: The FIM request is not implemented for this version.
        zFIM not available for r   r0   versionr6   rA   r9   r9   r:   
encode_fimp  s   zInstructTokenizerV1.encode_fimc                 C   r   )Nz Transcription not available for r   r   r9   r9   r:   encode_transcriptionx  r   z(InstructTokenizerV1.encode_transcriptionc                 C   s   t d| jjj )Nz+Speech request not available for tokenizer )r   r0   r   valuer   r9   r9   r:   encode_speech_request{  s   z)InstructTokenizerV1.encode_speech_requestNF)"r   r   r   r   r   noner   rC   r   r   r   r   r   npndarrayr   rp   r   rs   r   r   r   rR   r   rV   r   rY   r   r)   r   r!   r   r   r   r9   r9   r9   r:   r      sd    
	
$


r   c                       s  e Zd ZdZejZ		d0dededB de	dB f fddZ
dde fd	ed
ee dB dedededB dededeee eej ee f fddZdedefddZdeee B defddZdedeeef fddZd	ededee fddZde deeef fdd Z!d	e"dee fd!d"Z#d	e"dee fd#d$Z$dedee fd%d&Z%d	e"ded'edee fd(d)Z&d*edee fd+d,Z'd-e(de)fd.d/Z*  Z+S )1InstructTokenizerV2z`Instruct tokenizer V2.

    This tokenizer adds supports to images, tools and FIM requests.
    Nr0   r1   r2   c                    s   t  ||| | jtjj| _| jtjj| _	| jtj
j| _| jtjj| _| jtjj| _| jtjj| _| jtjj| _| jtjj| _| jtjj| _| jtjj| _dS Initialize the tokenizer.

        Args:
            tokenizer: The tokenizer to use.
            image_encoder: The image encoder to use.
            audio_encoder: The audio encoder to use.
        N)r3   r4   r0   get_special_tokenr(   
begin_instr   
BEGIN_INSTend_instEND_INSTbegin_toolsBEGIN_AVAILABLE_TOOLS	end_toolsEND_AVAILABLE_TOOLSbegin_tool_resultsBEGIN_TOOL_RESULTSend_tool_resultsEND_TOOL_RESULTSr   
TOOL_CALLSr   BOSr   PREFIXsuffixSUFFIXr5   r7   r9   r:   r4     s   zInstructTokenizerV2.__init__FrL   rq   r   r   rd   re   rf   r;   c                 C   s   d}||o
| j tjkO }||o| j tjkO }g }	|r=|r=dd |D }
| jjtj|
ddddd}|	| j	g|| j
 |rH|	| j|d | j|j|||d\}}}g |	| j}| jg}|| | }|||fS )a  Encode a user message.

        Args:
            message: The message to encode.
            available_tools: The list of available tools if any.
            is_last: Whether the message is the last one.
            is_first: Not used.
            system_prompt: The system prompt.
            force_img_first: Whether to force the image to be first.

        Returns:
            The encoded tokens and the list of images.
        Fc                 S   s    g | ]}|j d ddiidqS )functionstrictT)exclude)
model_dump).0toolr9   r9   r:   
<listcomp>  s     z;InstructTokenizerV2.encode_user_message.<locals>.<listcomp>ensure_asciir   )rf   )r   r   rd   re   )*_message_position_to_encode_tools_settingsr,   firstlastr0   r   jsondumpsrr   r   r   _encode_settingsr   r   r   r   )r6   rL   rq   r   r   rd   re   rf   do_encode_tools_settingstools_settings_tokenstoolstools_json_tokensri   r   r   prefix_tokenssuffix_tokensr   r9   r9   r:   rp     s>   


z'InstructTokenizerV2.encode_user_messager   c                 C   s&   zt |W S  t jy   | Y S w r<   )r   loadsJSONDecodeErrorr6   r   r9   r9   r:   _parse_json_content  s
   z'InstructTokenizerV2._parse_json_contentc                 C   s(   t |trddd |D }| |S )Nr   c                 s       | ]}|j V  qd S r<   rj   r   rW   r9   r9   r:   	<genexpr>      z:InstructTokenizerV2._parse_tool_content.<locals>.<genexpr>)rF   rC   joinr   r   r9   r9   r:   _parse_tool_content  s   

z'InstructTokenizerV2._parse_tool_contenttool_messagec                 C   s   |j | |jdS )z8Bit of a hack due to the way tool results are tokenized.)namer   )r   r   r   r6   r   r9   r9   r:   _prepare_tool_result  s   
z(InstructTokenizerV2._prepare_tool_resultrM   c                 C   sB   |rg S t j| |gdd}| jg| jj|ddd| j}|S )a  Encode a tool message.

        Args:
            message: The message to encode.
            is_before_last_user_message: Whether the message is before the last user message. If true, the message is
                not encoded.

        Returns:
            The encoded tokens.
        Fr   r   r   r   r   r   r0   r   r   r6   rL   rM   tool_result_strr   r9   r9   r:   rR     s   z'InstructTokenizerV2.encode_tool_message	tool_callc                 C   s   |j j| |j jdS )z:Bit of a hack due to the way function calls are tokenized.r   	arguments)r   r   r   r   )r6   r   r9   r9   r:   _prepare_function_call  s   z*InstructTokenizerV2._prepare_function_callc                 C   sB   |j s
J d| t|j tsJ d| jj|j ddddS )Nz)Assistant message must have content. Got 3Message content must be a string for tokenizer < V7 Fr   )r   rF   r   r0   r   rstripr   r9   r9   r:   (_encode_normal_content_assistant_message  s   z<InstructTokenizerV2._encode_normal_content_assistant_messagec                 C   s`   |j s
J d| g }|j D ]
}|| | qtj|dd}| jg| jj|ddd}|S )N,Assistant message must have tool calls. Got Fr   r   )r   ru   r   r   r   r   r0   r   )r6   rL   prepared_tool_callsr   tool_call_strr   r9   r9   r:   '_encode_tool_calls_in_assistant_message  s   
z;InstructTokenizerV2._encode_tool_calls_in_assistant_messagec                 C   s   | j jdu s
J dg S )z?Encode model settings as tokens. Returns empty list by default.Nz8`model_settings_builder` not supported for this version.)r0   model_settings_builder)r6   rf   r9   r9   r:   r      s   z$InstructTokenizerV2._encode_settingsrT   c                 C   s   |j r|jrtd| |r|jrtd|j r#|rg S | |}n|jr6t|jts0J d| |}nt	d|j |jsJ|sJ|
| jj |S )a  Encode an assistant message.

        Args:
            message: The message to encode.
            is_before_last_user_message: Whether the message is before the last user message. If has tools and true, the
                message is not encoded.
            continue_message: Whether to continue the message generation.
                Only use this if the assistant message is the last message.

        Returns:
            The encoded tokens.
        zICannot have tool calls and content defined in the same assistant message r   r   Invalid assistant message: )r   r   
ValueErrorr   r   r   rF   r   r   r   ru   r0   r   r   r9   r9   r:   rV   (  s"   

z,InstructTokenizerV2.encode_assistant_messagerj   c                 C   s   | j jd| ddddd S )z;Remove prefix space in the case of SentencePieceTokenizers.u   ☺Fr      N)r0   r   )r6   rj   r9   r9   r:   _encode_infillingL  s   z%InstructTokenizerV2._encode_infillingrA   c                 C   s\   | j j|jddd}|jr| |jng }| j| jg|| j|}t|| j	|t
jddS )zEncode a FIM request.

        Args:
            request: The request to encode.

        Returns:
            The encoded tokens.
        Fr   rg   )ri   rj   )r0   r   promptr   r  r   r   r   r)   rw   r'   rx   )r6   rA   r   r   ri   r9   r9   r:   r   Q  s   	zInstructTokenizerV2.encode_fimr   ),r   r   r   r   r,   r   r   r+   r-   r"   r4   r   r   r   rC   r   r   r   r   r   r   r   r   rp   r   r   r   r   r   dictr   rR   r   r   r   r   r   r   rV   r  r   r)   r   r   r9   r9   r7   r:   r     sn    
	
<

$r   c                       s  e Zd ZdZ		d%dededB dedB f fddZded	e	e
ef fd
dZded	e	e
ef fddZdeded	ee fddZdededed	ee f fddZede
eB eB d	eee ddf fddZedeeB d	eee ejdf fddZedeeB d	eee de f fddZde
e!B d	eee ejdB e dB f fddZde"e! d	eee eej ee  f fddZ#		d&de
ee$ B d ed!e
dB d"ed	eee eej ee  f f
 fd#d$Z%  Z&S )'InstructTokenizerV3zxInstruct tokenizer V3.

    The only difference with V2 tokenizer is that it encodes the tool messages differently.
    Nr0   r1   r2   c                    s   t  j|||d dS )r   r1   r2   N)r3   r4   r5   r7   r9   r:   r4   n  s   zInstructTokenizerV3.__init__r   r;   c                 C   s6   |j j| |j jd}|jr|jdkr|j|d< |S )Nr   nullid)r   r   r   r   r  )r6   r   function_callr9   r9   r:   r   }  s   
z*InstructTokenizerV3._prepare_function_callr   c                 C   s&   |j d us	J d| |j|j dS )Nz7Tool message has to have the tool call id defined in v3)r   call_id)tool_call_idr   r   r   r9   r9   r:   r     s   
z(InstructTokenizerV3._prepare_tool_resultrL   rM   c                 C   s8   t j| |dd}| jg| jj|ddd| j}|S )a  Encode a tool message.

        Note:
            Same as [V2][mistral_common.tokens.tokenizers.instruct.InstructTokenizerV2.encode_tool_message] but tools
            are not wrapped in a list and the history is also tokenized.

        Args:
            message: The message to encode.
            is_before_last_user_message: Whether the message is before the last user message. If true, the message is
                not encoded.

        Returns:
            The encoded tokens.
        Fr   r   r   r   r9   r9   r:   rR     s   z'InstructTokenizerV3.encode_tool_messagerT   c                    s   t  |d|S )a  Encode an assistant message.

        Note:
            Same as [V2][mistral_common.tokens.tokenizers.instruct.InstructTokenizerV2.encode_assistant_message] but
            always encode the tool history.
            continue_message: Whether to continue the message generation.
                Only use this if the assistant message is the last message.

        Args:
            message: The message to encode.
            is_before_last_user_message: Not used.

        Returns:
            The encoded tokens.
        F)r3   rV   rU   r7   r9   r:   rV     s   z,InstructTokenizerV3.encode_assistant_messagerW   c                 C   r]   r<   r9   rX   r9   r9   r:   _encode_content_chunk     z)InstructTokenizerV3._encode_content_chunkc                 C   r]   r<   r9   rX   r9   r9   r:   r    r  c                 C   r]   r<   r9   rX   r9   r9   r:   r    r  c                 C   s   t |tr| jj|dddd d fS t |tr#| jj|jdddd d fS t |tr0| |d d fS t |tt	frL| j
d us@J d| 
|}|j|jd fS t |ttfrh| jd us\J d| |}|jd |jfS td| )NFr   z+Make sure to define a image encoder at initz+Make sure to define a audio encoder at initzUnknown chunk type: )rF   r   r0   r   r   rj   r   rY   r   r   r1   ri   r   r   r   r2   r   r   )r6   rW   img_encodingaudio_encodingr9   r9   r:   r    s   




r   c           	      C   s^   g }g }g }|D ]!}|  |\}}}|| |d ur || |d ur)|| q|||fS r<   )r  rr   ru   )	r6   r   ri   rl   r   rW   chunk_tokensmaybe_imagemaybe_audior9   r9   r:   _encode_content_chunks  s   



z*InstructTokenizerV3._encode_content_chunksFr   rd   re   c                    s*  t |trt |||S g }g }g }t|dko!t |d ttf}|r.|r.|d |d g}d}	|D ]]}
d}|	rM|rM|rMd}	|d }|| jj|ddd7 }t |
t	t
frn|r`J d	t|
 d
| |
\}}}|| nt |
ttfr| |
\}}}|| n| |
d }|| q2|||fS )H  Encode a user content.

        Args:
            content: The content to encode.
            is_last: Whether the message is the last one.
            system_prompt: The system prompt.
            force_img_first: Whether to force the image to be first.

        Returns:
            The encoded tokens and the images.
        r  rc   r   Tr   Fr   r   zEIt is not possible that `content` is non-empty when chunk is of type .)rF   r   r3   r   ro   r   r   r0   r   r   r   rt   r  ru   rr   )r6   r   r   rd   re   ri   rl   r   has_one_img_one_text_firstfirst_chunkrW   content_strr  _chunk_audiochunk_imager7   r9   r:   r     s6   

z'InstructTokenizerV3.encode_user_contentr   r   )'r   r   r   r   r+   r-   r"   r4   r   r  r   r   r   r   r   r   rC   r   rR   r   rV   r   r   r   r   r  r   r   r   r   r   r   r   r   r   r  r   r   r   r9   r9   r7   r:   r  f  s`    *(&.

r  c                       s$  e Zd ZdZ		d9dededB dedB ddf fddZd	eee	 dB  d
ee
 de	de	ddf
ddZdedee	 fddZ		d:deee B dededB dedeee	 eej ee f f
 fddZdde fdedee dB dedededB dededeee	 eej ee f f fddZdedefdd Zdedefd!d"Zd;d#eeB d$e dB defd%d&Z!dedefd'd(Z"e#d
ee$ ddfd)d*Z%e&d
ee$ defd+d,Z'de(d-edee	 fd.d/Z)de
d-ed0edee	 fd1d2Z*d3eeB dB d4edB defd5d6Z+de,defd7d8Z-  Z.S )<InstructTokenizerV7a%  Instruct tokenizer V7.

    The difference with V3 tokenizer is that it encodes the system prompts differently:
    - in V7 the system prompts are treated as separate SystemMessages
    - they are no longer prepended to the last user message
    - they are printed between special tokens

    Nr0   r1   r2   r;   c                    sz   t  ||| | jtjj| _| jtjj| _	| jtj
j| _d| _|dur9|jjs;| jtjj| _dS dS dS r   )r3   r4   r0   r   r(   begin_systemr   BEGIN_SYSTEM
end_system
END_SYSTEMbegin_tool_contentBEGIN_TOOL_CONTENT
TRANSCRIBEaudio_configis_streaming
transcriber5   r7   r9   r:   r4   '  s   zInstructTokenizerV7.__init__tokenized_messagesrE   r[   r\   c                    s   t dd D | dtdd f fdd}d}dkrb|tk rb|| |d7 }t|d  trX|tk rXt| tsX|| |d7 }|tk rXt| trCdkrb|tk s%dkrjtd	d S )
Nc                 s   s     | ]}|d urt |V  qd S r<   )ro   )r   tr9   r9   r:   r   J  s    z?InstructTokenizerV7._truncate_for_max_tokens.<locals>.<genexpr>idxr;   c                    sJ   t |  tr	d S |  krd S |  }|d usJ t|8 d | < d S r<   )rF   r   ro   )r)  r~   r\   rE   to_dropr'  r9   r:   dropL  s   z:InstructTokenizerV7._truncate_for_max_tokens.<locals>.dropr   rc   z+Input couldn't fit in truncate_at_max_token)sumr   ro   rF   r   r   )r6   r'  rE   r[   r\   r,  current_idxr9   r*  r:   r^   >  s   
z,InstructTokenizerV7._truncate_for_max_tokensrL   c                 C   sF   | j g}t|j }trt|dg}|| |d 7 }|| j |S )zEncode a system message.

        Args:
            message: The message to encode.

        Returns:
            The encoded tokens.
        r   r   )r  rF   r   r   r   r  ru   r   )r6   rL   ri   r   r9   r9   r:   rs   g  s   
z)InstructTokenizerV7.encode_system_messageFr   r   rd   re   c           	         sz   |du sJ dt |trt |||S t|dko#t |d ttf}|r0|r0|d |d g}| |\}}}|||fS )r  N?in Tokenizer V7 we don't encode system prompts in user messagesr  rc   r   )rF   r   r3   r   ro   r   r   r  )	r6   r   r   rd   re   r  ri   rl   r   r7   r9   r:   r   x  s   

z'InstructTokenizerV7.encode_user_contentrq   r   rf   c              	      s:   |du sJ dt  j||||d||d\}}	}
||	|
fS )a  Encode a user message.

        Args:
            message: The message to encode.
            available_tools: The list of available tools if any.
            is_last: Whether the message is the last one.
            is_first: Whether the message is the first one.
            system_prompt: Not used.
            force_img_first: Whether to force the image to be first.

        Returns:
            The encoded tokens and the list of images.
        Nr/  )r   r   rd   re   rf   )r3   rp   )r6   rL   rq   r   r   rd   re   rf   ri   rl   r   r7   r9   r:   rp     s   

z'InstructTokenizerV7.encode_user_messagerA   c                 C   sd   | j dusJ d| j | j jjtjkr| |S | j jjtjkr'| |S td| j jjd)a  
        Encodes an audio transcription request into a tokenized format.

        This method processes a transcription request containing audio data,
        encodes the user message, and returns the tokenized output.

        Args:
            request: The transcription request object containing
                the audio data to be encoded.

        Returns:
            Tokenized: The tokenized representation of the audio data, including processed audio and tokens
        Nz6Audio encoder must be defined, got self.audio_encoder=zxTranscription format should be one of 'instruct', 'streaming', got self.audio_encoder.audio_config.transcription_format=r  )	r2   r$  transcription_formatr#   INSTRUCT_encode_instruct_transcription	STREAMING_encode_streaming_transcriptionr
   r   r9   r9   r:   r     s   

z(InstructTokenizerV7.encode_transcriptionc                 C   s   |j tjksJ d|j | jd usJ | jj d|  }| jtt	|j
dgdg ddd t d\}}}g ||}|jd urTd|j }|| jj|ddd	7 }|| j t|| j||d
S )Nz=Request must not be in streaming mode, got request.streaming=z! needs to have a TRANSCRIBE token)input_audio)r   T)rq   r   r   rd   rf   zlang:Fr   ri   rj   rm   )	streamingr    DISABLEDr#  r8   r   r@   rp   r   r   r   r   r   languager0   r   ru   r)   r   )r6   rA   r   ri   r  r   language_stringr9   r9   r:   r2    s&   
	
z2InstructTokenizerV7._encode_instruct_transcriptionr   transcription_delay_msc                 C   sj   | j d usJ d| j t|trt|nt|}| j ||}|jd ur,|jgng }t|j	|dS )NFAudio encoder must be defined to encode audio, got self.audio_encoder=ri   rm   )
r2   rF   r   r   from_base64
from_bytesencode_audior   r)   ri   )r6   r   r;  _audio	audio_encrm   r9   r9   r:   _encode_audio  s   
z!InstructTokenizerV7._encode_audioc                 C   s<  |j tjkr| |jj|j}|  |j }|j	}nw|j tj
kr| jd us'J | j|j\}}||g}t|jjdkrhtd|j dt t|jjtsPJ t|jj}tt|j|jf|j|jg}n|jjrpJ d| jd us}J d| j|  | j|j }ntd|j t|| j|tjd|dS )	Nr   z%Passing audio with request.streaming=zf is deprecated. Make sure to not pass any audio to `TranscriptionRequest` when doing online streaming.z~For online streaming, no audio bytes should be passed in the first request. Audio buffering is taken care of directly by vLLM.r<  z9Request must be in streaming mode, got request.streaming=rg   r6  ) r7  r    OFFLINErC  r   datatarget_streaming_delay_msr@   ri   rm   ONLINEr2   get_padding_audioro   warningswarnFutureWarningrF   r   r   r>  r   concatenateaudio_arraysampling_rateformatencode_streaming_tokensr   r)   rw   r'   rx   )r6   rA   rZ   ri   rm   left_pad	right_padrequest_audior9   r9   r:   r4    sD   

z3InstructTokenizerV7._encode_streaming_transcriptionc                 C   s,   |  |rtdd |D rtddS dS )zKValidates that system prompts and audio chunks are not used together in v7.c                 s       | ]}t |tV  qd S r<   )rF   r   r   rL   r9   r9   r:   r   1      z8InstructTokenizerV7.validate_messages.<locals>.<genexpr>z9System messages are not yet allowed when audio is presentN)
_has_audioanyr   r_   r9   r9   r:   ra   -  s
   
z%InstructTokenizerV7.validate_messagesc                 C   s   t dd | D S )Nc                 s   s:    | ]}t |tot |jtotd d |jD V  qdS )c                 s   rT  r<   )rF   r   r   r9   r9   r:   r   9  rV  z;InstructTokenizerV7._has_audio.<locals>.<genexpr>.<genexpr>N)rF   r   r   rC   rX  rU  r9   r9   r:   r   6  s    


z1InstructTokenizerV7._has_audio.<locals>.<genexpr>)rX  )rE   r9   r9   r:   rW  4  s   zInstructTokenizerV7._has_audiorM   c                 C   sr   |j dusJ t|jtsJ d| jj|j ddd}| jj|jddd}| jg|| j}g ||| j}|S )a  Encode a tool message.

        Note:
            Same as [V3][mistral_common.tokens.tokenizers.instruct.InstructTokenizerV3.encode_tool_message]
            but tools are not wrapped in a list and history is also tokenized

        Args:
            message: The message to encode.
            is_before_last_user_message: Not used.

        Returns:
            The encoded tokens.
        Nr   Fr   )	r  rF   r   r   r0   r   r   r"  r   )r6   rL   rM   tool_call_id_tokensri   r   r   r9   r9   r:   rR   =  s$   z'InstructTokenizerV7.encode_tool_messagerT   c                 C   s   |j s|jstd| |r|jrtdg }|j r7t|j tr'| |}nt|j tr7|| 	|j d 7 }|jrA|| 
|7 }|jsM|sM|| jj |S )r   r   r   r   )r   r   r   r   r   rF   r   r   rC   r  r   ru   r0   r   r   r9   r9   r:   rV   \  s"   

z,InstructTokenizerV7.encode_assistant_message	ref_audiovoicec                 C   s   |dus|dusJ d|d|| j dusJ d| j d}|dur4t|tr/t|nt|}| j ||}t|j|j	durI|j	gdS g dS )a7  Encode reference audio or voice preset into a Tokenized object.

        Args:
            ref_audio: Base64-encoded string or raw bytes of reference audio, or None.
            voice: Preset voice name, or None.

        Returns:
            Tokenized object with audio tokens and optional audio data.
        NzIEither ref_audio or voice must be defined to encode audio, got ref_audio=z and voice=r<  r=  )
r2   rF   r   r   r>  r?  encode_audio_for_speech_requestr)   ri   r   )r6   rZ  r[  rA  rB  r9   r9   r:    _encode_audio_for_speech_request~  s    

z4InstructTokenizerV7._encode_audio_for_speech_requestc                 C   s   | j dusJ d| j |  }t|d}| |j|j}|j|j |j|j |j}|	| j j
 || jj|jddd |	| j j |	| j j ||_|S )ax  Encode a speech synthesis request into a tokenized sequence.

        Produces: [BOS] + audio_tokens + [TEXT_TO_AUDIO] + text_tokens + [AUDIO_TO_TEXT] + [BEGIN_AUDIO].

        Args:
            request: The speech request containing input text and voice/audio data.

        Returns:
            Tokenized object with the full token sequence and optional audio data.
        Nr<  )ri   Fr   )r2   r@   r)   r]  rZ  r[  ri   rr   rm   ru   text_to_audio_tokenr0   r   inputaudio_to_text_tokenbegin_audio_token)r6   rA   init_tokensrZ   tokenized_audiori   r9   r9   r:   r     s   

z)InstructTokenizerV7.encode_speech_requestr   r   r<   )/r   r   r   r   r+   r-   r"   r4   rC   r   r   r^   r   rs   r   r   r   r   r   r   r   r   r   r   r   r   rp   r!   r)   r   r2  bytesfloatrC  r4  r   r   ra   r   rW  r   rR   rV   r]  r   r   r   r9   r9   r7   r:   r    s    
)
%
	% 4
""r  c                	       sV   e Zd ZdZ		ddededB dedB ddf fddZd	ede	e
 fd
dZ  ZS )InstructTokenizerV11zInstruct tokenizer V11.

    The difference with V7 tokenizer is that it encodes tool calls differently:
    Tool call results are encoded as :
    - [begin tool call] call_name_tokens [call id] call_id_tokens [args] content tokens
    Nr0   r1   r2   r;   c                    s8   t  ||| | jtjj| _| jtjj| _	d S r<   )
r3   r4   r0   r   r(   argsr   ARGSr
  CALL_IDr5   r7   r9   r:   r4     s   zInstructTokenizerV11.__init__rL   c                 C   s   |j s
J d| g }|j D ]B}| |}g }d|v r+| jg| jj|d ddd}|| jg| jj|d ddd|| j| jjtj|d ddddd7 }q|S )Nr   r  Fr   r   r   r   )	r   r   ri  r0   r   r   rh  r   r   )r6   rL   r   r   preparedidsr9   r9   r:   r     s&   

z<InstructTokenizerV11._encode_tool_calls_in_assistant_messager   )r   r   r   r   r+   r-   r"   r4   r   rC   r   r   r   r9   r9   r7   r:   rf    s    

rf  c                	       s   e Zd ZdZejZ		ddededB de	dB ddf fddZ
d	edee fd
dZd	ededee fddZdedee fddZedee ddfddZ  ZS )InstructTokenizerV13zInstruct tokenizer V13.

    The difference with V11 tokenizer is that it encodes tool calls differently:
        - available tools are tokenized at the first user message.
        - call id is no longer tokenized for tool calls or results.
    Nr0   r1   r2   r;   c                    s|   t  ||| t|tsJ dt| tjj|jv r6tj	j|jv r6|
tjj| _|
tj	j| _d S d | _d | _d S )Nz$Tokenizer must be a Tekkenizer. Got )r3   r4   rF   r.   rt   r(   begin_thinkr   _special_tokens_reverse_vocab	end_thinkr   BEGIN_THINK	END_THINKr5   r7   r9   r:   r4     s   
zInstructTokenizerV13.__init__rL   c                 C   s   |j s
J d| g }|j D ]5}|jr|jdksJ | |}|| jg| jj|d ddd| j| jjtj|d ddddd7 }q|S )Nr   r  r   Fr   r   r   )	r   r  r   r   r0   r   rh  r   r   )r6   rL   r   r   rj  r9   r9   r:   r     s   

z<InstructTokenizerV13._encode_tool_calls_in_assistant_messagerM   c                 C   s^   |j dus	J d|j}t|tsddd |D }| jj|ddd}| jg|| j}|S )zEncode a tool message.

        Args:
            message: The message to encode.
            is_before_last_user_message: Not used.
        Returns:
            The encoded tokens.
        Nz2Tool call id must be provided for tokenizer >= v13r   c                 s   r   r<   r   r   r9   r9   r:   r     r   z;InstructTokenizerV13.encode_tool_message.<locals>.<genexpr>Fr   )	r  r   rF   r   r   r0   r   r   r   )r6   rL   rM   r   ri   r   r9   r9   r:   rR     s   	
z(InstructTokenizerV13.encode_tool_messagerW   c                 C   sZ   | j dus	J d| jdusJ d| jj|jddd}| j g|}|jr+|| j |S )zEncode a thinking chunk.

        Args:
            chunk: The thinking chunk to encode.
        Returns:
            The encoded tokens.
        Nz2think tokens are not available for this tokenizer.Fr   )rp  rq  r0   r   thinkingclosedru   )r6   rW   ri   think_tokensr9   r9   r:   rY     s   z!InstructTokenizerV13.encode_thinkrE   c                 C   s   dS )z9Allows system prompts and audio chunks to coexist in v13.Nr9   r_   r9   r9   r:   ra   -  rb   z&InstructTokenizerV13.validate_messagesr   )r   r   r   r   r,   r   r   r+   r-   r"   r4   r   rC   r   r   r   r   rR   r   rY   r   r   ra   r   r9   r9   r7   r:   rl    s&     rl  c                	       s   e Zd ZdZ		ddededB dedB ddf fddZd	eddfd
dZ	d	ede
e fddZdede
e f fddZ  ZS )InstructTokenizerV15zInstruct tokenizer V15.

    Extends V13 with model settings encoding. Inherits V13's tool call behavior
    (no call ID in tool calls/results, first-position tools, think support).
    Nr0   r1   r2   r;   c                    s:   t  j|||d | jtjj| _| jtjj| _	d S )Nr  )
r3   r4   r0   r   r(   begin_model_settingsr   BEGIN_MODEL_SETTINGSend_model_settingsEND_MODEL_SETTINGSr5   r7   r9   r:   r4   :  s   zInstructTokenizerV15.__init__rf   c                 C   s2   | j j}|du rtd| j j d|| dS )zGValidate model settings against the tokenizer's model settings builder.Nz
Tokenizer z" needs a `model_settings_builder`.)r0   r   r   r   validate_settings)r6   rf   r   r9   r9   r:   _validate_settingsD  s   z'InstructTokenizerV15._validate_settingsc                 C   sZ   |  | |t krg S tj|jddddd}| jj|ddd}| jg|| j	}|S )z=Encode model settings as special-token-delimited JSON tokens.T)exclude_noneF)r   	sort_keysr   )
r{  r   r   r   r   r   r0   r   rw  ry  )r6   rf   dumped_settingssetting_json_tokenssettings_tokensr9   r9   r:   r   K  s   
z%InstructTokenizerV15._encode_settingsrL   c                    s4   t |jtrtdd |jD rtdt |S )z6Encode a system message, rejecting ThinkChunk content.c                 s   rT  r<   )rF   r   r   r9   r9   r:   r   _  rV  z=InstructTokenizerV15.encode_system_message.<locals>.<genexpr>z<ThinkChunk in system message is not supported for this model)rF   r   rC   rX  r   r3   rs   r   r7   r9   r:   rs   \  s   z*InstructTokenizerV15.encode_system_messager   )r   r   r   r   r+   r-   r"   r4   r   r{  rC   r   r   r   rs   r   r9   r9   r7   r:   ru  3  s(    	

"ru  )Hr   rI  abcr   typingr   r   r   r   numpyr   mistral_common.audior   mistral_common.exceptionsr   r	   r
   r   #mistral_common.protocol.fim.requestr   &mistral_common.protocol.instruct.chunkr   r   r   r   r   r   r   r   )mistral_common.protocol.instruct.messagesr   r   r   r   r   r   (mistral_common.protocol.instruct.requestr   r   +mistral_common.protocol.instruct.tool_callsr   r   &mistral_common.protocol.speech.requestr   -mistral_common.protocol.transcription.requestr    r!   &mistral_common.tokens.tokenizers.audior"   r#   %mistral_common.tokens.tokenizers.baser$   r%   r&   r'   r(   r)   r*   r+   r,   &mistral_common.tokens.tokenizers.imager-   'mistral_common.tokens.tokenizers.tekkenr.   r/   r   r   r  r  rf  rl  ru  r9   r9   r9   r:   <module>   sR    ( 
,
 
D 
 
h 8   &X