o
    c۷i`                  	   @   sZ  d dl Z d dlZd dlmZmZmZ d dlmZ d dlm	Z	 d dl
mZmZmZ d dlmZmZmZmZmZmZmZmZmZmZmZ d dlmZmZmZ d dlmZm Z m!Z! d d	l"m#Z#m$Z$ d d
l%m&Z& dZ'G dd deeeeee#f Z(G dd de(Z)G dd de)Z*G dd de*Z+	dde$de&dB de(fddZ,	dde$de&dB de(fddZ-dS )    N)GenericSequenceoverload)assert_never)InvalidRequestException)	TextChunk
ThinkChunkUserContentChunk)UATSAssistantMessageAssistantMessageTypeFinetuningAssistantMessageRolesSystemMessageSystemMessageTypeToolMessageToolMessageTypeUserMessageUserMessageType)ChatCompletionRequestInstructRequestModelSettings)FunctionCallToolToolCall)InstructRequestTypeTokenizerVersion)ModelSettingsBuilderz

c                   @   s*  e Zd ZU dZdZeed< dZeed< dee	 dee
 dee dee d	ee d
edB fddZed2d
edB dd fddZdedefddZdedB defddZedeeeB eB  deeeeB  B fddZededefddZedee defddZdeeeeB eB  B ee B deeeeB  B fddZdee dedB fddZdee dee dee fd d!Zd"edefd#d$Z dee dee fd%d&Z!dee de
fd'd(Z"dee de	fd)d*Z#dee d+e$dB dee de%e fd,d-Z&dee dee fd.d/Z'dee defd0d1Z(dS )3InstructRequestNormalizera  Takes a [ChatCompletionRequest][mistral_common.protocol.instruct.request.ChatCompletionRequest] and normalizes
    it into an [InstructRequest][mistral_common.protocol.instruct.request.InstructRequest].

    The normalization process does several things such as:
    - Aggregate consecutive messages of the same role
    - Aggregate system prompts
    - Normalize json content
    - Normalize tool calls

    Examples:
        >>> normalizer = InstructRequestNormalizer.normalizer()
    F_system_prompt_in_begin_allow_tool_call_and_contentuser_message_classassistant_message_classtool_message_classsystem_message_classinstruct_request_classmodel_settings_builderNc                 C   s(   || _ || _|| _|| _|| _|| _dS )a  Initializes the normalizer with the appropriate message classes.

        Args:
           user_message_class: The class for user messages.
           assistant_message_class: The class for assistant messages.
           tool_message_class: The class for tool messages.
           system_message_class: The class for system messages.
           instruct_request_class: The class for instruct requests.
           model_settings_builder: The builder for model settings, or None if unsupported.
        N)_user_message_class_assistant_message_class_tool_message_class_instruct_request_class_system_message_class_model_settings_builder)selfr!   r"   r#   r$   r%   r&    r.   `/home/ubuntu/vllm_env/lib/python3.10/site-packages/mistral_common/protocol/instruct/normalize.py__init__5   s   
z"InstructRequestNormalizer.__init__returnc                 C   0   | durt d|  ttttttttf dS )a  Returns a normalizer for the default instruct request.

        Args:
            model_settings_builder: Must be None for this normalizer version.

        Returns:
            A normalizer for the default instruct request.

        Raises:
            ValueError: If model_settings_builder is not None.

        Examples:
            >>> normalizer = InstructRequestNormalizer.normalizer()
        NzGmodel_settings_builder must be None for InstructRequestNormalizer, got )	
ValueErrorr   r   r   r   r   r   r
   r   r&   r.   r.   r/   
normalizerP      z$InstructRequestNormalizer.normalizerrequestc                 C   s.   | j durtdt| j d| j  t S )zBuild model settings from a chat completion request.

        For pre-v15 normalizers, model settings are all `None`.

        Args:
            request: The chat completion request.

        Returns:
            Returns `ModelSettings.none()`.
        Nz*model_settings_builder should be None for , got )r,   r   type__name__r   noner-   r7   r.   r.   r/   build_settingsh   s
   
z(InstructRequestNormalizer.build_settingscontentc                 C   sR   |d u s
t |dkrdS zt|}tj|dd}W |S  tjy(   |}Y |S w )Nr   z{}F)ensure_ascii)lenjsonloadsdumpsJSONDecodeError)r-   r>   parsed_jsonnormalized_contentr.   r.   r/   _normalize_json_contenty   s   
z1InstructRequestNormalizer._normalize_json_contentc                 C      d S Nr.   r-   r>   r.   r.   r/   _aggregate_content_chunks   s   z3InstructRequestNormalizer._aggregate_content_chunksc                 C   rH   rI   r.   rJ   r.   r.   r/   rK         c                 C   rH   rI   r.   rJ   r.   r.   r/   rK      rL   c                 C   s   t |tr|S t |tsJ dt| g }|D ]@}t |tr%t|d}t |trF|r@t |d tr@|d  jt|j 7  _q|| qt |trQ|| qt	dt| t
|dkrlt |d trl|d jS |S )NzExpected list, got textzUnsupported chunk type    r   )
isinstancestrlistr9   r   rN   CHUNK_JOIN_STRappendr   r3   r@   )r-   r>   aggregated_contentchunkr.   r.   r/   rK      s"   





messagesc                 C   sL   g }|D ]}|j tjkr|jr| |j}|| qt|r$t|S d S rI   )	roler   systemr>   rK   rU   r@   rT   join)r-   rX   system_promptmessagerV   r.   r.   r/   _aggregate_system_prompts   s   
z3InstructRequestNormalizer._aggregate_system_promptslatest_call_idsc                 C   sn   g }|D ]0}t || jsJ d|j}t |ts"tdd |D }| |}|| j||j|j	d q|S )zm
        We currently do not do any aggregation for tool messages, but we normalize the json content
        zExpected tool messagec                 S   s   g | ]}|j qS r.   rM   ).0rW   r.   r.   r/   
<listcomp>   s    zFInstructRequestNormalizer._aggregate_tool_messages.<locals>.<listcomp>)r>   tool_call_idname)
rQ   r)   r>   rR   rT   r[   rG   rU   rb   rc   )r-   rX   r_   tool_messagesr]   r>   rF   r.   r.   r/   _aggregate_tool_messages   s   


z2InstructRequestNormalizer._aggregate_tool_messages	tool_callc                 C   s(   |  |jj}tt|jj|d|jdS )N)rc   	arguments)functionid)rG   rh   rg   r   r   rc   ri   )r-   rf   normalized_function_arumentsr.   r.   r/   _normalize_tool_call   s
   z.InstructRequestNormalizer._normalize_tool_callc                 C   s   g S rI   r.   r-   rX   r.   r.   r/   _aggregate_system_messages   rL   z4InstructRequestNormalizer._aggregate_system_messagesc                 C   s  g }g }d}d }|D ]]}t || jsJ d| js&|jr&|jr&td| |jr9|jD ]}| |}|| q,|j }	d urM|t |	t	rJ|	gn|	 ||j
O }t |trg|d urd||jksdJ d|j}q
|rp| |}
nd }
| j|
|pxd |d}|d urt|dr||_|S )NFzExpected assistant messagezDTool calls and content cannot be used together in the same message. zEExpected weights of aggregated FinetuningAssistantMessage to be equal)r>   
tool_callsprefixweight)rQ   r(   r    rn   r>   r3   rk   rU   extendrR   ro   r   rp   rK   hasattr)r-   rX   messages_contentsrn   ro   rp   r]   rf   normalized_tool_callr>   rV   aggregated_messager.   r.   r/   _aggregate_assistant_messages   sB   



z7InstructRequestNormalizer._aggregate_assistant_messagesc                 C   s   g }g }|D ]D}t || jsJ dt| t |jtr$||j q|jD ]"}t |tr5||j q'|rD|tt	|d g }|| q'q|rRt	|nd}|s\| j|dS |rf|t|d | j|dS )z:
        Just coalesce neighboring blocks of text
        zExpected user message got rM    r>   )
rQ   r'   r9   r>   rR   rU   r   rN   rT   r[   )r-   rX   all_contenttext_chunksr]   rW   text_contentr.   r.   r/   _aggregate_user_messages   s(   

	z2InstructRequestNormalizer._aggregate_user_messagesrY   c                 C   sL   |t jkr| ||S |t jkr| |gS |t jkr!| |gS | |S rI   )r   toolre   	assistantrv   userr|   rm   r-   rX   rY   r_   r.   r.   r/   _aggregate_role  s   



z)InstructRequestNormalizer._aggregate_rolec                 C   s  g }g }d }d }g }|D ]V}t |dd }||jks||krX|| ||| |tjkrK|d }	t|d ts9J |	jd urJ|	jD ]}
|	|
j
 qAn	|tjkrT|  |  |}|j}|	| q|| ||| t|dks~| js|d jtjkr|d| jdd |S )Nrp   rO   r   rw   rx   )getattrrY   rq   r   r   r~   rQ   r   rn   rU   ri   r}   clearr@   r   r   insertr'   )r-   rX   aggregated_messagesmessages_to_aggregatecurrent_rolecurrent_weightr_   r]   
new_weightassistant_messagerf   r.   r.   r/   _aggregate_messages&  s:   



z-InstructRequestNormalizer._aggregate_messagesc                 C   s`   |  |j}| |j}| |}|t kr$tdt| j d| | j	|||j
|j|dS )a  Converts a chat completion request to an instruct request.

        Args:
            request: The chat completion request to convert.

        Returns:
            The converted instruct request.

        Examples:
            >>> from mistral_common.protocol.instruct.messages import UserMessage, AssistantMessage
            >>> request = ChatCompletionRequest(
            ...     messages=[
            ...         UserMessage(content="Hello"),
            ...         AssistantMessage(content="Hi"),
            ...     ],
            ... )
            >>> normalizer = InstructRequestNormalizer.normalizer()
            >>> instruct_request = normalizer.from_chat_completion_request(request)
        %Model settings are not supported for r8   )rX   r\   available_toolscontinue_final_messagesettings)r^   rX   r   r=   r   r;   r   r9   r:   r*   toolsr   )r-   r7   r\   rX   r   r.   r.   r/   from_chat_completion_requestO  s   
z6InstructRequestNormalizer.from_chat_completion_requestrI   ))r:   
__module____qualname____doc__r   bool__annotations__r    r9   r   r   r   r   r   r   r0   staticmethodr5   r   r   r=   rR   rG   r   rS   r   r   rK   r
   r^   re   r   rk   rm   rv   r|   r   r   r   r   r   r.   r.   r.   r/   r   "   sZ   
 

"
-* 
)r   c                	   @   s   e Zd ZU dZdZeed< dZeed< edde	dB dd fdd	Z
d
ee dee fddZd
ee dedB dee dee fddZd
ee dedB fddZdee defddZdS )InstructRequestNormalizerV7zvNormalizer for the v7 tokenizer.

    Examples:
        >>> normalizer = InstructRequestNormalizerV7.normalizer()
    Tr   r    Nr&   r1   c                 C   r2   )a  Returns a normalizer for the default instruct request.

        Args:
            model_settings_builder: Must be None for this normalizer version.

        Returns:
            A normalizer for the V7 instruct request.

        Raises:
            ValueError: If model_settings_builder is not None.

        Examples:
            >>> normalizer = InstructRequestNormalizerV7.normalizer()
        NzImodel_settings_builder must be None for InstructRequestNormalizerV7, got )	r3   r   r   r   r   r   r   r
   r   r4   r.   r.   r/   r5   }  r6   z&InstructRequestNormalizerV7.normalizerrX   c                    s    fdd|D S )Nc                    s,   g | ]}t | jr j |jd qS )rx   )rQ   r+   rK   r>   )r`   r]   r-   r.   r/   ra     s    
zJInstructRequestNormalizerV7._aggregate_system_messages.<locals>.<listcomp>r.   rl   r.   r   r/   rm     s   
z6InstructRequestNormalizerV7._aggregate_system_messagesrY   r_   c                 C   sr   |t jkr| ||S |t jkr| |gS |t jkr!| |gS |t jkr+| |S |d u r5t	|dks7J g S )Nr   )
r   r}   re   r~   rv   r   r|   rZ   rm   r@   r   r.   r.   r/   r     s   




z+InstructRequestNormalizerV7._aggregate_rolec                 C   s   t d)Nz&We should not aggregate system prompts)NotImplementedErrorrl   r.   r.   r/   r^     s   z5InstructRequestNormalizerV7._aggregate_system_promptsr7   c                 C   sP   |  |j}| |}|t krtdt| j d| | j|d|j	|dS )a  Converts a chat completion request to an instruct request.

        Args:
            request: The chat completion request to convert.

        Returns:
            The converted instruct request.

        Examples:
            >>> from mistral_common.protocol.instruct.messages import UserMessage, AssistantMessage
            >>> request = ChatCompletionRequest(
            ...     messages=[
            ...         UserMessage(content="Hello"),
            ...         AssistantMessage(content="Hi"),
            ...     ],
            ... )
            >>> normalizer = InstructRequestNormalizerV7.normalizer()
            >>> instruct_request = normalizer.from_chat_completion_request(request)
        r   r8   NrX   r\   r   r   )
r   rX   r=   r   r;   r   r9   r:   r*   r   r-   r7   rX   r   r.   r.   r/   r     s   

z8InstructRequestNormalizerV7.from_chat_completion_requestrI   )r:   r   r   r   r   r   r   r    r   r   r5   rS   r
   r   rm   r   rR   r   r   r^   r   r   r   r.   r.   r.   r/   r   s  s   
 *r   c                       sV   e Zd ZdZeddedB dd fddZdee dee	 dee
 f fd	d
Z  ZS )InstructRequestNormalizerV13zNormalizer for the v13 tokenizer.

    It reorders tool messages based on the tool call order.

    Examples:
        >>> normalizer = InstructRequestNormalizerV13.normalizer()
    Nr&   r1   c                 C   r2   )a5  Returns a normalizer for the default instruct request.

        Args:
            model_settings_builder: Must be None for this normalizer version.

        Returns:
            A normalizer for the V13 instruct request.

        Raises:
            ValueError: If model_settings_builder is not None.
        NzJmodel_settings_builder must be None for InstructRequestNormalizerV13, got )	r3   r   r   r   r   r   r   r
   r   r4   r.   r.   r/   r5     s   z'InstructRequestNormalizerV13.normalizerrX   r_   c                    sL   t  ||}dd t|D  dd t|D |j fddd |S )Nc                 S   s   i | ]\}}||qS r.   r.   )r`   idxcall_idr.   r.   r/   
<dictcomp>  s    zIInstructRequestNormalizerV13._aggregate_tool_messages.<locals>.<dictcomp>c                 S   s   i | ]\}}|j |qS r.   )rb   )r`   r   r]   r.   r.   r/   r     s    c                    s      | jpdtd| j fS )Nnullinf)getrb   float)msgid_to_tool_call_idxid_to_tool_result_idxr.   r/   <lambda>  s   zGInstructRequestNormalizerV13._aggregate_tool_messages.<locals>.<lambda>)key)superre   	enumeratesort)r-   rX   r_   rd   	__class__r   r/   re     s   z5InstructRequestNormalizerV13._aggregate_tool_messagesrI   )r:   r   r   r   r   r   r5   rS   r
   rR   r   re   __classcell__r.   r.   r   r/   r     s
    .r   c                   @   sT   e Zd ZdZeddedB dd fddZdedefdd	Z	dee
 defd
dZdS )InstructRequestNormalizerV15zNormalizer for the v15 tokenizer.

    It reorders tool messages based on the tool call order and builds model settings.

    Examples:
        >>> normalizer = InstructRequestNormalizerV15.normalizer()
    Nr&   r1   c                 C   s   t ttttt| S )zReturns a normalizer for the V15 instruct request.

        Args:
            model_settings_builder: The builder for model settings.

        Returns:
            A normalizer for the V15 instruct request.
        )r   r   r   r   r   r   r4   r.   r.   r/   r5     s   
z'InstructRequestNormalizerV15.normalizerr7   c                 C   s*   | j du rtdt| j | j |S )a%  Build model settings using the configured model settings builder.

        Args:
            request: The chat completion request.

        Returns:
            The built model settings.

        Raises:
            InvalidRequestException: If no model settings builder is configured.
        Nz,model_settings_builder must not be None for )r,   r   r9   r:   r=   r<   r.   r.   r/   r=     s   
z+InstructRequestNormalizerV15.build_settingsc                 C   s*   |  |j}| |}| j|d|j|dS )zConverts a chat completion request to an instruct request.

        Args:
            request: The chat completion request to convert.

        Returns:
            The converted instruct request.
        Nr   )r   rX   r=   r*   r   r   r.   r.   r/   r   !  s
   	

z9InstructRequestNormalizerV15.from_chat_completion_requestrI   )r:   r   r   r   r   r   r5   r   r   r=   r
   r   r   r.   r.   r.   r/   r     s    r   versionr&   r1   c                 C   s   t dt t| |dS )zCDeprecated in favor to `get_normalizer`, will be removed in 1.12.0.zu`normalizer_for_tokenizer_version` is deprecated and will be removed in 1.12.0. Please call `get_normalizer` instead.r   r&   )warningswarnFutureWarningget_normalizerr   r.   r.   r/    normalizer_for_tokenizer_version1  s
   r   c                 C   s   |   t jkrn t jkrn t jkrn n  t}n) t jkr$ t}n   t jkr+n t jkr1n n  t	}nt j
kr?t}n	 t|  |j|dS )a  Gets the appropriate normalizer for the given tokenizer version.

    Args:
        version: The tokenizer version to get the normalizer for.
        model_settings_builder: The builder for model settings, or None if unsupported.

    Returns:
        The appropriate normalizer for the given tokenizer version.

    Examples:
        >>> normalizer = get_normalizer(TokenizerVersion.v1)
    r4   )r   v1v2v3r   v7r   v11v13r   v15r   r   r5   )r   r&   normalizer_clsr.   r.   r/   r   =  s   ."r   rI   ).rA   r   typingr   r   r   typing_extensionsr   mistral_common.exceptionsr   &mistral_common.protocol.instruct.chunkr   r   r	   )mistral_common.protocol.instruct.messagesr
   r   r   r   r   r   r   r   r   r   r   (mistral_common.protocol.instruct.requestr   r   r   +mistral_common.protocol.instruct.tool_callsr   r   r   %mistral_common.tokens.tokenizers.baser   r   7mistral_common.tokens.tokenizers.model_settings_builderr   rT   r   r   r   r   r   r   r.   r.   r.   r/   <module>   sH    4
  SV,=
