o
    *icK                  	   @   s  d dl Z d dlmZmZmZ d dlmZmZmZ d dl	m
Z
mZmZmZmZmZmZmZmZmZmZ d dlmZmZ d dlmZmZmZ d dlmZmZ dZG d	d
 d
eeeeeef Z G dd de Z!G dd de!Z"dede fddZ#dede fddZ$dS )    N)GenericSequenceoverload)	TextChunk
ThinkChunkUserContentChunk)UATSAssistantMessageAssistantMessageTypeFinetuningAssistantMessageRolesSystemMessageSystemMessageTypeToolMessageToolMessageTypeUserMessageUserMessageType)ChatCompletionRequestInstructRequest)FunctionCallToolToolCall)InstructRequestTypeTokenizerVersionz

c                   @   s  e Zd ZU dZdZeed< dZeed< dee	 dee
 dee dee d	ee f
d
dZed/ddZdedB defddZedeeeB eB  deeeeB  B fddZededefddZedee defddZdeeeeB eB  B ee B deeeeB  B fddZdee dedB fddZdee dee dee fddZdedefdd Zdee dee fd!d"Zdee de
fd#d$Zdee de	fd%d&Zdee d'e dB dee de!e fd(d)Z"dee dee fd*d+Z#d,e$e defd-d.Z%dS )0InstructRequestNormalizera  Takes a [ChatCompletionRequest][mistral_common.protocol.instruct.request.ChatCompletionRequest] and normalizes
    it into an [InstructRequest][mistral_common.tokens.instruct.request.InstructRequest].

    The normalization process does several things such as:
    - Aggregate consecutive messages of the same role
    - Aggregate system prompts
    - Normalize json content
    - Normalize tool calls

    Examples:
        >>> normalizer = InstructRequestNormalizer.normalizer()
    F_system_prompt_in_begin_allow_tool_call_and_contentuser_message_classassistant_message_classtool_message_classsystem_message_classinstruct_request_classc                 C   s"   || _ || _|| _|| _|| _dS )a  Initializes the normalizer with the appropriate message classes.

        Args:
           user_message_class: The class for user messages.
           assistant_message_class: The class for assistant messages.
           tool_message_class: The class for tool messages.
           system_message_class: The class for system messages.
           instruct_request_class: The class for instruct requests.
        N)_user_message_class_assistant_message_class_tool_message_class_instruct_request_class_system_message_class)selfr   r   r   r    r!    r(   g/home/ubuntu/veenaModal/venv/lib/python3.10/site-packages/mistral_common/protocol/instruct/normalize.py__init__0   s
   
z"InstructRequestNormalizer.__init__returnc                   C      t tttttttf S )zReturns a normalizer for the default instruct request.

        Examples:
            >>> normalizer = InstructRequestNormalizer.normalizer()
        )r   r   r	   r   r   r   r   r   r(   r(   r(   r)   
normalizerH      
z$InstructRequestNormalizer.normalizercontentNc                 C   sR   |d u s
t |dkrdS zt|}tj|dd}W |S  tjy(   |}Y |S w )Nr   z{}F)ensure_ascii)lenjsonloadsdumpsJSONDecodeError)r'   r/   parsed_jsonnormalized_contentr(   r(   r)   _normalize_json_contentW   s   
z1InstructRequestNormalizer._normalize_json_contentc                 C      d S Nr(   r'   r/   r(   r(   r)   _aggregate_content_chunksb   s   z3InstructRequestNormalizer._aggregate_content_chunksc                 C   r9   r:   r(   r;   r(   r(   r)   r<   f      c                 C   r9   r:   r(   r;   r(   r(   r)   r<   h   r=   c                 C   s   t |tr|S t |tsJ dt| g }|D ]@}t |tr%t|d}t |trF|r@t |d tr@|d  jt|j 7  _q|| qt |trQ|| qt	dt| t
|dkrlt |d trl|d jS |S )NzExpected list, got textzUnsupported chunk type    r   )
isinstancestrlisttyper   r?   CHUNK_JOIN_STRappendr   
ValueErrorr1   )r'   r/   aggregated_contentchunkr(   r(   r)   r<   j   s"   





messagesc                 C   sL   g }|D ]}|j tjkr|jr| |j}|| qt|r$t|S d S r:   )	roler   systemr/   r<   rG   r1   rF   join)r'   rK   system_promptmessagerI   r(   r(   r)   _aggregate_system_prompts   s   
z3InstructRequestNormalizer._aggregate_system_promptslatest_call_idsc                 C   sn   g }|D ]0}t || jsJ d|j}t |ts"tdd |D }| |}|| j||j|j	d q|S )zm
        We currently do not do any aggregation for tool messages, but we normalize the json content
        zExpected tool messagec                 S   s   g | ]}|j qS r(   r>   ).0rJ   r(   r(   r)   
<listcomp>   s    zFInstructRequestNormalizer._aggregate_tool_messages.<locals>.<listcomp>)r/   tool_call_idname)
rB   r$   r/   rC   rF   rN   r8   rG   rU   rV   )r'   rK   rR   tool_messagesrP   r/   r7   r(   r(   r)   _aggregate_tool_messages   s   


z2InstructRequestNormalizer._aggregate_tool_messages	tool_callc                 C   s(   |  |jj}tt|jj|d|jdS )N)rV   	arguments)functionid)r8   r[   rZ   r   r   rV   r\   )r'   rY   normalized_function_arumentsr(   r(   r)   _normalize_tool_call   s
   z.InstructRequestNormalizer._normalize_tool_callc                 C   s   g S r:   r(   r'   rK   r(   r(   r)   _aggregate_system_messages   r=   z4InstructRequestNormalizer._aggregate_system_messagesc                 C   s  g }g }d}d }|D ]]}t || jsJ d| js&|jr&|jr&td| |jr9|jD ]}| |}|| q,|j }	d urM|t |	t	rJ|	gn|	 ||j
O }t |trg|d urd||jksdJ d|j}q
|rp| |}
nd }
| j|
|pxd |d}|d urt|dr||_|S )NFzExpected assistant messagezDTool calls and content cannot be used together in the same message. zEExpected weights of aggregated FinetuningAssistantMessage to be equal)r/   
tool_callsprefixweight)rB   r#   r   ra   r/   rH   r^   rG   extendrC   rb   r   rc   r<   hasattr)r'   rK   messages_contentsra   rb   rc   rP   rY   normalized_tool_callr/   rI   aggregated_messager(   r(   r)   _aggregate_assistant_messages   sB   



z7InstructRequestNormalizer._aggregate_assistant_messagesc                 C   s   g }g }|D ]D}t || jsJ dt| t |jtr$||j q|jD ]"}t |tr5||j q'|rD|tt	|d g }|| q'q|rRt	|nd}|s\| j|dS |rf|t|d | j|dS )z:
        Just coalesce neighboring blocks of text
        zExpected user message got r>    r/   )
rB   r"   rE   r/   rC   rG   r   r?   rF   rN   )r'   rK   all_contenttext_chunksrP   rJ   text_contentr(   r(   r)   _aggregate_user_messages   s(   

	z2InstructRequestNormalizer._aggregate_user_messagesrL   c                 C   sL   |t jkr| ||S |t jkr| |gS |t jkr!| |gS | |S r:   )r   toolrX   	assistantri   userro   r`   r'   rK   rL   rR   r(   r(   r)   _aggregate_role   s   



z)InstructRequestNormalizer._aggregate_rolec                 C   s  g }g }d }d }g }|D ]V}t |dd }||jks||krX|| ||| |tjkrK|d }	t|d ts9J |	jd urJ|	jD ]}
|	|
j
 qAn	|tjkrT|  |  |}|j}|	| q|| ||| t|dks~| js|d jtjkr|d| jdd |S )Nrc   r@   r   rj   rk   )getattrrL   rd   rt   r   rq   rB   r	   ra   rG   r\   rp   clearr1   r   rr   insertr"   )r'   rK   aggregated_messagesmessages_to_aggregatecurrent_rolecurrent_weightrR   rP   
new_weightassistant_messagerY   r(   r(   r)   _aggregate_messages  s:   



z-InstructRequestNormalizer._aggregate_messagesrequestc                 C   s.   |  |j}| |j}| j|||j|jdS )a  Converts a chat completion request to an instruct request.

        Args:
            request: The chat completion request to convert.

        Returns:
            The converted instruct request.

        Examples:
            >>> from mistral_common.protocol.instruct.messages import UserMessage, AssistantMessage
            >>> request = ChatCompletionRequest(
            ...     messages=[
            ...         UserMessage(content="Hello"),
            ...         AssistantMessage(content="Hi"),
            ...     ],
            ... )
            >>> normalizer = InstructRequestNormalizer.normalizer()
            >>> instruct_request = normalizer.from_chat_completion_request(request)
        )rK   rO   available_toolscontinue_final_message)rQ   rK   r~   r%   toolsr   )r'   r   rO   rK   r(   r(   r)   from_chat_completion_request-  s   z6InstructRequestNormalizer.from_chat_completion_request)r+   r   )&__name__
__module____qualname____doc__r   bool__annotations__r   rE   r   r
   r   r   r   r*   staticmethodr-   rC   r8   r   rD   r   r   r<   r   rQ   rX   r   r^   r`   ri   ro   r   r   rt   r~   r   r   r(   r(   r(   r)   r      sT   
 

"
-* 
)r   c                	   @   s   e Zd ZU dZdZeed< dZeed< edddZ	de
e de
e fd	d
Zde
e dedB de
e dee fddZde
e dedB fddZdee defddZdS )InstructRequestNormalizerV7zvNormalizer for the v7 tokenizer.

    Examples:
        >>> normalizer = InstructRequestNormalizerV7.normalizer()
    Tr   r   r+   c                   C   r,   )zReturns a normalizer for the default instruct request

        Examples:
            >>> normalizer = InstructRequestNormalizerV7.normalizer()
        )r   r   r	   r   r   r   r   r   r(   r(   r(   r)   r-   V  r.   z&InstructRequestNormalizerV7.normalizerrK   c                    s    fdd|D S )Nc                    s,   g | ]}t | jr j |jd qS )rk   )rB   r&   r<   r/   )rS   rP   r'   r(   r)   rT   f  s    
zJInstructRequestNormalizerV7._aggregate_system_messages.<locals>.<listcomp>r(   r_   r(   r   r)   r`   e  s   
z6InstructRequestNormalizerV7._aggregate_system_messagesrL   NrR   c                 C   sr   |t jkr| ||S |t jkr| |gS |t jkr!| |gS |t jkr+| |S |d u r5t	|dks7J g S )Nr   )
r   rp   rX   rq   ri   rr   ro   rM   r`   r1   rs   r(   r(   r)   rt   l  s   




z+InstructRequestNormalizerV7._aggregate_rolec                 C   s   t d)Nz&We should not aggregate system prompts)NotImplementedErrorr_   r(   r(   r)   rQ   y  s   z5InstructRequestNormalizerV7._aggregate_system_promptsr   c                 C   s   |  |j}| j|d|jdS )a  Converts a chat completion request to an instruct request.

        Args:
            request: The chat completion request to convert.

        Returns:
            The converted instruct request.

        Examples:
            >>> from mistral_common.protocol.instruct.messages import UserMessage, AssistantMessage
            >>> request = ChatCompletionRequest(
            ...     messages=[
            ...         UserMessage(content="Hello"),
            ...         AssistantMessage(content="Hi"),
            ...     ],
            ... )
            >>> normalizer = InstructRequestNormalizerV7.normalizer()
            >>> instruct_request = normalizer.from_chat_completion_request(request)
        N)rK   rO   r   )r~   rK   r%   r   )r'   r   rK   r(   r(   r)   r   |  s   z8InstructRequestNormalizerV7.from_chat_completion_request)r+   r   )r   r   r   r   r   r   r   r   r   r-   rD   r   r   r`   r   rC   r   rt   rQ   r   r   r   r(   r(   r(   r)   r   L  s   
 *r   c                       sH   e Zd ZdZed	ddZdee dee dee	 f fddZ
  ZS )
InstructRequestNormalizerV13zNormalizer for the v13 tokenizer.

    It reorders tool messages based on the tool call order.

    Examples:
        >>> normalizer = InstructRequestNormalizerV13.normalizer()
    r+   c                   C   r,   )z6Returns a normalizer for the default instruct request.)r   r   r	   r   r   r   r   r   r(   r(   r(   r)   r-     s   
z'InstructRequestNormalizerV13.normalizerrK   rR   c                    sL   t  ||}dd t|D  dd t|D |j fddd |S )Nc                 S   s   i | ]\}}||qS r(   r(   )rS   idxcall_idr(   r(   r)   
<dictcomp>  s    zIInstructRequestNormalizerV13._aggregate_tool_messages.<locals>.<dictcomp>c                 S   s   i | ]\}}|j |qS r(   )rU   )rS   r   rP   r(   r(   r)   r     s    c                    s      | jpdtd| j fS )Nnullinf)getrU   float)msgid_to_tool_call_idxid_to_tool_result_idxr(   r)   <lambda>  s   zGInstructRequestNormalizerV13._aggregate_tool_messages.<locals>.<lambda>)key)superrX   	enumeratesort)r'   rK   rR   rW   	__class__r   r)   rX     s   z5InstructRequestNormalizerV13._aggregate_tool_messages)r+   r   )r   r   r   r   r   r-   rD   r   rC   r   rX   __classcell__r(   r(   r   r)   r     s
    .
r   versionr+   c                 C   sT   | t jt jt jhv rt S | t jt jhv rt S | t j	kr#t
 S td|  )aB  Gets the appropriate normalizer for the given tokenizer version.

    Args:
        version: The tokenizer version to get the normalizer for.

    Returns:
        The appropriate normalizer for the given tokenizer version.

    Examples:
        >>> normalizer = normalizer_for_tokenizer_version(TokenizerVersion.v1)
    zUnknown tokenizer version )r   v1v2v3r   r-   v7v11r   v13r   rH   )r   r(   r(   r)    normalizer_for_tokenizer_version  s   
r   c                 C   sP   | t jkrt}| S | t jkrt}| S | t jkr!t}| S td|  )NzUnsupported tokenizer version: )	r   r   r   r   r   r   r   rH   r-   )r   normalizer_clsr(   r(   r)   get_normalizer  s   


r   )%r2   typingr   r   r   &mistral_common.protocol.instruct.chunkr   r   r   )mistral_common.protocol.instruct.messagesr   r	   r
   r   r   r   r   r   r   r   r   (mistral_common.protocol.instruct.requestr   r   +mistral_common.protocol.instruct.tool_callsr   r   r   %mistral_common.tokens.tokenizers.baser   r   rF   r   r   r   r   r   r(   r(   r(   r)   <module>   s"    4
  1H"