o
    i                  
   @   s  d dl Z d dlmZ d dlZd dlmZmZmZmZ d dl	m
Z
 d dlmZ d dlmZmZmZmZ d dlmZ d dlmZmZ d d	lmZmZ d d
lmZ d dlmZ d dlmZm Z m!Z! d dl"m#Z# edgdZ$edddgdZ%edddgdZ&e$'dde
fddZ(e%)ddeeB deeeef de*e+ fddZ,e&)dee*deej-d fdeeeef d!e*e+ d"ede.fd#d$Z/e&)dee*dfdeeeef d!e*e+ defd%d&Z0e$j)d'd(d)gddeeB deeeef defd*d+Z1dS ),    N)	Annotated)	APIRouterBodyDependsHTTPExceptionRedirectResponse)ValidationError)EngineBackendOpenAIChatCompletionRequestSettingsget_settings)_split_content_and_think_chunks)_decode_tool_calls_split_content_and_tool_calls)	TextChunk
ThinkChunk)AssistantMessage)ChatCompletionRequest)SpecialTokenPolicy	TokenizedTokenizerVersion)InstructTokenizerV13app)tagsz/v1/tokenize	tokenizertokenize)prefixr   z/v1/detokenize
detokenize/returnc                      s   t ddS )zRedirect to the documentation.docs)urlr    r#   r#   [/home/ubuntu/.local/lib/python3.10/site-packages/mistral_common/experimental/app/routers.pyredirect_to_docs   s   
r%   requestsettingsc              
      s   t | tr-z|   tjdi |  } W n ttfy, } ztdt	|dd}~ww | j
g kr8tddd|j| }t |tsIJ t||jS )z#Tokenize a chat completion request.  status_codedetailNzMessages list cannot be empty.r#   )
isinstancer   drop_extra_fieldsr   from_openai
model_dumpr	   
ValueErrorr   strmessagesr   encode_chat_completionr   typetokens)r&   r'   e	tokenizedr#   r#   r$   tokenize_request"   s   

r8   z/string)default_factory)defaultr5   special_token_policyc              
      sV   t |dkrtdddz	| jj||dW S  ty* } ztdt|dd}~ww )zDetokenize a list of tokens to a string.

    Args:
        tokens: The tokens to detokenize.
        special_token_policy: The policy to use for special tokens.

    Returns:
        The detokenized string or assistant message.
    r   r(   Tokens list cannot be empty.r)   r;   N)lenr   r   decoder0   r1   )r'   r5   r;   r6   r#   r#   r$   detokenize_to_string7   s   r@   c           
   
      s  t |dkrtdddjjjjtjkr$t|jjjd\}}n|d}}d}jjjjtj	krHt
jjts=J jjj}jjj nd } |dur durzt|| }W n typ } ztdt|dd}~ww jjjj|r fdd	|D }t |d
krt
|d tr|d j}n|rjj|tjd}|rz
t|jjj}W n ttjfy } ztdt|dd}~ww d}|d jjjjk}	t|||	 dS )zDetokenize a list of tokens to an assistant message.

    Parse tool calls from the tokens and extract content before the first tool call.

    Args:
        tokens: The tokens to detokenize.

    Returns:
        The detokenized assistant message.
    r   r(   r<   r)   z[TOOL_CALLS]r#   Nc                    sX   g | ](\}}|gkr|st jj|tjd dntjj|tjd |d  kdqS )r=   )text)thinkingclosed)r   r   r?   r   IGNOREr   ).0chunkis_think	end_thinkeosr'   r#   r$   
<listcomp>z   s    

z3detokenize_to_assistant_message.<locals>.<listcomp>   r=   rB   )content
tool_callsr   )r>   r   r   instruct_tokenizerversionr   v1r   get_special_tokenv13r,   r   BEGIN_THINK	END_THINKr   r0   r1   eos_idr   rA   r?   r   rE   r   jsonJSONDecodeErrorr   )
r'   r5   content_tokenstool_calls_tokensrN   begin_thinkcontent_or_think_tokensr6   rO   has_eosr#   rI   r$   detokenize_to_assistant_messageN   sR   




r_   z/v1/chat/completionschatcompletionsc              
      sL  t | tr|  }tjdi |  } ni }t| |I dH }ddh |  }||  fdd| D }|	ddrCt
dd	d
z#|jtjkr]tj|j d|dd||jd}ntd|j W n$ tjjyu   t
ddd
 tjjy } zt
dt|d
d}~ww |jdkrt
|j|jd
| }t||d I dH S )zGenerate a chat completion.

    Args:
        request: The chat completion request.
        settings: The settings for the Mistral-common API.

    Returns:
        The generated chat completion.
    Nr2   toolsc                    s   i | ]\}}| vr||qS r#   r#   )rF   kvexclude_fieldsr#   r$   
<dictcomp>   s    zgenerate.<locals>.<dictcomp>streamFr(   zStreaming is not supported.r)   z/completionsT)promptreturn_tokens)rX   timeoutzUnsupported engine backend: i  Timeouti     r5   r#   )r,   r   r-   r   r.   r/   r8   updateitemsgetr   engine_backendr
   	llama_cpprequestspost
engine_urlrk   r0   
exceptionsrl   RequestExceptionr1   r*   rA   rX   r_   )r&   r'   extra_fields
tokens_idsrequest_jsonresponser6   response_jsonr#   re   r$   generate   sF   




r}   )2rX   typingr   rs   fastapir   r   r   r   fastapi.responsesr   pydanticr	   &mistral_common.experimental.app.modelsr
   r   r   r   !mistral_common.experimental.thinkr   !mistral_common.experimental.toolsr   r   &mistral_common.protocol.instruct.chunkr   r   )mistral_common.protocol.instruct.messagesr   (mistral_common.protocol.instruct.requestr   %mistral_common.tokens.tokenizers.baser   r   r   )mistral_common.tokens.tokenizers.instructr   main_routertokenize_routerdecode_routerrp   r%   rt   listintr8   rE   r1   r@   r_   r}   r#   r#   r#   r$   <module>   sp    
H