o
    qmiE                     @   s   d dl Z d dlmZ ddlmZ ddlmZmZ ddlm	Z	m
Z
 ddlmZ ddlmZ dd	lmZ dd
lmZ ddlmZ ddlmZ ddlmZ ddlmZ ddlmZ ddlmZ ddlm Z  ddl!m"Z" e #e j$dZ%G dd dZ&G dd dZ'dS )    N)JSONDecodeError   )ApiError)AsyncClientWrapperSyncClientWrapper)AsyncHttpResponseHttpResponse)parse_obj_as)RequestOptions)'convert_and_respect_annotation_metadata)BadRequestError)ForbiddenError)InternalServerError)TooManyRequestsError)UnprocessableEntityError)"ChatCompletionRequestMessageParams)StopConfigurationParams)CreateChatCompletionResponse)ReasoningEffort.c                   @      e Zd ZdefddZeeeeeeeeeeedddeje dej	e
 dej	e
 d	ej	e d
ej	e dej	e dej	e dej	e dej	e dej	e
 dej	e
 dej	e dej	e dee fddZdS )RawChatClientclient_wrapperc                C   
   || _ d S N_client_wrapperselfr    r   L/home/ubuntu/.local/lib/python3.10/site-packages/sarvamai/chat/raw_client.py__init__      
zRawChatClient.__init__Ntemperaturetop_preasoning_effort
max_tokensstreamstopnseedfrequency_penaltypresence_penaltywiki_groundingrequest_optionsmessagesr#   r$   r%   r&   r'   r(   r)   r*   r+   r,   r-   r.   returnc                C   s*  | j jjd| j  jdt|tjt dd|||||t|t	dd||	|
||ddddi|t
d	}zd
|j  kr;dk rPn ntttt| d}t||dW S |jdkrptt|jttjtj ttjtj | dd|jdkrtt|jttjtj ttjtj | dd|jdkrtt|jttjtj ttjtj | dd|jdkrtt|jttjtj ttjtj | dd|jdkrtt|jttjtj ttjtj | dd| }W n ty	   t|jt|j|jdw t|jt|j|d)a  
        Calls Sarvam LLM API to get the chat completion. Supported model(s): `sarvam-m`.

        Parameters
        ----------
        messages : typing.Sequence[ChatCompletionRequestMessageParams]
            A list of messages comprising the conversation so far.

        temperature : typing.Optional[float]
            What sampling temperature to use, between 0 and 2. Higher values like 0.8 will make the output more random, while lower values like 0.2 will make it more focused and deterministic.
            We generally recommend altering this or `top_p` but not both.

        top_p : typing.Optional[float]
            An alternative to sampling with temperature, called nucleus sampling,
            where the model considers the results of the tokens with top_p probability
            mass. So 0.1 means only the tokens comprising the top 10% probability mass
            are considered.

            We generally recommend altering this or `temperature` but not both.

        reasoning_effort : typing.Optional[ReasoningEffort]

        max_tokens : typing.Optional[int]
            The maximum number of tokens that can be generated in the chat completion.

        stream : typing.Optional[bool]
            If set to true, the model response data will be streamed to the client
            as it is generated using [server-sent events](https://developer.mozilla.org/en-US/docs/Web/API/Server-sent_events/Using_server-sent_events#Event_stream_format).

        stop : typing.Optional[StopConfigurationParams]

        n : typing.Optional[int]
            How many chat completion choices to generate for each input message. Note that you will be charged based on the number of generated tokens across all of the choices. Keep `n` as `1` to minimize costs.

        seed : typing.Optional[int]
            This feature is in Beta.
            If specified, our system will make a best effort to sample deterministically, such that repeated requests with the same `seed` and parameters should return the same result.

        frequency_penalty : typing.Optional[float]
            Number between -2.0 and 2.0. Positive values penalize new tokens based on
            their existing frequency in the text so far, decreasing the model's
            likelihood to repeat the same line verbatim.

        presence_penalty : typing.Optional[float]
            Number between -2.0 and 2.0. Positive values penalize new tokens based on
            whether they appear in the text so far, increasing the model's likelihood
            to talk about new topics.

        wiki_grounding : typing.Optional[bool]
            If this parameter is enabled, then the model uses a RAG based approach to retrieve relevant chunks from Wikipedia and uses them to answer the question. This is particularly useful for queries seeking factual information.

        request_options : typing.Optional[RequestOptions]
            Request-specific configuration.

        Returns
        -------
        HttpResponse[CreateChatCompletionResponse]
            Successful Response
        v1/chat/completionsPOSTwriteobject_
annotation	directionsarvam-mr/   r#   r$   r%   r&   r'   r(   r)   r*   r+   r,   r-   modelcontent-typeapplication/jsonbase_urlmethodjsonheadersr.   omit   ,  type_r5   responsedata  rA   body        status_coderA   rL   )r   httpx_clientrequestget_environmentbaser   typingSequencer   r   OMITrR   castr   r	   r@   r   r   dictrA   OptionalAnyr   r   r   r   r   r   textr   r/   r#   r$   r%   r&   r'   r(   r)   r*   r+   r,   r-   r.   	_response_data_response_jsonr   r   r   completions   s   L




















zRawChatClient.completions)__name__
__module____qualname__r   r    rY   rW   rX   r   r\   floatr   intboolr   r
   r   r   rc   r   r   r   r   r      V    	
r   c                   @   r   )AsyncRawChatClientr   c                C   r   r   r   r   r   r   r   r       r!   zAsyncRawChatClient.__init__Nr"   r/   r#   r$   r%   r&   r'   r(   r)   r*   r+   r,   r-   r.   r0   c                   s2  | j jjd| j  jdt|tjt dd|||||t|t	dd||	|
||ddddi|t
d	I d
H }zd|j  kr?dk rTn ntttt| d}t||dW S |jdkrttt|jttjtj ttjtj | dd|jdkrtt|jttjtj ttjtj | dd|jdkrtt|jttjtj ttjtj | dd|jdkrtt|jttjtj ttjtj | dd|jdkrtt|jttjtj ttjtj | dd| }W n ty   t|jt|j|jdw t|jt|j|d)a  
        Calls Sarvam LLM API to get the chat completion. Supported model(s): `sarvam-m`.

        Parameters
        ----------
        messages : typing.Sequence[ChatCompletionRequestMessageParams]
            A list of messages comprising the conversation so far.

        temperature : typing.Optional[float]
            What sampling temperature to use, between 0 and 2. Higher values like 0.8 will make the output more random, while lower values like 0.2 will make it more focused and deterministic.
            We generally recommend altering this or `top_p` but not both.

        top_p : typing.Optional[float]
            An alternative to sampling with temperature, called nucleus sampling,
            where the model considers the results of the tokens with top_p probability
            mass. So 0.1 means only the tokens comprising the top 10% probability mass
            are considered.

            We generally recommend altering this or `temperature` but not both.

        reasoning_effort : typing.Optional[ReasoningEffort]

        max_tokens : typing.Optional[int]
            The maximum number of tokens that can be generated in the chat completion.

        stream : typing.Optional[bool]
            If set to true, the model response data will be streamed to the client
            as it is generated using [server-sent events](https://developer.mozilla.org/en-US/docs/Web/API/Server-sent_events/Using_server-sent_events#Event_stream_format).

        stop : typing.Optional[StopConfigurationParams]

        n : typing.Optional[int]
            How many chat completion choices to generate for each input message. Note that you will be charged based on the number of generated tokens across all of the choices. Keep `n` as `1` to minimize costs.

        seed : typing.Optional[int]
            This feature is in Beta.
            If specified, our system will make a best effort to sample deterministically, such that repeated requests with the same `seed` and parameters should return the same result.

        frequency_penalty : typing.Optional[float]
            Number between -2.0 and 2.0. Positive values penalize new tokens based on
            their existing frequency in the text so far, decreasing the model's
            likelihood to repeat the same line verbatim.

        presence_penalty : typing.Optional[float]
            Number between -2.0 and 2.0. Positive values penalize new tokens based on
            whether they appear in the text so far, increasing the model's likelihood
            to talk about new topics.

        wiki_grounding : typing.Optional[bool]
            If this parameter is enabled, then the model uses a RAG based approach to retrieve relevant chunks from Wikipedia and uses them to answer the question. This is particularly useful for queries seeking factual information.

        request_options : typing.Optional[RequestOptions]
            Request-specific configuration.

        Returns
        -------
        AsyncHttpResponse[CreateChatCompletionResponse]
            Successful Response
        r1   r2   r3   r4   r8   r9   r;   r<   r=   NrC   rD   rE   rG   rJ   rK   rM   rN   rO   rP   rQ   )r   rS   rT   rU   rV   r   rW   rX   r   r   rY   rR   rZ   r   r	   r@   r   r   r[   rA   r\   r]   r   r   r   r   r   r   r^   r_   r   r   r   rc      s   L




















zAsyncRawChatClient.completions)rd   re   rf   r   r    rY   rW   rX   r   r\   rg   r   rh   ri   r   r
   r   r   rc   r   r   r   r   rk      rj   rk   )(rW   json.decoderr   core.api_errorr   core.client_wrapperr   r   core.http_responser   r   core.pydantic_utilitiesr	   core.request_optionsr
   core.serializationr   errors.bad_request_errorr   errors.forbidden_errorr   errors.internal_server_errorr   errors.too_many_requests_errorr   !errors.unprocessable_entity_errorr   (requests.chat_completion_request_messager   requests.stop_configurationr   %types.create_chat_completion_responser   types.reasoning_effortr   rZ   r]   rY   r   rk   r   r   r   r   <module>   s*    5