o
    qmi+                     @   s   d dl Z ddlmZmZ ddlmZ ddlmZ ddlm	Z	 ddl
mZ ddlmZ d	d
lmZmZ e e jdZG dd dZG dd dZdS )    N   )AsyncClientWrapperSyncClientWrapper)RequestOptions)"ChatCompletionRequestMessageParams)StopConfigurationParams)CreateChatCompletionResponse)ReasoningEffort   )AsyncRawChatClientRawChatClient.c                   @      e Zd ZdefddZedefddZeeeeeeeeeeeddd	e	j
e d
e	je de	je de	je de	je de	je de	je de	je de	je de	je de	je de	je de	je defddZdS )
ChatClientclient_wrapperc                C      t |d| _d S N)r   )r   _raw_clientselfr    r   H/home/ubuntu/.local/lib/python3.10/site-packages/sarvamai/chat/client.py__init__      zChatClient.__init__returnc                 C      | j S )z
        Retrieves a raw implementation of this client that returns raw responses.

        Returns
        -------
        RawChatClient
        r   r   r   r   r   with_raw_response      	zChatClient.with_raw_responseNtemperaturetop_preasoning_effort
max_tokensstreamstopnseedfrequency_penaltypresence_penaltywiki_groundingrequest_optionsmessagesr    r!   r"   r#   r$   r%   r&   r'   r(   r)   r*   r+   c                C   s,   | j j|||||||||	|
|||d}|jS )a  
        Calls Sarvam LLM API to get the chat completion. Supported model(s): `sarvam-m`.

        Parameters
        ----------
        messages : typing.Sequence[ChatCompletionRequestMessageParams]
            A list of messages comprising the conversation so far.

        temperature : typing.Optional[float]
            What sampling temperature to use, between 0 and 2. Higher values like 0.8 will make the output more random, while lower values like 0.2 will make it more focused and deterministic.
            We generally recommend altering this or `top_p` but not both.

        top_p : typing.Optional[float]
            An alternative to sampling with temperature, called nucleus sampling,
            where the model considers the results of the tokens with top_p probability
            mass. So 0.1 means only the tokens comprising the top 10% probability mass
            are considered.

            We generally recommend altering this or `temperature` but not both.

        reasoning_effort : typing.Optional[ReasoningEffort]

        max_tokens : typing.Optional[int]
            The maximum number of tokens that can be generated in the chat completion.

        stream : typing.Optional[bool]
            If set to true, the model response data will be streamed to the client
            as it is generated using [server-sent events](https://developer.mozilla.org/en-US/docs/Web/API/Server-sent_events/Using_server-sent_events#Event_stream_format).

        stop : typing.Optional[StopConfigurationParams]

        n : typing.Optional[int]
            How many chat completion choices to generate for each input message. Note that you will be charged based on the number of generated tokens across all of the choices. Keep `n` as `1` to minimize costs.

        seed : typing.Optional[int]
            This feature is in Beta.
            If specified, our system will make a best effort to sample deterministically, such that repeated requests with the same `seed` and parameters should return the same result.

        frequency_penalty : typing.Optional[float]
            Number between -2.0 and 2.0. Positive values penalize new tokens based on
            their existing frequency in the text so far, decreasing the model's
            likelihood to repeat the same line verbatim.

        presence_penalty : typing.Optional[float]
            Number between -2.0 and 2.0. Positive values penalize new tokens based on
            whether they appear in the text so far, increasing the model's likelihood
            to talk about new topics.

        wiki_grounding : typing.Optional[bool]
            If this parameter is enabled, then the model uses a RAG based approach to retrieve relevant chunks from Wikipedia and uses them to answer the question. This is particularly useful for queries seeking factual information.

        request_options : typing.Optional[RequestOptions]
            Request-specific configuration.

        Returns
        -------
        CreateChatCompletionResponse
            Successful Response

        Examples
        --------
        from sarvamai import SarvamAI

        client = SarvamAI(
            api_subscription_key="YOUR_API_SUBSCRIPTION_KEY",
        )
        client.chat.completions(
            messages=[{"content": "content", "role": "assistant"}],
        )
        r,   r    r!   r"   r#   r$   r%   r&   r'   r(   r)   r*   r+   r   completionsdatar   r,   r    r!   r"   r#   r$   r%   r&   r'   r(   r)   r*   r+   	_responser   r   r   r/       s    WzChatClient.completions)__name__
__module____qualname__r   r   propertyr   r   OMITtypingSequencer   Optionalfloatr	   intboolr   r   r   r/   r   r   r   r   r      Z    	
r   c                   @   r   )AsyncChatClientr   c                C   r   r   )r   r   r   r   r   r   r      r   zAsyncChatClient.__init__r   c                 C   r   )z
        Retrieves a raw implementation of this client that returns raw responses.

        Returns
        -------
        AsyncRawChatClient
        r   r   r   r   r   r      r   z!AsyncChatClient.with_raw_responseNr   r,   r    r!   r"   r#   r$   r%   r&   r'   r(   r)   r*   r+   c                   s4   | j j|||||||||	|
|||dI dH }|jS )a  
        Calls Sarvam LLM API to get the chat completion. Supported model(s): `sarvam-m`.

        Parameters
        ----------
        messages : typing.Sequence[ChatCompletionRequestMessageParams]
            A list of messages comprising the conversation so far.

        temperature : typing.Optional[float]
            What sampling temperature to use, between 0 and 2. Higher values like 0.8 will make the output more random, while lower values like 0.2 will make it more focused and deterministic.
            We generally recommend altering this or `top_p` but not both.

        top_p : typing.Optional[float]
            An alternative to sampling with temperature, called nucleus sampling,
            where the model considers the results of the tokens with top_p probability
            mass. So 0.1 means only the tokens comprising the top 10% probability mass
            are considered.

            We generally recommend altering this or `temperature` but not both.

        reasoning_effort : typing.Optional[ReasoningEffort]

        max_tokens : typing.Optional[int]
            The maximum number of tokens that can be generated in the chat completion.

        stream : typing.Optional[bool]
            If set to true, the model response data will be streamed to the client
            as it is generated using [server-sent events](https://developer.mozilla.org/en-US/docs/Web/API/Server-sent_events/Using_server-sent_events#Event_stream_format).

        stop : typing.Optional[StopConfigurationParams]

        n : typing.Optional[int]
            How many chat completion choices to generate for each input message. Note that you will be charged based on the number of generated tokens across all of the choices. Keep `n` as `1` to minimize costs.

        seed : typing.Optional[int]
            This feature is in Beta.
            If specified, our system will make a best effort to sample deterministically, such that repeated requests with the same `seed` and parameters should return the same result.

        frequency_penalty : typing.Optional[float]
            Number between -2.0 and 2.0. Positive values penalize new tokens based on
            their existing frequency in the text so far, decreasing the model's
            likelihood to repeat the same line verbatim.

        presence_penalty : typing.Optional[float]
            Number between -2.0 and 2.0. Positive values penalize new tokens based on
            whether they appear in the text so far, increasing the model's likelihood
            to talk about new topics.

        wiki_grounding : typing.Optional[bool]
            If this parameter is enabled, then the model uses a RAG based approach to retrieve relevant chunks from Wikipedia and uses them to answer the question. This is particularly useful for queries seeking factual information.

        request_options : typing.Optional[RequestOptions]
            Request-specific configuration.

        Returns
        -------
        CreateChatCompletionResponse
            Successful Response

        Examples
        --------
        import asyncio

        from sarvamai import AsyncSarvamAI

        client = AsyncSarvamAI(
            api_subscription_key="YOUR_API_SUBSCRIPTION_KEY",
        )


        async def main() -> None:
            await client.chat.completions(
                messages=[{"content": "content", "role": "assistant"}],
            )


        asyncio.run(main())
        r-   Nr.   r1   r   r   r   r/      s"   _zAsyncChatClient.completions)r3   r4   r5   r   r   r6   r   r   r7   r8   r9   r   r:   r;   r	   r<   r=   r   r   r   r/   r   r   r   r   r?      r>   r?   )r8   core.client_wrapperr   r   core.request_optionsr   (requests.chat_completion_request_messager   requests.stop_configurationr   %types.create_chat_completion_responser   types.reasoning_effortr	   
raw_clientr   r   castAnyr7   r   r?   r   r   r   r   <module>   s   x