# This file was auto-generated by Fern from our API Definition.

import typing

from ..core.client_wrapper import AsyncClientWrapper, SyncClientWrapper
from ..core.request_options import RequestOptions
from ..requests.chat_completion_request_message import ChatCompletionRequestMessageParams
from ..requests.stop_configuration import StopConfigurationParams
from ..types.create_chat_completion_response import CreateChatCompletionResponse
from ..types.reasoning_effort import ReasoningEffort
from .raw_client import AsyncRawChatClient, RawChatClient

# this is used as the default value for optional parameters
OMIT = typing.cast(typing.Any, ...)


class ChatClient:
    def __init__(self, *, client_wrapper: SyncClientWrapper):
        self._raw_client = RawChatClient(client_wrapper=client_wrapper)

    @property
    def with_raw_response(self) -> RawChatClient:
        """
        Retrieves a raw implementation of this client that returns raw responses.

        Returns
        -------
        RawChatClient
        """
        return self._raw_client

    def completions(
        self,
        *,
        messages: typing.Sequence[ChatCompletionRequestMessageParams],
        temperature: typing.Optional[float] = OMIT,
        top_p: typing.Optional[float] = OMIT,
        reasoning_effort: typing.Optional[ReasoningEffort] = OMIT,
        max_tokens: typing.Optional[int] = OMIT,
        stream: typing.Optional[bool] = OMIT,
        stop: typing.Optional[StopConfigurationParams] = OMIT,
        n: typing.Optional[int] = OMIT,
        seed: typing.Optional[int] = OMIT,
        frequency_penalty: typing.Optional[float] = OMIT,
        presence_penalty: typing.Optional[float] = OMIT,
        wiki_grounding: typing.Optional[bool] = OMIT,
        request_options: typing.Optional[RequestOptions] = None,
    ) -> CreateChatCompletionResponse:
        """
        Calls Sarvam LLM API to get the chat completion. Supported model(s): `sarvam-m`.

        Parameters
        ----------
        messages : typing.Sequence[ChatCompletionRequestMessageParams]
            A list of messages comprising the conversation so far.

        temperature : typing.Optional[float]
            What sampling temperature to use, between 0 and 2. Higher values like 0.8 will make the output more random, while lower values like 0.2 will make it more focused and deterministic.
            We generally recommend altering this or `top_p` but not both.

        top_p : typing.Optional[float]
            An alternative to sampling with temperature, called nucleus sampling,
            where the model considers the results of the tokens with top_p probability
            mass. So 0.1 means only the tokens comprising the top 10% probability mass
            are considered.

            We generally recommend altering this or `temperature` but not both.

        reasoning_effort : typing.Optional[ReasoningEffort]

        max_tokens : typing.Optional[int]
            The maximum number of tokens that can be generated in the chat completion.

        stream : typing.Optional[bool]
            If set to true, the model response data will be streamed to the client
            as it is generated using [server-sent events](https://developer.mozilla.org/en-US/docs/Web/API/Server-sent_events/Using_server-sent_events#Event_stream_format).

        stop : typing.Optional[StopConfigurationParams]

        n : typing.Optional[int]
            How many chat completion choices to generate for each input message. Note that you will be charged based on the number of generated tokens across all of the choices. Keep `n` as `1` to minimize costs.

        seed : typing.Optional[int]
            This feature is in Beta.
            If specified, our system will make a best effort to sample deterministically, such that repeated requests with the same `seed` and parameters should return the same result.

        frequency_penalty : typing.Optional[float]
            Number between -2.0 and 2.0. Positive values penalize new tokens based on
            their existing frequency in the text so far, decreasing the model's
            likelihood to repeat the same line verbatim.

        presence_penalty : typing.Optional[float]
            Number between -2.0 and 2.0. Positive values penalize new tokens based on
            whether they appear in the text so far, increasing the model's likelihood
            to talk about new topics.

        wiki_grounding : typing.Optional[bool]
            If this parameter is enabled, then the model uses a RAG based approach to retrieve relevant chunks from Wikipedia and uses them to answer the question. This is particularly useful for queries seeking factual information.

        request_options : typing.Optional[RequestOptions]
            Request-specific configuration.

        Returns
        -------
        CreateChatCompletionResponse
            Successful Response

        Examples
        --------
        from sarvamai import SarvamAI

        client = SarvamAI(
            api_subscription_key="YOUR_API_SUBSCRIPTION_KEY",
        )
        client.chat.completions(
            messages=[{"content": "content", "role": "assistant"}],
        )
        """
        _response = self._raw_client.completions(
            messages=messages,
            temperature=temperature,
            top_p=top_p,
            reasoning_effort=reasoning_effort,
            max_tokens=max_tokens,
            stream=stream,
            stop=stop,
            n=n,
            seed=seed,
            frequency_penalty=frequency_penalty,
            presence_penalty=presence_penalty,
            wiki_grounding=wiki_grounding,
            request_options=request_options,
        )
        return _response.data


class AsyncChatClient:
    def __init__(self, *, client_wrapper: AsyncClientWrapper):
        self._raw_client = AsyncRawChatClient(client_wrapper=client_wrapper)

    @property
    def with_raw_response(self) -> AsyncRawChatClient:
        """
        Retrieves a raw implementation of this client that returns raw responses.

        Returns
        -------
        AsyncRawChatClient
        """
        return self._raw_client

    async def completions(
        self,
        *,
        messages: typing.Sequence[ChatCompletionRequestMessageParams],
        temperature: typing.Optional[float] = OMIT,
        top_p: typing.Optional[float] = OMIT,
        reasoning_effort: typing.Optional[ReasoningEffort] = OMIT,
        max_tokens: typing.Optional[int] = OMIT,
        stream: typing.Optional[bool] = OMIT,
        stop: typing.Optional[StopConfigurationParams] = OMIT,
        n: typing.Optional[int] = OMIT,
        seed: typing.Optional[int] = OMIT,
        frequency_penalty: typing.Optional[float] = OMIT,
        presence_penalty: typing.Optional[float] = OMIT,
        wiki_grounding: typing.Optional[bool] = OMIT,
        request_options: typing.Optional[RequestOptions] = None,
    ) -> CreateChatCompletionResponse:
        """
        Calls Sarvam LLM API to get the chat completion. Supported model(s): `sarvam-m`.

        Parameters
        ----------
        messages : typing.Sequence[ChatCompletionRequestMessageParams]
            A list of messages comprising the conversation so far.

        temperature : typing.Optional[float]
            What sampling temperature to use, between 0 and 2. Higher values like 0.8 will make the output more random, while lower values like 0.2 will make it more focused and deterministic.
            We generally recommend altering this or `top_p` but not both.

        top_p : typing.Optional[float]
            An alternative to sampling with temperature, called nucleus sampling,
            where the model considers the results of the tokens with top_p probability
            mass. So 0.1 means only the tokens comprising the top 10% probability mass
            are considered.

            We generally recommend altering this or `temperature` but not both.

        reasoning_effort : typing.Optional[ReasoningEffort]

        max_tokens : typing.Optional[int]
            The maximum number of tokens that can be generated in the chat completion.

        stream : typing.Optional[bool]
            If set to true, the model response data will be streamed to the client
            as it is generated using [server-sent events](https://developer.mozilla.org/en-US/docs/Web/API/Server-sent_events/Using_server-sent_events#Event_stream_format).

        stop : typing.Optional[StopConfigurationParams]

        n : typing.Optional[int]
            How many chat completion choices to generate for each input message. Note that you will be charged based on the number of generated tokens across all of the choices. Keep `n` as `1` to minimize costs.

        seed : typing.Optional[int]
            This feature is in Beta.
            If specified, our system will make a best effort to sample deterministically, such that repeated requests with the same `seed` and parameters should return the same result.

        frequency_penalty : typing.Optional[float]
            Number between -2.0 and 2.0. Positive values penalize new tokens based on
            their existing frequency in the text so far, decreasing the model's
            likelihood to repeat the same line verbatim.

        presence_penalty : typing.Optional[float]
            Number between -2.0 and 2.0. Positive values penalize new tokens based on
            whether they appear in the text so far, increasing the model's likelihood
            to talk about new topics.

        wiki_grounding : typing.Optional[bool]
            If this parameter is enabled, then the model uses a RAG based approach to retrieve relevant chunks from Wikipedia and uses them to answer the question. This is particularly useful for queries seeking factual information.

        request_options : typing.Optional[RequestOptions]
            Request-specific configuration.

        Returns
        -------
        CreateChatCompletionResponse
            Successful Response

        Examples
        --------
        import asyncio

        from sarvamai import AsyncSarvamAI

        client = AsyncSarvamAI(
            api_subscription_key="YOUR_API_SUBSCRIPTION_KEY",
        )


        async def main() -> None:
            await client.chat.completions(
                messages=[{"content": "content", "role": "assistant"}],
            )


        asyncio.run(main())
        """
        _response = await self._raw_client.completions(
            messages=messages,
            temperature=temperature,
            top_p=top_p,
            reasoning_effort=reasoning_effort,
            max_tokens=max_tokens,
            stream=stream,
            stop=stop,
            n=n,
            seed=seed,
            frequency_penalty=frequency_penalty,
            presence_penalty=presence_penalty,
            wiki_grounding=wiki_grounding,
            request_options=request_options,
        )
        return _response.data