# This file was auto-generated by Fern from our API Definition.

import typing
from json.decoder import JSONDecodeError

from ..core.api_error import ApiError
from ..core.client_wrapper import AsyncClientWrapper, SyncClientWrapper
from ..core.http_response import AsyncHttpResponse, HttpResponse
from ..core.pydantic_utilities import parse_obj_as
from ..core.request_options import RequestOptions
from ..core.serialization import convert_and_respect_annotation_metadata
from ..errors.bad_request_error import BadRequestError
from ..errors.forbidden_error import ForbiddenError
from ..errors.internal_server_error import InternalServerError
from ..errors.too_many_requests_error import TooManyRequestsError
from ..errors.unprocessable_entity_error import UnprocessableEntityError
from ..requests.chat_completion_request_message import ChatCompletionRequestMessageParams
from ..requests.stop_configuration import StopConfigurationParams
from ..types.create_chat_completion_response import CreateChatCompletionResponse
from ..types.reasoning_effort import ReasoningEffort

# this is used as the default value for optional parameters
OMIT = typing.cast(typing.Any, ...)


class RawChatClient:
    def __init__(self, *, client_wrapper: SyncClientWrapper):
        self._client_wrapper = client_wrapper

    def completions(
        self,
        *,
        messages: typing.Sequence[ChatCompletionRequestMessageParams],
        temperature: typing.Optional[float] = OMIT,
        top_p: typing.Optional[float] = OMIT,
        reasoning_effort: typing.Optional[ReasoningEffort] = OMIT,
        max_tokens: typing.Optional[int] = OMIT,
        stream: typing.Optional[bool] = OMIT,
        stop: typing.Optional[StopConfigurationParams] = OMIT,
        n: typing.Optional[int] = OMIT,
        seed: typing.Optional[int] = OMIT,
        frequency_penalty: typing.Optional[float] = OMIT,
        presence_penalty: typing.Optional[float] = OMIT,
        wiki_grounding: typing.Optional[bool] = OMIT,
        request_options: typing.Optional[RequestOptions] = None,
    ) -> HttpResponse[CreateChatCompletionResponse]:
        """
        Calls Sarvam LLM API to get the chat completion. Supported model(s): `sarvam-m`.

        Parameters
        ----------
        messages : typing.Sequence[ChatCompletionRequestMessageParams]
            A list of messages comprising the conversation so far.

        temperature : typing.Optional[float]
            What sampling temperature to use, between 0 and 2. Higher values like 0.8 will make the output more random, while lower values like 0.2 will make it more focused and deterministic.
            We generally recommend altering this or `top_p` but not both.

        top_p : typing.Optional[float]
            An alternative to sampling with temperature, called nucleus sampling,
            where the model considers the results of the tokens with top_p probability
            mass. So 0.1 means only the tokens comprising the top 10% probability mass
            are considered.

            We generally recommend altering this or `temperature` but not both.

        reasoning_effort : typing.Optional[ReasoningEffort]

        max_tokens : typing.Optional[int]
            The maximum number of tokens that can be generated in the chat completion.

        stream : typing.Optional[bool]
            If set to true, the model response data will be streamed to the client
            as it is generated using [server-sent events](https://developer.mozilla.org/en-US/docs/Web/API/Server-sent_events/Using_server-sent_events#Event_stream_format).

        stop : typing.Optional[StopConfigurationParams]

        n : typing.Optional[int]
            How many chat completion choices to generate for each input message. Note that you will be charged based on the number of generated tokens across all of the choices. Keep `n` as `1` to minimize costs.

        seed : typing.Optional[int]
            This feature is in Beta.
            If specified, our system will make a best effort to sample deterministically, such that repeated requests with the same `seed` and parameters should return the same result.

        frequency_penalty : typing.Optional[float]
            Number between -2.0 and 2.0. Positive values penalize new tokens based on
            their existing frequency in the text so far, decreasing the model's
            likelihood to repeat the same line verbatim.

        presence_penalty : typing.Optional[float]
            Number between -2.0 and 2.0. Positive values penalize new tokens based on
            whether they appear in the text so far, increasing the model's likelihood
            to talk about new topics.

        wiki_grounding : typing.Optional[bool]
            If this parameter is enabled, then the model uses a RAG based approach to retrieve relevant chunks from Wikipedia and uses them to answer the question. This is particularly useful for queries seeking factual information.

        request_options : typing.Optional[RequestOptions]
            Request-specific configuration.

        Returns
        -------
        HttpResponse[CreateChatCompletionResponse]
            Successful Response
        """
        _response = self._client_wrapper.httpx_client.request(
            "v1/chat/completions",
            base_url=self._client_wrapper.get_environment().base,
            method="POST",
            json={
                "messages": convert_and_respect_annotation_metadata(
                    object_=messages, annotation=typing.Sequence[ChatCompletionRequestMessageParams], direction="write"
                ),
                "temperature": temperature,
                "top_p": top_p,
                "reasoning_effort": reasoning_effort,
                "max_tokens": max_tokens,
                "stream": stream,
                "stop": convert_and_respect_annotation_metadata(
                    object_=stop, annotation=StopConfigurationParams, direction="write"
                ),
                "n": n,
                "seed": seed,
                "frequency_penalty": frequency_penalty,
                "presence_penalty": presence_penalty,
                "wiki_grounding": wiki_grounding,
                "model": "sarvam-m",
            },
            headers={
                "content-type": "application/json",
            },
            request_options=request_options,
            omit=OMIT,
        )
        try:
            if 200 <= _response.status_code < 300:
                _data = typing.cast(
                    CreateChatCompletionResponse,
                    parse_obj_as(
                        type_=CreateChatCompletionResponse,  # type: ignore
                        object_=_response.json(),
                    ),
                )
                return HttpResponse(response=_response, data=_data)
            if _response.status_code == 400:
                raise BadRequestError(
                    headers=dict(_response.headers),
                    body=typing.cast(
                        typing.Optional[typing.Any],
                        parse_obj_as(
                            type_=typing.Optional[typing.Any],  # type: ignore
                            object_=_response.json(),
                        ),
                    ),
                )
            if _response.status_code == 403:
                raise ForbiddenError(
                    headers=dict(_response.headers),
                    body=typing.cast(
                        typing.Optional[typing.Any],
                        parse_obj_as(
                            type_=typing.Optional[typing.Any],  # type: ignore
                            object_=_response.json(),
                        ),
                    ),
                )
            if _response.status_code == 422:
                raise UnprocessableEntityError(
                    headers=dict(_response.headers),
                    body=typing.cast(
                        typing.Optional[typing.Any],
                        parse_obj_as(
                            type_=typing.Optional[typing.Any],  # type: ignore
                            object_=_response.json(),
                        ),
                    ),
                )
            if _response.status_code == 429:
                raise TooManyRequestsError(
                    headers=dict(_response.headers),
                    body=typing.cast(
                        typing.Optional[typing.Any],
                        parse_obj_as(
                            type_=typing.Optional[typing.Any],  # type: ignore
                            object_=_response.json(),
                        ),
                    ),
                )
            if _response.status_code == 500:
                raise InternalServerError(
                    headers=dict(_response.headers),
                    body=typing.cast(
                        typing.Optional[typing.Any],
                        parse_obj_as(
                            type_=typing.Optional[typing.Any],  # type: ignore
                            object_=_response.json(),
                        ),
                    ),
                )
            _response_json = _response.json()
        except JSONDecodeError:
            raise ApiError(status_code=_response.status_code, headers=dict(_response.headers), body=_response.text)
        raise ApiError(status_code=_response.status_code, headers=dict(_response.headers), body=_response_json)


class AsyncRawChatClient:
    def __init__(self, *, client_wrapper: AsyncClientWrapper):
        self._client_wrapper = client_wrapper

    async def completions(
        self,
        *,
        messages: typing.Sequence[ChatCompletionRequestMessageParams],
        temperature: typing.Optional[float] = OMIT,
        top_p: typing.Optional[float] = OMIT,
        reasoning_effort: typing.Optional[ReasoningEffort] = OMIT,
        max_tokens: typing.Optional[int] = OMIT,
        stream: typing.Optional[bool] = OMIT,
        stop: typing.Optional[StopConfigurationParams] = OMIT,
        n: typing.Optional[int] = OMIT,
        seed: typing.Optional[int] = OMIT,
        frequency_penalty: typing.Optional[float] = OMIT,
        presence_penalty: typing.Optional[float] = OMIT,
        wiki_grounding: typing.Optional[bool] = OMIT,
        request_options: typing.Optional[RequestOptions] = None,
    ) -> AsyncHttpResponse[CreateChatCompletionResponse]:
        """
        Calls Sarvam LLM API to get the chat completion. Supported model(s): `sarvam-m`.

        Parameters
        ----------
        messages : typing.Sequence[ChatCompletionRequestMessageParams]
            A list of messages comprising the conversation so far.

        temperature : typing.Optional[float]
            What sampling temperature to use, between 0 and 2. Higher values like 0.8 will make the output more random, while lower values like 0.2 will make it more focused and deterministic.
            We generally recommend altering this or `top_p` but not both.

        top_p : typing.Optional[float]
            An alternative to sampling with temperature, called nucleus sampling,
            where the model considers the results of the tokens with top_p probability
            mass. So 0.1 means only the tokens comprising the top 10% probability mass
            are considered.

            We generally recommend altering this or `temperature` but not both.

        reasoning_effort : typing.Optional[ReasoningEffort]

        max_tokens : typing.Optional[int]
            The maximum number of tokens that can be generated in the chat completion.

        stream : typing.Optional[bool]
            If set to true, the model response data will be streamed to the client
            as it is generated using [server-sent events](https://developer.mozilla.org/en-US/docs/Web/API/Server-sent_events/Using_server-sent_events#Event_stream_format).

        stop : typing.Optional[StopConfigurationParams]

        n : typing.Optional[int]
            How many chat completion choices to generate for each input message. Note that you will be charged based on the number of generated tokens across all of the choices. Keep `n` as `1` to minimize costs.

        seed : typing.Optional[int]
            This feature is in Beta.
            If specified, our system will make a best effort to sample deterministically, such that repeated requests with the same `seed` and parameters should return the same result.

        frequency_penalty : typing.Optional[float]
            Number between -2.0 and 2.0. Positive values penalize new tokens based on
            their existing frequency in the text so far, decreasing the model's
            likelihood to repeat the same line verbatim.

        presence_penalty : typing.Optional[float]
            Number between -2.0 and 2.0. Positive values penalize new tokens based on
            whether they appear in the text so far, increasing the model's likelihood
            to talk about new topics.

        wiki_grounding : typing.Optional[bool]
            If this parameter is enabled, then the model uses a RAG based approach to retrieve relevant chunks from Wikipedia and uses them to answer the question. This is particularly useful for queries seeking factual information.

        request_options : typing.Optional[RequestOptions]
            Request-specific configuration.

        Returns
        -------
        AsyncHttpResponse[CreateChatCompletionResponse]
            Successful Response
        """
        _response = await self._client_wrapper.httpx_client.request(
            "v1/chat/completions",
            base_url=self._client_wrapper.get_environment().base,
            method="POST",
            json={
                "messages": convert_and_respect_annotation_metadata(
                    object_=messages, annotation=typing.Sequence[ChatCompletionRequestMessageParams], direction="write"
                ),
                "temperature": temperature,
                "top_p": top_p,
                "reasoning_effort": reasoning_effort,
                "max_tokens": max_tokens,
                "stream": stream,
                "stop": convert_and_respect_annotation_metadata(
                    object_=stop, annotation=StopConfigurationParams, direction="write"
                ),
                "n": n,
                "seed": seed,
                "frequency_penalty": frequency_penalty,
                "presence_penalty": presence_penalty,
                "wiki_grounding": wiki_grounding,
                "model": "sarvam-m",
            },
            headers={
                "content-type": "application/json",
            },
            request_options=request_options,
            omit=OMIT,
        )
        try:
            if 200 <= _response.status_code < 300:
                _data = typing.cast(
                    CreateChatCompletionResponse,
                    parse_obj_as(
                        type_=CreateChatCompletionResponse,  # type: ignore
                        object_=_response.json(),
                    ),
                )
                return AsyncHttpResponse(response=_response, data=_data)
            if _response.status_code == 400:
                raise BadRequestError(
                    headers=dict(_response.headers),
                    body=typing.cast(
                        typing.Optional[typing.Any],
                        parse_obj_as(
                            type_=typing.Optional[typing.Any],  # type: ignore
                            object_=_response.json(),
                        ),
                    ),
                )
            if _response.status_code == 403:
                raise ForbiddenError(
                    headers=dict(_response.headers),
                    body=typing.cast(
                        typing.Optional[typing.Any],
                        parse_obj_as(
                            type_=typing.Optional[typing.Any],  # type: ignore
                            object_=_response.json(),
                        ),
                    ),
                )
            if _response.status_code == 422:
                raise UnprocessableEntityError(
                    headers=dict(_response.headers),
                    body=typing.cast(
                        typing.Optional[typing.Any],
                        parse_obj_as(
                            type_=typing.Optional[typing.Any],  # type: ignore
                            object_=_response.json(),
                        ),
                    ),
                )
            if _response.status_code == 429:
                raise TooManyRequestsError(
                    headers=dict(_response.headers),
                    body=typing.cast(
                        typing.Optional[typing.Any],
                        parse_obj_as(
                            type_=typing.Optional[typing.Any],  # type: ignore
                            object_=_response.json(),
                        ),
                    ),
                )
            if _response.status_code == 500:
                raise InternalServerError(
                    headers=dict(_response.headers),
                    body=typing.cast(
                        typing.Optional[typing.Any],
                        parse_obj_as(
                            type_=typing.Optional[typing.Any],  # type: ignore
                            object_=_response.json(),
                        ),
                    ),
                )
            _response_json = _response.json()
        except JSONDecodeError:
            raise ApiError(status_code=_response.status_code, headers=dict(_response.headers), body=_response.text)
        raise ApiError(status_code=_response.status_code, headers=dict(_response.headers), body=_response_json)
