# This file was auto-generated by Fern from our API Definition.

import typing
from json.decoder import JSONDecodeError

from ..core.api_error import ApiError
from ..core.client_wrapper import AsyncClientWrapper, SyncClientWrapper
from ..core.http_response import AsyncHttpResponse, HttpResponse
from ..core.pydantic_utilities import parse_obj_as
from ..core.request_options import RequestOptions
from ..errors.bad_request_error import BadRequestError
from ..errors.forbidden_error import ForbiddenError
from ..errors.internal_server_error import InternalServerError
from ..errors.too_many_requests_error import TooManyRequestsError
from ..errors.unprocessable_entity_error import UnprocessableEntityError
from ..types.speech_sample_rate import SpeechSampleRate
from ..types.text_to_speech_language import TextToSpeechLanguage
from ..types.text_to_speech_model import TextToSpeechModel
from ..types.text_to_speech_output_audio_codec import TextToSpeechOutputAudioCodec
from ..types.text_to_speech_response import TextToSpeechResponse
from ..types.text_to_speech_speaker import TextToSpeechSpeaker

# this is used as the default value for optional parameters
OMIT = typing.cast(typing.Any, ...)


class RawTextToSpeechClient:
    def __init__(self, *, client_wrapper: SyncClientWrapper):
        self._client_wrapper = client_wrapper

    def convert(
        self,
        *,
        text: str,
        target_language_code: TextToSpeechLanguage,
        speaker: typing.Optional[TextToSpeechSpeaker] = OMIT,
        pitch: typing.Optional[float] = OMIT,
        pace: typing.Optional[float] = OMIT,
        loudness: typing.Optional[float] = OMIT,
        speech_sample_rate: typing.Optional[SpeechSampleRate] = OMIT,
        enable_preprocessing: typing.Optional[bool] = OMIT,
        model: typing.Optional[TextToSpeechModel] = OMIT,
        output_audio_codec: typing.Optional[TextToSpeechOutputAudioCodec] = OMIT,
        temperature: typing.Optional[float] = OMIT,
        request_options: typing.Optional[RequestOptions] = None,
    ) -> HttpResponse[TextToSpeechResponse]:
        """
        Convert text into spoken audio. The output is a base64-encoded audio string that must be decoded before use.

        **Available Models:**
        - **bulbul:v3**: Latest model with improved quality, 30+ voices, and temperature control
        - **bulbul:v2**: Legacy model with pitch and loudness controls

        **Important Notes for bulbul:v3:**
        - Pitch and loudness parameters are NOT supported
        - Pace range: 0.5 to 2.0
        - Preprocessing is automatically enabled
        - Default sample rate is 24000 Hz
        - Supports sample rates: 8000, 16000, 22050, 24000 Hz (REST API also supports 32000, 44100, 48000 Hz)

        Parameters
        ----------
        text : str
            The text(s) to be converted into speech.

            **Features:**
            - Supports code-mixed text (English and Indic languages)

            **Model-specific limits:**
            - **bulbul:v3:** Max 2500 characters
            - **bulbul:v2:** Max 1500 characters

            **Important Note:**
            - For numbers larger than 4 digits, use commas (e.g., '10,000' instead of '10000')
            - This ensures proper pronunciation as a whole number

        target_language_code : TextToSpeechLanguage
            The language code in BCP-47 format. This parameter is primarily used by the pre-TTS text normalization model to handle language-specific text processing such as number formatting, abbreviations, and special characters.

        speaker : typing.Optional[TextToSpeechSpeaker]
            The speaker voice to be used for the output audio.

            **Default:** Shubh (for bulbul:v3), Anushka (for bulbul:v2)

            **Model Compatibility (Speakers compatible with respective model):**
            - **bulbul:v3:**
              - Shubh (default), Aditya, Ritu, Priya, Neha, Rahul, Pooja, Rohan, Simran, Kavya, Amit, Dev, Ishita, Shreya, Ratan, Varun, Manan, Sumit, Roopa, Kabir, Aayan, Ashutosh, Advait, Amelia, Sophia, Anand, Tanya, Tarun, Sunny, Mani, Gokul, Vijay, Shruti, Suhani, Mohit, Kavitha, Rehan, Soham, Rupali
            - **bulbul:v2:**
              - Female: Anushka, Manisha, Vidya, Arya
              - Male: Abhilash, Karun, Hitesh

            **Note:** Speaker selection must match the chosen model version.

        pitch : typing.Optional[float]
            Controls the pitch of the audio. Lower values result in a deeper voice, while higher values make it sharper. The suitable range is between -0.75 and 0.75. Default is 0.0.

            **Note:** This parameter is only supported for bulbul:v2. It is NOT supported for bulbul:v3.

        pace : typing.Optional[float]
            Controls the speed of the audio. Lower values result in slower speech, while higher values make it faster. Default is 1.0.

            **Model-specific ranges:**
            - **bulbul:v3:** 0.5 to 2.0
            - **bulbul:v2:** 0.3 to 3.0

        loudness : typing.Optional[float]
            Controls the loudness of the audio. Lower values result in quieter audio, while higher values make it louder. The suitable range is between 0.3 and 3.0. Default is 1.0.

            **Note:** This parameter is only supported for bulbul:v2. It is NOT supported for bulbul:v3.

        speech_sample_rate : typing.Optional[SpeechSampleRate]
            Specifies the sample rate of the output audio. Supported values are 8000, 16000, 22050, 24000, 32000, 44100, 48000 Hz.

            **Note:** Higher sample rates (32000, 44100, 48000 Hz) are only available with bulbul:v3 via the REST API, not in streaming mode.

            **Default:** 24000 Hz

        enable_preprocessing : typing.Optional[bool]
            Controls whether normalization of English words and numeric entities (e.g., numbers, dates) is performed. Set to true for better handling of mixed-language text.

            **Model-specific behavior:**
            - **bulbul:v3:** Not Supported
            - **bulbul:v2:** Default is false

        model : typing.Optional[TextToSpeechModel]
            Specifies the model to use for text-to-speech conversion.

            **Available models:**
            - **bulbul:v3:** Latest model with improved quality, 30+ voices, pace, and temperature control
            - **bulbul:v2:** Legacy model with pitch, loudness, and pace controls

        output_audio_codec : typing.Optional[TextToSpeechOutputAudioCodec]
            Specifies the audio codec for the output audio file. Different codecs offer various compression and quality characteristics.

        temperature : typing.Optional[float]
            Temperature controls how much randomness and expressiveness the TTS model uses while generating speech.

            Lower values produce more stable and consistent output, while higher values sound more expressive but may introduce artifacts or errors. The suitable range is between 0.01 and 2.0. Default is 0.6.

            **Note:** This parameter is only supported for bulbul:v3. It has no effect on bulbul:v2.

        request_options : typing.Optional[RequestOptions]
            Request-specific configuration.

        Returns
        -------
        HttpResponse[TextToSpeechResponse]
            Successful Response
        """
        _response = self._client_wrapper.httpx_client.request(
            "text-to-speech",
            base_url=self._client_wrapper.get_environment().base,
            method="POST",
            json={
                "text": text,
                "target_language_code": target_language_code,
                "speaker": speaker,
                "pitch": pitch,
                "pace": pace,
                "loudness": loudness,
                "speech_sample_rate": speech_sample_rate,
                "enable_preprocessing": enable_preprocessing,
                "model": model,
                "output_audio_codec": output_audio_codec,
                "temperature": temperature,
            },
            headers={
                "content-type": "application/json",
            },
            request_options=request_options,
            omit=OMIT,
        )
        try:
            if 200 <= _response.status_code < 300:
                _data = typing.cast(
                    TextToSpeechResponse,
                    parse_obj_as(
                        type_=TextToSpeechResponse,  # type: ignore
                        object_=_response.json(),
                    ),
                )
                return HttpResponse(response=_response, data=_data)
            if _response.status_code == 400:
                raise BadRequestError(
                    headers=dict(_response.headers),
                    body=typing.cast(
                        typing.Optional[typing.Any],
                        parse_obj_as(
                            type_=typing.Optional[typing.Any],  # type: ignore
                            object_=_response.json(),
                        ),
                    ),
                )
            if _response.status_code == 403:
                raise ForbiddenError(
                    headers=dict(_response.headers),
                    body=typing.cast(
                        typing.Optional[typing.Any],
                        parse_obj_as(
                            type_=typing.Optional[typing.Any],  # type: ignore
                            object_=_response.json(),
                        ),
                    ),
                )
            if _response.status_code == 422:
                raise UnprocessableEntityError(
                    headers=dict(_response.headers),
                    body=typing.cast(
                        typing.Optional[typing.Any],
                        parse_obj_as(
                            type_=typing.Optional[typing.Any],  # type: ignore
                            object_=_response.json(),
                        ),
                    ),
                )
            if _response.status_code == 429:
                raise TooManyRequestsError(
                    headers=dict(_response.headers),
                    body=typing.cast(
                        typing.Optional[typing.Any],
                        parse_obj_as(
                            type_=typing.Optional[typing.Any],  # type: ignore
                            object_=_response.json(),
                        ),
                    ),
                )
            if _response.status_code == 500:
                raise InternalServerError(
                    headers=dict(_response.headers),
                    body=typing.cast(
                        typing.Optional[typing.Any],
                        parse_obj_as(
                            type_=typing.Optional[typing.Any],  # type: ignore
                            object_=_response.json(),
                        ),
                    ),
                )
            _response_json = _response.json()
        except JSONDecodeError:
            raise ApiError(status_code=_response.status_code, headers=dict(_response.headers), body=_response.text)
        raise ApiError(status_code=_response.status_code, headers=dict(_response.headers), body=_response_json)


class AsyncRawTextToSpeechClient:
    def __init__(self, *, client_wrapper: AsyncClientWrapper):
        self._client_wrapper = client_wrapper

    async def convert(
        self,
        *,
        text: str,
        target_language_code: TextToSpeechLanguage,
        speaker: typing.Optional[TextToSpeechSpeaker] = OMIT,
        pitch: typing.Optional[float] = OMIT,
        pace: typing.Optional[float] = OMIT,
        loudness: typing.Optional[float] = OMIT,
        speech_sample_rate: typing.Optional[SpeechSampleRate] = OMIT,
        enable_preprocessing: typing.Optional[bool] = OMIT,
        model: typing.Optional[TextToSpeechModel] = OMIT,
        output_audio_codec: typing.Optional[TextToSpeechOutputAudioCodec] = OMIT,
        temperature: typing.Optional[float] = OMIT,
        request_options: typing.Optional[RequestOptions] = None,
    ) -> AsyncHttpResponse[TextToSpeechResponse]:
        """
        Convert text into spoken audio. The output is a base64-encoded audio string that must be decoded before use.

        **Available Models:**
        - **bulbul:v3**: Latest model with improved quality, 30+ voices, and temperature control
        - **bulbul:v2**: Legacy model with pitch and loudness controls

        **Important Notes for bulbul:v3:**
        - Pitch and loudness parameters are NOT supported
        - Pace range: 0.5 to 2.0
        - Preprocessing is automatically enabled
        - Default sample rate is 24000 Hz
        - Supports sample rates: 8000, 16000, 22050, 24000 Hz (REST API also supports 32000, 44100, 48000 Hz)

        Parameters
        ----------
        text : str
            The text(s) to be converted into speech.

            **Features:**
            - Supports code-mixed text (English and Indic languages)

            **Model-specific limits:**
            - **bulbul:v3:** Max 2500 characters
            - **bulbul:v2:** Max 1500 characters

            **Important Note:**
            - For numbers larger than 4 digits, use commas (e.g., '10,000' instead of '10000')
            - This ensures proper pronunciation as a whole number

        target_language_code : TextToSpeechLanguage
            The language code in BCP-47 format. This parameter is primarily used by the pre-TTS text normalization model to handle language-specific text processing such as number formatting, abbreviations, and special characters.

        speaker : typing.Optional[TextToSpeechSpeaker]
            The speaker voice to be used for the output audio.

            **Default:** Shubh (for bulbul:v3), Anushka (for bulbul:v2)

            **Model Compatibility (Speakers compatible with respective model):**
            - **bulbul:v3:**
              - Shubh (default), Aditya, Ritu, Priya, Neha, Rahul, Pooja, Rohan, Simran, Kavya, Amit, Dev, Ishita, Shreya, Ratan, Varun, Manan, Sumit, Roopa, Kabir, Aayan, Ashutosh, Advait, Amelia, Sophia, Anand, Tanya, Tarun, Sunny, Mani, Gokul, Vijay, Shruti, Suhani, Mohit, Kavitha, Rehan, Soham, Rupali
            - **bulbul:v2:**
              - Female: Anushka, Manisha, Vidya, Arya
              - Male: Abhilash, Karun, Hitesh

            **Note:** Speaker selection must match the chosen model version.

        pitch : typing.Optional[float]
            Controls the pitch of the audio. Lower values result in a deeper voice, while higher values make it sharper. The suitable range is between -0.75 and 0.75. Default is 0.0.

            **Note:** This parameter is only supported for bulbul:v2. It is NOT supported for bulbul:v3.

        pace : typing.Optional[float]
            Controls the speed of the audio. Lower values result in slower speech, while higher values make it faster. Default is 1.0.

            **Model-specific ranges:**
            - **bulbul:v3:** 0.5 to 2.0
            - **bulbul:v2:** 0.3 to 3.0

        loudness : typing.Optional[float]
            Controls the loudness of the audio. Lower values result in quieter audio, while higher values make it louder. The suitable range is between 0.3 and 3.0. Default is 1.0.

            **Note:** This parameter is only supported for bulbul:v2. It is NOT supported for bulbul:v3.

        speech_sample_rate : typing.Optional[SpeechSampleRate]
            Specifies the sample rate of the output audio. Supported values are 8000, 16000, 22050, 24000, 32000, 44100, 48000 Hz.

            **Note:** Higher sample rates (32000, 44100, 48000 Hz) are only available with bulbul:v3 via the REST API, not in streaming mode.

            **Default:** 24000 Hz

        enable_preprocessing : typing.Optional[bool]
            Controls whether normalization of English words and numeric entities (e.g., numbers, dates) is performed. Set to true for better handling of mixed-language text.

            **Model-specific behavior:**
            - **bulbul:v3:** Not Supported
            - **bulbul:v2:** Default is false

        model : typing.Optional[TextToSpeechModel]
            Specifies the model to use for text-to-speech conversion.

            **Available models:**
            - **bulbul:v3:** Latest model with improved quality, 30+ voices, pace, and temperature control
            - **bulbul:v2:** Legacy model with pitch, loudness, and pace controls

        output_audio_codec : typing.Optional[TextToSpeechOutputAudioCodec]
            Specifies the audio codec for the output audio file. Different codecs offer various compression and quality characteristics.

        temperature : typing.Optional[float]
            Temperature controls how much randomness and expressiveness the TTS model uses while generating speech.

            Lower values produce more stable and consistent output, while higher values sound more expressive but may introduce artifacts or errors. The suitable range is between 0.01 and 2.0. Default is 0.6.

            **Note:** This parameter is only supported for bulbul:v3. It has no effect on bulbul:v2.

        request_options : typing.Optional[RequestOptions]
            Request-specific configuration.

        Returns
        -------
        AsyncHttpResponse[TextToSpeechResponse]
            Successful Response
        """
        _response = await self._client_wrapper.httpx_client.request(
            "text-to-speech",
            base_url=self._client_wrapper.get_environment().base,
            method="POST",
            json={
                "text": text,
                "target_language_code": target_language_code,
                "speaker": speaker,
                "pitch": pitch,
                "pace": pace,
                "loudness": loudness,
                "speech_sample_rate": speech_sample_rate,
                "enable_preprocessing": enable_preprocessing,
                "model": model,
                "output_audio_codec": output_audio_codec,
                "temperature": temperature,
            },
            headers={
                "content-type": "application/json",
            },
            request_options=request_options,
            omit=OMIT,
        )
        try:
            if 200 <= _response.status_code < 300:
                _data = typing.cast(
                    TextToSpeechResponse,
                    parse_obj_as(
                        type_=TextToSpeechResponse,  # type: ignore
                        object_=_response.json(),
                    ),
                )
                return AsyncHttpResponse(response=_response, data=_data)
            if _response.status_code == 400:
                raise BadRequestError(
                    headers=dict(_response.headers),
                    body=typing.cast(
                        typing.Optional[typing.Any],
                        parse_obj_as(
                            type_=typing.Optional[typing.Any],  # type: ignore
                            object_=_response.json(),
                        ),
                    ),
                )
            if _response.status_code == 403:
                raise ForbiddenError(
                    headers=dict(_response.headers),
                    body=typing.cast(
                        typing.Optional[typing.Any],
                        parse_obj_as(
                            type_=typing.Optional[typing.Any],  # type: ignore
                            object_=_response.json(),
                        ),
                    ),
                )
            if _response.status_code == 422:
                raise UnprocessableEntityError(
                    headers=dict(_response.headers),
                    body=typing.cast(
                        typing.Optional[typing.Any],
                        parse_obj_as(
                            type_=typing.Optional[typing.Any],  # type: ignore
                            object_=_response.json(),
                        ),
                    ),
                )
            if _response.status_code == 429:
                raise TooManyRequestsError(
                    headers=dict(_response.headers),
                    body=typing.cast(
                        typing.Optional[typing.Any],
                        parse_obj_as(
                            type_=typing.Optional[typing.Any],  # type: ignore
                            object_=_response.json(),
                        ),
                    ),
                )
            if _response.status_code == 500:
                raise InternalServerError(
                    headers=dict(_response.headers),
                    body=typing.cast(
                        typing.Optional[typing.Any],
                        parse_obj_as(
                            type_=typing.Optional[typing.Any],  # type: ignore
                            object_=_response.json(),
                        ),
                    ),
                )
            _response_json = _response.json()
        except JSONDecodeError:
            raise ApiError(status_code=_response.status_code, headers=dict(_response.headers), body=_response.text)
        raise ApiError(status_code=_response.status_code, headers=dict(_response.headers), body=_response_json)
