# This file was auto-generated by Fern from our API Definition.

import typing

from .. import core
from ..core.client_wrapper import AsyncClientWrapper, SyncClientWrapper
from ..core.request_options import RequestOptions
from ..types.input_audio_codec import InputAudioCodec
from ..types.mode import Mode
from ..types.speech_to_text_language import SpeechToTextLanguage
from ..types.speech_to_text_model import SpeechToTextModel
from ..types.speech_to_text_response import SpeechToTextResponse
from ..types.speech_to_text_translate_model import SpeechToTextTranslateModel
from ..types.speech_to_text_translate_response import SpeechToTextTranslateResponse
from .raw_client import AsyncRawSpeechToTextClient, RawSpeechToTextClient

# this is used as the default value for optional parameters
OMIT = typing.cast(typing.Any, ...)


class SpeechToTextClient:
    def __init__(self, *, client_wrapper: SyncClientWrapper):
        self._raw_client = RawSpeechToTextClient(client_wrapper=client_wrapper)

    @property
    def with_raw_response(self) -> RawSpeechToTextClient:
        """
        Retrieves a raw implementation of this client that returns raw responses.

        Returns
        -------
        RawSpeechToTextClient
        """
        return self._raw_client

    def transcribe(
        self,
        *,
        file: core.File,
        model: typing.Optional[SpeechToTextModel] = OMIT,
        mode: typing.Optional[Mode] = OMIT,
        language_code: typing.Optional[SpeechToTextLanguage] = OMIT,
        input_audio_codec: typing.Optional[InputAudioCodec] = OMIT,
        request_options: typing.Optional[RequestOptions] = None,
    ) -> SpeechToTextResponse:
        """
        ## Speech to Text API

        This API transcribes speech to text in multiple Indian languages and English. Supports transcription for interactive applications.

        ### Available Options:
        - **REST API** (Current Endpoint): For quick responses under 30 seconds with immediate results
        - **Batch API**: For longer audio files, [Follow This Documentation](https://docs.sarvam.ai/api-reference-docs/api-guides-tutorials/speech-to-text/batch-api)
          - Supports diarization (speaker identification)

        ### Note:
        - Pricing differs for REST and Batch APIs
        - Diarization is only available in Batch API with separate pricing
        - Please refer to [here](https://docs.sarvam.ai/api-reference-docs/getting-started/pricing) for detailed pricing information

        Parameters
        ----------
        file : core.File
            See core.File for more documentation

        model : typing.Optional[SpeechToTextModel]
            Specifies the model to use for speech-to-text conversion.

            - **saarika:v2.5** (default): Transcribes audio in the spoken language.

            - **saaras:v3**: State-of-the-art model with flexible output formats. Supports multiple modes via the `mode` parameter: transcribe, translate, verbatim, translit, codemix.

        mode : typing.Optional[Mode]
            Mode of operation. **Only applicable when using saaras:v3 model.**

            Example audio: 'मेरा फोन नंबर है 9840950950'

            - **transcribe** (default): Standard transcription in the original language with proper formatting and number normalization.
              - Output: `मेरा फोन नंबर है 9840950950`

            - **translate**: Translates speech from any supported Indic language to English.
              - Output: `My phone number is 9840950950`

            - **verbatim**: Exact word-for-word transcription without normalization, preserving filler words and spoken numbers as-is.
              - Output: `मेरा फोन नंबर है नौ आठ चार zero नौ पांच zero नौ पांच zero`

            - **translit**: Romanization - Transliterates speech to Latin/Roman script only.
              - Output: `mera phone number hai 9840950950`

            - **codemix**: Code-mixed text with English words in English and Indic words in native script.
              - Output: `मेरा phone number है 9840950950`

        language_code : typing.Optional[SpeechToTextLanguage]
            Specifies the language of the input audio in BCP-47 format.

            **Note:** This parameter is optional for `saarika:v2.5` model.

            **Available Options:**
            - `unknown`: Use when the language is not known; the API will auto-detect.
            - `hi-IN`: Hindi
            - `bn-IN`: Bengali
            - `kn-IN`: Kannada
            - `ml-IN`: Malayalam
            - `mr-IN`: Marathi
            - `od-IN`: Odia
            - `pa-IN`: Punjabi
            - `ta-IN`: Tamil
            - `te-IN`: Telugu
            - `en-IN`: English
            - `gu-IN`: Gujarati

            **Additional Options (saaras:v3 only):**
            - `as-IN`: Assamese
            - `ur-IN`: Urdu
            - `ne-IN`: Nepali
            - `kok-IN`: Konkani
            - `ks-IN`: Kashmiri
            - `sd-IN`: Sindhi
            - `sa-IN`: Sanskrit
            - `sat-IN`: Santali
            - `mni-IN`: Manipuri
            - `brx-IN`: Bodo
            - `mai-IN`: Maithili
            - `doi-IN`: Dogri

        input_audio_codec : typing.Optional[InputAudioCodec]
            Input Audio codec/format of the input file. PCM files are supported only at 16kHz sample rate.

        request_options : typing.Optional[RequestOptions]
            Request-specific configuration.

        Returns
        -------
        SpeechToTextResponse
            Successful Response

        Examples
        --------
        from sarvamai import SarvamAI

        client = SarvamAI(
            api_subscription_key="YOUR_API_SUBSCRIPTION_KEY",
        )
        client.speech_to_text.transcribe()
        """
        _response = self._raw_client.transcribe(
            file=file,
            model=model,
            mode=mode,
            language_code=language_code,
            input_audio_codec=input_audio_codec,
            request_options=request_options,
        )
        return _response.data

    def translate(
        self,
        *,
        file: core.File,
        prompt: typing.Optional[str] = OMIT,
        model: typing.Optional[SpeechToTextTranslateModel] = OMIT,
        input_audio_codec: typing.Optional[InputAudioCodec] = OMIT,
        request_options: typing.Optional[RequestOptions] = None,
    ) -> SpeechToTextTranslateResponse:
        """
        ## Speech to Text Translation API

        This API automatically detects the input language, transcribes the speech, and translates the text to English.

        ### Available Options:
        - **REST API** (Current Endpoint): For quick responses under 30 seconds with immediate results
        - **Batch API**: For longer audio files [Follow this documentation](https://docs.sarvam.ai/api-reference-docs/api-guides-tutorials/speech-to-text/batch-api)
          - Supports diarization (speaker identification)

        ### Note:
        - Pricing differs for REST and Batch APIs
        - Diarization is only available in Batch API with separate pricing
        - Please refer to [here](https://docs.sarvam.ai/api-reference-docs/getting-started/pricing) for detailed pricing information

        Parameters
        ----------
        file : core.File
            See core.File for more documentation

        prompt : typing.Optional[str]
            Conversation context can be passed as a prompt to boost model accuracy. However, the current system is at an experimentation stage and doesn't match the prompt performance of large language models.

        model : typing.Optional[SpeechToTextTranslateModel]
            Model to be used for speech to text translation.

            - **saaras:v2.5** (default): Translation model that translates audio from any spoken Indic language to English.
              - Example: Hindi audio → English text output

        input_audio_codec : typing.Optional[InputAudioCodec]
            Audio codec/format of the input file. Our API automatically detects all codec formats, but for PCM files specifically (pcm_s16le, pcm_l16, pcm_raw), you must pass this parameter. PCM files are supported only at 16kHz sample rate.

        request_options : typing.Optional[RequestOptions]
            Request-specific configuration.

        Returns
        -------
        SpeechToTextTranslateResponse
            Successful Response

        Examples
        --------
        from sarvamai import SarvamAI

        client = SarvamAI(
            api_subscription_key="YOUR_API_SUBSCRIPTION_KEY",
        )
        client.speech_to_text.translate()
        """
        _response = self._raw_client.translate(
            file=file, prompt=prompt, model=model, input_audio_codec=input_audio_codec, request_options=request_options
        )
        return _response.data


class AsyncSpeechToTextClient:
    def __init__(self, *, client_wrapper: AsyncClientWrapper):
        self._raw_client = AsyncRawSpeechToTextClient(client_wrapper=client_wrapper)

    @property
    def with_raw_response(self) -> AsyncRawSpeechToTextClient:
        """
        Retrieves a raw implementation of this client that returns raw responses.

        Returns
        -------
        AsyncRawSpeechToTextClient
        """
        return self._raw_client

    async def transcribe(
        self,
        *,
        file: core.File,
        model: typing.Optional[SpeechToTextModel] = OMIT,
        mode: typing.Optional[Mode] = OMIT,
        language_code: typing.Optional[SpeechToTextLanguage] = OMIT,
        input_audio_codec: typing.Optional[InputAudioCodec] = OMIT,
        request_options: typing.Optional[RequestOptions] = None,
    ) -> SpeechToTextResponse:
        """
        ## Speech to Text API

        This API transcribes speech to text in multiple Indian languages and English. Supports transcription for interactive applications.

        ### Available Options:
        - **REST API** (Current Endpoint): For quick responses under 30 seconds with immediate results
        - **Batch API**: For longer audio files, [Follow This Documentation](https://docs.sarvam.ai/api-reference-docs/api-guides-tutorials/speech-to-text/batch-api)
          - Supports diarization (speaker identification)

        ### Note:
        - Pricing differs for REST and Batch APIs
        - Diarization is only available in Batch API with separate pricing
        - Please refer to [here](https://docs.sarvam.ai/api-reference-docs/getting-started/pricing) for detailed pricing information

        Parameters
        ----------
        file : core.File
            See core.File for more documentation

        model : typing.Optional[SpeechToTextModel]
            Specifies the model to use for speech-to-text conversion.

            - **saarika:v2.5** (default): Transcribes audio in the spoken language.

            - **saaras:v3**: State-of-the-art model with flexible output formats. Supports multiple modes via the `mode` parameter: transcribe, translate, verbatim, translit, codemix.

        mode : typing.Optional[Mode]
            Mode of operation. **Only applicable when using saaras:v3 model.**

            Example audio: 'मेरा फोन नंबर है 9840950950'

            - **transcribe** (default): Standard transcription in the original language with proper formatting and number normalization.
              - Output: `मेरा फोन नंबर है 9840950950`

            - **translate**: Translates speech from any supported Indic language to English.
              - Output: `My phone number is 9840950950`

            - **verbatim**: Exact word-for-word transcription without normalization, preserving filler words and spoken numbers as-is.
              - Output: `मेरा फोन नंबर है नौ आठ चार zero नौ पांच zero नौ पांच zero`

            - **translit**: Romanization - Transliterates speech to Latin/Roman script only.
              - Output: `mera phone number hai 9840950950`

            - **codemix**: Code-mixed text with English words in English and Indic words in native script.
              - Output: `मेरा phone number है 9840950950`

        language_code : typing.Optional[SpeechToTextLanguage]
            Specifies the language of the input audio in BCP-47 format.

            **Note:** This parameter is optional for `saarika:v2.5` model.

            **Available Options:**
            - `unknown`: Use when the language is not known; the API will auto-detect.
            - `hi-IN`: Hindi
            - `bn-IN`: Bengali
            - `kn-IN`: Kannada
            - `ml-IN`: Malayalam
            - `mr-IN`: Marathi
            - `od-IN`: Odia
            - `pa-IN`: Punjabi
            - `ta-IN`: Tamil
            - `te-IN`: Telugu
            - `en-IN`: English
            - `gu-IN`: Gujarati

            **Additional Options (saaras:v3 only):**
            - `as-IN`: Assamese
            - `ur-IN`: Urdu
            - `ne-IN`: Nepali
            - `kok-IN`: Konkani
            - `ks-IN`: Kashmiri
            - `sd-IN`: Sindhi
            - `sa-IN`: Sanskrit
            - `sat-IN`: Santali
            - `mni-IN`: Manipuri
            - `brx-IN`: Bodo
            - `mai-IN`: Maithili
            - `doi-IN`: Dogri

        input_audio_codec : typing.Optional[InputAudioCodec]
            Input Audio codec/format of the input file. PCM files are supported only at 16kHz sample rate.

        request_options : typing.Optional[RequestOptions]
            Request-specific configuration.

        Returns
        -------
        SpeechToTextResponse
            Successful Response

        Examples
        --------
        import asyncio

        from sarvamai import AsyncSarvamAI

        client = AsyncSarvamAI(
            api_subscription_key="YOUR_API_SUBSCRIPTION_KEY",
        )


        async def main() -> None:
            await client.speech_to_text.transcribe()


        asyncio.run(main())
        """
        _response = await self._raw_client.transcribe(
            file=file,
            model=model,
            mode=mode,
            language_code=language_code,
            input_audio_codec=input_audio_codec,
            request_options=request_options,
        )
        return _response.data

    async def translate(
        self,
        *,
        file: core.File,
        prompt: typing.Optional[str] = OMIT,
        model: typing.Optional[SpeechToTextTranslateModel] = OMIT,
        input_audio_codec: typing.Optional[InputAudioCodec] = OMIT,
        request_options: typing.Optional[RequestOptions] = None,
    ) -> SpeechToTextTranslateResponse:
        """
        ## Speech to Text Translation API

        This API automatically detects the input language, transcribes the speech, and translates the text to English.

        ### Available Options:
        - **REST API** (Current Endpoint): For quick responses under 30 seconds with immediate results
        - **Batch API**: For longer audio files [Follow this documentation](https://docs.sarvam.ai/api-reference-docs/api-guides-tutorials/speech-to-text/batch-api)
          - Supports diarization (speaker identification)

        ### Note:
        - Pricing differs for REST and Batch APIs
        - Diarization is only available in Batch API with separate pricing
        - Please refer to [here](https://docs.sarvam.ai/api-reference-docs/getting-started/pricing) for detailed pricing information

        Parameters
        ----------
        file : core.File
            See core.File for more documentation

        prompt : typing.Optional[str]
            Conversation context can be passed as a prompt to boost model accuracy. However, the current system is at an experimentation stage and doesn't match the prompt performance of large language models.

        model : typing.Optional[SpeechToTextTranslateModel]
            Model to be used for speech to text translation.

            - **saaras:v2.5** (default): Translation model that translates audio from any spoken Indic language to English.
              - Example: Hindi audio → English text output

        input_audio_codec : typing.Optional[InputAudioCodec]
            Audio codec/format of the input file. Our API automatically detects all codec formats, but for PCM files specifically (pcm_s16le, pcm_l16, pcm_raw), you must pass this parameter. PCM files are supported only at 16kHz sample rate.

        request_options : typing.Optional[RequestOptions]
            Request-specific configuration.

        Returns
        -------
        SpeechToTextTranslateResponse
            Successful Response

        Examples
        --------
        import asyncio

        from sarvamai import AsyncSarvamAI

        client = AsyncSarvamAI(
            api_subscription_key="YOUR_API_SUBSCRIPTION_KEY",
        )


        async def main() -> None:
            await client.speech_to_text.translate()


        asyncio.run(main())
        """
        _response = await self._raw_client.translate(
            file=file, prompt=prompt, model=model, input_audio_codec=input_audio_codec, request_options=request_options
        )
        return _response.data
