o
    qmœi¹C  ã                   @   sÂ   d dl Z ddlmZ ddlmZmZ ddlmZ ddlm	Z	 ddl
mZ ddlmZ dd	lmZ dd
lmZ ddlmZ ddlmZ ddlmZmZ e  e jd¡ZG dd„ dƒZG dd„ dƒZdS )é    Né   )Úcore)ÚAsyncClientWrapperÚSyncClientWrapper)ÚRequestOptions)ÚInputAudioCodec)ÚMode)ÚSpeechToTextLanguage)ÚSpeechToTextModel)ÚSpeechToTextResponse)ÚSpeechToTextTranslateModel)ÚSpeechToTextTranslateResponseé   )ÚAsyncRawSpeechToTextClientÚRawSpeechToTextClient.c                   @   óÈ   e Zd Zdefdd„Zedefdd„ƒZeeeeddœd	e	j
d
eje deje deje deje deje defdd„Zeeeddœd	e	j
deje d
eje deje deje defdd„ZdS )ÚSpeechToTextClientÚclient_wrapperc                C   ó   t |d| _d S ©N)r   )r   Ú_raw_client©Úselfr   © r   úR/home/ubuntu/.local/lib/python3.10/site-packages/sarvamai/speech_to_text/client.pyÚ__init__   ó   zSpeechToTextClient.__init__Úreturnc                 C   ó   | j S )zš
        Retrieves a raw implementation of this client that returns raw responses.

        Returns
        -------
        RawSpeechToTextClient
        ©r   ©r   r   r   r   Úwith_raw_response   ó   	z$SpeechToTextClient.with_raw_responseN©ÚmodelÚmodeÚlanguage_codeÚinput_audio_codecÚrequest_optionsÚfiler$   r%   r&   r'   r(   c                C   s   | j j||||||d}|jS )uW  
        ## Speech to Text API

        This API transcribes speech to text in multiple Indian languages and English. Supports transcription for interactive applications.

        ### Available Options:
        - **REST API** (Current Endpoint): For quick responses under 30 seconds with immediate results
        - **Batch API**: For longer audio files, [Follow This Documentation](https://docs.sarvam.ai/api-reference-docs/api-guides-tutorials/speech-to-text/batch-api)
          - Supports diarization (speaker identification)

        ### Note:
        - Pricing differs for REST and Batch APIs
        - Diarization is only available in Batch API with separate pricing
        - Please refer to [here](https://docs.sarvam.ai/api-reference-docs/getting-started/pricing) for detailed pricing information

        Parameters
        ----------
        file : core.File
            See core.File for more documentation

        model : typing.Optional[SpeechToTextModel]
            Specifies the model to use for speech-to-text conversion.

            - **saarika:v2.5** (default): Transcribes audio in the spoken language.

            - **saaras:v3**: State-of-the-art model with flexible output formats. Supports multiple modes via the `mode` parameter: transcribe, translate, verbatim, translit, codemix.

        mode : typing.Optional[Mode]
            Mode of operation. **Only applicable when using saaras:v3 model.**

            Example audio: 'à¤®à¥‡à¤°à¤¾ à¤«à¥‹à¤¨ à¤¨à¤‚à¤¬à¤° à¤¹à¥ˆ 9840950950'

            - **transcribe** (default): Standard transcription in the original language with proper formatting and number normalization.
              - Output: `à¤®à¥‡à¤°à¤¾ à¤«à¥‹à¤¨ à¤¨à¤‚à¤¬à¤° à¤¹à¥ˆ 9840950950`

            - **translate**: Translates speech from any supported Indic language to English.
              - Output: `My phone number is 9840950950`

            - **verbatim**: Exact word-for-word transcription without normalization, preserving filler words and spoken numbers as-is.
              - Output: `à¤®à¥‡à¤°à¤¾ à¤«à¥‹à¤¨ à¤¨à¤‚à¤¬à¤° à¤¹à¥ˆ à¤¨à¥Œ à¤†à¤  à¤šà¤¾à¤° zero à¤¨à¥Œ à¤ªà¤¾à¤‚à¤š zero à¤¨à¥Œ à¤ªà¤¾à¤‚à¤š zero`

            - **translit**: Romanization - Transliterates speech to Latin/Roman script only.
              - Output: `mera phone number hai 9840950950`

            - **codemix**: Code-mixed text with English words in English and Indic words in native script.
              - Output: `à¤®à¥‡à¤°à¤¾ phone number à¤¹à¥ˆ 9840950950`

        language_code : typing.Optional[SpeechToTextLanguage]
            Specifies the language of the input audio in BCP-47 format.

            **Note:** This parameter is optional for `saarika:v2.5` model.

            **Available Options:**
            - `unknown`: Use when the language is not known; the API will auto-detect.
            - `hi-IN`: Hindi
            - `bn-IN`: Bengali
            - `kn-IN`: Kannada
            - `ml-IN`: Malayalam
            - `mr-IN`: Marathi
            - `od-IN`: Odia
            - `pa-IN`: Punjabi
            - `ta-IN`: Tamil
            - `te-IN`: Telugu
            - `en-IN`: English
            - `gu-IN`: Gujarati

            **Additional Options (saaras:v3 only):**
            - `as-IN`: Assamese
            - `ur-IN`: Urdu
            - `ne-IN`: Nepali
            - `kok-IN`: Konkani
            - `ks-IN`: Kashmiri
            - `sd-IN`: Sindhi
            - `sa-IN`: Sanskrit
            - `sat-IN`: Santali
            - `mni-IN`: Manipuri
            - `brx-IN`: Bodo
            - `mai-IN`: Maithili
            - `doi-IN`: Dogri

        input_audio_codec : typing.Optional[InputAudioCodec]
            Input Audio codec/format of the input file. PCM files are supported only at 16kHz sample rate.

        request_options : typing.Optional[RequestOptions]
            Request-specific configuration.

        Returns
        -------
        SpeechToTextResponse
            Successful Response

        Examples
        --------
        from sarvamai import SarvamAI

        client = SarvamAI(
            api_subscription_key="YOUR_API_SUBSCRIPTION_KEY",
        )
        client.speech_to_text.transcribe()
        ©r)   r$   r%   r&   r'   r(   ©r   Ú
transcribeÚdata©r   r)   r$   r%   r&   r'   r(   Ú	_responser   r   r   r,   $   s   núzSpeechToTextClient.transcribe©Úpromptr$   r'   r(   r1   c                C   s   | j j|||||d}|jS )u™  
        ## Speech to Text Translation API

        This API automatically detects the input language, transcribes the speech, and translates the text to English.

        ### Available Options:
        - **REST API** (Current Endpoint): For quick responses under 30 seconds with immediate results
        - **Batch API**: For longer audio files [Follow this documentation](https://docs.sarvam.ai/api-reference-docs/api-guides-tutorials/speech-to-text/batch-api)
          - Supports diarization (speaker identification)

        ### Note:
        - Pricing differs for REST and Batch APIs
        - Diarization is only available in Batch API with separate pricing
        - Please refer to [here](https://docs.sarvam.ai/api-reference-docs/getting-started/pricing) for detailed pricing information

        Parameters
        ----------
        file : core.File
            See core.File for more documentation

        prompt : typing.Optional[str]
            Conversation context can be passed as a prompt to boost model accuracy. However, the current system is at an experimentation stage and doesn't match the prompt performance of large language models.

        model : typing.Optional[SpeechToTextTranslateModel]
            Model to be used for speech to text translation.

            - **saaras:v2.5** (default): Translation model that translates audio from any spoken Indic language to English.
              - Example: Hindi audio â†’ English text output

        input_audio_codec : typing.Optional[InputAudioCodec]
            Audio codec/format of the input file. Our API automatically detects all codec formats, but for PCM files specifically (pcm_s16le, pcm_l16, pcm_raw), you must pass this parameter. PCM files are supported only at 16kHz sample rate.

        request_options : typing.Optional[RequestOptions]
            Request-specific configuration.

        Returns
        -------
        SpeechToTextTranslateResponse
            Successful Response

        Examples
        --------
        from sarvamai import SarvamAI

        client = SarvamAI(
            api_subscription_key="YOUR_API_SUBSCRIPTION_KEY",
        )
        client.speech_to_text.translate()
        ©r)   r1   r$   r'   r(   ©r   Ú	translater-   ©r   r)   r1   r$   r'   r(   r/   r   r   r   r4   œ   s   :
ÿzSpeechToTextClient.translate)Ú__name__Ú
__module__Ú__qualname__r   r   Úpropertyr   r!   ÚOMITr   ÚFileÚtypingÚOptionalr
   r   r	   r   r   r   r,   Ústrr   r   r4   r   r   r   r   r      sR    øýüûúùø	
÷|ùýüûúùør   c                   @   r   )ÚAsyncSpeechToTextClientr   c                C   r   r   )r   r   r   r   r   r   r   Ý   r   z AsyncSpeechToTextClient.__init__r   c                 C   r   )zŸ
        Retrieves a raw implementation of this client that returns raw responses.

        Returns
        -------
        AsyncRawSpeechToTextClient
        r   r    r   r   r   r!   à   r"   z)AsyncSpeechToTextClient.with_raw_responseNr#   r)   r$   r%   r&   r'   r(   c                Ã   s&   | j j||||||dI dH }|jS )uÅ  
        ## Speech to Text API

        This API transcribes speech to text in multiple Indian languages and English. Supports transcription for interactive applications.

        ### Available Options:
        - **REST API** (Current Endpoint): For quick responses under 30 seconds with immediate results
        - **Batch API**: For longer audio files, [Follow This Documentation](https://docs.sarvam.ai/api-reference-docs/api-guides-tutorials/speech-to-text/batch-api)
          - Supports diarization (speaker identification)

        ### Note:
        - Pricing differs for REST and Batch APIs
        - Diarization is only available in Batch API with separate pricing
        - Please refer to [here](https://docs.sarvam.ai/api-reference-docs/getting-started/pricing) for detailed pricing information

        Parameters
        ----------
        file : core.File
            See core.File for more documentation

        model : typing.Optional[SpeechToTextModel]
            Specifies the model to use for speech-to-text conversion.

            - **saarika:v2.5** (default): Transcribes audio in the spoken language.

            - **saaras:v3**: State-of-the-art model with flexible output formats. Supports multiple modes via the `mode` parameter: transcribe, translate, verbatim, translit, codemix.

        mode : typing.Optional[Mode]
            Mode of operation. **Only applicable when using saaras:v3 model.**

            Example audio: 'à¤®à¥‡à¤°à¤¾ à¤«à¥‹à¤¨ à¤¨à¤‚à¤¬à¤° à¤¹à¥ˆ 9840950950'

            - **transcribe** (default): Standard transcription in the original language with proper formatting and number normalization.
              - Output: `à¤®à¥‡à¤°à¤¾ à¤«à¥‹à¤¨ à¤¨à¤‚à¤¬à¤° à¤¹à¥ˆ 9840950950`

            - **translate**: Translates speech from any supported Indic language to English.
              - Output: `My phone number is 9840950950`

            - **verbatim**: Exact word-for-word transcription without normalization, preserving filler words and spoken numbers as-is.
              - Output: `à¤®à¥‡à¤°à¤¾ à¤«à¥‹à¤¨ à¤¨à¤‚à¤¬à¤° à¤¹à¥ˆ à¤¨à¥Œ à¤†à¤  à¤šà¤¾à¤° zero à¤¨à¥Œ à¤ªà¤¾à¤‚à¤š zero à¤¨à¥Œ à¤ªà¤¾à¤‚à¤š zero`

            - **translit**: Romanization - Transliterates speech to Latin/Roman script only.
              - Output: `mera phone number hai 9840950950`

            - **codemix**: Code-mixed text with English words in English and Indic words in native script.
              - Output: `à¤®à¥‡à¤°à¤¾ phone number à¤¹à¥ˆ 9840950950`

        language_code : typing.Optional[SpeechToTextLanguage]
            Specifies the language of the input audio in BCP-47 format.

            **Note:** This parameter is optional for `saarika:v2.5` model.

            **Available Options:**
            - `unknown`: Use when the language is not known; the API will auto-detect.
            - `hi-IN`: Hindi
            - `bn-IN`: Bengali
            - `kn-IN`: Kannada
            - `ml-IN`: Malayalam
            - `mr-IN`: Marathi
            - `od-IN`: Odia
            - `pa-IN`: Punjabi
            - `ta-IN`: Tamil
            - `te-IN`: Telugu
            - `en-IN`: English
            - `gu-IN`: Gujarati

            **Additional Options (saaras:v3 only):**
            - `as-IN`: Assamese
            - `ur-IN`: Urdu
            - `ne-IN`: Nepali
            - `kok-IN`: Konkani
            - `ks-IN`: Kashmiri
            - `sd-IN`: Sindhi
            - `sa-IN`: Sanskrit
            - `sat-IN`: Santali
            - `mni-IN`: Manipuri
            - `brx-IN`: Bodo
            - `mai-IN`: Maithili
            - `doi-IN`: Dogri

        input_audio_codec : typing.Optional[InputAudioCodec]
            Input Audio codec/format of the input file. PCM files are supported only at 16kHz sample rate.

        request_options : typing.Optional[RequestOptions]
            Request-specific configuration.

        Returns
        -------
        SpeechToTextResponse
            Successful Response

        Examples
        --------
        import asyncio

        from sarvamai import AsyncSarvamAI

        client = AsyncSarvamAI(
            api_subscription_key="YOUR_API_SUBSCRIPTION_KEY",
        )


        async def main() -> None:
            await client.speech_to_text.transcribe()


        asyncio.run(main())
        r*   Nr+   r.   r   r   r   r,   ë   s   €vúz"AsyncSpeechToTextClient.transcriber0   r1   c                Ã   s$   | j j|||||dI dH }|jS )u	  
        ## Speech to Text Translation API

        This API automatically detects the input language, transcribes the speech, and translates the text to English.

        ### Available Options:
        - **REST API** (Current Endpoint): For quick responses under 30 seconds with immediate results
        - **Batch API**: For longer audio files [Follow this documentation](https://docs.sarvam.ai/api-reference-docs/api-guides-tutorials/speech-to-text/batch-api)
          - Supports diarization (speaker identification)

        ### Note:
        - Pricing differs for REST and Batch APIs
        - Diarization is only available in Batch API with separate pricing
        - Please refer to [here](https://docs.sarvam.ai/api-reference-docs/getting-started/pricing) for detailed pricing information

        Parameters
        ----------
        file : core.File
            See core.File for more documentation

        prompt : typing.Optional[str]
            Conversation context can be passed as a prompt to boost model accuracy. However, the current system is at an experimentation stage and doesn't match the prompt performance of large language models.

        model : typing.Optional[SpeechToTextTranslateModel]
            Model to be used for speech to text translation.

            - **saaras:v2.5** (default): Translation model that translates audio from any spoken Indic language to English.
              - Example: Hindi audio â†’ English text output

        input_audio_codec : typing.Optional[InputAudioCodec]
            Audio codec/format of the input file. Our API automatically detects all codec formats, but for PCM files specifically (pcm_s16le, pcm_l16, pcm_raw), you must pass this parameter. PCM files are supported only at 16kHz sample rate.

        request_options : typing.Optional[RequestOptions]
            Request-specific configuration.

        Returns
        -------
        SpeechToTextTranslateResponse
            Successful Response

        Examples
        --------
        import asyncio

        from sarvamai import AsyncSarvamAI

        client = AsyncSarvamAI(
            api_subscription_key="YOUR_API_SUBSCRIPTION_KEY",
        )


        async def main() -> None:
            await client.speech_to_text.translate()


        asyncio.run(main())
        r2   Nr3   r5   r   r   r   r4   k  s
   €B
ÿz!AsyncSpeechToTextClient.translate)r6   r7   r8   r   r   r9   r   r!   r:   r   r;   r<   r=   r
   r   r	   r   r   r   r,   r>   r   r   r4   r   r   r   r   r?   Ü   sT    øýüûúùø	
÷ ùýüûúùør?   )r<   Ú r   Úcore.client_wrapperr   r   Úcore.request_optionsr   Útypes.input_audio_codecr   Ú
types.moder   Útypes.speech_to_text_languager	   Útypes.speech_to_text_modelr
   Útypes.speech_to_text_responser   Ú$types.speech_to_text_translate_modelr   Ú'types.speech_to_text_translate_responser   Ú
raw_clientr   r   ÚcastÚAnyr:   r   r?   r   r   r   r   Ú<module>   s     H