o
    qmi{                     @   s.  d dl Z d dlmZ ddlmZ ddlmZ ddlmZm	Z	 ddl
mZmZ ddlmZ dd	lmZ dd
lmZ ddlmZ ddlmZ ddlmZ ddlmZ ddlmZ ddlmZ ddlm Z  ddl!m"Z" ddl#m$Z$ ddl%m&Z& ddl'm(Z( ddl)m*Z* e +e j,dZ-G dd dZ.G dd dZ/dS )    N)JSONDecodeError   )core)ApiError)AsyncClientWrapperSyncClientWrapper)AsyncHttpResponseHttpResponse)parse_obj_as)RequestOptions)BadRequestError)ForbiddenError)InternalServerError)ServiceUnavailableError)TooManyRequestsError)UnprocessableEntityError)InputAudioCodec)Mode)SpeechToTextLanguage)SpeechToTextModel)SpeechToTextResponse)SpeechToTextTranslateModel)SpeechToTextTranslateResponse.c                   @      e Zd ZdefddZeeeedddejdej	e
 dej	e d	ej	e d
ej	e dej	e dee fddZeeedddejdej	e dej	e d
ej	e dej	e dee fddZdS )RawSpeechToTextClientclient_wrapperc                C   
   || _ d S N_client_wrapperselfr    r"   V/home/ubuntu/.local/lib/python3.10/site-packages/sarvamai/speech_to_text/raw_client.py__init__      
zRawSpeechToTextClient.__init__Nmodelmodelanguage_codeinput_audio_codecrequest_optionsfiler'   r(   r)   r*   r+   returnc          
   
   C   s@  | j jjd| j  jd||||dd|i|tdd}zd|j  kr&dk r;n ntt	t
t	| d	}t||d
W S |jdkr[tt|jttjtj t
tjtj | d	d|jdkr{tt|jttjtj t
tjtj | d	d|jdkrtt|jttjtj t
tjtj | d	d|jdkrtt|jttjtj t
tjtj | d	d|jdkrtt|jttjtj t
tjtj | d	d|jdkrtt|jttjtj t
tjtj | d	d| }	W n ty   t|jt|j|jdw t|jt|j|	d)u  
        ## Speech to Text API

        This API transcribes speech to text in multiple Indian languages and English. Supports transcription for interactive applications.

        ### Available Options:
        - **REST API** (Current Endpoint): For quick responses under 30 seconds with immediate results
        - **Batch API**: For longer audio files, [Follow This Documentation](https://docs.sarvam.ai/api-reference-docs/api-guides-tutorials/speech-to-text/batch-api)
          - Supports diarization (speaker identification)

        ### Note:
        - Pricing differs for REST and Batch APIs
        - Diarization is only available in Batch API with separate pricing
        - Please refer to [here](https://docs.sarvam.ai/api-reference-docs/getting-started/pricing) for detailed pricing information

        Parameters
        ----------
        file : core.File
            See core.File for more documentation

        model : typing.Optional[SpeechToTextModel]
            Specifies the model to use for speech-to-text conversion.

            - **saarika:v2.5** (default): Transcribes audio in the spoken language.

            - **saaras:v3**: State-of-the-art model with flexible output formats. Supports multiple modes via the `mode` parameter: transcribe, translate, verbatim, translit, codemix.

        mode : typing.Optional[Mode]
            Mode of operation. **Only applicable when using saaras:v3 model.**

            Example audio: 'मेरा फोन नंबर है 9840950950'

            - **transcribe** (default): Standard transcription in the original language with proper formatting and number normalization.
              - Output: `मेरा फोन नंबर है 9840950950`

            - **translate**: Translates speech from any supported Indic language to English.
              - Output: `My phone number is 9840950950`

            - **verbatim**: Exact word-for-word transcription without normalization, preserving filler words and spoken numbers as-is.
              - Output: `मेरा फोन नंबर है नौ आठ चार zero नौ पांच zero नौ पांच zero`

            - **translit**: Romanization - Transliterates speech to Latin/Roman script only.
              - Output: `mera phone number hai 9840950950`

            - **codemix**: Code-mixed text with English words in English and Indic words in native script.
              - Output: `मेरा phone number है 9840950950`

        language_code : typing.Optional[SpeechToTextLanguage]
            Specifies the language of the input audio in BCP-47 format.

            **Note:** This parameter is optional for `saarika:v2.5` model.

            **Available Options:**
            - `unknown`: Use when the language is not known; the API will auto-detect.
            - `hi-IN`: Hindi
            - `bn-IN`: Bengali
            - `kn-IN`: Kannada
            - `ml-IN`: Malayalam
            - `mr-IN`: Marathi
            - `od-IN`: Odia
            - `pa-IN`: Punjabi
            - `ta-IN`: Tamil
            - `te-IN`: Telugu
            - `en-IN`: English
            - `gu-IN`: Gujarati

            **Additional Options (saaras:v3 only):**
            - `as-IN`: Assamese
            - `ur-IN`: Urdu
            - `ne-IN`: Nepali
            - `kok-IN`: Konkani
            - `ks-IN`: Kashmiri
            - `sd-IN`: Sindhi
            - `sa-IN`: Sanskrit
            - `sat-IN`: Santali
            - `mni-IN`: Manipuri
            - `brx-IN`: Bodo
            - `mai-IN`: Maithili
            - `doi-IN`: Dogri

        input_audio_codec : typing.Optional[InputAudioCodec]
            Input Audio codec/format of the input file. PCM files are supported only at 16kHz sample rate.

        request_options : typing.Optional[RequestOptions]
            Request-specific configuration.

        Returns
        -------
        HttpResponse[SpeechToTextResponse]
            Successful Response
        speech-to-textPOSTr'   r(   r)   r*   r,   Tbase_urlmethoddatafilesr+   omitforce_multipart   ,  type_object_responser4     headersbody          status_coderA   rB   )r   httpx_clientrequestget_environmentbaseOMITrI   typingcastr   r
   jsonr	   r   dictrA   OptionalAnyr   r   r   r   r   r   r   text
r!   r,   r'   r(   r)   r*   r+   	_response_data_response_jsonr"   r"   r#   
transcribe"   s   e
























z RawSpeechToTextClient.transcribepromptr'   r*   r+   r\   c          	   
   C   s>  | j jjd| j  jd|||dd|i|tdd}zd|j  kr%dk r:n ntt	t
t	| d	}t||d
W S |jdkrZtt|jttjtj t
tjtj | d	d|jdkrztt|jttjtj t
tjtj | d	d|jdkrtt|jttjtj t
tjtj | d	d|jdkrtt|jttjtj t
tjtj | d	d|jdkrtt|jttjtj t
tjtj | d	d|jdkrtt|jttjtj t
tjtj | d	d| }W n ty   t|jt|j|jdw t|jt|j|d)u  
        ## Speech to Text Translation API

        This API automatically detects the input language, transcribes the speech, and translates the text to English.

        ### Available Options:
        - **REST API** (Current Endpoint): For quick responses under 30 seconds with immediate results
        - **Batch API**: For longer audio files [Follow this documentation](https://docs.sarvam.ai/api-reference-docs/api-guides-tutorials/speech-to-text/batch-api)
          - Supports diarization (speaker identification)

        ### Note:
        - Pricing differs for REST and Batch APIs
        - Diarization is only available in Batch API with separate pricing
        - Please refer to [here](https://docs.sarvam.ai/api-reference-docs/getting-started/pricing) for detailed pricing information

        Parameters
        ----------
        file : core.File
            See core.File for more documentation

        prompt : typing.Optional[str]
            Conversation context can be passed as a prompt to boost model accuracy. However, the current system is at an experimentation stage and doesn't match the prompt performance of large language models.

        model : typing.Optional[SpeechToTextTranslateModel]
            Model to be used for speech to text translation.

            - **saaras:v2.5** (default): Translation model that translates audio from any spoken Indic language to English.
              - Example: Hindi audio → English text output

        input_audio_codec : typing.Optional[InputAudioCodec]
            Audio codec/format of the input file. Our API automatically detects all codec formats, but for PCM files specifically (pcm_s16le, pcm_l16, pcm_raw), you must pass this parameter. PCM files are supported only at 16kHz sample rate.

        request_options : typing.Optional[RequestOptions]
            Request-specific configuration.

        Returns
        -------
        HttpResponse[SpeechToTextTranslateResponse]
            Successful Response
        speech-to-text-translater/   r\   r'   r*   r,   Tr1   r8   r9   r:   r=   r?   r@   rC   rD   rE   rF   rG   rH   )r   rJ   rK   rL   rM   rN   rI   rO   rP   r   r
   rQ   r	   r   rR   rA   rS   rT   r   r   r   r   r   r   r   rU   	r!   r,   r\   r'   r*   r+   rW   rX   rY   r"   r"   r#   	translate   s   1
























zRawSpeechToTextClient.translate)__name__
__module____qualname__r   r$   rN   r   FilerO   rS   r   r   r   r   r   r	   r   rZ   strr   r   r`   r"   r"   r"   r#   r      P    	
 Lr   c                   @   r   )AsyncRawSpeechToTextClientr   c                C   r   r   r   r    r"   r"   r#   r$   }  r%   z#AsyncRawSpeechToTextClient.__init__Nr&   r,   r'   r(   r)   r*   r+   r-   c          
   
      sH  | j jjd| j  jd||||dd|i|tddI dH }zd|j  kr*d	k r?n ntt	t
t	| d
}t||dW S |jdkr_tt|jttjtj t
tjtj | d
d|jdkrtt|jttjtj t
tjtj | d
d|jdkrtt|jttjtj t
tjtj | d
d|jdkrtt|jttjtj t
tjtj | d
d|jdkrtt|jttjtj t
tjtj | d
d|jdkrtt|jttjtj t
tjtj | d
d| }	W n ty   t|jt|j|jdw t|jt|j|	d)u  
        ## Speech to Text API

        This API transcribes speech to text in multiple Indian languages and English. Supports transcription for interactive applications.

        ### Available Options:
        - **REST API** (Current Endpoint): For quick responses under 30 seconds with immediate results
        - **Batch API**: For longer audio files, [Follow This Documentation](https://docs.sarvam.ai/api-reference-docs/api-guides-tutorials/speech-to-text/batch-api)
          - Supports diarization (speaker identification)

        ### Note:
        - Pricing differs for REST and Batch APIs
        - Diarization is only available in Batch API with separate pricing
        - Please refer to [here](https://docs.sarvam.ai/api-reference-docs/getting-started/pricing) for detailed pricing information

        Parameters
        ----------
        file : core.File
            See core.File for more documentation

        model : typing.Optional[SpeechToTextModel]
            Specifies the model to use for speech-to-text conversion.

            - **saarika:v2.5** (default): Transcribes audio in the spoken language.

            - **saaras:v3**: State-of-the-art model with flexible output formats. Supports multiple modes via the `mode` parameter: transcribe, translate, verbatim, translit, codemix.

        mode : typing.Optional[Mode]
            Mode of operation. **Only applicable when using saaras:v3 model.**

            Example audio: 'मेरा फोन नंबर है 9840950950'

            - **transcribe** (default): Standard transcription in the original language with proper formatting and number normalization.
              - Output: `मेरा फोन नंबर है 9840950950`

            - **translate**: Translates speech from any supported Indic language to English.
              - Output: `My phone number is 9840950950`

            - **verbatim**: Exact word-for-word transcription without normalization, preserving filler words and spoken numbers as-is.
              - Output: `मेरा फोन नंबर है नौ आठ चार zero नौ पांच zero नौ पांच zero`

            - **translit**: Romanization - Transliterates speech to Latin/Roman script only.
              - Output: `mera phone number hai 9840950950`

            - **codemix**: Code-mixed text with English words in English and Indic words in native script.
              - Output: `मेरा phone number है 9840950950`

        language_code : typing.Optional[SpeechToTextLanguage]
            Specifies the language of the input audio in BCP-47 format.

            **Note:** This parameter is optional for `saarika:v2.5` model.

            **Available Options:**
            - `unknown`: Use when the language is not known; the API will auto-detect.
            - `hi-IN`: Hindi
            - `bn-IN`: Bengali
            - `kn-IN`: Kannada
            - `ml-IN`: Malayalam
            - `mr-IN`: Marathi
            - `od-IN`: Odia
            - `pa-IN`: Punjabi
            - `ta-IN`: Tamil
            - `te-IN`: Telugu
            - `en-IN`: English
            - `gu-IN`: Gujarati

            **Additional Options (saaras:v3 only):**
            - `as-IN`: Assamese
            - `ur-IN`: Urdu
            - `ne-IN`: Nepali
            - `kok-IN`: Konkani
            - `ks-IN`: Kashmiri
            - `sd-IN`: Sindhi
            - `sa-IN`: Sanskrit
            - `sat-IN`: Santali
            - `mni-IN`: Manipuri
            - `brx-IN`: Bodo
            - `mai-IN`: Maithili
            - `doi-IN`: Dogri

        input_audio_codec : typing.Optional[InputAudioCodec]
            Input Audio codec/format of the input file. PCM files are supported only at 16kHz sample rate.

        request_options : typing.Optional[RequestOptions]
            Request-specific configuration.

        Returns
        -------
        AsyncHttpResponse[SpeechToTextResponse]
            Successful Response
        r.   r/   r0   r,   Tr1   Nr8   r9   r:   r=   r?   r@   rC   rD   rE   rF   rG   rH   )r   rJ   rK   rL   rM   rN   rI   rO   rP   r   r
   rQ   r   r   rR   rA   rS   rT   r   r   r   r   r   r   r   rU   rV   r"   r"   r#   rZ     s   e
























z%AsyncRawSpeechToTextClient.transcriber[   r\   c          	   
      sF  | j jjd| j  jd|||dd|i|tddI dH }zd|j  kr)d	k r>n ntt	t
t	| d
}t||dW S |jdkr^tt|jttjtj t
tjtj | d
d|jdkr~tt|jttjtj t
tjtj | d
d|jdkrtt|jttjtj t
tjtj | d
d|jdkrtt|jttjtj t
tjtj | d
d|jdkrtt|jttjtj t
tjtj | d
d|jdkrtt|jttjtj t
tjtj | d
d| }W n ty   t|jt|j|jdw t|jt|j|d)u  
        ## Speech to Text Translation API

        This API automatically detects the input language, transcribes the speech, and translates the text to English.

        ### Available Options:
        - **REST API** (Current Endpoint): For quick responses under 30 seconds with immediate results
        - **Batch API**: For longer audio files [Follow this documentation](https://docs.sarvam.ai/api-reference-docs/api-guides-tutorials/speech-to-text/batch-api)
          - Supports diarization (speaker identification)

        ### Note:
        - Pricing differs for REST and Batch APIs
        - Diarization is only available in Batch API with separate pricing
        - Please refer to [here](https://docs.sarvam.ai/api-reference-docs/getting-started/pricing) for detailed pricing information

        Parameters
        ----------
        file : core.File
            See core.File for more documentation

        prompt : typing.Optional[str]
            Conversation context can be passed as a prompt to boost model accuracy. However, the current system is at an experimentation stage and doesn't match the prompt performance of large language models.

        model : typing.Optional[SpeechToTextTranslateModel]
            Model to be used for speech to text translation.

            - **saaras:v2.5** (default): Translation model that translates audio from any spoken Indic language to English.
              - Example: Hindi audio → English text output

        input_audio_codec : typing.Optional[InputAudioCodec]
            Audio codec/format of the input file. Our API automatically detects all codec formats, but for PCM files specifically (pcm_s16le, pcm_l16, pcm_raw), you must pass this parameter. PCM files are supported only at 16kHz sample rate.

        request_options : typing.Optional[RequestOptions]
            Request-specific configuration.

        Returns
        -------
        AsyncHttpResponse[SpeechToTextTranslateResponse]
            Successful Response
        r]   r/   r^   r,   Tr1   Nr8   r9   r:   r=   r?   r@   rC   rD   rE   rF   rG   rH   )r   rJ   rK   rL   rM   rN   rI   rO   rP   r   r
   rQ   r   r   rR   rA   rS   rT   r   r   r   r   r   r   r   rU   r_   r"   r"   r#   r`   G  s   1
























z$AsyncRawSpeechToTextClient.translate)ra   rb   rc   r   r$   rN   r   rd   rO   rS   r   r   r   r   r   r   r   rZ   re   r   r   r`   r"   r"   r"   r#   rg   |  rf   rg   )0rO   json.decoderr    r   core.api_errorr   core.client_wrapperr   r   core.http_responser   r	   core.pydantic_utilitiesr
   core.request_optionsr   errors.bad_request_errorr   errors.forbidden_errorr   errors.internal_server_errorr    errors.service_unavailable_errorr   errors.too_many_requests_errorr   !errors.unprocessable_entity_errorr   types.input_audio_codecr   
types.moder   types.speech_to_text_languager   types.speech_to_text_modelr   types.speech_to_text_responser   $types.speech_to_text_translate_modelr   'types.speech_to_text_translate_responser   rP   rT   rN   r   rg   r"   r"   r"   r#   <module>   s4     `