# This file was auto-generated by Fern from our API Definition.

import typing
from contextlib import asynccontextmanager, contextmanager

import httpx
import websockets.exceptions
import websockets.sync.client as websockets_sync_client
from ..core.api_error import ApiError
from ..core.client_wrapper import AsyncClientWrapper, SyncClientWrapper
from ..core.request_options import RequestOptions
from .socket_client import AsyncSpeechToTextStreamingSocketClient, SpeechToTextStreamingSocketClient
from .types.speech_to_text_streaming_flush_signal import SpeechToTextStreamingFlushSignal
from .types.speech_to_text_streaming_high_vad_sensitivity import SpeechToTextStreamingHighVadSensitivity
from .types.speech_to_text_streaming_input_audio_codec import SpeechToTextStreamingInputAudioCodec
from .types.speech_to_text_streaming_language_code import SpeechToTextStreamingLanguageCode
from .types.speech_to_text_streaming_mode import SpeechToTextStreamingMode
from .types.speech_to_text_streaming_model import SpeechToTextStreamingModel
from .types.speech_to_text_streaming_vad_signals import SpeechToTextStreamingVadSignals

try:
    from websockets.legacy.client import connect as websockets_client_connect  # type: ignore
except ImportError:
    from websockets import connect as websockets_client_connect  # type: ignore


class RawSpeechToTextStreamingClient:
    def __init__(self, *, client_wrapper: SyncClientWrapper):
        self._client_wrapper = client_wrapper

    @contextmanager
    def connect(
        self,
        *,
        language_code: SpeechToTextStreamingLanguageCode,
        model: typing.Optional[SpeechToTextStreamingModel] = None,
        mode: typing.Optional[SpeechToTextStreamingMode] = None,
        sample_rate: typing.Optional[str] = None,
        high_vad_sensitivity: typing.Optional[SpeechToTextStreamingHighVadSensitivity] = None,
        vad_signals: typing.Optional[SpeechToTextStreamingVadSignals] = None,
        flush_signal: typing.Optional[SpeechToTextStreamingFlushSignal] = None,
        input_audio_codec: typing.Optional[SpeechToTextStreamingInputAudioCodec] = None,
        api_subscription_key: typing.Optional[str] = None,
        request_options: typing.Optional[RequestOptions] = None,
    ) -> typing.Iterator[SpeechToTextStreamingSocketClient]:
        """
        WebSocket channel for real-time speech to text streaming.

        **Note:** This API Reference page is provided for informational purposes only.
        The Try It playground may not provide the best experience for streaming audio.
        For optimal streaming performance, please use the SDK or implement your own WebSocket client.

        Parameters
        ----------
        language_code : SpeechToTextStreamingLanguageCode
            Specifies the language of the input audio in BCP-47 format.

            **Available Options (saarika:v2.5):**
            - `unknown` (default): Use when the language is not known; the API will auto-detect.
            - `hi-IN`: Hindi
            - `bn-IN`: Bengali
            - `gu-IN`: Gujarati
            - `kn-IN`: Kannada
            - `ml-IN`: Malayalam
            - `mr-IN`: Marathi
            - `od-IN`: Odia
            - `pa-IN`: Punjabi
            - `ta-IN`: Tamil
            - `te-IN`: Telugu
            - `en-IN`: English

            **Additional Options (saaras:v3 only):**
            - `as-IN`: Assamese
            - `ur-IN`: Urdu
            - `ne-IN`: Nepali
            - `kok-IN`: Konkani
            - `ks-IN`: Kashmiri
            - `sd-IN`: Sindhi
            - `sa-IN`: Sanskrit
            - `sat-IN`: Santali
            - `mni-IN`: Manipuri
            - `brx-IN`: Bodo
            - `mai-IN`: Maithili
            - `doi-IN`: Dogri

        model : typing.Optional[SpeechToTextStreamingModel]
            Specifies the model to use for speech-to-text conversion.

            - **saarika:v2.5** (default): Transcribes audio in the spoken language.

            - **saaras:v3**: State-of-the-art model with flexible output formats. Supports multiple modes via the `mode` parameter: transcribe, translate, verbatim, translit, codemix.

        mode : typing.Optional[SpeechToTextStreamingMode]
            Mode of operation. **Only applicable when using saaras:v3 model.**

            Example audio: 'मेरा फोन नंबर है 9840950950'

            - **transcribe** (default): Standard transcription in the original language with proper formatting and number normalization.
              - Output: `मेरा फोन नंबर है 9840950950`

            - **translate**: Translates speech from any supported Indic language to English.
              - Output: `My phone number is 9840950950`

            - **verbatim**: Exact word-for-word transcription without normalization, preserving filler words and spoken numbers as-is.
              - Output: `मेरा फोन नंबर है नौ आठ चार zero नौ पांच zero नौ पांच zero`

            - **translit**: Romanization - Transliterates speech to Latin/Roman script only.
              - Output: `mera phone number hai 9840950950`

            - **codemix**: Code-mixed text with English words in English and Indic words in native script.
              - Output: `मेरा phone number है 9840950950`

        sample_rate : typing.Optional[str]
            Audio sample rate for the WebSocket connection. When specified as a connection parameter, only 16kHz and 8kHz are supported. 8kHz is only available via this connection parameter. If not specified, defaults to 16kHz.

        high_vad_sensitivity : typing.Optional[SpeechToTextStreamingHighVadSensitivity]
            Enable high VAD (Voice Activity Detection) sensitivity

        vad_signals : typing.Optional[SpeechToTextStreamingVadSignals]
            Enable VAD signals in response

        flush_signal : typing.Optional[SpeechToTextStreamingFlushSignal]
            Signal to flush the audio buffer and finalize transcription

        input_audio_codec : typing.Optional[SpeechToTextStreamingInputAudioCodec]
            Audio codec/format of the input stream. Use this when sending raw PCM audio.
            Supported values: wav, pcm_s16le, pcm_l16, pcm_raw.

        api_subscription_key : typing.Optional[str]
            API subscription key for authentication

        request_options : typing.Optional[RequestOptions]
            Request-specific configuration.

        Returns
        -------
        SpeechToTextStreamingSocketClient
        """
        ws_url = self._client_wrapper.get_environment().production + "/speech-to-text/ws"
        query_params = httpx.QueryParams()
        if language_code is not None:
            query_params = query_params.add("language-code", language_code)
        if model is not None:
            query_params = query_params.add("model", model)
        if mode is not None:
            query_params = query_params.add("mode", mode)
        if sample_rate is not None:
            query_params = query_params.add("sample_rate", sample_rate)
        if high_vad_sensitivity is not None:
            query_params = query_params.add("high_vad_sensitivity", high_vad_sensitivity)
        if vad_signals is not None:
            query_params = query_params.add("vad_signals", vad_signals)
        if flush_signal is not None:
            query_params = query_params.add("flush_signal", flush_signal)
        if input_audio_codec is not None:
            query_params = query_params.add("input_audio_codec", input_audio_codec)
        ws_url = ws_url + f"?{query_params}"
        headers = self._client_wrapper.get_headers()
        if api_subscription_key is not None:
            headers["Api-Subscription-Key"] = str(api_subscription_key)
        if request_options and "additional_headers" in request_options:
            headers.update(request_options["additional_headers"])
        try:
            with websockets_sync_client.connect(ws_url, additional_headers=headers) as protocol:
                yield SpeechToTextStreamingSocketClient(websocket=protocol)
        except websockets.exceptions.InvalidStatusCode as exc:
            status_code: int = exc.status_code
            if status_code == 401:
                raise ApiError(
                    status_code=status_code,
                    headers=dict(headers),
                    body="Websocket initialized with invalid credentials.",
                )
            raise ApiError(
                status_code=status_code,
                headers=dict(headers),
                body="Unexpected error when initializing websocket connection.",
            )


class AsyncRawSpeechToTextStreamingClient:
    def __init__(self, *, client_wrapper: AsyncClientWrapper):
        self._client_wrapper = client_wrapper

    @asynccontextmanager
    async def connect(
        self,
        *,
        language_code: SpeechToTextStreamingLanguageCode,
        model: typing.Optional[SpeechToTextStreamingModel] = None,
        mode: typing.Optional[SpeechToTextStreamingMode] = None,
        sample_rate: typing.Optional[str] = None,
        high_vad_sensitivity: typing.Optional[SpeechToTextStreamingHighVadSensitivity] = None,
        vad_signals: typing.Optional[SpeechToTextStreamingVadSignals] = None,
        flush_signal: typing.Optional[SpeechToTextStreamingFlushSignal] = None,
        input_audio_codec: typing.Optional[SpeechToTextStreamingInputAudioCodec] = None,
        api_subscription_key: typing.Optional[str] = None,
        request_options: typing.Optional[RequestOptions] = None,
    ) -> typing.AsyncIterator[AsyncSpeechToTextStreamingSocketClient]:
        """
        WebSocket channel for real-time speech to text streaming.

        **Note:** This API Reference page is provided for informational purposes only.
        The Try It playground may not provide the best experience for streaming audio.
        For optimal streaming performance, please use the SDK or implement your own WebSocket client.

        Parameters
        ----------
        language_code : SpeechToTextStreamingLanguageCode
            Specifies the language of the input audio in BCP-47 format.

            **Available Options (saarika:v2.5):**
            - `unknown` (default): Use when the language is not known; the API will auto-detect.
            - `hi-IN`: Hindi
            - `bn-IN`: Bengali
            - `gu-IN`: Gujarati
            - `kn-IN`: Kannada
            - `ml-IN`: Malayalam
            - `mr-IN`: Marathi
            - `od-IN`: Odia
            - `pa-IN`: Punjabi
            - `ta-IN`: Tamil
            - `te-IN`: Telugu
            - `en-IN`: English

            **Additional Options (saaras:v3 only):**
            - `as-IN`: Assamese
            - `ur-IN`: Urdu
            - `ne-IN`: Nepali
            - `kok-IN`: Konkani
            - `ks-IN`: Kashmiri
            - `sd-IN`: Sindhi
            - `sa-IN`: Sanskrit
            - `sat-IN`: Santali
            - `mni-IN`: Manipuri
            - `brx-IN`: Bodo
            - `mai-IN`: Maithili
            - `doi-IN`: Dogri

        model : typing.Optional[SpeechToTextStreamingModel]
            Specifies the model to use for speech-to-text conversion.

            - **saarika:v2.5** (default): Transcribes audio in the spoken language.

            - **saaras:v3**: State-of-the-art model with flexible output formats. Supports multiple modes via the `mode` parameter: transcribe, translate, verbatim, translit, codemix.

        mode : typing.Optional[SpeechToTextStreamingMode]
            Mode of operation. **Only applicable when using saaras:v3 model.**

            Example audio: 'मेरा फोन नंबर है 9840950950'

            - **transcribe** (default): Standard transcription in the original language with proper formatting and number normalization.
              - Output: `मेरा फोन नंबर है 9840950950`

            - **translate**: Translates speech from any supported Indic language to English.
              - Output: `My phone number is 9840950950`

            - **verbatim**: Exact word-for-word transcription without normalization, preserving filler words and spoken numbers as-is.
              - Output: `मेरा फोन नंबर है नौ आठ चार zero नौ पांच zero नौ पांच zero`

            - **translit**: Romanization - Transliterates speech to Latin/Roman script only.
              - Output: `mera phone number hai 9840950950`

            - **codemix**: Code-mixed text with English words in English and Indic words in native script.
              - Output: `मेरा phone number है 9840950950`

        sample_rate : typing.Optional[str]
            Audio sample rate for the WebSocket connection. When specified as a connection parameter, only 16kHz and 8kHz are supported. 8kHz is only available via this connection parameter. If not specified, defaults to 16kHz.

        high_vad_sensitivity : typing.Optional[SpeechToTextStreamingHighVadSensitivity]
            Enable high VAD (Voice Activity Detection) sensitivity

        vad_signals : typing.Optional[SpeechToTextStreamingVadSignals]
            Enable VAD signals in response

        flush_signal : typing.Optional[SpeechToTextStreamingFlushSignal]
            Signal to flush the audio buffer and finalize transcription

        input_audio_codec : typing.Optional[SpeechToTextStreamingInputAudioCodec]
            Audio codec/format of the input stream. Use this when sending raw PCM audio.
            Supported values: wav, pcm_s16le, pcm_l16, pcm_raw.

        api_subscription_key : typing.Optional[str]
            API subscription key for authentication

        request_options : typing.Optional[RequestOptions]
            Request-specific configuration.

        Returns
        -------
        AsyncSpeechToTextStreamingSocketClient
        """
        ws_url = self._client_wrapper.get_environment().production + "/speech-to-text/ws"
        query_params = httpx.QueryParams()
        if language_code is not None:
            query_params = query_params.add("language-code", language_code)
        if model is not None:
            query_params = query_params.add("model", model)
        if mode is not None:
            query_params = query_params.add("mode", mode)
        if sample_rate is not None:
            query_params = query_params.add("sample_rate", sample_rate)
        if high_vad_sensitivity is not None:
            query_params = query_params.add("high_vad_sensitivity", high_vad_sensitivity)
        if vad_signals is not None:
            query_params = query_params.add("vad_signals", vad_signals)
        if flush_signal is not None:
            query_params = query_params.add("flush_signal", flush_signal)
        if input_audio_codec is not None:
            query_params = query_params.add("input_audio_codec", input_audio_codec)
        ws_url = ws_url + f"?{query_params}"
        headers = self._client_wrapper.get_headers()
        if api_subscription_key is not None:
            headers["Api-Subscription-Key"] = str(api_subscription_key)
        if request_options and "additional_headers" in request_options:
            headers.update(request_options["additional_headers"])
        try:
            async with websockets_client_connect(ws_url, extra_headers=headers) as protocol:
                yield AsyncSpeechToTextStreamingSocketClient(websocket=protocol)
        except websockets.exceptions.InvalidStatusCode as exc:
            status_code: int = exc.status_code
            if status_code == 401:
                raise ApiError(
                    status_code=status_code,
                    headers=dict(headers),
                    body="Websocket initialized with invalid credentials.",
                )
            raise ApiError(
                status_code=status_code,
                headers=dict(headers),
                body="Unexpected error when initializing websocket connection.",
            )
