# This file was auto-generated by Fern from our API Definition.

import json
import typing

import websockets
import websockets.sync.connection as websockets_sync_connection
from ..core.events import EventEmitterMixin, EventType
from ..core.pydantic_utilities import parse_obj_as
from ..types.audio_output import AudioOutput
from ..types.flush_signal import FlushSignal
from ..types.error_response import ErrorResponse
from ..types.event_response import EventResponse
from ..types.configure_connection import ConfigureConnection
from ..types.configure_connection_data import ConfigureConnectionData
from ..types.ping_signal import PingSignal
from ..types.send_text import SendText
from ..types.send_text_data import SendTextData

TextToSpeechStreamingSocketClientResponse = typing.Union[AudioOutput, ErrorResponse, EventResponse]


class AsyncTextToSpeechStreamingSocketClient(EventEmitterMixin):
    def __init__(self, *, websocket: websockets.WebSocketClientProtocol):
        super().__init__()
        self._websocket = websocket

    async def __aiter__(self):
        async for message in self._websocket:
            message = json.loads(message) if isinstance(message, str) else message
            yield parse_obj_as(TextToSpeechStreamingSocketClientResponse, message)  # type: ignore

    async def start_listening(self):
        """
        Start listening for messages on the websocket connection.

        Emits events in the following order:
        - EventType.OPEN when connection is established
        - EventType.MESSAGE for each message received
        - EventType.ERROR if an error occurs
        - EventType.CLOSE when connection is closed
        """
        self._emit(EventType.OPEN, None)
        try:
            async for raw_message in self._websocket:
                raw_message = (
                    json.loads(raw_message)
                    if isinstance(raw_message, str)
                    else raw_message
                )
                parsed = parse_obj_as(TextToSpeechStreamingSocketClientResponse, raw_message)  # type: ignore
                self._emit(EventType.MESSAGE, parsed)
        except websockets.WebSocketException as exc:
            self._emit(EventType.ERROR, exc)
        finally:
            self._emit(EventType.CLOSE, None)

    async def configure(
        self,
        target_language_code: str,
        speaker: str = "anushka",
        pitch: float = 0.0,
        pace: float = 1.0,
        loudness: float = 1.0,
        speech_sample_rate: int = 22050,
        enable_preprocessing: bool = False,
        output_audio_codec: str = "mp3",
        output_audio_bitrate: str = "128k",
        min_buffer_size: int = 50,
        max_chunk_length: int = 150,
    ) -> None:
        """
        Configuration message required as the first message after establishing the WebSocket connection.
        This initializes TTS parameters and can be updated at any time during the WebSocket lifecycle
        by sending a new config message. When a config update is sent, any text currently in the buffer
        will be automatically flushed and processed before applying the new configuration.

        :param target_language_code: The language of the text is BCP-47 format
        :param speaker: The speaker voice to be used for the output audio. Default: Anushka.
            Model Compatibility (bulbul:v2): Female: Anushka, Manisha, Vidya, Arya;
            Male: Abhilash, Karun, Hitesh
        :param pitch: Controls the pitch of the audio. Lower values result in a deeper voice,
            while higher values make it sharper. The suitable range is between -0.75
            and 0.75. Default is 0.0.
        :param pace: Controls the speed of the audio. Lower values result in slower speech,
            while higher values make it faster. The suitable range is between 0.5
            and 2.0. Default is 1.0.
        :param loudness: Controls the loudness of the audio. Lower values result in quieter audio,
            while higher values make it louder. The suitable range is between 0.3
            and 3.0. Default is 1.0.
        :param speech_sample_rate: Specifies the sample rate of the output audio. Supported values are
            8000, 16000, 22050, 24000 Hz. If not provided, the default is 22050 Hz.
        :param enable_preprocessing: Controls whether normalization of English words and numeric entities
            (e.g., numbers, dates) is performed. Set to true for better handling
            of mixed-language text. Default is false.
        :param output_audio_codec: Audio codec (currently supports MP3 only, optimized for real-time playback)
        :param output_audio_bitrate: Audio bitrate (choose from 5 supported bitrate options)
        :param min_buffer_size: Minimum character length that triggers buffer flushing for TTS model processing
        :param max_chunk_length: Maximum length for sentence splitting (adjust based on content length)
        """
        data = ConfigureConnectionData(
            target_language_code=target_language_code,
            speaker=speaker,
            pitch=pitch,
            pace=pace,
            loudness=loudness,
            speech_sample_rate=speech_sample_rate,
            enable_preprocessing=enable_preprocessing,
            output_audio_codec=output_audio_codec,
            output_audio_bitrate=output_audio_bitrate,
            min_buffer_size=min_buffer_size,
            max_chunk_length=max_chunk_length,
        )
        message = ConfigureConnection(data=data)
        await self._send_model(message)

    async def convert(self, text: str) -> None:
        """
        Send text to be converted to speech. Text length should be 1-2500 characters.
        Recommended: <500 characters for optimal streaming performance.
        Real-time endpoints perform better with longer character counts.

        :param text: Text to be synthesized (1-2500 characters, recommended <500)
        """
        data = SendTextData(text=text)
        message = SendText(data=data)
        await self._send_model(message)

    async def flush(self) -> None:
        """
        Forces the text buffer to process immediately, regardless of the min_buffer_size threshold.
        Use this when you need to process remaining text that hasn't reached the minimum buffer size.
        """
        message = FlushSignal()
        await self._send_model(message)

    async def ping(self) -> None:
        """
        Send ping signal to keep the WebSocket connection alive. The connection automatically
        closes after one minute of inactivity.
        """
        message = PingSignal()
        await self._send_model(message)

    async def recv(self) -> TextToSpeechStreamingSocketClientResponse:
        """
        Receive a message from the websocket connection.
        """
        data = await self._websocket.recv()
        data = json.loads(data) if isinstance(data, str) else data
        return parse_obj_as(TextToSpeechStreamingSocketClientResponse, data)  # type: ignore

    async def _send(self, data: typing.Any) -> None:
        """
        Send a message to the websocket connection.
        """
        if isinstance(data, dict):
            data = json.dumps(data)
        await self._websocket.send(data)

    async def _send_model(self, data: typing.Any) -> None:
        """
        Send a Pydantic model to the websocket connection.
        """
        await self._send(data.dict())


class TextToSpeechStreamingSocketClient(EventEmitterMixin):
    def __init__(self, *, websocket: websockets_sync_connection.Connection):
        super().__init__()
        self._websocket = websocket

    def __iter__(self):
        for message in self._websocket:
            message = json.loads(message) if isinstance(message, str) else message
            yield parse_obj_as(TextToSpeechStreamingSocketClientResponse, message)  # type: ignore

    def start_listening(self):
        """
        Start listening for messages on the websocket connection.

        Emits events in the following order:
        - EventType.OPEN when connection is established
        - EventType.MESSAGE for each message received
        - EventType.ERROR if an error occurs
        - EventType.CLOSE when connection is closed
        """
        self._emit(EventType.OPEN, None)
        try:
            for raw_message in self._websocket:
                raw_message = (
                    json.loads(raw_message)
                    if isinstance(raw_message, str)
                    else raw_message
                )
                parsed = parse_obj_as(TextToSpeechStreamingSocketClientResponse, raw_message)  # type: ignore
                self._emit(EventType.MESSAGE, parsed)
        except websockets.WebSocketException as exc:
            self._emit(EventType.ERROR, exc)
        finally:
            self._emit(EventType.CLOSE, None)

    def configure(
        self,
        target_language_code: str,
        speaker: str = "anushka",
        pitch: float = 0.0,
        pace: float = 1.0,
        loudness: float = 1.0,
        speech_sample_rate: int = 22050,
        enable_preprocessing: bool = False,
        output_audio_codec: str = "mp3",
        output_audio_bitrate: str = "128k",
        min_buffer_size: int = 50,
        max_chunk_length: int = 150,
    ) -> None:
        """
        Configuration message required as the first message after establishing the WebSocket connection.
        This initializes TTS parameters and can be updated at any time during the WebSocket lifecycle
        by sending a new config message. When a config update is sent, any text currently in the buffer
        will be automatically flushed and processed before applying the new configuration.

        :param target_language_code: The language of the text is BCP-47 format
        :param speaker: The speaker voice to be used for the output audio. Default: Anushka.
            Model Compatibility (bulbul:v2): Female: Anushka, Manisha, Vidya, Arya;
            Male: Abhilash, Karun, Hitesh
        :param pitch: Controls the pitch of the audio. Lower values result in a deeper voice,
            while higher values make it sharper. The suitable range is between -0.75
            and 0.75. Default is 0.0.
        :param pace: Controls the speed of the audio. Lower values result in slower speech,
            while higher values make it faster. The suitable range is between 0.5
            and 2.0. Default is 1.0.
        :param loudness: Controls the loudness of the audio. Lower values result in quieter audio,
            while higher values make it louder. The suitable range is between 0.3
            and 3.0. Default is 1.0.
        :param speech_sample_rate: Specifies the sample rate of the output audio. Supported values are
            8000, 16000, 22050, 24000 Hz. If not provided, the default is 22050 Hz.
        :param enable_preprocessing: Controls whether normalization of English words and numeric entities
            (e.g., numbers, dates) is performed. Set to true for better handling
            of mixed-language text. Default is false.
        :param output_audio_codec: Audio codec (currently supports MP3 only, optimized for real-time playback)
        :param output_audio_bitrate: Audio bitrate (choose from 5 supported bitrate options)
        :param min_buffer_size: Minimum character length that triggers buffer flushing for TTS model processing
        :param max_chunk_length: Maximum length for sentence splitting (adjust based on content length)
        """
        data = ConfigureConnectionData(
            target_language_code=target_language_code,
            speaker=speaker,
            pitch=pitch,
            pace=pace,
            loudness=loudness,
            speech_sample_rate=speech_sample_rate,
            enable_preprocessing=enable_preprocessing,
            output_audio_codec=output_audio_codec,
            output_audio_bitrate=output_audio_bitrate,
            min_buffer_size=min_buffer_size,
            max_chunk_length=max_chunk_length,
        )
        message = ConfigureConnection(data=data)
        self._send_model(message)

    def convert(self, text: str) -> None:
        """
        Send text to be converted to speech. Text length should be 1-2500 characters.
        Recommended: <500 characters for optimal streaming performance.
        Real-time endpoints perform better with longer character counts.

        :param text: Text to be synthesized (1-2500 characters, recommended <500)
        """
        data = SendTextData(text=text)
        message = SendText(data=data)
        self._send_model(message)

    def flush(self) -> None:
        """
        Forces the text buffer to process immediately, regardless of the min_buffer_size threshold.
        Use this when you need to process remaining text that hasn't reached the minimum buffer size.
        """
        message = FlushSignal()
        self._send_model(message)

    def ping(self) -> None:
        """
        Send ping signal to keep the WebSocket connection alive. The connection automatically
        closes after one minute of inactivity.
        """
        message = PingSignal()
        self._send_model(message)

    def recv(self) -> TextToSpeechStreamingSocketClientResponse:
        """
        Receive a message from the websocket connection.
        """
        data = self._websocket.recv()
        data = json.loads(data) if isinstance(data, str) else data
        return parse_obj_as(TextToSpeechStreamingSocketClientResponse, data)  # type: ignore

    def _send(self, data: typing.Any) -> None:
        """
        Send a message to the websocket connection.
        """
        if isinstance(data, dict):
            data = json.dumps(data)
        self._websocket.send(data)

    def _send_model(self, data: typing.Any) -> None:
        """
        Send a Pydantic model to the websocket connection.
        """
        self._send(data.dict())
