o
    i                     @   s  U d Z ddlZddlZddlZddlmZmZ ddlmZ ddl	m
Z
mZmZmZmZmZmZ ddlZddlmZ ddlmZmZ ddlmZmZmZmZmZmZmZmZ dd	l m!Z! dd
l"m#Z# ddl$m%Z%m&Z&m'Z' ddl(m)Z)m*Z*m+Z+ ddl,m-Z-m.Z. ddl/m0Z0 zddl1m2Z3 ddl4m5Z5 W n  e6y Z7 ze8de7  e8d e9de7 dZ7[7ww G dd de:eZ;G dd de:eZ<G dd de:eZ=eddG dd dZ>e>ddddd d!de?d"d# e<D d$e>dddd%d&d'de?d(d# e=D d$e>dddd%d&d'de?d)d# e=D d$d*Z@ee:e>f eAd+< d,e:d-ee: fd.d/ZBd0e-d-ee: fd1d2ZCeG d3d4 d4e&ZDeG d5d6 d6eDZEG d7d8 d8e+ZFG d9d: d:e)ZGdS );av  Sarvam AI text-to-speech service implementation.

This module provides TTS services using Sarvam AI's API with support for multiple
Indian languages and two model variants:

**Model Variants:**

- **bulbul:v2** (default): Standard TTS model
    - Supports: pitch, loudness, pace (0.3-3.0)
    - Default sample rate: 22050 Hz
    - Speakers: anushka (default), abhilash, manisha, vidya, arya, karun, hitesh

- **bulbul:v3-beta**: Advanced TTS model with temperature control
    - Does NOT support: pitch, loudness
    - Supports: pace (0.5-2.0), temperature (0.01-1.0)
    - Default sample rate: 24000 Hz
    - Preprocessing is always enabled
    - Speakers: aditya (default), ritu, priya, neha, rahul, pooja, rohan, simran,
      kavya, amit, dev, ishita, shreya, ratan, varun, manan, sumit, roopa, kabir,
      aayan, shubh, ashutosh, advait, amelia, sophia

- **bulbul:v3**: Advanced TTS model with temperature control
    - Does NOT support: pitch, loudness
    - Supports: pace (0.5-2.0), temperature (0.01-1.0)
    - Default sample rate: 24000 Hz
    - Preprocessing is always enabled
    - Speakers: aditya (default), ritu, priya, neha, rahul, pooja, rohan, simran,
      kavya, amit, dev, ishita, shreya, ratan, varun, manan, sumit, roopa, kabir,
      aayan, shubh, ashutosh, advait, amelia, sophia

See https://docs.sarvam.ai/api-reference-docs/text-to-speech/stream for full API details.
    N)	dataclassfield)Enum)AnyAsyncGeneratorClassVarDictListOptionalTuple)logger)	BaseModelField)CancelFrameEndFrame
ErrorFrameFrameLLMFullResponseEndFrame
StartFrameTTSAudioRawFrameTTSStoppedFrame)FrameDirection)sdk_headers)	NOT_GIVENTTSSettings	_NotGiven)InterruptibleTTSServiceTextAggregationMode
TTSService)Languageresolve_language)
traced_tts)connect)StatezException: zEIn order to use Sarvam, you need to `pip install pipecat-ai[sarvam]`.zMissing module: c                   @   s   e Zd ZdZdZdZdZdS )SarvamTTSModela  Available Sarvam TTS models.

    Attributes:
        BULBUL_V2: Standard TTS model with pitch/loudness control.
            - Supports pitch, loudness, pace (0.3-3.0)
            - Default sample rate: 22050 Hz
        BULBUL_V3_BETA: Advanced model with temperature control.
            - Does NOT support pitch/loudness
            - Pace range: 0.5-2.0
            - Supports temperature parameter
            - Default sample rate: 24000 Hz
            - Preprocessing is always enabled
    	bulbul:v2bulbul:v3-beta	bulbul:v3N)__name__
__module____qualname____doc__	BULBUL_V2BULBUL_V3_BETA	BULBUL_V3 r/   r/   O/home/ubuntu/.local/lib/python3.10/site-packages/pipecat/services/sarvam/tts.pyr$   M   s
    r$   c                   @   s,   e Zd ZdZdZdZdZdZdZdZ	dZ
d	S )
SarvamTTSSpeakerV2zAvailable speakers for bulbul:v2 model.

    Female voices: anushka, manisha, vidya, arya
    Male voices: abhilash, karun, hitesh
    anushkaabhilashmanishavidyaaryakarunhiteshN)r(   r)   r*   r+   ANUSHKAABHILASHMANISHAVIDYAARYAKARUNHITESHr/   r/   r/   r0   r1   a   s    r1   c                   @   st   e Zd ZdZdZdZdZdZdZdZ	dZ
d	Zd
ZdZdZdZdZdZdZdZdZdZdZdZdZdZdZdZdZdS )SarvamTTSSpeakerV3zyAvailable speakers for bulbul:v3-beta model.

    Includes a wider variety of voices with different characteristics.
    adityaritupriyaneharahulpoojarohansimrankavyaamitdevishitashreyaratanvarunmanansumitroopakabiraayanshubhashutoshadvaitameliasophiaN)r(   r)   r*   r+   ADITYARITUPRIYANEHARAHULPOOJAROHANSIMRANKAVYAAMITDEVISHITASHREYARATANVARUNMANANSUMITROOPAKABIRAAYANSHUBHASHUTOSHADVAITAMELIASOPHIAr/   r/   r/   r0   r@   q   s6    r@   T)frozenc                   @   sb   e Zd ZU dZeed< eed< eed< eed< eed< ee	e	f ed< eed< eed	f ed
< dS )TTSModelConfigab  Immutable configuration for a Sarvam TTS model.

    Attributes:
        supports_pitch: Whether the model accepts pitch parameter.
        supports_loudness: Whether the model accepts loudness parameter.
        supports_temperature: Whether the model accepts temperature parameter.
        default_sample_rate: Default audio sample rate in Hz.
        default_speaker: Default speaker voice ID.
        pace_range: Valid range for pace parameter (min, max).
        preprocessing_always_enabled: Whether preprocessing is always enabled.
        speakers: Tuple of available speaker names for this model.
    supports_pitchsupports_loudnesssupports_temperaturedefault_sample_ratedefault_speaker
pace_rangepreprocessing_always_enabled.speakersN)
r(   r)   r*   r+   bool__annotations__intstrr   floatr/   r/   r/   r0   rt      s   
 rt   Fi"V  r2   )333333?      @c                 c       | ]}|j V  qd S Nvalue.0sr/   r/   r0   	<genexpr>       r   )ru   rv   rw   rx   ry   rz   r{   r|   i]  rU   )g      ?g       @c                 c   r   r   r   r   r/   r/   r0   r      r   c                 c   r   r   r   r   r/   r/   r0   r      r   )r%   r&   r'   TTS_MODEL_CONFIGSmodelreturnc                 C   s$   | t v rtt |  jS tt d jS )zGet the list of available speakers for a given model.

    Args:
        model: The model name (e.g., "bulbul:v2" or "bulbul:v3-beta").

    Returns:
        List of speaker names available for the model.
    r%   )r   listr|   )r   r/   r/   r0   get_speakers_for_model   s   	r   languagec                 C   s   i t jdt jdt jdt jdt jdt jdt jdt jdt j	dt j
dt jdt jdt jdt jdt jdt jdt jd	t jd	t jd
t jd
t jdt jdi}t| |ddS )zConvert Pipecat Language enum to Sarvam AI language codes.

    Args:
        language: The Language enum value to convert.

    Returns:
        The corresponding Sarvam AI language code, or None if not supported.
    zbn-INen-INzgu-INzhi-INzkn-INzml-INzmr-INzod-INzpa-INzta-INzte-INF)use_base_code)r   BNBN_INENEN_INGUGU_INHIHI_INKNKN_INMLML_INMRMR_INOROR_INPAPA_INTATA_INTETE_INr    )r   LANGUAGE_MAPr/   r/   r0   language_to_sarvam_language   sT   		
r   c                   @   s   e Zd ZU dZedd dZedB eB ed< edd dZ	e
dB eB ed< ed	d dZe
dB eB ed
< edd dZe
dB eB ed< edd dZe
dB eB ed< dS )SarvamHttpTTSSettingsa  Settings for SarvamHttpTTSService.

    Parameters:
        enable_preprocessing: Whether to enable text preprocessing. Defaults to False.
            **Note:** Always enabled for bulbul:v3-beta (cannot be disabled).
        pace: Speech pace multiplier. Defaults to 1.0.
            - bulbul:v2: Range 0.3 to 3.0
            - bulbul:v3-beta: Range 0.5 to 2.0
        pitch: Voice pitch adjustment (-0.75 to 0.75). Defaults to 0.0.
            **Note:** Only supported for bulbul:v2. Ignored for v3 models.
        loudness: Volume multiplier (0.3 to 3.0). Defaults to 1.0.
            **Note:** Only supported for bulbul:v2. Ignored for v3 models.
        temperature: Controls output randomness for bulbul:v3-beta (0.01 to 1.0).
            Lower values = more deterministic, higher = more random. Defaults to 0.6.
            **Note:** Only supported for bulbul:v3-beta. Ignored for v2.
    c                   C      t S r   r   r/   r/   r/   r0   <lambda>      zSarvamHttpTTSSettings.<lambda>default_factoryNenable_preprocessingc                   C   r   r   r   r/   r/   r/   r0   r     r   pacec                   C   r   r   r   r/   r/   r/   r0   r     r   pitchc                   C   r   r   r   r/   r/   r/   r0   r     r   loudnessc                   C   r   r   r   r/   r/   r/   r0   r     r   temperature)r(   r)   r*   r+   r   r   r}   r   r~   r   r   r   r   r   r/   r/   r/   r0   r     s   
 "r   c                   @   sj   e Zd ZU dZddiZeeeef  ed< e	dd dZ
edB eB ed	< e	d
d dZedB eB ed< dS )SarvamTTSSettingsa  Settings for SarvamTTSService.

    Extends :class:`SarvamHttpTTSService.Settings` with WebSocket-specific buffering parameters.

    Parameters:
        min_buffer_size: Minimum characters to buffer before generating audio.
            Lower values reduce latency but may affect quality. Defaults to 50.
        max_chunk_length: Maximum characters processed in a single chunk.
            Controls memory usage and processing efficiency. Defaults to 150.
    target_language_coder   _aliasesc                   C   r   r   r   r/   r/   r/   r0   r   *  r   zSarvamTTSSettings.<lambda>r   Nmin_buffer_sizec                   C   r   r   r   r/   r/   r/   r0   r   +  r   max_chunk_length)r(   r)   r*   r+   r   r   r   r   r~   r   r   r   r   r   r/   r/   r/   r0   r     s
   
 "r   c                       s   e Zd ZU dZeZeed< G dd deZdddddddde	d	e
jd
ee	 dee	 de	dee dee dee f fddZdefddZdedee	 fddZdef fddZede	de	deedf fddZ  ZS )SarvamHttpTTSServicea  Text-to-Speech service using Sarvam AI's API.

    Converts text to speech using Sarvam AI's TTS models with support for multiple
    Indian languages. Provides control over voice characteristics.

    **Model Differences:**

    - **bulbul:v2** (default):
        - Supports: pitch (-0.75 to 0.75), loudness (0.3 to 3.0), pace (0.3 to 3.0)
        - Default sample rate: 22050 Hz
        - Speakers: anushka, abhilash, manisha, vidya, arya, karun, hitesh

    - **bulbul:v3-beta**:
        - Does NOT support: pitch, loudness (will be ignored)
        - Supports: pace (0.5 to 2.0), temperature (0.01 to 1.0)
        - Default sample rate: 24000 Hz
        - Preprocessing is always enabled
        - Speakers: aditya, ritu, priya, neha, rahul, pooja, rohan, simran, kavya,
          amit, dev, ishita, shreya, ratan, varun, manan, sumit, roopa, kabir,
          aayan, shubh, ashutosh, advait, amelia, sophia

    Example::

        # Using bulbul:v2 (default)
        tts = SarvamHttpTTSService(
            api_key="your-api-key",
            aiohttp_session=session,
            settings=SarvamHttpTTSService.Settings(
                voice="anushka",
                model="bulbul:v2",
                language=Language.HI,
                pitch=0.1,
                pace=1.2,
                loudness=1.5,
            ),
        )

        # Using bulbul:v3-beta with temperature control
        tts_v3 = SarvamHttpTTSService(
            api_key="your-api-key",
            aiohttp_session=session,
            settings=SarvamHttpTTSService.Settings(
                voice="aditya",  # Use v3 speaker
                model="bulbul:v3-beta",
                language=Language.HI,
                pace=1.2,  # Range: 0.5-2.0 for v3
                temperature=0.8,
            ),
        )
    	_settingsc                   @   s   e Zd ZU dZejZee ed< e	dddddZ
ee ed< e	d	d
dddZee ed< e	d	d
dddZee ed< e	dddZee ed< e	ddd	ddZee ed< dS )z SarvamHttpTTSService.InputParamsa  Input parameters for Sarvam TTS configuration.

        .. deprecated:: 0.0.105
            Use ``SarvamHttpTTSService.Settings`` directly via the ``settings`` parameter instead.

        Parameters:
            language: Language for synthesis. Defaults to English (India).
            pitch: Voice pitch adjustment (-0.75 to 0.75). Defaults to 0.0.
                **Note:** Only supported for bulbul:v2. Ignored for v3 models.
            pace: Speech pace multiplier. Defaults to 1.0.
                - bulbul:v2: Range 0.3 to 3.0
                - bulbul:v3-beta: Range 0.5 to 2.0
            loudness: Volume multiplier (0.3 to 3.0). Defaults to 1.0.
                **Note:** Only supported for bulbul:v2. Ignored for v3 models.
            enable_preprocessing: Whether to enable text preprocessing. Defaults to False.
                **Note:** Always enabled for bulbul:v3-beta (cannot be disabled).
            temperature: Controls output randomness for bulbul:v3-beta (0.01 to 1.0).
                Lower values = more deterministic, higher = more random. Defaults to 0.6.
                **Note:** Only supported for bulbul:v3-beta. Ignored for v2.
        r                       ?+Voice pitch adjustment. Only for bulbul:v2.defaultgeledescriptionr         ?r   r   &Speech pace. v2: 0.3-3.0, v3: 0.5-2.0.r   &Volume multiplier. Only for bulbul:v2.r   Fz<Enable text preprocessing. Always enabled for v3-beta model.r   r   r   333333?{Gz?;Output randomness for bulbul:v3-beta only. Range: 0.01-1.0.r   N)r(   r)   r*   r+   r   r   r   r
   r~   r   r   r   r   r   r   r}   r   r/   r/   r/   r0   InputParamse  s>   
 r   Nzhttps://api.sarvam.ai)voice_idr   base_urlsample_rateparamssettingsapi_keyaiohttp_sessionr   r   r   r   r   r   c             
      sp  | j ddddddddd}
|dur| dd ||
_|dur'| d	d
 ||
_|durh| d |sh|jdur;|j|
_|jdurD|j|
_|jdurM|j|
_|jdurV|j|
_|jdur_|j|
_|j	durh|j	|
_	|durq|

| |
j}|tvrdtt }td| d| dt| | _|du r| jj}|du r|du s|jtu r| jj|
_|
j}| jj\}}|dur||k s||krtd| d| d| d t|t|||
_| jjrd|
_| jjs|
jdvrtd|  d|
_| jjs	|
jdvr	td|  d|
_| jjs|
j	dvrtd|  d|
_	t jd|dd|
d|	 || _|| _ || _!dS )ad  Initialize the Sarvam TTS service.

        Args:
            api_key: Sarvam AI API subscription key.
            aiohttp_session: Shared aiohttp session for making requests.
            voice_id: Speaker voice ID. If None, uses model-appropriate default.

                .. deprecated:: 0.0.105
                    Use ``settings=SarvamHttpTTSService.Settings(voice=...)`` instead.

            model: TTS model to use. Options:
                - "bulbul:v2" (default): Standard model with pitch/loudness support
                - "bulbul:v3-beta": Advanced model with temperature control

                .. deprecated:: 0.0.105
                    Use ``settings=SarvamHttpTTSService.Settings(model=...)`` instead.

            base_url: Sarvam AI API base URL. Defaults to "https://api.sarvam.ai".
            sample_rate: Audio sample rate in Hz (8000, 16000, 22050, 24000).
                If None, uses model-specific default.
            params: Additional voice and preprocessing parameters. If None, uses defaults.

                .. deprecated:: 0.0.105
                    Use ``settings=SarvamHttpTTSService.Settings(...)`` instead.

            settings: Runtime-updatable settings. When provided alongside deprecated
                parameters, ``settings`` values take precedence.
            **kwargs: Additional arguments passed to parent TTSService.
        r%   r2   r   Fr   N)r   voicer   r   r   r   r   r   r   r   r   r   , Unsupported model ''. Allowed values: .Pace  is outside model range (-). Clamping.TNr   pitch parameter is ignored for Nr   "loudness parameter is ignored for Nr   %temperature parameter is ignored for )r   push_stop_framespush_start_framer   r/   )"Settings"_warn_init_param_moved_to_settingsr   r   r   r   r   r   r   r   apply_updater   joinsortedkeys
ValueError_configrx   r   ry   rz   r   warningmaxminr{   ru   rv   rw   super__init___api_key	_base_url_session)selfr   r   r   r   r   r   r   r   kwargsdefault_settingsresolved_modelallowedr   pace_minpace_max	__class__r/   r0   r     s   +










zSarvamHttpTTSService.__init__r   c                 C      dS zCheck if this service can generate processing metrics.

        Returns:
            True, as Sarvam service supports metrics generation.
        Tr/   r   r/   r/   r0   can_generate_metrics!     z)SarvamHttpTTSService.can_generate_metricsr   c                 C      t |S zConvert a Language enum to Sarvam AI language format.

        Args:
            language: The language to convert.

        Returns:
            The Sarvam AI-specific language code, or None if not supported.
        r   r   r   r/   r/   r0   language_to_service_language)     	z1SarvamHttpTTSService.language_to_service_languageframec                    s   t  |I dH  dS z~Start the Sarvam TTS service.

        Args:
            frame: The start frame containing initialization parameters.
        N)r   startr   r  r   r/   r0   r  4  s   zSarvamHttpTTSService.starttext
context_idc              
   C  s  t |  d| d z;z|| jj| jj| j| jj| jj| jjdur)| jjndd}| j	j
r?| jjdur;| jjnd|d< | j	jrQ| jjdurM| jjnd|d< | j	jrc| jjdur_| jjnd	|d
< | jddt }| j d}| jj|||d4 I dH <}|jdkr| I dH }td| dV  	 W d  I dH  W W |  I dH  dS | I dH }W d  I dH  n1 I dH sw   Y  | |I dH  d|vs|d stddV  W W |  I dH  dS |d d }	t|	}
t|
dkr
|
dr
t d |
dd }
t|
| jd|d}|V  W n t y4 } ztd| |dV  W Y d}~nd}~ww W |  I dH  dS W |  I dH  dS |  I dH  w )a  Generate speech from text using Sarvam AI's API.

        Args:
            text: The text to synthesize into speech.
            context_id: The context ID for tracking audio frames.

        Yields:
            Frame: Audio frames containing the synthesized speech.
        z: Generating TTS []Nr   )r  r   speakerr   r   r   r   r   r   r   r   r   zapplication/json)api-subscription-keyzContent-Typez/text-to-speech)jsonheaders   zSarvam API error: erroraudioszNo audio data receivedr   ,   s   RIFFz+Stripping WAV header from Sarvam audio data   )audior   num_channelsr  zError generating TTS: )r  	exception)!r   debugr   r   r   r   r   r   r   r   ru   r   rv   r   rw   r   r   r   r   r   poststatusr  r   stop_ttfb_metricsr  start_tts_usage_metricsbase64	b64decodelen
startswithr   	Exception)r   r  r  payloadr  urlresponse
error_textresponse_database64_audio
audio_datar  er/   r/   r0   run_tts<  sv   
$(


 "zSarvamHttpTTSService.run_tts)r(   r)   r*   r+   r   r   r~   r   r   r   aiohttpClientSessionr
   r   r   r}   r  r   r  r   r  r!   r   r   r3  __classcell__r/   r/   r   r0   r   .  sD   
 39	
 	(r   c                       s  e Zd ZU dZeZeed< G dd deZdddddddddde	d	e
e	 d
e
e	 de	de
e de
e de
e de
e de
e f fddZdefddZdede
e	 fddZdef fddZdef fddZdef fddZdBd e
e	 fd!d"Zejfded#ef fd$d%Zded#ef fd&d'Zd(edee	e f f fd)d*Z! fd+d,Z" fd-d.Z#d/d0 Z$d1d2 Z%d3d4 Z&d5d6 Z'd7d8 Z(d9d: Z)d;d< Z*d=e	fd>d?Z+e,d=e	d e	de-edf fd@dAZ.  Z/S )CSarvamTTSServiceaJ  WebSocket-based text-to-speech service using Sarvam AI.

    Provides streaming TTS with real-time audio generation for multiple Indian languages.
    Uses WebSocket for low-latency streaming audio synthesis.

    **Model Differences:**

    - **bulbul:v2** (default):
        - Supports: pitch (-0.75 to 0.75), loudness (0.3 to 3.0), pace (0.3 to 3.0)
        - Default sample rate: 22050 Hz
        - Speakers: anushka, abhilash, manisha, vidya, arya, karun, hitesh

    - **bulbul:v3-beta** / **bulbul:v3**:
        - Does NOT support: pitch, loudness (will be ignored)
        - Supports: pace (0.5 to 2.0), temperature (0.01 to 1.0)
        - Default sample rate: 24000 Hz
        - Preprocessing is always enabled
        - Speakers: aditya, ritu, priya, neha, rahul, pooja, rohan, simran, kavya,
          amit, dev, ishita, shreya, ratan, varun, manan, sumit, roopa, kabir,
          aayan, shubh, ashutosh, advait, amelia, sophia

    **WebSocket Protocol:**
    The service uses a WebSocket connection for real-time streaming. Messages include:
    - config: Initial configuration with voice settings
    - text: Text chunks for synthesis
    - flush: Signal to process remaining buffered text
    - ping: Keepalive signal

    Example::

        # Using bulbul:v2 (default)
        tts = SarvamTTSService(
            api_key="your-api-key",
            settings=SarvamTTSService.Settings(
                voice="anushka",
                model="bulbul:v2",
                language=Language.HI,
                pitch=0.1,
                pace=1.2,
                loudness=1.5,
            ),
        )

        # Using bulbul:v3-beta with temperature control
        tts_v3 = SarvamTTSService(
            api_key="your-api-key",
            settings=SarvamTTSService.Settings(
                voice="aditya",  # Use v3 speaker
                model="bulbul:v3-beta",
                language=Language.HI,
                pace=1.2,  # Range: 0.5-2.0 for v3
                temperature=0.8,
            ),
        )

    See https://docs.sarvam.ai/api-reference-docs/text-to-speech/stream for API details.
    r   c                   @   s  e Zd ZU dZedddddZee ed< edd	d
ddZ	ee ed< edd	d
ddZ
ee ed< edddZee ed< edddZee ed< edddZee ed< edddZee ed< edddZee ed< ejZee ed< ed d!dd"dZee ed#< d$S )%zSarvamTTSService.InputParamsae  Configuration parameters for Sarvam TTS WebSocket service.

        .. deprecated:: 0.0.105
            Use ``SarvamTTSService.Settings`` directly via the ``settings`` parameter instead.

        Parameters:
            pitch: Voice pitch adjustment (-0.75 to 0.75). Defaults to 0.0.
                **Note:** Only supported for bulbul:v2. Ignored for v3 models.
            pace: Speech pace multiplier. Defaults to 1.0.
                - bulbul:v2: Range 0.3 to 3.0
                - bulbul:v3-beta: Range 0.5 to 2.0
            loudness: Volume multiplier (0.3 to 3.0). Defaults to 1.0.
                **Note:** Only supported for bulbul:v2. Ignored for v3 models.
            enable_preprocessing: Enable text preprocessing. Defaults to False.
                **Note:** Always enabled for bulbul:v3-beta.
            min_buffer_size: Minimum characters to buffer before generating audio.
                Lower values reduce latency but may affect quality. Defaults to 50.
            max_chunk_length: Maximum characters processed in a single chunk.
                Controls memory usage and processing efficiency. Defaults to 150.
            output_audio_codec: Audio codec format. Options: linear16, mulaw, alaw,
                opus, flac, aac, wav, mp3. Defaults to "linear16".
            output_audio_bitrate: Audio bitrate (32k, 64k, 96k, 128k, 192k).
                Defaults to "128k".
            language: Target language for synthesis. Supports Indian languages.
            temperature: Controls output randomness for bulbul:v3-beta (0.01 to 1.0).
                Lower = more deterministic, higher = more random. Defaults to 0.6.
                **Note:** Only supported for bulbul:v3-beta. Ignored for v2.

        **Speakers by Model:**

        bulbul:v2:
            - Female: anushka (default), manisha, vidya, arya
            - Male: abhilash, karun, hitesh

        bulbul:v3-beta:
            - aditya (default), ritu, priya, neha, rahul, pooja, rohan, simran,
              kavya, amit, dev, ishita, shreya, ratan, varun, manan, sumit,
              roopa, kabir, aayan, shubh, ashutosh, advait, amelia, sophia
        r   r   r   r   r   r   r   r   r   r   r   r   r   Fz8Enable text preprocessing. Always enabled for v3 models.r   r   2   z3Minimum characters to buffer before TTS processing.r      z&Maximum length for sentence splitting.r   linear16z>Audio codec: linear16, mulaw, alaw, opus, flac, aac, wav, mp3.output_audio_codec128kz)Audio bitrate: 32k, 64k, 96k, 128k, 192k.output_audio_bitrater   r   r   r   r   N)r(   r)   r*   r+   r   r   r
   r   r~   r   r   r   r}   r   r   r   r;  r   r=  r   r   r   r   r/   r/   r/   r0   r     s^   
 (r   Nz%wss://api.sarvam.ai/text-to-speech/ws)r   r   r,  aggregate_sentencestext_aggregation_moder   r   r   r   r   r   r,  r>  r?  r   r   r   c       	            s  | j ddddddddddd	
}|dur| d
d
 ||_|dur)| dd ||_d}d}|dur| d |	s|jdurA|j|_|jdurJ|j|_|jdurS|j|_|jdur\|j|_|jdurd|j}|j	durl|j	}|j
duru|j
|_
|jdur~|j|_|jdur|j|_|jdur|j|_|	dur||	 |j}|tvrdtt }td| d| dt| | _|du r| jj}|du r|	du s|	jtu r| jj|_|j
}| jj\}}|dur||k s||krtd| d| d| d t|t|||_
| jjr
d|_| jjs |jdvr td|  d|_| jjs6|jdvr6td|  d|_| jj sL|jdvrLtd|  d|_t! j"d!||dddd||d|
 t#|| _$|| _%|| _&| d | | _'|| _(d| _)d| _*dS )"a{  Initialize the Sarvam TTS service with voice and transport configuration.

        Args:
            api_key: Sarvam API key for authenticating TTS requests.
            model: TTS model to use. Options:
                - "bulbul:v2" (default): Standard model with pitch/loudness support
                - "bulbul:v3-beta": Advanced model with temperature control

                .. deprecated:: 0.0.105
                    Use ``settings=SarvamTTSService.Settings(model=...)`` instead.

            voice_id: Speaker voice ID. If None, uses model-appropriate default.

                .. deprecated:: 0.0.105
                    Use ``settings=SarvamTTSService.Settings(voice=...)`` instead.

            url: WebSocket URL for the TTS backend (default production URL).
            aggregate_sentences: Deprecated. Use text_aggregation_mode instead.

                .. deprecated:: 0.0.104
                    Use ``text_aggregation_mode`` instead.

            text_aggregation_mode: How to aggregate text before synthesis.
            sample_rate: Output audio sample rate in Hz (8000, 16000, 22050, 24000).
                If None, uses model-specific default.
            params: Optional input parameters to override defaults.

                .. deprecated:: 0.0.105
                    Use ``settings=SarvamTTSService.Settings(...)`` instead.

            settings: Runtime-updatable settings. When provided alongside deprecated
                parameters, ``settings`` values take precedence.
            **kwargs: Arguments forwarded to InterruptibleTTSService.

        See https://docs.sarvam.ai/api-reference-docs/text-to-speech/stream
        r%   r2   r   Fr8  r9  r   N)
r   r   r   r   r   r   r   r   r   r   r   r   r   r:  r<  r   r   r   r   r   r   r   r   r   Tr   r   r   r   r   r   )r>  r?  push_text_framespause_frame_processingr   r   r   r   z?model=r/   )+r   r   r   r   r   r   r   r   r;  r=  r   r   r   r   r   r   r   r   r   r   r   rx   r   ry   rz   r   r   r   r   r{   ru   rv   rw   r   r   r   _speech_sample_rate_output_audio_codec_output_audio_bitrate_websocket_urlr   _receive_task_keepalive_task)r   r   r   r   r,  r>  r?  r   r   r   r   r   r;  r=  r   r   r   r   r   r   r/   r0   r   %  s   3














	

zSarvamTTSService.__init__r   c                 C   r  r  r/   r  r/   r/   r0   r    r  z%SarvamTTSService.can_generate_metricsr   c                 C   r  r  r	  r
  r/   r/   r0   r    r  z-SarvamTTSService.language_to_service_languager  c                    s2   t  |I dH  t| j| _|  I dH  dS r  )r   r  r   r   rB  _connectr  r   r/   r0   r    s   zSarvamTTSService.startc                    &   t  |I dH  |  I dH  dS )zVStop the Sarvam TTS service.

        Args:
            frame: The end frame.
        N)r   stop_disconnectr  r   r/   r0   rJ       zSarvamTTSService.stopc                    rI  )z[Cancel the Sarvam TTS service.

        Args:
            frame: The cancel frame.
        N)r   cancelrK  r  r   r/   r0   rM    rL  zSarvamTTSService.cancelr  c              
      sv   z| j rddi}| j t|I dH  W dS W dS  ty: } z| jd| |dI dH  W Y d}~dS d}~ww )z;Flush any pending audio synthesis by sending flush command.typeflushNzError sending flush to Sarvam: 	error_msgr   )
_websocketsendr  dumpsr*  
push_error)r   r  msgr2  r/   r/   r0   flush_audio  s   (zSarvamTTSService.flush_audio	directionc                    s   t  ||I dH  dS )zPush a frame downstream with special handling for stop conditions.

        Args:
            frame: The frame to push.
            direction: The direction to push the frame.
        N)r   
push_framer   r  rX  r   r/   r0   rY  
  s   zSarvamTTSService.push_framec                    s:   t  ||I dH  t|ttfr|  I dH  dS dS )zCProcess a frame and flush audio if it's the end of a full response.N)r   process_frame
isinstancer   r   rW  rZ  r   r/   r0   r[    s
   zSarvamTTSService.process_framedeltac                    s*   t  |I dH }|r|  I dH  |S )z:Apply a settings delta and resend config if voice changed.N)r   _update_settings_send_config)r   r]  changedr   r/   r0   r^    s
   z!SarvamTTSService._update_settingsc                    sh   t   I dH  |  I dH  | jr | js | | | j| _| jr0| js2| | 	 | _dS dS dS )z7Connect to Sarvam WebSocket and start background tasks.N)
r   rH  _connect_websocketrR  rF  create_task_receive_task_handler_report_errorrG  _keepalive_task_handlerr  r   r/   r0   rH  $  s   
zSarvamTTSService._connectc                    s`   t   I dH  | jr| | jI dH  d| _| jr'| | jI dH  d| _|  I dH  dS )z4Disconnect from Sarvam WebSocket and clean up tasks.N)r   rK  rF  cancel_taskrG  _disconnect_websocketr  r   r/   r0   rK  2  s   zSarvamTTSService._disconnectc              
      s   z7| j r| j jtju rW dS d| jit }t| j|dI dH | _ t	d | 
 I dH  | dI dH  W dS  tye } z!| jd| |dI dH  d| _ | d| I dH  W Y d}~dS d}~ww )	z-Establish WebSocket connection to Sarvam API.Nr  )additional_headersz!Connected to Sarvam TTS Websocketon_connectedz*Error connecting to Sarvam TTS Websocket: rP  on_connection_error)rR  stater#   OPENr   r   websocket_connectrE  r   r!  r_  _call_event_handlerr*  rU  )r   ws_additional_headersr2  r/   r/   r0   ra  @  s.   

"z#SarvamTTSService._connect_websocketc                    s  | j std| jj| jj| j| jj| jj| jj| j	| j
| jj| jjd
}| jjdur2| jj|d< | jjdur>| jj|d< | jjdurJ| jj|d< td|  d|d	}z| j t|I dH  td
 W dS  ty } z| jd| |dI dH   d}~ww )z#Send initial configuration message.zWebSocket not connected)
r   r  speech_sample_rater   r   r   r;  r=  r   r   Nr   r   r   zConfig being sent is configrN  datazConfiguration sent successfullyUnknown error occurred: rP  )rR  r*  r   r   r   rB  r   r   r   rC  rD  r   r   r   r   r   r   r!  rS  r  rT  rU  )r   config_dataconfig_messager2  r/   r/   r0   r_  Z  s<   
zSarvamTTSService._send_configc              
      s   zUz|   I dH  | jrtd | j I dH  W n ty: } z| jd| |dI dH  W Y d}~nd}~ww W d| _| dI dH  dS W d| _| dI dH  dS d| _| dI dH  w )z.Close WebSocket connection and clean up state.NzDisconnecting from SarvamzError closing websocket: rP  on_disconnected)stop_all_metricsrR  r   r!  closer*  rU  rn  )r   r2  r/   r/   r0   rg  {  s&   
&z&SarvamTTSService._disconnect_websocketc                 C   s   | j r| j S td)NzWebsocket not connected)rR  r*  r  r/   r/   r0   _get_websocket  s   zSarvamTTSService._get_websocketc                    s   |   2 zs3 dH W }t|trxt|}|ddkr@|  I dH  t|d d }t	|| j
d|  d}| |I dH  q|ddkrx|d d }| jd	| d
I dH  d| v sed| v rjtd | td	| dI dH  q6 dS )z3Receive and process messages from Sarvam WebSocket.NrN  r  rs  r  r  r  messagezTTS Error: )rQ  ztoo longtimeoutz5Connection timeout detected, service may need restartr  )rz  r\  r   r  loadsgetr$  r&  r'  r   r   get_active_audio_context_idrY  rU  lowerr   r   r   )r   r|  rV  r  r  rQ  r/   r/   r0   _receive_messages  s&   


z"SarvamTTSService._receive_messagesc                    s(   d}	 t |I dH  |  I dH  q)z;Handle keepalive messages to maintain WebSocket connection.   TN)asynciosleep_send_keepalive)r   KEEPALIVE_SLEEPr/   r/   r0   re    s   z(SarvamTTSService._keepalive_task_handlerc                    sB   | j r| j jtjkrddi}| j t|I dH  dS dS dS )z.Send keepalive message to maintain connection.rN  pingN)rR  rk  r#   rl  rS  r  rT  )r   rV  r/   r/   r0   r    s
   z SarvamTTSService._send_keepaliver  c                    sN   | j r | j jtjkr dd|id}| j t|I dH  dS td dS )z,Send text to Sarvam WebSocket for synthesis.r  rr  Nz%WebSocket not ready, cannot send text)	rR  rk  r#   rl  rS  r  rT  r   r   )r   r  rV  r/   r/   r0   
_send_text  s
   zSarvamTTSService._send_textc              
   C  s  t d| d z[| jr| jjtju r|  I dH  z| |I dH  | |I dH  W n1 t	y_ } z%t
d| dV  t|dV  |  I dH  |  I dH  W Y d}~W dS d}~ww dV  W dS  t	y } zt
d| dV  W Y d}~dS d}~ww )a  Generate speech audio frames from input text using Sarvam TTS.

        Sends text over WebSocket for synthesis and yields corresponding audio or status frames.

        Args:
            text: The text input to synthesize.
            context_id: The context ID for tracking audio frames.

        Yields:
            Frame objects including TTSStartedFrame, TTSAudioRawFrame(s, context_id=context_id), or TTSStoppedFrame.
        zGenerating TTS: [r  Nrt  r  r{  )r   r!  rR  rk  r#   CLOSEDrH  r  r%  r*  r   r   rK  )r   r  r  r2  r/   r/   r0   r3    s*    zSarvamTTSService.run_ttsr   )0r(   r)   r*   r+   r   r   r~   r   r   r   r
   r}   r   r   r   r  r   r  r   r  r   rJ  r   rM  rW  r   
DOWNSTREAMr   rY  r[  r   dictr   r^  rH  rK  ra  r_  rg  rz  r  re  r  r  r!   r   r3  r6  r/   r/   r   r0   r7    sl   
 :[	
 ,					!(r7  )Hr+   r  r&  r  dataclassesr   r   enumr   typingr   r   r   r   r	   r
   r   r4  logurur   pydanticr   r   pipecat.frames.framesr   r   r   r   r   r   r   r   "pipecat.processors.frame_processorr   pipecat.services.sarvam._sdkr   pipecat.services.settingsr   r   r   pipecat.services.tts_servicer   r   r   pipecat.transcriptions.languager   r    (pipecat.utils.tracing.service_decoratorsr!   websockets.asyncio.clientr"   rm  websockets.protocolr#   ModuleNotFoundErrorr2  r  r*  r   r$   r1   r@   rt   tupler   r~   r   r   r   r   r   r7  r/   r/   r/   r0   <module>   s   !$(

!

"%  d