o
    i                  
   @   s  d Z ddlZddlZddlZddlmZ dejd< ddlmZm	Z	 ddl
mZmZmZmZmZ ddlmZ dd	lmZ dd
lmZmZmZmZ ddlmZmZmZmZ ddlmZ ddl m!Z!m"Z" z ddl#m$Z$ ddl%m&Z& ddl'm(Z( ddl)m*Z* ddl+m,Z, W n  e-y Z. ze/de.  e/d e0de. dZ.[.ww de!dee1 fddZ2de!dee1 fddZ3eG dd deZ4eG dd deZ5e5Z6eG d d! d!eZ7G d"d# d#eZ8G d$d% d%eZ9G d&d' d'e9Z:G d(d) d)e9Z;dS )*al  Google Cloud Text-to-Speech service implementations.

This module provides integration with Google Cloud Text-to-Speech API,
offering both HTTP-based synthesis with SSML support and streaming synthesis
for real-time applications.

It also includes GeminiTTSService which uses Gemini's TTS-specific models
for natural voice control and multi-speaker conversations.
    N)
traced_ttsfalseGRPC_ENABLE_FORK_SUPPORT)	dataclassfield)AnyAsyncGeneratorListLiteralOptional)logger)	BaseModel)
ErrorFrameFrame
StartFrameTTSAudioRawFrame)	NOT_GIVENTTSSettings	_NotGivenis_given)
TTSService)Languageresolve_language)ClientOptions)default)GoogleAuthError)texttospeech_v1)service_accountzException: zIn order to use Google AI, you need to `pip install pipecat-ai[google]`. Also, set `GOOGLE_APPLICATION_CREDENTIALS` environment variable.zMissing module: languagereturnc                 C   s  i t jdt jdt jdt jdt jdt jdt jdt jdt j	dt j
dt jdt jdt jdt jdt jd	t jd	t jd
i t jdt jdt jdt jdt jdt jdt jdt jdt jdt jdt jdt jdt jdt jdt j dt j!dt j"di t j#dt j$dt j%dt j&dt j'dt j(dt j)dt j*dt j+dt j,dt j-dt j.dt j/dt j0dt j1dt j2dt j3di t j4dt j5dt j6dt j7dt j8dt j9d t j:d t j;d!t j<d!t j=d!t j>d"t j?d"t j@d#t jAd#t jBd$t jCd$t jDd%i t jEd%t jFd&t jGd&t jHd't jId't jJd(t jKd(t jLd)t jMd)t jNd*t jOd+t jPd+t jQd,t jRd,t jSd-t jTd-t jUd.t jVd.t jWd/t jXd/t jYd0t jZd0t j[d1t j\d1t j]d2t j^d2t j_d3t j`d3i}ta| |d4d5S )6a#  Convert a Language enum to Google TTS language code.

    Source:
    https://docs.cloud.google.com/text-to-speech/docs/chirp3-hd

    Args:
        language: The Language enum value to convert.

    Returns:
        The corresponding Google TTS language code, or None if not supported.
    zar-XAzbn-INbg-BGhr-HRcs-CZda-DKnl-NLznl-BEen-USen-AUen-GBen-INet-EEfi-FIfr-FRfr-CAde-DEel-GRgu-INhe-ILhi-INhu-HUid-IDit-ITja-JPkn-INko-KRlv-LVlt-LTml-INcmn-CNmr-INnb-NOpl-PLpt-BRro-ROru-RUsr-RSsk-SKsl-SIes-ESzes-USsw-KEsv-SEta-INte-INth-THtr-TRuk-UAzur-INvi-VNFuse_base_code)br   ARBNBN_INBGBG_BGHRHR_HRCSCS_CZDADA_DKNLNL_BENL_NLENEN_USEN_AUEN_GBEN_INETET_EEFIFI_FIFRFR_CAFR_FRDEDE_DEELEL_GRGUGU_INHEHE_ILHIHI_INHUHU_HUIDID_IDITIT_ITJAJA_JPKNKN_INKOKO_KRLVLV_LVLTLT_LTMLML_INZHZH_CNMRMR_INNONBNB_NOPLPL_PLPTPT_BRRORO_RORURU_RUSRSR_RSSKSK_SKSLSL_SIESES_ESES_USSWSW_KESVSV_SETATA_INTETE_INTHTH_THTRTR_TRUKUK_UAURUR_INVIVI_VNr   r   LANGUAGE_MAP r   O/home/ubuntu/.local/lib/python3.10/site-packages/pipecat/services/google/tts.pylanguage_to_google_tts_language>   st  
 !#$%'(*+-.0134679:<=?@BCEFHIKLNOQRTUWXY[\^_abdeghjkmnpqrtuwxz{}~  r   c                 C   sL  i t jdt jdt jdt jdt jdt jdt jdt jdt j	dt j
dt jdt jdt jdt jdt jdt jd	t jd	i t jd
t jd
t jdt jdt jdt jdt jdt jdt jdt jdt jdt jdt jdt jdt j dt j!dt j"di t j#dt j$dt j%dt j&dt j'dt j(dt j)dt j*dt j+dt j,dt j-dt j.dt j/dt j0dt j1dt j2dt j3di t j4dt j5dt j6dt j7dt j8dt j9d t j:d t j;d!t j<d!t j=d"t j>d"t j?d#t j@d#t jAd$t jBd$t jCd%t jDd%i t jEd&t jFd&t jGd't jHd't jId(t jJd(t jKd)t jLd)t jMd*t jNd*t jOd+t jPd+t jQd,t jRd,t jSd-t jTd-t jUd.i t jVd.t jWd/t jXd/t jYd0t jZd0t j[d1t j\d1t j]d2t j^d2t j_d3t j`d3t jad4t jbd4t jcd5t jdd5t jed6t jfd6i t jgd7t jhd7t jid8t jjd8t jkd9t jld9t jmd:t jnd:t jod;t jpd;t jqd<t jrd<t jsd<t jtd=t jud=t jvd>t jwd>i t jxd?t jyd?t jzd@t j{d@t j|dAt j}dAt j~dBt jdBt jdCt jdDt jdDt jdEt jdEt jdFt jdFt jdGt jdGi t jdHt jdHt jdIt jdIt jdJt jdJt jdKt jdKt jdLt jdLt jdMt jdNt jdOt jdOt jdPt jdPt jdQt jdQt jdRt jdRt jdSt jdSt jdTt jdTt jdUt jdUt jdVt jdVt jdWt jdWi}t| |dXdYS )Za8  Convert a Language enum to Gemini TTS language code.

    Source:
    https://docs.cloud.google.com/text-to-speech/docs/gemini-tts#available_languages

    Args:
        language: The Language enum value to convert.

    Returns:
        The corresponding Gemini TTS language code, or None if not supported.
    zaf-ZAzsq-ALzam-ETzar-EGzar-001zhy-AMzaz-AZzeu-ESzbe-BYzbn-BDr    zmy-MMzca-ESzceb-PHr;   zcmn-TWr!   r"   r#   r$   r%   r&   r'   r(   r)   zfil-PHr*   r+   r,   zgl-ESzka-GEr-   r.   r/   zht-HTr0   r1   r2   zis-ISr3   r4   r5   zjv-JVr6   zkok-INr7   zlo-LAzla-VAr8   r9   zlb-LUzmk-MKzmai-INzmg-MGzms-MYr:   r<   zmn-MNzne-NPr=   znn-NOzor-INzps-AFzfa-IRr>   r?   zpt-PTzpa-INr@   rA   rB   zsd-INzsi-LKrC   rD   rE   zes-419zes-MXrF   rG   rH   rI   rJ   rK   rL   zur-PKrM   FrN   )r   AFAF_ZASQSQ_ALAMAM_ETrP   AR_EGAR_001HYHY_AMAZAZ_AZEUEU_ESBEBE_BYrQ   BN_BDrS   rT   MYMY_MMCACA_ESCEBCEB_PHr   r   ZH_TWrU   rV   rW   rX   rY   rZ   r[   r]   r^   r_   r`   ra   rb   rc   rd   FILFIL_PHre   rf   rg   ri   rh   GLGL_ESKAKA_GErj   rk   rl   rm   rn   ro   HTHT_HTrp   rq   rr   rs   rt   ru   ISIS_ISrv   rw   rx   ry   rz   r{   JVJV_JVr|   r}   KOKKOK_INr~   r   LOLO_LALALA_VAr   r   r   r   LBLB_LUMKMK_MKMAIMAI_INMGMG_MGMSMS_MYr   r   r   r   MNMN_MNNENE_NPr   r   r   NNNN_NOOROR_INPSPS_AFFAFA_IRr   r   r   r   PT_PTPAPA_INr   r   r   r   r   r   SDSD_INSISI_LKr   r   r   r   r   r   ES_419ES_MXr   r   r   r   r   r   r   r   r   r   r   r   r   r   r   UR_PKr   r   r   r   r   r   r   language_to_gemini_tts_language   s  	!"$%'(*+,./124578:;<=>@ACDFGIJKMNPQSTVWYZ\]_`bcefhiklnoqrtuwxz{}~             
                                 "  #  %  &  (  )  *  +  ,  .  /  1  2  4  5  7  8  :  ;  <  >  ?  A  B  D  E  G  H  J  K  M  N  P  Q  S  T  V  W  X  Y  [  \  ^  _  a  b  wr   c                   @   s   e Zd ZU dZedd dZedB eB ed< edd dZ	edB eB ed< ed	d dZ
edB eB ed
< edd dZedB eB ed< edd dZed dB eB ed< edd dZed dB eB ed< edd dZed dB eB ed< dS )GoogleHttpTTSSettingsa1  Settings for GoogleHttpTTSService.

    Parameters:
        pitch: Voice pitch adjustment (e.g., "+2st", "-50%").
        rate: Speaking rate adjustment (e.g., "slow", "fast", "125%"). Used for
            SSML prosody tags (non-Chirp voices).
        speaking_rate: Speaking rate for AudioConfig (Chirp/Journey voices).
            Range [0.25, 2.0].
        volume: Volume adjustment (e.g., "loud", "soft", "+6dB").
        emphasis: Emphasis level for the text.
        gender: Voice gender preference.
        google_style: Google-specific voice style.
    c                   C      t S Nr   r   r   r   r   <lambda>      zGoogleHttpTTSSettings.<lambda>default_factoryNpitchc                   C   r  r  r  r   r   r   r   r    r  ratec                   C   r  r  r  r   r   r   r   r    r  speaking_ratec                   C   r  r  r  r   r   r   r   r    r  volumec                   C   r  r  r  r   r   r   r   r    r  strongmoderatereducednoneemphasisc                   C   r  r  r  r   r   r   r   r    r  malefemaleneutralgenderc                   C   r  r  r  r   r   r   r   r    r  
apologeticcalm
empatheticfirmlivelygoogle_style)__name__
__module____qualname____doc__r   r  strr   __annotations__r	  r
  floatr  r  r
   r  r  r   r   r   r   r     s    
 
r   c                   @   s0   e Zd ZU dZedd dZedB eB ed< dS )GoogleTTSSettingszxSettings for GoogleTTSService.

    Parameters:
        speaking_rate: The speaking rate, in the range [0.25, 2.0].
    c                   C   r  r  r  r   r   r   r   r  	  r  zGoogleTTSSettings.<lambda>r  Nr
  )	r  r  r   r!  r   r
  r$  r   r#  r   r   r   r   r%    s   
 "r%  c                   @   st   e Zd ZU dZedd dZedB eB ed< edd dZ	e
eB ed< ed	d dZeeeef  dB eB ed
< dS )GeminiTTSSettingsa  Settings for GeminiTTSService.

    Parameters:
        prompt: Optional style instructions for how to synthesize the content.
        multi_speaker: Whether to enable multi-speaker support.
        speaker_configs: List of speaker configurations for multi-speaker mode.
    c                   C   r  r  r  r   r   r   r   r    r  zGeminiTTSSettings.<lambda>r  Npromptc                   C   r  r  r  r   r   r   r   r    r  multi_speakerc                   C   r  r  r  r   r   r   r   r    r  speaker_configs)r  r  r   r!  r   r'  r"  r   r#  r(  boolr)  listdictr   r   r   r   r   r&    s   
 &r&  c                       s   e Zd ZU dZeZeed< G dd deZddddddddde	e
 de	e
 d	e	e
 d
e	e
 de	e de	e de	e f fddZde	e
 de	e
 dejfddZdefddZdede	e
 fddZdedee
ef f fddZde
de
fddZede
de
deedf fdd Z  ZS )!GoogleHttpTTSServicea;  Google Cloud Text-to-Speech HTTP service with SSML support.

    Provides text-to-speech synthesis using Google Cloud's HTTP API with
    comprehensive SSML support for voice customization, prosody control,
    and styling options. Ideal for applications requiring fine-grained
    control over speech output.

    Note:
        Requires Google Cloud credentials via service account JSON, credentials file,
        or default application credentials (GOOGLE_APPLICATION_CREDENTIALS).
        Chirp and Journey voices don't support SSML and will use plain text input.
    	_settingsc                   @   s   e Zd ZU dZdZee ed< dZee ed< dZ	ee
 ed< dZee ed< dZeed  ed< ejZee ed	< dZeed
  ed< dZeed  ed< dS )z GoogleHttpTTSService.InputParamsa  Input parameters for Google HTTP TTS voice customization.

        .. deprecated:: 0.0.105
            Use ``GoogleHttpTTSService.Settings`` directly via the ``settings`` parameter instead.

        Parameters:
            pitch: Voice pitch adjustment (e.g., "+2st", "-50%").
            rate: Speaking rate adjustment (e.g., "slow", "fast", "125%"). Used for SSML prosody tags (non-Chirp voices).
            speaking_rate: Speaking rate for AudioConfig (Chirp/Journey voices). Range [0.25, 2.0].
            volume: Volume adjustment (e.g., "loud", "soft", "+6dB").
            emphasis: Emphasis level for the text.
            language: Language for synthesis. Defaults to English.
            gender: Voice gender preference.
            google_style: Google-specific voice style.
        Nr  r	  r
  r  r  r  r   r  r  r  r  )r  r  r   r!  r  r   r"  r#  r	  r
  r$  r  r  r
   r   r^   r   r  r  r   r   r   r   InputParams3  s   
 r/  N)credentialscredentials_pathlocationvoice_idsample_rateparamssettingsr0  r1  r2  r3  r4  r5  r6  c          
         s$  | j ddddddddddd
}	|dur| dd ||	_|duro| d |so|jdur0|j|	_|jdur9|j|	_|jdurB|j|	_|jdurK|j|	_|jdurT|j|	_|jdur]|j|	_|j	durf|j	|	_	|j
duro|j
|	_
|durx|	| t jd
|dd|	d	| || _| ||| _dS )a	  Initializes the Google HTTP TTS service.

        Args:
            credentials: JSON string containing Google Cloud service account credentials.
            credentials_path: Path to Google Cloud service account JSON file.
            location: Google Cloud location for regional endpoint (e.g., "us-central1").
            voice_id: Google TTS voice identifier (e.g., "en-US-Standard-A").

                .. deprecated:: 0.0.105
                    Use ``settings=GoogleHttpTTSService.Settings(voice=...)`` instead.

            sample_rate: Audio sample rate in Hz. If None, uses default.
            params: Voice customization parameters including pitch, rate, volume, etc.

                .. deprecated:: 0.0.105
                    Use ``settings=GoogleHttpTTSService.Settings(...)`` instead.

            settings: Runtime-updatable settings. When provided alongside deprecated
                parameters, ``settings`` values take precedence.
            **kwargs: Additional arguments passed to parent TTSService.
        Nen-US-Chirp3-HD-Charonr%   )
modelvoicer   r  r	  r
  r  r  r  r  r3  r9  r5  Tr4  push_start_framepush_stop_framesr6  r   )Settings"_warn_init_param_moved_to_settingsr9  r  r	  r
  r  r  r   r  r  apply_updatesuper__init__	_location_create_client_client)
selfr0  r1  r2  r3  r4  r5  r6  kwargsdefault_settings	__class__r   r   rA  M  s`   "










zGoogleHttpTTSService.__init__r   c                 C      d}|rt |}tj|}n|rtj|}nz
tdgd\}}W n	 ty,   Y nw |s3tdd}| j	rAt
| j	 dd}tj||dS ao  Create authenticated Google Text-to-Speech client.

        Args:
            credentials: JSON string with service account credentials.
            credentials_path: Path to service account JSON file.

        Returns:
            Authenticated TextToSpeechAsyncClient instance.

        Raises:
            ValueError: If no valid credentials are provided.
        Nz.https://www.googleapis.com/auth/cloud-platform)scopeszNo valid credentials provided.z-texttospeech.googleapis.com)api_endpoint)r0  client_optionsjsonloadsr   Credentialsfrom_service_account_infofrom_service_account_filer   r   
ValueErrorrB  r   r   TextToSpeechAsyncClientrE  r0  r1  credsjson_account_info
project_idrN  r   r   r   rC    .   

z#GoogleHttpTTSService._create_clientc                 C      dS )zCheck if this service can generate processing metrics.

        Returns:
            True, as Google HTTP TTS service supports metrics generation.
        Tr   rE  r   r   r   can_generate_metrics     z)GoogleHttpTTSService.can_generate_metricsr   c                 C      t |S zConvert a Language enum to Google TTS language format.

        Args:
            language: The language to convert.

        Returns:
            The Google TTS-specific language code, or None if not supported.
        r   rE  r   r   r   r   language_to_service_language     	z1GoogleHttpTTSService.language_to_service_languagedeltac                    b   t || jr(t|jr(t|j}d|  krdks(n td| d t|_t 	|I dH S zOverride to handle speaking_rate validation.

        Args:
            delta: Settings delta. Can include 'speaking_rate' (float).
        g      ?g       @zInvalid speaking_rate value: z. Must be between 0.25 and 2.0N

isinstancer=  r   r
  r$  r   warningr   r@  _update_settingsrE  rf  
rate_valuerH  r   r   rl       

z%GoogleHttpTTSService._update_settingstextc                 C   sT  d}d| j j dg}| j j}|d| d | j jr'|d| j j d |dd| d7 }g }| j jrC|d	| j j d | j jrR|d
| j j d | j jra|d| j j d |rn|dd| d7 }| j j	r||d| j j	 d7 }| j j
r|d| j j
 d7 }||7 }| j j
r|d7 }| j j	r|d7 }|r|d7 }|d7 }|S )Nz<speak>zname=''z
language='zgender='z<voice  >zpitch='zrate='zvolume='z	<prosody z<emphasis level='z'>z<google:style name='z</google:style>z</emphasis>z
</prosody>z</voice></speak>)r.  r9  r   appendr  joinr  r	  r  r  r  )rE  rp  ssmlvoice_attrsr   prosody_attrsr   r   r   _construct_ssml  s:   z$GoogleHttpTTSService._construct_ssml
context_idc              
   C  s  t |  d| d zd| jj v }d| jj v }|s!|r(tj|d}n| |}tj|d}tj| jj	| jjd}tj
j| jd}|sJ|rV| jjd	urV| jj|d
< tjdi |}	tj|||	d}
| jj|
dI d	H }| |I d	H  |jdd	 }| j}tdt||D ]#}||||  }|s W d	S |  I d	H  t|| jd|d}|V  qW d	S  ty } zdt| }t|dV  W Y d	}~d	S d	}~ww )a  Generate speech from text using Google's HTTP TTS API.

        Args:
            text: The text to synthesize into speech.
            context_id: The context ID for tracking audio frames.

        Yields:
            Frame: Audio frames containing the synthesized speech.
        : Generating TTS []chirpjourney)rp  )rv  language_codenameaudio_encodingsample_rate_hertzNr
  )inputr9  audio_config)request,   r      rz  TTS generation error: errorr   )r   debugr.  r9  lowerr   SynthesisInputry  VoiceSelectionParamsr   AudioEncodingLINEAR16r4  r
  AudioConfigSynthesizeSpeechRequestrD  synthesize_speechstart_tts_usage_metricsaudio_content
chunk_sizerangelenstop_ttfb_metricsr   	Exceptionr"  r   )rE  rp  rz  is_chirp_voiceis_journey_voicesynthesis_inputrv  r9  audio_config_paramsr  r  responser  
CHUNK_SIZEichunkframeeerror_messager   r   r   run_tts$  sL   
zGoogleHttpTTSService.run_tts)r  r  r   r!  r   r=  r#  r   r/  r   r"  intrA  r   rV  rC  r*  r^  r   rd  r   r,  r   rl  ry  r   r   r   r  __classcell__r   r   rH  r   r-  "  sP   
 	Z
-.(r-  c                   @   s   e Zd ZdZdee dee dejfddZde	fddZ
d	edee fd
dZ	ddejdededee deedf f
ddZdS )GoogleBaseTTSServicezBase class for Google Cloud Text-to-Speech streaming services.

    Provides shared streaming synthesis logic for Google TTS services.
    This is an abstract base class. Use GoogleTTSService or GeminiTTSService instead.
    r0  r1  r   c                 C   rJ  rK  rO  rW  r   r   r   rC  n  r[  z#GoogleBaseTTSService._create_clientc                 C   r\  )zCheck if this service can generate processing metrics.

        Returns:
            True, as Google streaming TTS services support metrics generation.
        Tr   r]  r   r   r   r^    r_  z)GoogleBaseTTSService.can_generate_metricsr   c                 C   r`  ra  rb  rc  r   r   r   rd    re  z1GoogleBaseTTSService.language_to_service_languageNstreaming_configrp  rz  r'  c                   s   t j|d  fdd}| j| I dH }| I dH  d}d}| j}	|2 z=3 dH W }
|
j}|s6q*|sA|  I dH  d}||7 }t||	krg|d|	 }||	d }t	|| j
d|d	V  t||	ksKq*6 |rwt	|| j
d|d	V  dS dS )
a|  Shared streaming synthesis logic.

        Args:
            streaming_config: The streaming configuration.
            text: The text to synthesize.
            context_id: Unique identifier for this TTS context.
            prompt: Optional prompt for style instructions (Gemini only).

        Yields:
            Frame: Audio frames containing the synthesized speech.
        )r  c                    s>    V  di} d ur| d< t jt jdi | dV  d S )Nrp  r'  )r  r   )r   StreamingSynthesizeRequestStreamingSynthesisInput)synthesis_input_paramsconfig_requestr'  rp  r   r   request_generator  s   z;GoogleBaseTTSService._stream_tts.<locals>.request_generatorN    FTr  r  )r   r  rD  streaming_synthesizer  r  r  r  r  r   r4  )rE  r  rp  rz  r'  r  streaming_responsesaudio_bufferfirst_chunk_for_ttfbr  r  r  piecer   r  r   _stream_tts  s8   	z GoogleBaseTTSService._stream_ttsr  )r  r  r   r!  r   r"  r   rV  rC  r*  r^  r   rd  StreamingSynthesizeConfigr   r   r  r   r   r   r   r  g  s.    
-
r  c                       s   e Zd ZU dZeZeed< G dd deZdddddddddde	e
 de	e
 d	e	e
 d
e	e
 de	e
 de	e de	e de	e f fddZdedee
ef f fddZede
de
deedf fddZ  ZS )GoogleTTSServicea*  Google Cloud Text-to-Speech streaming service.

    Provides real-time text-to-speech synthesis using Google Cloud's streaming API
    for low-latency applications. Optimized for Chirp 3 HD and Journey voices
    with continuous audio streaming capabilities.

    Note:
        Requires Google Cloud credentials via service account JSON, file path, or
        default application credentials (GOOGLE_APPLICATION_CREDENTIALS env var).
        Only Chirp 3 HD and Journey voices are supported. Use GoogleHttpTTSService for other voices.

    Example::

        tts = GoogleTTSService(
            credentials_path="/path/to/service-account.json",
            settings=GoogleTTSService.Settings(
                voice="en-US-Chirp3-HD-Charon",
                language=Language.EN_US,
            )
        )
    r.  c                   @   s4   e Zd ZU dZejZee ed< dZ	ee
 ed< dS )zGoogleTTSService.InputParamsaa  Input parameters for Google streaming TTS configuration.

        .. deprecated:: 0.0.105
            Use ``GoogleTTSService.Settings`` directly via the ``settings`` parameter instead.

        Parameters:
            language: Language for synthesis. Defaults to English.
            speaking_rate: The speaking rate, in the range [0.25, 2.0].
        r   Nr
  )r  r  r   r!  r   r^   r   r   r#  r
  r$  r   r   r   r   r/    s   
 
r/  N)r0  r1  r2  r3  voice_cloning_keyr4  r5  r6  r0  r1  r2  r3  r  r4  r5  r6  c                   s   | j ddddd}
|dur| dd ||
_|dur3| d |s3|jdur*|j|
_|jdur3|j|
_|dur<|
| t jd
|dd|
d	|	 || _|| _	| 
||| _dS )a<  Initializes the Google streaming TTS service.

        Args:
            credentials: JSON string containing Google Cloud service account credentials.
            credentials_path: Path to Google Cloud service account JSON file.
            location: Google Cloud location for regional endpoint (e.g., "us-central1").
            voice_id: Google TTS voice identifier (e.g., "en-US-Chirp3-HD-Charon").

                .. deprecated:: 0.0.105
                    Use ``settings=GoogleTTSService.Settings(voice=...)`` instead.

            voice_cloning_key: The voice cloning key for Chirp 3 custom voices.
            sample_rate: Audio sample rate in Hz. If None, uses default.
            params: Language configuration parameters.

                .. deprecated:: 0.0.105
                    Use ``settings=GoogleTTSService.Settings(...)`` instead.

            settings: Runtime-updatable settings. When provided alongside deprecated
                parameters, ``settings`` values take precedence.
            **kwargs: Additional arguments passed to parent TTSService.
        Nr7  r%   )r8  r9  r   r
  r3  r9  r5  Tr:  r   )r=  r>  r9  r   r
  r?  r@  rA  rB  _voice_cloning_keyrC  rD  )rE  r0  r1  r2  r3  r  r4  r5  r6  rF  rG  rH  r   r   rA    s>   $




zGoogleTTSService.__init__rf  r   c                    rg  rh  ri  rm  rH  r   r   rl  [  ro  z!GoogleTTSService._update_settingsrp  rz  c              
   C  s   t |  d| d zF| jr!tj| jd}tj| jj|d}ntj| jj| jjd}tj	|tj
tjj| j| jjdd}| |||2 z	3 dH W }|V  qE6 W dS  tyt } z| jd	t| |d
I dH  W Y d}~dS d}~ww )a7  Generate streaming speech from text using Google's streaming API.

        Args:
            text: The text to synthesize into speech.
            context_id: The context ID for tracking audio frames.

        Yields:
            Frame: Audio frames containing the synthesized speech as it's generated.
        r{  r|  )r  )r  voice_cloner  )r  r  r
  r9  streaming_audio_configNr  )	error_msg	exception)r   r  r  r   VoiceCloneParamsr  r.  r   r9  r  StreamingAudioConfigr  PCMr4  r
  r  r  
push_errorr"  )rE  rp  rz  voice_clone_paramsr9  r  r  r  r   r   r   r  j  s8   
,zGoogleTTSService.run_tts)r  r  r   r!  r%  r=  r#  r   r/  r   r"  r  rA  r   r,  r   rl  r   r   r   r  r  r   r   rH  r   r    sB   
 	
K(r  c                       s  e Zd ZU dZeZeed< dZg dZG dd de	Z
dddddddddd	d	ee d
ee dee dee dee dee dee dee
 dee f fddZdedee fddZdef fddZdedeeef f fddZedededeedf fd d!Z  ZS )"GeminiTTSServicea  Gemini Text-to-Speech streaming service using Gemini TTS models.

    Provides real-time text-to-speech synthesis using Gemini's TTS-specific models
    (gemini-2.5-flash-tts and gemini-2.5-pro-tts) with support for natural
    voice control, prompts for style instructions, expressive markup tags,
    and multi-speaker conversations.

    Note:
        Requires Google Cloud credentials via service account JSON, credentials file,
        or default application credentials (GOOGLE_APPLICATION_CREDENTIALS).

        Uses the Google Cloud Text-to-Speech streaming API for low-latency synthesis.

    Example::

        tts = GeminiTTSService(
            credentials_path="/path/to/service-account.json",
            settings=GeminiTTSService.Settings(
                model="gemini-2.5-flash-tts",
                voice="Kore",
                language=Language.EN_US,
                prompt="Say this in a friendly and helpful tone"
            )
        )
    r.  i]  )AchernarAchirdAlgenibAlgiebaAlnilamAoedeAutonoe	CallirhoeCharonDespina	EnceladusErinomeFenrirGacruxIapetusKore	LaomedeiaLedaOrusPuckPulcherrima
Rasalgethi	Sadachbia
SadaltagerSchedarSulafarUmbrielVindemiatrixZephyrZubenelgenubic                   @   sT   e Zd ZU dZejZee ed< dZ	ee
 ed< dZeed< dZeee  ed< dS )zGeminiTTSService.InputParamsa  Input parameters for Gemini TTS configuration.

        .. deprecated:: 0.0.105
            Use ``GeminiTTSService.Settings`` directly via the ``settings`` parameter instead.

        Parameters:
            language: Language for synthesis. Defaults to English.
            prompt: Optional style instructions for how to synthesize the content.
            multi_speaker: Whether to enable multi-speaker support.
            speaker_configs: List of speaker configurations for multi-speaker mode.
        r   Nr'  Fr(  r)  )r  r  r   r!  r   r^   r   r   r#  r'  r"  r(  r*  r)  r	   r,  r   r   r   r   r/    s   
 r/  N)	api_keyr8  r0  r1  r2  r3  r4  r5  r6  r  r8  r0  r1  r2  r3  r4  r5  r6  c       	            sN  |durt jdtdd |r || jkr td| j d| d | jdd	d
dddd}|dur8| dd ||_|durE| dd ||_	|j	| j
vrUtd|j	 d |dur| d |	s|jduri|j|_|jdurr|j|_|jdur{|j|_|jdur|j|_|	dur||	 t jd|dd|d|
 || _| ||| _dS )a  Initializes the Gemini TTS service.

        Args:
            api_key:

                .. deprecated:: 0.0.95
                    The `api_key` parameter is deprecated. Use `credentials` or
                    `credentials_path` instead for Google Cloud authentication.

            model: Gemini TTS model to use. Must be a TTS model like
                   "gemini-2.5-flash-tts" or "gemini-2.5-pro-tts".

                .. deprecated:: 0.0.105
                    Use ``settings=GeminiTTSService.Settings(model=...)`` instead.

            credentials: JSON string containing Google Cloud service account credentials.
            credentials_path: Path to Google Cloud service account JSON file.
            location: Google Cloud location for regional endpoint (e.g., "us-central1").
            voice_id: Voice name from the available Gemini voices.

                .. deprecated:: 0.0.105
                    Use ``settings=GeminiTTSService.Settings(voice=...)`` instead.

            sample_rate: Audio sample rate in Hz. If None, uses Google's default 24kHz.
            params: TTS configuration parameters.

                .. deprecated:: 0.0.105
                    Use ``settings=GeminiTTSService.Settings(...)`` instead.

            settings: Runtime-updatable settings. When provided alongside deprecated
                parameters, ``settings`` values take precedence.
            **kwargs: Additional arguments passed to parent TTSService.
        NzThe 'api_key' parameter is deprecated and will be removed in a future version. Use 'credentials' or 'credentials_path' instead for Google Cloud authentication.   )
stacklevelzGoogle TTS only supports  Hz sample rate. Current rate of Hz may cause issues.zgemini-2.5-flash-ttsr  r%   F)r8  r9  r   r'  r(  r)  r8  r3  r9  Voice ')' not in known voices list. Using anyway.r5  Tr:  r   )warningswarnDeprecationWarningGOOGLE_SAMPLE_RATEr   rk  r=  r>  r8  r9  AVAILABLE_VOICESr   r'  r(  r)  r?  r@  rA  rB  rC  rD  )rE  r  r8  r0  r1  r2  r3  r4  r5  r6  rF  rG  rH  r   r   rA    sn   0








zGeminiTTSService.__init__r   r   c                 C   r`  )zConvert a Language enum to Gemini TTS language format.

        Args:
            language: The language to convert.

        Returns:
            The Gemini TTS-specific language code, or None if not supported.
        )r   rc  r   r   r   rd  ^  re  z-GeminiTTSService.language_to_service_languager  c                    sD   t  |I dH  | j| jkr td| j d| j d dS dS )z~Start the Gemini TTS service.

        Args:
            frame: The start frame containing initialization parameters.
        NzGoogle TTS requires r  r  )r@  startr4  r  r   rk  )rE  r  rH  r   r   r  i  s   
zGeminiTTSService.startrf  c                    s>   t |jr|j| jvrtd|j d t |I dH S )zApply a settings delta with voice validation.

        Args:
            delta: Settings delta. Can include 'voice', 'prompt', etc.

        Returns:
            Dict mapping changed field names to their previous values.
        r  r  N)r   r9  r  r   rk  r@  rl  )rE  rf  rH  r   r   rl  v  s   	z!GeminiTTSService._update_settingsrp  rz  c              
   C  s0  t |  d| d zk| jjrC| jjrCg }| jjD ]}|tj|d |d| jj	d qtj
|d}tj| jj| jj|d}ntj| jj| jj	| jjd}tj|tjtjj| jd	d
}| |||| jj2 z	3 dH W }|V  qj6 W dS  ty }	 zdt|	 }
t|
dV  W Y d}	~	dS d}	~	ww )a  Generate streaming speech from text using Gemini TTS models.

        Args:
            text: The text to synthesize into speech.
            context_id: The context ID for tracking audio frames. Can include markup tags
                  like [sigh], [laughing], [whispering] for expressive control.

        Yields:
            Frame: Audio frames containing the synthesized speech as it's generated.
        r{  r|  speaker_alias
speaker_id)r  r  )speaker_voice_configs)r  
model_namemulti_speaker_voice_config)r  r  r  r  r  NzGemini TTS generation error: r  )r   r  r.  r(  r)  rt  r   MultispeakerPrebuiltVoicegetr9  MultiSpeakerVoiceConfigr  r   r8  r  r  r  r  r4  r  r'  r  r"  r   )rE  rp  rz  r  speaker_configr  r9  r  r  r  r  r   r   r   r    sT   	zGeminiTTSService.run_tts)r  r  r   r!  r&  r=  r#  r  r  r   r/  r   r"  r  rA  r   rd  r   r  r   r,  r   rl  r   r   r   r  r  r   r   rH  r   r    sP   
 !	
s(r  )<r!  rP  osr  (pipecat.utils.tracing.service_decoratorsr   environdataclassesr   r   typingr   r   r	   r
   r   logurur   pydanticr   pipecat.frames.framesr   r   r   r   pipecat.services.settingsr   r   r   r   pipecat.services.tts_servicer   pipecat.transcriptions.languager   r   google.api_core.client_optionsr   google.authr   google.auth.exceptionsr   google.cloudr   google.oauth2r   ModuleNotFoundErrorr  r  r  r"  r   r   r   r%  GoogleStreamTTSSettingsr&  r-  r  r  r  r   r   r   r   <module>   s`   

      G  0