o
    iJ                  
   @   s  d Z ddlZddlZddlmZmZ ddlmZ ddlm	Z	m
Z
mZ ddlZddlmZ ddlmZ ddlmZmZmZmZmZmZmZ dd	lmZmZmZ dd
lmZmZm Z  ddl!m"Z"m#Z# ddl$m%Z% ddl&m'Z' ddl(m)Z) zddl*m+Z, ddl-m.Z. W n  e/y Z0 ze1de0  e1d e2de0 dZ0[0ww G dd deZ3de"dee4 fddZ5G dd de4eZ6eG dd deZ7G dd de Z8G d d! d!eZ9dS )"z0Cartesia text-to-speech service implementations.    N)	dataclassfield)Enum)AsyncGeneratorListOptional)logger)	BaseModel)CancelFrameEndFrame
ErrorFrameFrame
StartFrameTTSAudioRawFrameTTSStoppedFrame)	NOT_GIVENTTSSettings	_NotGiven)TextAggregationMode
TTSServiceWebsocketTTSService)Languageresolve_language)BaseTextAggregator)SkipTagsAggregator)
traced_tts)connect)StatezException: zIIn order to use Cartesia, you need to `pip install pipecat-ai[cartesia]`.zMissing module: c                   @   sB   e Zd ZU dZdZee ed< dZee ed< dZ	ee
 ed< dS )GenerationConfiga  Configuration for Cartesia Sonic-3 generation parameters.

    Sonic-3 interprets these parameters as guidance to ensure natural speech.
    Test against your content for best results.

    Parameters:
        volume: Volume multiplier for generated speech. Valid range: [0.5, 2.0]. Default is 1.0.
        speed: Speed multiplier for generated speech. Valid range: [0.6, 1.5]. Default is 1.0.
        emotion: Single emotion string to guide the emotional tone. Examples include neutral,
            angry, excited, content, sad, scared. Over 60 emotions are supported. For best
            results, use with recommended voices: Leo, Jace, Kyle, Gavin, Maya, Tessa, Dana,
            and Marian.
    Nvolumespeedemotion)__name__
__module____qualname____doc__r   r   float__annotations__r    r!   str r)   r)   Q/home/ubuntu/.local/lib/python3.10/site-packages/pipecat/services/cartesia/tts.pyr   -   s
   
 r   languagereturnc                 C   sZ  i t jdt jdt jdt jdt jdt jdt jdt jdt j	d	t j
d
t jdt jdt jdt jdt jdt jdt jdi t jdt jdt jdt jdt jdt jdt jdt jdt jdt jdt jdt jdt jdt jdt j d t j!d!t j"d"t j#d#t j$d$t j%d%t j&d&t j'd't j(d(t j)d)t j*d*i}t+| |d+d,S )-zConvert a Language enum to Cartesia language code.

    Args:
        language: The Language enum value to convert.

    Returns:
        The corresponding Cartesia language code, or None if not supported.
    arbgbncsdadeenelesfifrguhehihrhuiditjakaknkomlmrmsnlnopaplptrorusksvtatethtltrukvizhT)use_base_code),r   ARBGBNCSDADEENELESFIFRGUHEHIHRHUIDITJAKAKNKOMLMRMSNLNOPAPLPTRORUSKSVTATETHTLTRUKVIZHr   )r+   LANGUAGE_MAPr)   r)   r*   language_to_cartesia_languageA   s   		
 !"#-r   c                   @   s   e Zd ZdZdZdZdZdZdZdZ	dZ
d	Zd
ZdZdZdZdZdZdZdZdZdZdZdZdZdZdZdZdZdZdZdZdZ dZ!d Z"d!Z#d"Z$d#Z%d$Z&d%Z'd&Z(d'Z)d(Z*d)Z+d*Z,d+Z-d,Z.d-Z/d.Z0d/Z1d0Z2d1Z3d2Z4d3Z5d4Z6d5Z7d6Z8d7Z9d8Z:d9Z;d:Z<d;Z=d<Z>d=S )>CartesiaEmotionz*Predefined Emotions supported by Cartesia.neutralangryexcitedcontentsadscaredhappyenthusiasticelatedeuphoric
triumphantamazed	surprisedflirtatiouszjoking/comediccuriouspeacefulserenecalmgratefulaffectionatetrustsympatheticanticipation
mysteriousmadoutraged
frustratedagitated
threatened	disgustedcontemptenvious	sarcasticironicdejectedmelancholicdisappointedhurtguiltyboredtiredrejected	nostalgicwistful
apologetichesitantinsecureconfusedresignedanxiouspanickedalarmedproud	confidentdistant	skepticalcontemplative
determinedN)?r"   r#   r$   r%   NEUTRALANGRYEXCITEDCONTENTSADSCAREDHAPPYENTHUSIASTICELATEDEUPHORIC
TRIUMPHANTAMAZED	SURPRISEDFLIRTATIOUSJOKING_COMEDICCURIOUSPEACEFULSERENECALMGRATEFULAFFECTIONATETRUSTSYMPATHETICANTICIPATION
MYSTERIOUSMADOUTRAGED
FRUSTRATEDAGITATED
THREATENED	DISGUSTEDCONTEMPTENVIOUS	SARCASTICIRONICDEJECTEDMELANCHOLICDISAPPOINTEDHURTGUILTYBOREDTIREDREJECTED	NOSTALGICWISTFUL
APOLOGETICHESITANTINSECURECONFUSEDRESIGNEDANXIOUSPANICKEDALARMEDPROUD	CONFIDENTDISTANT	SKEPTICALCONTEMPLATIVE
DETERMINEDr)   r)   r)   r*   r   z   sz    r   c                   @   sN   e Zd ZU dZedd dZedB eB ed< edd dZ	e
dB eB ed< dS )	CartesiaTTSSettingsa\  Settings for CartesiaTTSService and CartesiaHttpTTSService.

    Parameters:
        generation_config: Generation configuration for Sonic-3 models. Includes volume,
            speed (numeric), and emotion (string) parameters.
        pronunciation_dict_id: The ID of the pronunciation dictionary to use for
            custom pronunciations.
    c                   C      t S Nr   r)   r)   r)   r*   <lambda>       zCartesiaTTSSettings.<lambda>)default_factoryNgeneration_configc                   C   r   r   r   r)   r)   r)   r*   r      r   pronunciation_dict_id)r"   r#   r$   r%   r   r  r   r   r'   r  r(   r)   r)   r)   r*   r      s   
 	"r   c                       sV  e Zd ZU dZeZeed< G dd deZddddddd	dddddd
de	de
e	 de	de	de
e	 de
e de	de	de
e de
e de
e de
e de
e f fddZdefddZdede
e	 fddZd e	de	fd!d"Zd#ede	fd$d%Zd&ede	fd'd(Zd)ede	fd*d+Zd,ede	fd-d.Zde	defd/d0Zd1ee	 d2ee deee	ef  fd3d4Z	5	6	6	5dYd e	d7ed8ed9e	fd:d;Zd<ef fd=d>Z d<e!f fd?d@Z"d<e#f fdAdBZ$ fdCdDZ% fdEdFZ&dGdH Z'dIdJ Z(dKdL Z)d9e	fdMdNZ*d9e	fdOdPZ+dZd9e
e	 fdQdRZ,dSdT Z-dUdV Z.e/d e	d9e	de0e1df fdWdXZ2  Z3S )[CartesiaTTSServicea  Cartesia TTS service with WebSocket streaming and word timestamps.

    Provides text-to-speech using Cartesia's streaming WebSocket API.
    Supports word-level timestamps, audio context management, and various voice
    customization options including generation configuration.
    	_settingsc                   @   D   e Zd ZU dZejZee ed< dZ	ee
 ed< dZee ed< dS )zCartesiaTTSService.InputParamsa  Input parameters for Cartesia TTS configuration.

        Parameters:
            language: Language to use for synthesis.
            generation_config: Generation configuration for Sonic-3 models. Includes volume,
                speed (numeric), and emotion (string) parameters.
            pronunciation_dict_id: The ID of the pronunciation dictionary to use for custom pronunciations.
        r+   Nr  r  r"   r#   r$   r%   r   r^   r+   r   r'   r  r   r  r(   r)   r)   r)   r*   InputParams   
   
 	r  Nz
2025-04-16z#wss://api.cartesia.ai/tts/websocket	pcm_s16leraw)voice_idcartesia_versionurlmodelsample_rateencoding	containerparamssettingstext_aggregatortext_aggregation_modeaggregate_sentencesapi_keyr  r  r  r  r  r  r  r  r  r  r  r  c                   s  | j ddtjddd}|dur| dd ||_|dur%| dd ||_|	durK| d |
sK|	jdur9|	j|_|	jdurB|	j|_|	jdurK|	j|_|
durT|	|
 t
 jd||dd|d	||d
| |sqtdg| jd| _|| _|| _|| _|| _|| _d| _d| _dS )a  Initialize the Cartesia TTS service.

        Args:
            api_key: Cartesia API key for authentication.
            voice_id: ID of the voice to use for synthesis.

                .. deprecated:: 0.0.105
                    Use ``settings=CartesiaTTSService.Settings(voice=...)`` instead.

            cartesia_version: API version string for Cartesia service.
            url: WebSocket URL for Cartesia TTS API.
            model: TTS model to use (e.g., "sonic-3").

                .. deprecated:: 0.0.105
                    Use ``settings=CartesiaTTSService.Settings(model=...)`` instead.

            sample_rate: Audio sample rate. If None, uses default.
            encoding: Audio encoding format.
            container: Audio container format.
            params: Additional input parameters for voice customization.

                .. deprecated:: 0.0.105
                    Use ``settings=CartesiaTTSService.Settings(...)`` instead.

            settings: Runtime-updatable settings. When provided alongside deprecated
                parameters, ``settings`` values take precedence.
            text_aggregator: Custom text aggregator for processing input text.

                .. deprecated:: 0.0.95
                    Use an LLMTextProcessor before the TTSService for custom text aggregation.

            text_aggregation_mode: How to aggregate incoming text before synthesis.
            aggregate_sentences: Whether to aggregate sentences within the TTSService.

                .. deprecated:: 0.0.104
                    Use ``text_aggregation_mode`` instead.

            **kwargs: Additional arguments passed to the parent service.
        sonic-3Nr  voicer+   r  r  r  r  r  r  FT)r  r  push_text_framespause_frame_processingr  push_start_framer  r  )<spell></spell>)aggregation_typer   r)   )Settingsr   r^   "_warn_init_param_moved_to_settingsr  r  r+   r  r  apply_updatesuper__init__r   _text_aggregation_mode_text_aggregator_api_key_cartesia_version_url_output_container_output_encoding_output_sample_rate_receive_task)selfr  r  r  r  r  r  r  r  r  r  r  r  r  kwargsdefault_settings	__class__r)   r*   r%     s^   H	




	
zCartesiaTTSService.__init__r,   c                 C      dS )zCheck if this service can generate processing metrics.

        Returns:
            True, as Cartesia service supports metrics generation.
        Tr)   r/  r)   r)   r*   can_generate_metricsn     z'CartesiaTTSService.can_generate_metricsr+   c                 C      t |S zConvert a Language enum to Cartesia language format.

        Args:
            language: The language to convert.

        Returns:
            The Cartesia-specific language code, or None if not supported.
        r   r/  r+   r)   r)   r*   language_to_service_languagev     	z/CartesiaTTSService.language_to_service_languagetextc                 C      d|  dS )z Wrap text in Cartesia spell tag.r  r  r)   )r>  r)   r)   r*   SPELL     zCartesiaTTSService.SPELLr!   c                 C   r?  )z,Convenience method to create an emotion tag.z<emotion value="" />r)   )r!   r)   r)   r*   EMOTION_TAG  rA  zCartesiaTTSService.EMOTION_TAGsecondsc                 C   r?  )z)Convenience method to create a pause tag.z<break time="zs" />r)   )rD  r)   r)   r*   	PAUSE_TAG  rA  zCartesiaTTSService.PAUSE_TAGr   c                 C   r?  )z*Convenience method to create a volume tag.z<volume ratio="rB  r)   )r   r)   r)   r*   
VOLUME_TAG  rA  zCartesiaTTSService.VOLUME_TAGr    c                 C   r?  )z)Convenience method to create a speed tag.z<speed ratio="rB  r)   )r    r)   r)   r*   	SPEED_TAG  rA  zCartesiaTTSService.SPEED_TAGc                 C   s"   h d}| dd  }||v S )zCheck if the given language is CJK (Chinese, Japanese, Korean).

        Args:
            language: The language code to check.

        Returns:
            True if the language is Chinese, Japanese, or Korean.
        >   r?   rB   rV   -r   )splitlower)r/  r+   cjk_languages	base_langr)   r)   r*   _is_cjk_language  s   	z#CartesiaTTSService._is_cjk_languagewordsstartsc                 C   sL   | j j}|r| |r|r|rd|}|d }||fgS g S tt||S )u  Process word timestamps based on the current language.

        For CJK languages, Cartesia groups related characters in the same timestamp message.
        For example, in Japanese a single message might be `['こ', 'ん', 'に', 'ち', 'は', '。']`.
        We combine these into single words so the downstream aggregator can add natural
        spacing between meaningful units rather than individual characters.

        For non-CJK languages, words are already properly separated and are used as-is.

        Args:
            words: List of words/characters from Cartesia.
            starts: List of start timestamps for each word/character.

        Returns:
            List of (word, start_time) tuples processed for the language.
         r   )r  r+   rM  joinlistzip)r/  rN  rO  current_languagecombined_wordfirst_startr)   r)   r*   %_process_word_timestamps_for_language  s   

z8CartesiaTTSService._process_word_timestamps_for_languagerP  Tcontinue_transcriptadd_timestamps
context_idc              	   C   s   i }d|d< | j j|d< |||| j j|| j| j| jd|| j jdkr$dndd}| j jr2| j j|d< | j jr@| j jjdd	|d
< | j j	rJ| j j	|d< t
|S )Nr=   moder  r  r  sonicFT)
transcriptcontinuerZ  model_idr  output_formatrY  use_original_timestampsr+   exclude_noner  r  )r  r  r  r+  r,  r-  r+   r  
model_dumpr  jsondumps)r/  r>  rX  rY  rZ  voice_configmsgr)   r)   r*   
_build_msg  s0   

zCartesiaTTSService._build_msgframec                    s.   t  |I dH  | j| _|  I dH  dS )zStart the Cartesia TTS service.

        Args:
            frame: The start frame containing initialization parameters.
        N)r$  startr  r-  _connectr/  rk  r2  r)   r*   rl    s   zCartesiaTTSService.startc                    &   t  |I dH  |  I dH  dS zXStop the Cartesia TTS service.

        Args:
            frame: The end frame.
        N)r$  stop_disconnectrn  r2  r)   r*   rq       zCartesiaTTSService.stopc                    ro  rp  )r$  cancelrr  rn  r2  r)   r*   rt     rs  zCartesiaTTSService.cancelc                    sL   t   I d H  |  I d H  | jr"| js$| | | j| _d S d S d S r   )r$  rm  _connect_websocket
_websocketr.  create_task_receive_task_handler_report_errorr5  r2  r)   r*   rm  	  s   zCartesiaTTSService._connectc                    sB   t   I d H  | jr| | jI d H  d | _|  I d H  d S r   )r$  rr  r.  cancel_task_disconnect_websocketr5  r2  r)   r*   rr    s   zCartesiaTTSService._disconnectc              
      s   z0| j r| j jtju rW d S td t| j d| j d| j	 I d H | _ | 
dI d H  W d S  ty^ } z!| jd| |dI d H  d | _ | 
d| I d H  W Y d }~d S d }~ww )NzConnecting to Cartesia TTSz	?api_key=z&cartesia_version=on_connectedUnknown error occurred: 	error_msg	exceptionon_connection_error)rv  stater   OPENr   debugwebsocket_connectr*  r(  r)  _call_event_handler	Exception
push_errorr/  er)   r)   r*   ru    s   
"z%CartesiaTTSService._connect_websocketc              
      s   zcz|   I d H  | jrtd | j I d H  W n ty: } z| jd| |dI d H  W Y d }~nd }~ww W |  I d H  d | _| dI d H  d S W |  I d H  d | _| dI d H  d S |  I d H  d | _| dI d H  w )NzDisconnecting from Cartesiar}  r~  on_disconnected)	stop_all_metricsrv  r   r  closer  r  remove_active_audio_contextr  r  r)   r)   r*   r{  (  s,   
&z(CartesiaTTSService._disconnect_websocketc                 C   s   | j r| j S td)NzWebsocket not connected)rv  r  r5  r)   r)   r*   _get_websocket6  s   z!CartesiaTTSService._get_websocketc                    s@   |   I dH  |rt|dd}|  |I dH  dS dS )z?Cancel the active Cartesia context when the bot is interrupted.NT)rZ  rt  )r  rf  rg  r  send)r/  rZ  
cancel_msgr)   r)   r*   on_audio_context_interrupted;  s   z/CartesiaTTSService.on_audio_context_interruptedc                    s   dS )zClose the Cartesia context after all audio has been played.

        No close message is needed: the server already considers the context
        done once it has sent its ``done`` message, which is handled in
        ``_process_messages``.
        Nr)   )r/  rZ  r)   r)   r*   on_audio_context_completedB  s   z-CartesiaTTSService.on_audio_context_completedc                    sR   |p|   }|r| jsdS t|  d | jdd|d}| j|I dH  dS )zFlush any pending audio and finalize the current context.

        Args:
            context_id: The specific context to flush. If None, falls back to the
                currently active context.
        Nz: flushing audiorP  F)r>  rX  rZ  )get_active_audio_context_idrv  r   tracerj  r  )r/  rZ  flush_idri  r)   r)   r*   flush_audioK  s   
zCartesiaTTSService.flush_audioc                    sh  |   2 z3 d H W }t|}|r| |d sq|d }|d dkr?|  I d H  | ddg|I d H  | |I d H  q|d dkr]| |d d |d d	 }| ||I d H  q|d d
kr{tt	
|d | jd|d}| ||I d H  q|d dkr| t|dI d H  |  I d H  | jd| dI d H  |   q| jd| dI d H  q6 d S )NrZ  typedone)r   r   )Resetr   
timestampsword_timestampsrN  rl  chunkdata   audior  num_channelsrZ  errorrZ  zError: )r  zError, unknown message type: )r  rf  loadsaudio_context_availablestop_ttfb_metricsadd_word_timestampsremove_audio_contextrW  r   base64	b64decoder  append_to_audio_context
push_framer   r  r  reset_active_audio_context)r/  messageri  ctx_idprocessed_timestampsrk  r)   r)   r*   _process_messagesY  s<   

z$CartesiaTTSService._process_messagesc                    s2   	 |   I d H  t|  d |  I d H  q)NTz> Cartesia connection was disconnected (timeout?), reconnecting)r  r   r  ru  r5  r)   r)   r*   _receive_messagesy  s   z$CartesiaTTSService._receive_messagesc              
   C  s8  | j st|  d| d nt|  d| d zd| jr&| jjtju r-|  I dH  | j	||d}z| 
 |I dH  | |I dH  W n1 tyy } z%td| dV  t|dV  |  I dH  |  I dH  W Y d}~W dS d}~ww dV  W dS  ty } ztd| dV  W Y d}~dS d}~ww )a  Generate speech from text using Cartesia's streaming API.

        Args:
            text: The text to synthesize into speech.
            context_id: The context ID for tracking audio frames.

        Yields:
            Frame: Audio frames containing the synthesized speech.
        : Generating TTS []N)r>  rZ  r}  r  r  )_is_streaming_tokensr   r  r  rv  r  r   CLOSEDrm  rj  r  r  start_tts_usage_metricsr  r   r   rr  )r/  r>  rZ  ri  r  r)   r)   r*   run_tts  s0    zCartesiaTTSService.run_tts)rP  TTrP  r   )4r"   r#   r$   r%   r   r!  r'   r	   r  r(   r   intr   r   boolr%  r6  r   r<  r@  r   rC  r&   rE  rF  rG  rM  r   tuplerW  rj  r   rl  r   rq  r
   rt  rm  rr  ru  r{  r  r  r  r  r  r  r   r   r   r  __classcell__r)   r)   r2  r*   r     s   
 	
 	
%
'
				 (r  c                       s"  e Zd ZU dZeZeed< G dd deZdddddddd	ddd

de	de
e	 de
e	 de	de	de
ej de
e de	de	de
e de
e f fddZdefddZdede
e	 fddZdef fdd Zd!d" Zdef fd#d$Zdef fd%d&Zed'e	d(e	deedf fd)d*Z  ZS )+CartesiaHttpTTSServicezCartesia HTTP-based TTS service.

    Provides text-to-speech using Cartesia's HTTP API for simpler, non-streaming
    synthesis. Suitable for use cases where streaming is not required and simpler
    integration is preferred.
    r  c                   @   r  )z"CartesiaHttpTTSService.InputParamsa  Input parameters for Cartesia HTTP TTS configuration.

        Parameters:
            language: Language to use for synthesis.
            generation_config: Generation configuration for Sonic-3 models. Includes volume,
                speed (numeric), and emotion (string) parameters.
            pronunciation_dict_id: The ID of the pronunciation dictionary to use for custom pronunciations.
        r+   Nr  r  r  r)   r)   r)   r*   r    r  r  Nzhttps://api.cartesia.aiz
2026-03-01r	  r
  )
r  r  base_urlr  aiohttp_sessionr  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  c                   s   | j ddtjddd}|dur| dd ||_|dur%| dd ||_|
durK| d |sK|
jdur9|
j|_|
jdurB|
j|_|
jdurK|
j|_|durT|	| t
 jd|dd|d	| || _|| _|| _|	| _|| _d
| _|| _|du | _dS )aZ  Initialize the Cartesia HTTP TTS service.

        Args:
            api_key: Cartesia API key for authentication.
            voice_id: ID of the voice to use for synthesis.

                .. deprecated:: 0.0.105
                    Use ``settings=CartesiaHttpTTSService.Settings(voice=...)`` instead.

            model: TTS model to use (e.g., "sonic-3").

                .. deprecated:: 0.0.105
                    Use ``settings=CartesiaHttpTTSService.Settings(model=...)`` instead.

            base_url: Base URL for Cartesia HTTP API.
            cartesia_version: API version string for Cartesia service.
            aiohttp_session: Optional aiohttp ClientSession for HTTP requests.
                If not provided, a session will be created and managed internally.
            sample_rate: Audio sample rate. If None, uses default.
            encoding: Audio encoding format.
            container: Audio container format.
            params: Additional input parameters for voice customization.

                .. deprecated:: 0.0.105
                    Use ``settings=CartesiaHttpTTSService.Settings(...)`` instead.

            settings: Runtime-updatable settings. When provided alongside deprecated
                parameters, ``settings`` values take precedence.
            **kwargs: Additional arguments passed to the parent TTSService.
        r  Nr  r  r  r  r  T)r  r  push_stop_framesr  r   r)   )r!  r   r^   r"  r  r  r+   r  r  r#  r$  r%  r(  	_base_urlr)  r+  r,  r-  _session_owns_session)r/  r  r  r  r  r  r  r  r  r  r  r  r0  r1  r2  r)   r*   r%    sP   /	




zCartesiaHttpTTSService.__init__r,   c                 C   r4  )zCheck if this service can generate processing metrics.

        Returns:
            True, as Cartesia HTTP service supports metrics generation.
        Tr)   r5  r)   r)   r*   r6     r7  z+CartesiaHttpTTSService.can_generate_metricsr+   c                 C   r8  r9  r:  r;  r)   r)   r*   r<  (  r=  z3CartesiaHttpTTSService.language_to_service_languagerk  c                    s4   t  |I dH  | j| _| jrt | _dS dS )zStart the Cartesia HTTP TTS service.

        Args:
            frame: The start frame containing initialization parameters.
        N)r$  rl  r  r-  r  aiohttpClientSessionr  rn  r2  r)   r*   rl  3  s   zCartesiaHttpTTSService.startc                    s0   | j r| jr| j I dH  d| _dS dS dS )z$Close the HTTP session if we own it.N)r  r  r  r5  r)   r)   r*   _close_session>  s
   
z%CartesiaHttpTTSService._close_sessionc                    ro  )z]Stop the Cartesia HTTP TTS service.

        Args:
            frame: The end frame.
        N)r$  rq  r  rn  r2  r)   r*   rq  D  rs  zCartesiaHttpTTSService.stopc                    ro  )zbCancel the Cartesia HTTP TTS service.

        Args:
            frame: The cancel frame.
        N)r$  rt  r  rn  r2  r)   r*   rt  M  rs  zCartesiaHttpTTSService.cancelr>  rZ  c              
   C  s  t |  d| d zzd| jjd}| j| j| jd}| jj|||d}| jjr1| jj|d< | jj	r?| jj	j
dd	|d
< | jjrI| jj|d< | j| jdd}| j d}| jj|||d4 I dH 2}|jdkr| I dH }	td|	 dV  td|j d|	 | I dH }
W d  I dH  n1 I dH sw   Y  | |I dH  t|
| jd|d}|V  W n ty } ztd| dV  W Y d}~nd}~ww W |  I dH  dS W |  I dH  dS |  I dH  w )a  Generate speech from text using Cartesia's HTTP API.

        Args:
            text: The text to synthesize into speech.
            context_id: The context ID for tracking audio frames.

        Yields:
            Frame: Audio frames containing the synthesized speech.
        r  r  r=   )r[  r=   r\  )r`  r^  r  ra  r+   Trc  r  r  zapplication/json)zCartesia-Versionz	X-API-KeyzContent-Typez
/tts/bytes)rf  headersN   zCartesia API error: r  zCartesia API returned status z: r  r  r}  )r   r  r  r  r+  r,  r-  r  r+   r  re  r  r)  r(  r  r  poststatusr>  r   r  readr  r   r  r  )r/  r>  rZ  rh  ra  payloadr  r  response
error_text
audio_datark  r  r)   r)   r*   r  V  sb   

(
"zCartesiaHttpTTSService.run_tts)r"   r#   r$   r%   r   r!  r'   r	   r  r(   r   r  r  r  r%  r  r6  r   r<  r   rl  r  r   rq  r
   rt  r   r   r   r  r  r)   r)   r2  r*   r    s\   
 	
b		(r  ):r%   r  rf  dataclassesr   r   enumr   typingr   r   r   r  logurur   pydanticr	   pipecat.frames.framesr
   r   r   r   r   r   r   pipecat.services.settingsr   r   r   pipecat.services.tts_servicer   r   r   pipecat.transcriptions.languager   r   'pipecat.utils.text.base_text_aggregatorr   'pipecat.utils.text.skip_tags_aggregatorr   (pipecat.utils.tracing.service_decoratorsr   websockets.asyncio.clientr   r  websockets.protocolr   ModuleNotFoundErrorr  r  r  r   r(   r   r   r   r  r  r)   r)   r)   r*   <module>   sF   $	
9B   [