o
    iF                  
   @   s"  d Z ddlZddlmZ ddlmZmZmZ ddlZddl	m
Z
 ddlmZmZmZmZmZmZmZ ddlmZ ddlmZmZ dd	lmZ zdd
lmZ ddlmZ W n  eyt Z  ze
!de   e
!d e"de  dZ [ ww eG dd deZ#G dd deZ$G dd deZ%dS )zDeepgram text-to-speech service implementation.

This module provides integration with Deepgram's text-to-speech API
for generating speech from text using various voice models.
    N)	dataclass)AnyAsyncGeneratorOptional)logger)CancelFrameEndFrame
ErrorFrameFrame
StartFrameTTSAudioRawFrameTTSStoppedFrame)TTSSettings)
TTSServiceWebsocketTTSService)
traced_tts)connect)StatezException: z\In order to use DeepgramWebsocketTTSService, you need to `pip install pipecat-ai[deepgram]`.zMissing module: c                   @   s   e Zd ZdZdS )DeepgramTTSSettingsz;Settings for DeepgramTTSService and DeepgramHttpTTSService.N)__name__
__module____qualname____doc__ r   r   Q/home/ubuntu/.local/lib/python3.10/site-packages/pipecat/services/deepgram/tts.pyr   ,   s    r   c                       sD  e Zd ZU dZeZeed< dZddddddded	e	e d
ede	e
 dede	e f fddZdefddZdef fddZdef fddZdef fddZ fddZ fddZdedeeef f fdd Zd!d" Zd#d$ Zd%d& Zd'efd(d)Zd*d+ Zd1d'e	e fd,d-Ze d.ed'ede!e"df fd/d0Z#  Z$S )2DeepgramTTSServicea  Deepgram WebSocket-based text-to-speech service.

    Provides real-time text-to-speech synthesis using Deepgram's WebSocket API.
    Supports streaming audio generation with interruption handling via the Clear
    message for conversational AI use cases.
    	_settings)linear16mulawalawNzwss://api.deepgram.comr   voicebase_urlsample_rateencodingsettingsapi_keyr!   r"   r#   r$   r%   c          	   	      s   |  | jvrtd| dd| j d| jdddd}|dur.| dd ||_||_|dur7|| t	 j
d|d	d
d	d	|d| || _|| _|| _d| _dS )a  Initialize the Deepgram WebSocket TTS service.

        Args:
            api_key: Deepgram API key for authentication.
            voice: Voice model to use for synthesis.

                .. deprecated:: 0.0.105
                    Use ``settings=DeepgramTTSService.Settings(voice=...)`` instead.

            base_url: WebSocket base URL for Deepgram API. Defaults to "wss://api.deepgram.com".
            sample_rate: Audio sample rate in Hz. If None, uses service default.
            encoding: Audio encoding format. Defaults to "linear16". Must be one of SUPPORTED_ENCODINGS.
            settings: Runtime-updatable settings. When provided alongside deprecated
                parameters, ``settings`` values take precedence.
            **kwargs: Additional arguments passed to parent InterruptibleTTSService class.

        Raises:
            ValueError: If encoding is not in SUPPORTED_ENCODINGS.
        zUnsupported encoding 'z'. Must be one of z, z for WebSocket TTS.Naura-2-helena-enmodelr!   languager!   TF)r#   pause_frame_processingpush_stop_framespush_start_frameappend_trailing_spacer%   r   )lowerSUPPORTED_ENCODINGS
ValueErrorjoinSettings"_warn_init_param_moved_to_settingsr)   r!   apply_updatesuper__init___api_key	_base_url	_encoding_receive_task)	selfr&   r!   r"   r#   r$   r%   kwargsdefault_settings	__class__r   r   r7   @   s:   


zDeepgramTTSService.__init__returnc                 C      dS )zCheck if the service can generate metrics.

        Returns:
            True, as Deepgram WebSocket TTS service supports metrics generation.
        Tr   r<   r   r   r   can_generate_metrics      z'DeepgramTTSService.can_generate_metricsframec                    &   t  |I dH  |  I dH  dS )zStart the Deepgram WebSocket TTS service.

        Args:
            frame: The start frame containing initialization parameters.
        N)r6   start_connectr<   rF   r?   r   r   rH         zDeepgramTTSService.startc                    rG   )zbStop the Deepgram WebSocket TTS service.

        Args:
            frame: The end frame.
        N)r6   stop_disconnectrJ   r?   r   r   rL      rK   zDeepgramTTSService.stopc                    rG   )zgCancel the Deepgram WebSocket TTS service.

        Args:
            frame: The cancel frame.
        N)r6   cancelrM   rJ   r?   r   r   rN      rK   zDeepgramTTSService.cancelc                    sL   t   I dH  |  I dH  | jr"| js$| | | j| _dS dS dS )z5Connect to Deepgram WebSocket and start receive task.N)r6   rI   _connect_websocket
_websocketr;   create_task_receive_task_handler_report_errorrC   r?   r   r   rI      s   zDeepgramTTSService._connectc                    sB   t   I dH  | jr| | jI dH  d| _|  I dH  dS )z6Disconnect from Deepgram WebSocket and clean up tasks.N)r6   rM   r;   cancel_task_disconnect_websocketrC   r?   r   r   rM      s   zDeepgramTTSService._disconnectdeltac                    sT   t  |I dH }d|v r| jj| j_|   |r(|  I dH  |  I dH  |S )zApply a settings delta.

        Args:
            delta: A :class:`TTSSettings` (or ``DeepgramTTSService.Settings``) delta.

        Returns:
            Dict mapping changed field names to their previous values.
        Nr!   )r6   _update_settingsr   r!   r)   _sync_model_name_to_metricsrM   rI   )r<   rV   changedr?   r   r   rW      s   	z#DeepgramTTSService._update_settingsc              
      sV  zo| j r| j jtju rW dS td g }|d| jj  |d| j	  |d| j
  | j dd| }dd	| j i}t||d
I dH | _ dd | j jj D }t|  d| d | dI dH  W dS  ty } z.t|  d|  | t|  d| dI dH  d| _ | d| I dH  W Y d}~dS d}~ww )z;Connect to Deepgram WebSocket API with configured settings.Nz Connecting to Deepgram WebSocketzmodel=z	encoding=zsample_rate=z
/v1/speak?&AuthorizationToken )additional_headersc                 S   s    i | ]\}}| d r||qS )zdg-)
startswith).0kvr   r   r   
<dictcomp>   s
    z9DeepgramTTSService._connect_websocket.<locals>.<dictcomp>z0: Websocket connection initialized: {"headers": }on_connected exception:  error: erroron_connection_error)rP   stater   OPENr   debugappendr   r!   r:   r#   r9   r2   r8   websocket_connectresponseheadersitems_call_event_handler	Exceptionrh   
push_errorr	   )r<   paramsurlrp   er   r   r   rO      s0   
 "z%DeepgramTTSService._connect_websocketc              
      s   zpz'|   I dH  | jr(td | jtddiI dH  | j I dH  W n, tyU } z t	|  d|  | 
t|  d| dI dH  W Y d}~nd}~ww W d| _| dI dH  dS W d| _| dI dH  dS d| _| dI dH  w )	z+Close WebSocket connection and reset state.Nz%Disconnecting from Deepgram WebSockettypeClosere   rf   rg   on_disconnected)stop_all_metricsrP   r   rl   sendjsondumpsclosers   rh   rt   r	   rr   )r<   rw   r   r   r   rU      s*   
,z(DeepgramTTSService._disconnect_websocketc                 C   s   | j r| j S td)z3Get active websocket connection or raise exception.zWebsocket not connected)rP   rs   rC   r   r   r   _get_websocket  s   z!DeepgramTTSService._get_websocket
context_idc              
      sx   |   I dH  | jr:z| jtddiI dH  W dS  ty9 } zt|  d|  W Y d}~dS d}~ww dS )a9  Send Clear message to Deepgram when an audio context is interrupted.

        The Clear message will clear Deepgram's internal text buffer and stop
        sending audio, allowing for a new response to be generated.

        Args:
            context_id: The ID of the audio context that was interrupted.
        Nrx   Clearz error sending Clear message: )r{   rP   r|   r}   r~   rs   r   rh   )r<   r   rw   r   r   r   on_audio_context_interrupted	  s   	""z/DeepgramTTSService.on_audio_context_interruptedc              	      sR  |   2 z3 dH W }t|tr'|  }t|| jd|d}| ||I dH  qt|trzft	|}|
d}|dkrDtd|  nM|dkritd|  |  }| |t|dI dH  | |I dH  n(|d	krvtd
|  n|dkrt|  d|
dd  ntd|  W q tjy   td|  Y qw q6 dS )z5Receive and process messages from Deepgram WebSocket.N   )r   rx   MetadatazReceived metadata: FlushedzReceived Flushed: ClearedzReceived Cleared: Warningz
 warning: descriptionzUnknown warningzReceived unknown message type: zInvalid JSON message: )r   
isinstancebytesget_active_audio_context_idr   r#   append_to_audio_contextstrr}   loadsgetr   tracer   remove_audio_contextwarningrl   JSONDecodeErrorrh   )r<   messagectx_idrF   msgmsg_typer   r   r   _receive_messages  s@   





z$DeepgramTTSService._receive_messagesc              
      sn   | j r5zddi}| j t|I dH  W dS  ty4 } zt|  d|  W Y d}~dS d}~ww dS )zFlush any pending audio synthesis by sending Flush command.

        This should be called when the LLM finishes a complete response to force
        generation of audio from Deepgram's internal text buffer.
        rx   FlushNz error sending Flush message: )rP   r|   r}   r~   rs   r   rh   )r<   r   	flush_msgrw   r   r   r   flush_audio=  s   "zDeepgramTTSService.flush_audiotextc              
   C  s   t |  d| d z)| jr| jjtju r|  I dH  d|d}|  t	
|I dH  dV  W dS  tyQ } ztd| dV  W Y d}~dS d}~ww )a9  Generate speech from text using Deepgram's WebSocket TTS API.

        Args:
            text: The text to synthesize into speech.
            context_id: The context ID for tracking audio frames.

        Yields:
            Frame: Audio frames containing the synthesized speech, plus start/stop frames.
        : Generating TTS []NSpeak)rx   r   zUnknown error occurred: rg   )r   rl   rP   rj   r   CLOSEDrI   r   r|   r}   r~   rs   r	   )r<   r   r   	speak_msgrw   r   r   r   run_ttsJ  s   
 zDeepgramTTSService.run_tts)N)%r   r   r   r   r   r3   __annotations__r0   r   r   intr7   boolrD   r   rH   r   rL   r   rN   rI   rM   r   dictr   rW   rO   rU   r   r   r   r   r   r   r
   r   __classcell__r   r   r?   r   r   3   sL   
 F				
 $(r   c                       s   e Zd ZU dZeZeed< dddddddedee d	e	j
d
edee dedee f fddZdefddZedededeedf fddZ  ZS )DeepgramHttpTTSServicezDeepgram HTTP text-to-speech service.

    Provides text-to-speech synthesis using Deepgram's HTTP TTS API.
    Supports various voice models and audio encoding formats with
    configurable sample rates and quality settings.
    r   Nzhttps://api.deepgram.comr   r    r&   r!   aiohttp_sessionr"   r#   r$   r%   c          
         sz   | j dddd}	|dur| dd ||	_||	_|dur!|	| t jd|dd|	d| || _|| _|| _	|| _
dS )ag  Initialize the Deepgram TTS service.

        Args:
            api_key: Deepgram API key for authentication.
            voice: Voice model to use for synthesis.

                .. deprecated:: 0.0.105
                    Use ``settings=DeepgramHttpTTSService.Settings(voice=...)`` instead.

            aiohttp_session: Shared aiohttp session for HTTP requests with connection pooling.
            base_url: Custom base URL for Deepgram API. Defaults to "https://api.deepgram.com".
            sample_rate: Audio sample rate in Hz. If None, uses service default.
            encoding: Audio encoding format. Defaults to "linear16".
            settings: Runtime-updatable settings. When provided alongside deprecated
                parameters, ``settings`` values take precedence.
            **kwargs: Additional arguments passed to parent TTSService class.
        Nr'   r(   r!   T)r#   r-   r,   r%   r   )r3   r4   r)   r!   r5   r6   r7   r8   _sessionr9   r:   )
r<   r&   r!   r   r"   r#   r$   r%   r=   r>   r?   r   r   r7   t  s.   

zDeepgramHttpTTSService.__init__rA   c                 C   rB   )zCheck if the service can generate metrics.

        Returns:
            True, as Deepgram TTS service supports metrics generation.
        Tr   rC   r   r   r   rD     rE   z+DeepgramHttpTTSService.can_generate_metricsr   r   c              
   C  s  t |  d| d | j d}d| j dd}| jj| j| jdd}d	|i}zy|  I d
H  | j	j
||||d4 I d
H V}|jdkrX| I d
H }td|j d| | |I d
H  | j}	d}
|j|	2 z3 d
H W }|
r||  I d
H  d}
|rt|| jd|dV  qk6 W d
  I d
H  W d
S 1 I d
H sw   Y  W d
S  ty } ztdt| V  W Y d
}~d
S d
}~ww )a/  Generate speech from text using Deepgram's TTS API.

        Args:
            text: The text to synthesize into speech.
            context_id: The context ID for tracking audio frames.

        Yields:
            Frame: Audio frames containing the synthesized speech, plus start/stop frames.
        r   r   z	/v1/speakr\   zapplication/json)r[   zContent-Typenone)r)   r$   r#   	containerr   N)rp   r}   ru      zHTTP z: TFr   )audior#   num_channelsr   zError getting audio: )r   rl   r9   r8   r   r!   r:   r#   start_ttfb_metricsr   poststatusr   rs   start_tts_usage_metrics
chunk_sizecontentiter_chunkedstop_ttfb_metricsr   r	   r   )r<   r   r   rv   rp   ru   payloadro   
error_text
CHUNK_SIZEfirst_chunkchunkrw   r   r   r   r     sR   
2"zDeepgramHttpTTSService.run_tts)r   r   r   r   r   r3   r   r   r   aiohttpClientSessionr   r7   r   rD   r   r   r
   r   r   r   r   r?   r   r   i  s6   
 	=(r   )&r   r}   dataclassesr   typingr   r   r   r   logurur   pipecat.frames.framesr   r   r	   r
   r   r   r   pipecat.services.settingsr   pipecat.services.tts_servicer   r   (pipecat.utils.tracing.service_decoratorsr   websockets.asyncio.clientr   rn   websockets.protocolr   ModuleNotFoundErrorrw   rh   rs   r   r   r   r   r   r   r   <module>   s6   $	  8