o
    i+;                  
   @   sP  d Z ddlZddlZddlmZ ddlmZmZm	Z	 ddl
mZ ddlmZmZmZmZmZmZmZmZ ddlmZ ddlmZ dd	lmZ dd
lmZ ddlmZ ddlm Z  ddl!m"Z" zddl#m$Z% ddl&m'Z' W n  e(y Z) ze*de)  e*d e+de) dZ)[)ww eG dd deZ,G dd dZ-G dd deZ.dS )zCartesia Speech-to-Text service implementation.

This module provides a WebSocket-based STT service that integrates with
the Cartesia Live transcription API for real-time speech recognition.
    N)	dataclass)AnyAsyncGeneratorOptional)logger)CancelFrameEndFrameFrameInterimTranscriptionFrame
StartFrameTranscriptionFrameVADUserStartedSpeakingFrameVADUserStoppedSpeakingFrame)FrameDirection)STTSettings)CARTESIA_TTFS_P99)WebsocketSTTService)Language)time_now_iso8601)
traced_stt)connect)StatezException: zIIn order to use Cartesia, you need to `pip install pipecat-ai[cartesia]`.zMissing module: c                   @   s   e Zd ZdZdS )CartesiaSTTSettingsz Settings for CartesiaSTTService.N)__name__
__module____qualname____doc__ r   r   Q/home/ubuntu/.local/lib/python3.10/site-packages/pipecat/services/cartesia/stt.pyr   /   s    r   c                	   @   sj   e Zd ZdZdejjddddededed	efd
dZ	dd Z
dd ZdddZededd fddZdS )CartesiaLiveOptionszConfiguration options for Cartesia Live STT service.

    .. deprecated:: 0.0.105
        Use ``settings=CartesiaSTTService.Settings(...)`` for model/language and
        direct ``__init__`` parameters for encoding/sample_rate instead.
    ink-whisper	pcm_s16lei>  modellanguageencodingsample_rater#   r$   r%   r&   c                K   s"   || _ || _|| _|| _|| _dS )a  Initialize CartesiaLiveOptions with default or provided parameters.

        Args:
            model: The transcription model to use. Defaults to "ink-whisper".
            language: Target language for transcription. Defaults to English.
            encoding: Audio encoding format. Defaults to "pcm_s16le".
            sample_rate: Audio sample rate in Hz. Defaults to 16000.
            **kwargs: Additional parameters for the transcription service.
        N)r#   r$   r%   r&   additional_params)selfr#   r$   r%   r&   kwargsr   r   r   __init__>   s
   
zCartesiaLiveOptions.__init__c                 C   s2   | j t| jtr| jn| jj| jt| jd}|S )zConvert options to dictionary format.

        Returns:
            Dictionary containing all configuration parameters.
        r"   )r#   
isinstancer$   strvaluer%   r&   )r(   paramsr   r   r   to_dictV   s   zCartesiaLiveOptions.to_dictc                 C   s   |    S )zGet configuration items as key-value pairs.

        Returns:
            Iterator of (key, value) tuples for all configuration parameters.
        )r/   itemsr(   r   r   r   r0   e   s   zCartesiaLiveOptions.itemsNc                 C   s"   t | |r
t| |S | j||S )zGet a configuration value by key.

        Args:
            key: The configuration parameter name to retrieve.
            default: Default value if key is not found.

        Returns:
            The configuration value or default if not found.
        )hasattrgetattrr'   get)r(   keydefaultr   r   r   r4   m   s   


zCartesiaLiveOptions.getjson_strreturnc                 C   s   | di t |S )zCreate options from JSON string.

        Args:
            json_str: JSON string containing configuration parameters.

        Returns:
            New CartesiaLiveOptions instance with parsed parameters.
        Nr   )jsonloads)clsr7   r   r   r   	from_json{   s   
zCartesiaLiveOptions.from_jsonN)r   r   r   r   r   ENr-   r,   intr*   r/   r0   r4   classmethodr<   r   r   r   r   r   6   s(    


r   c                       sp  e Zd ZU dZeZeed< dddddeddeded	ed
e	e
 de	e de	e de	e f fddZdefddZdef fddZdef fddZdef fddZdd Zdedef fddZdedeedf fd d!Z fd"d#Z fd$d%Zd&ede ee!f f fd'd(Z"d)d* Z#d+d, Z$d-d. Z%d/d0 Z&d1d2 Z'e(	d:d3ed4ed5e	e) fd6d7Z*d8d9 Z+  Z,S );CartesiaSTTServicea+  Speech-to-text service using Cartesia Live API.

    Provides real-time speech transcription through WebSocket connection
    to Cartesia's Live transcription service. Supports both interim and
    final transcriptions with configurable models and languages.

    Cartesia disconnects WebSocket connections after 3 minutes of inactivity.
    The timeout resets with each message (audio data or text command) sent to
    the server. Silence-based keepalive is enabled by default to prevent this.
    See: https://docs.cartesia.ai/api-reference/stt/stt
    	_settings r!   N)base_urlr%   r&   live_optionssettingsttfs_p99_latencyapi_keyrD   r%   r&   rE   rF   rG   c                   s   | j dtjjd}	|dur<| d |s<|jr|du r|j}|jr$|j}|jr+|j|	_|jr<|j}
t	|
tr9|
jn|
|	_|durE|	
| t jd	||dd|	d| || _|pZd| _d| _|| _dS )
a5  Initialize CartesiaSTTService with API key and options.

        Args:
            api_key: Authentication key for Cartesia API.
            base_url: Custom API endpoint URL. If empty, uses default.
            encoding: Audio encoding format. Defaults to "pcm_s16le".
            sample_rate: Audio sample rate in Hz. If None, uses the pipeline
                sample rate.
            live_options: Configuration options for transcription service.

                .. deprecated:: 0.0.105
                    Use ``settings=CartesiaSTTService.Settings(...)`` for model/language
                    and direct init parameters for encoding/sample_rate instead.

            settings: Runtime-updatable settings. When provided alongside deprecated
                parameters, ``settings`` values take precedence.
            ttfs_p99_latency: P99 latency from speech end to final transcript in seconds.
                Override for your deployment. See https://github.com/pipecat-ai/stt-benchmark
            **kwargs: Additional arguments passed to parent STTService.
        r    )r#   r$   NrE   x      )r&   rG   keepalive_timeoutkeepalive_intervalrF   zapi.cartesia.air   )Settingsr   r>   r-   "_warn_init_param_moved_to_settingsr&   r%   r#   r$   r+   apply_updatesuperr*   _api_key	_base_url_receive_task	_encoding)r(   rH   rD   r%   r&   rE   rF   rG   r)   default_settingslang	__class__r   r   r*      s>   !

	

zCartesiaSTTService.__init__r8   c                 C   s   dS )zCheck if the service can generate processing metrics.

        Returns:
            True, indicating metrics are supported.
        Tr   r1   r   r   r   can_generate_metrics   s   z'CartesiaSTTService.can_generate_metricsframec                    &   t  |I dH  |  I dH  dS )zStart the STT service and establish connection.

        Args:
            frame: Frame indicating service should start.
        N)rP   start_connectr(   rZ   rW   r   r   r\         zCartesiaSTTService.startc                    r[   )z{Stop the STT service and close connection.

        Args:
            frame: Frame indicating service should stop.
        N)rP   stop_disconnectr^   rW   r   r   r`      r_   zCartesiaSTTService.stopc                    r[   )zCancel the STT service and close connection.

        Args:
            frame: Frame indicating service should be cancelled.
        N)rP   cancelra   r^   rW   r   r   rb      r_   zCartesiaSTTService.cancelc                    s   |   I dH  dS )zBStart performance metrics collection for transcription processing.N)start_processing_metricsr1   r   r   r   _start_metrics  s   z!CartesiaSTTService._start_metrics	directionc                    sr   t  ||I dH  t|tr|  I dH  dS t|tr3| jr5| jjtj	u r7| j
dI dH  dS dS dS dS )zProcess incoming frames and handle speech events.

        Args:
            frame: The frame to process.
            direction: Direction of frame flow in the pipeline.
        Nfinalize)rP   process_framer+   r   rd   r   
_websocketstater   OPENsend)r(   rZ   re   rW   r   r   rg     s   

z CartesiaSTTService.process_frameaudioc                 C  s@   | j r| j jtju r|  I dH  | j |I dH  dV  dS )zProcess audio data for speech-to-text transcription.

        Args:
            audio: Raw audio bytes to transcribe.

        Yields:
            None - transcription results are handled via WebSocket responses.
        N)rh   ri   r   CLOSEDr]   rk   )r(   rl   r   r   r   run_stt  s
   

zCartesiaSTTService.run_sttc                    sL   |   I d H  t  I d H  | jr"| js$| | | j| _d S d S d S r=   )_connect_websocketrP   r]   rh   rS   create_task_receive_task_handler_report_errorr1   rW   r   r   r]   '  s   zCartesiaSTTService._connectc                    sB   t   I d H  | jr| | jI d H  d | _|  I d H  d S r=   )rP   ra   rS   cancel_task_disconnect_websocketr1   rW   r   r   ra   /  s   zCartesiaSTTService._disconnectdeltac                    s"   t  |I dH }| | |S )zApply a settings delta.

        Args:
            delta: A :class:`STTSettings` (or ``CartesiaSTTService.Settings``) delta.

        Returns:
            Dict mapping changed field names to their previous values.
        N)rP   _update_settings _warn_unhandled_updated_settings)r(   ru   changedrW   r   r   rv   8  s   	
z#CartesiaSTTService._update_settingsc              
      s   zI| j r| j jtju rW d S td | jj| jj| j	t
| jd}d| j dtj| }d| jd}t||dI d H | _ | dI d H  W d S  tyj } z| jd	| |d
I d H  W Y d }~d S d }~ww )NzConnecting to Cartesia STTr"   zwss://z/stt/websocket?z
2025-04-16)zCartesia-Versionz	X-API-Key)additional_headerson_connectedzUnknown error occurred: 	error_msg	exception)rh   ri   r   rj   r   debugrB   r#   r$   rT   r,   r&   rR   urllibparse	urlencoderQ   websocket_connect_call_event_handler	Exception
push_error)r(   r.   ws_urlheaderser   r   r   ro   M  s$   
(z%CartesiaSTTService._connect_websocketc              
      s   zUz| j r| j jtju rtd | j  I d H  W n ty: } z| jd| |dI d H  W Y d }~nd }~ww W d | _ | 	dI d H  d S W d | _ | 	dI d H  d S d | _ | 	dI d H  w )NzDisconnecting from Cartesia STTzError closing websocket: r{   on_disconnected)
rh   ri   r   rj   r   r~   closer   r   r   )r(   r   r   r   r   rt   a  s$   
&z(CartesiaSTTService._disconnect_websocketc                 C   s   | j r| j S td)NzWebsocket not connected)rh   r   r1   r   r   r   _get_websocketl  s   z!CartesiaSTTService._get_websocketc                    s   |   2 z@3 dH W }zt|}| |I dH  W q tjy,   td|  Y q tyE } ztd|  W Y d}~qd}~ww 6 dS )z$Process incoming WebSocket messages.NzReceived non-JSON message: zError processing message: )	r   r9   r:   _process_responseJSONDecodeErrorr   warningr   error)r(   messagedatar   r   r   r   _receive_messagesq  s   
z$CartesiaSTTService._receive_messagesc                    s`   d|v r,|d dkr|  |I d H  d S |d dkr.|dd}| j|dI d H  d S d S d S )Ntype
transcriptr   r   zUnknown error)r|   )_on_transcriptr4   r   )r(   r   r|   r   r   r   r   |  s   z$CartesiaSTTService._process_responser   is_finalr$   c                    s   dS )z+Handle a transcription result with tracing.Nr   )r(   r   r   r$   r   r   r   _handle_transcription  s   z(CartesiaSTTService._handle_transcriptionc              	      s   d|vrd S | dd}| dd}d }d|v r-zt|d }W n ttfy,   Y nw t|dkrl|rY| t|| jt ||dI d H  | 	|||I d H  | 
 I d H  d S | t|| jt ||dI d H  d S d S )NtextrC   r   Fr$   r   )result)r4   r   
ValueErrorKeyErrorlen
push_framer   _user_idr   r   stop_processing_metricsr
   )r(   r   r   r   r$   r   r   r   r     sF   
	z!CartesiaSTTService._on_transcriptr=   )-r   r   r   r   r   rM   __annotations__r   r,   r   r?   r   floatr*   boolrY   r   r\   r   r`   r   rb   rd   r	   r   rg   bytesr   rn   r]   ra   r   dictr   rv   ro   rt   r   r   r   r   r   r   r   __classcell__r   r   rW   r   rA      sd   
 	H					rA   )/r   r9   urllib.parser   dataclassesr   typingr   r   r   logurur   pipecat.frames.framesr   r   r	   r
   r   r   r   r   "pipecat.processors.frame_processorr   pipecat.services.settingsr   pipecat.services.stt_latencyr   pipecat.services.stt_servicer   pipecat.transcriptions.languager   pipecat.utils.timer   (pipecat.utils.tracing.service_decoratorsr   websockets.asyncio.clientr   r   websockets.protocolr   ModuleNotFoundErrorr   r   r   r   r   rA   r   r   r   r   <module>   s6   (

R