o
    i+q                  
   @   s  d Z ddlZddlZddlZddlZddlmZmZ ddlm	Z	m
Z
mZmZ ddlZddlmZ ddlmZ ddlmZmZmZmZmZmZmZmZmZ ddlmZmZm Z m!Z!m"Z" dd	l#m$Z$m%Z%m&Z& dd
l'm(Z( ddl)m*Z* ddl+m,Z,m-Z- ddl.m/Z/ ddl0m1Z1 zddl2Z2ddl3m4Z5 ddl6m7Z7 W n  e8y Z9 ze:de9  e:d e;de9 dZ9[9ww de,dee< fddZ=G dd dZ>eG dd de%Z?G dd de*Z@dS )zGladia Speech-to-Text (STT) service implementation.

This module provides a Speech-to-Text service using Gladia's real-time WebSocket API,
supporting multiple languages, custom vocabulary, and various audio processing options.
    N)	dataclassfield)AnyAsyncGeneratorLiteralOptional)logger)version)	CancelFrameEndFrameFrameInterimTranscriptionFrame
StartFrameTranscriptionFrameTranslationFrameUserStartedSpeakingFrameUserStoppedSpeakingFrame)GladiaInputParamsLanguageConfigMessagesConfigPreProcessingConfigRealtimeProcessingConfig)	NOT_GIVENSTTSettings	_NotGiven)GLADIA_TTFS_P99)WebsocketSTTService)Languageresolve_language)time_now_iso8601)
traced_stt)connect)StatezException: zEIn order to use Gladia, you need to `pip install pipecat-ai[gladia]`.zMissing module: languagereturnc                 C   s"  i t jdt jdt jdt jdt jdt jdt jdt jdt j	d	t j
d
t jdt jdt jdt jdt jdt jdt jdi t jdt jdt jdt jdt jdt jdt jdt jdt jdt jdt jdt jdt jdt jdt j d t j!d!t j"d"i t j#d#t j$d$t j%d%t j&d&t j'd't j(d(t j)d)t j*d*t j+d+t j,d,t j-d-t j.d.t j/d/t j0d0t j1d1t j2d2t j3d3i t j4d4t j5d5t j6d6t j7d7t j8d8t j9d9t j:d:t j;d;t j<d<t j=d=t j>d>t j?d?t j@d@t jAdAt jBdBt jCdCt jDdDi t jEdEt jFdFt jGdGt jHdHt jIdIt jJdJt jKdKt jLdLt jMdMt jNdNt jOdOt jPdPt jQdQt jRdRt jSdSt jTdTt jUdUt jVdVt jWdWt jXdXt jYdYt jZdZt j[d[t j\d\t j]d]t j^d^t j_d_t j`d`t jadat jbdbt jcdci}td| |dddeS )fzConvert a Language enum to Gladia's language code format.

    Args:
        language: The Language enum value to convert.

    Returns:
        The Gladia language code string or None if not supported.
    afamarasazbabebgbnbobrbscacscydadeeleneseteufafifofrglguhahawhehihrhthuhyidisitjajvkakkkmknkolalblnloltlvmgmimkmlmnmrmsmtmymrnenlnnnoocpaplpsptrorusasdsiskslsnsosqsrsusvswtatetgthtktltrttukuruzviyiyozhT)use_base_code)er   AFAMARASAZBABEBGBNBOBRBSCACSCYDADEELENESETEUFAFIFOFRGLGUHAHAWHEHIHRHTHUHYIDISITJAJVKAKKKMKNKOLALBLNLOLTLVMGMIMKMLMNMRMSMTMY_MRNENLNNNOOCPAPLPSPTRORUSASDSISKSLSNSOSQSRSUSVSWTATETGTHTKTLTRTTUKURUZVIYIYOZHr   )r#   LANGUAGE_MAP r   O/home/ubuntu/.local/lib/python3.10/site-packages/pipecat/services/gladia/stt.pylanguage_to_gladia_language;   sv  		
 !"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVfr   c                   @   s   e Zd ZdZdddZdS )_InputParamsDescriptorz?Descriptor for backward compatibility with deprecation warning.Nc                 C   sH   t   t d t jdtdd W d    tS 1 sw   Y  tS )NalwayszGladiaSTTService.InputParams is deprecated and will be removed in a future version. Import and use GladiaInputParams directly instead.   
stacklevel)warningscatch_warningssimplefilterwarnDeprecationWarningr   )selfobjobjtyper   r   r   __get__   s   


z_InputParamsDescriptor.__get__N)__name__
__module____qualname____doc__r   r   r   r   r   r      s    r   c                   @   s
  e Zd ZU dZedd dZedB eB ed< edd dZ	e
eef dB eB ed< ed	d dZedB eB ed
< edd dZedB eB ed< edd dZedB eB ed< edd dZedB eB ed< edd dZedB eB ed< edd dZedB eB ed< dS )GladiaSTTSettingsa[  Settings for GladiaSTTService.

    Parameters:
        language_config: Language detection and handling configuration.
        custom_metadata: Additional metadata to include with requests.
        endpointing: Silence duration in seconds to mark end of speech.
        maximum_duration_without_endpointing: Maximum utterance duration without silence.
        pre_processing: Audio pre-processing options.
        realtime_processing: Real-time processing features.
        messages_config: WebSocket message filtering options.
        enable_vad: Enable VAD to trigger end of utterance detection.
    c                   C      t S r   r   r   r   r   r   <lambda>       zGladiaSTTSettings.<lambda>)default_factoryNlanguage_configc                   C   r  r   r  r   r   r   r   r     r  custom_metadatac                   C   r  r   r  r   r   r   r   r     r  endpointingc                   C   r  r   r  r   r   r   r   r     r  $maximum_duration_without_endpointingc                   C   r  r   r  r   r   r   r   r     r  pre_processingc                   C   r  r   r  r   r   r   r   r     r  realtime_processingc                   C   r  r   r  r   r   r   r   r     r  messages_configc                   C   r  r   r  r   r   r   r   r     r  
enable_vad)r   r   r  r  r   r	  r   r   __annotations__r
  dictstrr   r  floatr  intr  r   r  r   r  r   r  boolr   r   r   r   r     s    
 &"r  c                       s  e Zd ZU dZeZeed< e Zddddddddddd	de	d
de
ded dB de
de
dededee dee dee
 dee dededee dee f fddZdd Zdefdd Zd!edee
 fd"d#Zdee
ef fd$d%Zd&ef fd'd(Zd)edee
ef f fd*d+Zd&ef fd,d-Zd&ef fd.d/Zd0e de!e"df fd1d2Z# fd3d4Z$ fd5d6Z%d7d8 Z&d9d: Z'dee
ef fd;d<Z(e)	dRd=e
d>ed!ee
 fd?d@Z*dAdB Z+dCdD Z,d0e fdEdFZ-dGdH Z.dIdJ Z/dKdL Z0dMdN Z1dOe fdPdQZ2  Z3S )SGladiaSTTServicea  Speech-to-Text service using Gladia's API.

    This service connects to Gladia's WebSocket API for real-time transcription
    with support for multiple languages, custom vocabulary, and various processing options.
    Provides automatic reconnection, audio buffering, and comprehensive error handling.

    For complete API documentation, see: https://docs.gladia.io/api-reference/v2/live/init

    .. deprecated:: 0.0.62
        Use :class:`~pipecat.services.gladia.config.GladiaInputParams` directly instead.
    	_settingsNzhttps://api.gladia.io/v2/livewav/pcm      i  @T)regionurlencoding	bit_depthchannels
confidencesample_ratemodelparamsmax_buffer_sizeshould_interruptsettingsttfs_p99_latencyapi_keyr  )zus-westzeu-westr  r  r  r   r!  r"  r#  r$  r%  r&  r'  r(  c                   s  |r#t   t d t jdtdd W d   n1 sw   Y  | jddddddddddd	
}|	dur?| d
d
 |	|_|
dur| d |
jdurnt   t d t jdtdd W d   n1 siw   Y  |s|
j	durx|
j	}|
j
dur|
j
}|
jdur|
j}|
j|_|
j|_|
j|_|
j|_|
j|_|
j|_|
j|_|
jr|
j|_n|
jr| |
j}|rt|gdd|_|dur|| t jd||dd|d| || _|| _|| _d| _|| _|| _|| _d| _ d| _!d| _"t# | _$d| _%|| _&t'( | _)d| _*|| _+dS )a  Initialize the Gladia STT service.

        Args:
            api_key: Gladia API key for authentication.
            region: Region used to process audio. eu-west or us-west. Defaults to eu-west.
            url: Gladia API URL. Defaults to "https://api.gladia.io/v2/live".
            encoding: Audio encoding format. Defaults to ``"wav/pcm"``.
            bit_depth: Audio bit depth. Defaults to 16.
            channels: Number of audio channels. Defaults to 1.
            confidence: Minimum confidence threshold for transcriptions (0.0-1.0).

                .. deprecated:: 0.0.86
                    The 'confidence' parameter is deprecated and will be removed in a future version.
                    No confidence threshold is applied.

            sample_rate: Audio sample rate in Hz. If None, uses service default.
            model: Model to use for transcription.

                .. deprecated:: 0.0.105
                    Use ``settings=GladiaSTTService.Settings(model=...)`` instead.

            params: Additional configuration parameters for Gladia service.

                .. deprecated:: 0.0.105
                    Use ``settings=GladiaSTTService.Settings(...)`` for runtime-updatable
                    fields and direct init parameters for encoding/bit_depth/channels.

            max_buffer_size: Maximum size of audio buffer in bytes. Defaults to 20MB.
            should_interrupt: Determine whether the bot should be interrupted when
                Gladia VAD detects user speech. Defaults to True.
            settings: Runtime-updatable settings. When provided alongside deprecated
                parameters, ``settings`` values take precedence.
            ttfs_p99_latency: P99 latency from speech end to final transcript in seconds.
                Override for your deployment. See https://github.com/pipecat-ai/stt-benchmark
            **kwargs: Additional arguments passed to the STTService parent class.
        r   zuThe 'confidence' parameter is deprecated and will be removed in a future version. No confidence threshold is applied.r   r   Nz	solaria-1   F)
r#  r#   r	  r
  r  r  r  r  r  r  r#  r$  znThe 'language' parameter is deprecated and will be removed in a future version. Use 'language_config' instead.)	languagescode_switching   )r"  r(  keepalive_timeoutkeepalive_intervalr'  r   r   ),r   r   r   r   r   Settings"_warn_init_param_moved_to_settingsr#  r#   r  r  r   r
  r  r  r  r  r  r  r	  language_to_service_languager   apply_updatesuper__init___api_key_region_url_receive_task	_encoding
_bit_depth	_channels_session_url_session_id_connection_active	bytearray_audio_buffer_bytes_sent_max_buffer_sizeasyncioLock_buffer_lock_is_speaking_should_interrupt)r   r)  r  r  r  r  r   r!  r"  r#  r$  r%  r&  r'  r(  kwargsdefault_settingslanguage_code	__class__r   r   r5     s   7











	

zGladiaSTTService.__init__c                 C   s   | j  d| j dS )Nz [])namer>  r   r   r   r   __str__  s   zGladiaSTTService.__str__r$   c                 C   s   dS )zCheck if the service can generate performance metrics.

        Returns:
            True, indicating this service supports metrics generation.
        Tr   rP  r   r   r   can_generate_metrics  s   z%GladiaSTTService.can_generate_metricsr#   c                 C   s   t |S )zConvert pipecat Language enum to Gladia's language code.

        Args:
            language: The Language enum value to convert.

        Returns:
            The Gladia language code string or None if not supported.
        )r   )r   r#   r   r   r   r2    s   	z-GladiaSTTService.language_to_service_languagec                 C   s   | j }| jpd| jpd| j| jpd|jd}t|jpi |d< t |d d< |j	d ur0|j	|d< |j
d ur:|j
|d< |jrF|jjd	d
|d< |jrR|jjd	d
|d< |jr^|jjd	d
|d< |jrj|jjd	d
|d< |S )Nr  r  r  )r  r  r"  r   r#  r
  pipecatr  r  T)exclude_noner	  r  r  r  )r  r:  r;  r"  r<  r#  r  r
  pipecat_versionr  r  r	  
model_dumpr  r  r  )r   sr'  r   r   r   _prepare_settings  s.   	


z"GladiaSTTService._prepare_settingsframec                    &   t  |I dH  |  I dH  dS )zStart the Gladia STT websocket connection.

        Args:
            frame: The start frame triggering service startup.
        N)r4  start_connectr   rY  rL  r   r   r[       zGladiaSTTService.startdeltac                    s*   t  |I dH }|s|S | | |S )zApply settings delta.

        Settings are stored but not applied to the active session.

        Args:
            delta: A settings delta.

        Returns:
            Dict mapping changed field names to their previous values.
        N)r4  _update_settings _warn_unhandled_updated_settings)r   r_  changedrL  r   r   r`    s   
	z!GladiaSTTService._update_settingsc                    s4   t  |I dH  |  I dH  |  I dH  dS )zStop the Gladia STT websocket connection.

        Args:
            frame: The end frame triggering service shutdown.
        N)r4  stop_send_stop_recording_disconnectr]  rL  r   r   rc    s   zGladiaSTTService.stopc                    rZ  )zCancel the Gladia STT websocket connection.

        Args:
            frame: The cancel frame triggering service cancellation.
        N)r4  cancelre  r]  rL  r   r   rf    r^  zGladiaSTTService.cancelaudioc              
   C  s*  |   I dH  | j4 I dH = | j| t| j| jkrBt| j| j }| j|d | _td| j| | _t	|  d| d W d  I dH  n1 I dH sRw   Y  | j
r| jr| jjtju rz
| |I dH  W n! tjjy } zt	|  d|  d| _
W Y d}~nd}~ww dV  dS )zRun speech-to-text on audio data.

        Args:
            audio: Raw audio bytes to transcribe.

        Yields:
            None (processing is handled asynchronously via WebSocket).
        Nr   z) Audio buffer exceeded max size, trimmed z bytesz- Websocket closed while sending audio chunk: F)start_processing_metricsrF  rA  extendlenrC  maxrB  r   warningr?  
_websocketstater"   OPEN_send_audio
websockets
exceptionsConnectionClosed)r   rg  	trim_sizeer   r   r   run_stt  s(   	(

zGladiaSTTService.run_sttc                    s   | j s%|  }| |I dH }|d | _ |d | _t|  d| j   |  I dH  t  I dH  | j	rF| j
sH| | | j| _
dS dS dS )zxConnect to the Gladia service.

        Initializes the session if needed and establishes websocket connection.
        Nr  rI   z Session URL: )r=  rX  _setup_gladiar>  r   info_connect_websocketr4  r\  rm  r9  create_task_receive_task_handler_report_error)r   r'  responserL  r   r   r\  %  s   

zGladiaSTTService._connectc                    sH   t   I dH  d| _| jr| | jI dH  d| _|  I dH  dS )zfDisconnect from the Gladia service.

        Cleans up tasks and closes websocket connection.
        NF)r4  re  r?  r9  cancel_task_disconnect_websocketrP  rL  r   r   re  9  s   zGladiaSTTService._disconnectc              
      s   z[| j r| j jtju rW dS t|  d t| jI dH | _ d| _| j	4 I dH  d| _
W d  I dH  n1 I dH s>w   Y  | dI dH  |  I dH  t|  d W dS  tyv } z| jd| |dI dH   d}~ww )	z-Establish the websocket connection to Gladia.NzConnecting to Gladia WebSocketTr   on_connectedz Connected to Gladia WebSocketzUnable to connect to Gladia: 	error_msg	exception)rm  rn  r"   ro  r   debugwebsocket_connectr=  r?  rF  rB  _call_event_handler_send_buffered_audio	Exception
push_errorr   ru  r   r   r   ry  H  s$   (z#GladiaSTTService._connect_websocketc              
      s   zXz| j r| j jtju rt|  d | j  I dH  W n ty= } z| jd| |dI dH  W Y d}~nd}~ww W d| _ | 	dI dH  dS W d| _ | 	dI dH  dS d| _ | 	dI dH  w )z)Close the websocket connection to Gladia.z$ Disconnecting from Gladia WebSocketNzError closing websocket: r  on_disconnected)
rm  rn  r"   ro  r   r  closer  r  r  r  r   r   r   r  a  s$   &z&GladiaSTTService._disconnect_websocketc              
      s  t  4 I d H v}i }| jr| j|d< |j| jd| ji||d4 I d H D}|jrD| I d H W  d   I d H  W  d   I d H  S | I d H }t	
|  d|j d|pX|j  t|  d|j d| 1 I d H sow   Y  W d   I d H  d S 1 I d H sw   Y  d S )Nr  zX-Gladia-Key)headersjsonr$  z Gladia error: z: z& Failed to initialize Gladia session: z - )aiohttpClientSessionr7  postr8  r6  okr  textr   errorstatusreasonr  )r   r'  sessionr$  r}  
error_textr   r   r   rw  m  s2   
.zGladiaSTTService._setup_gladia
transcriptis_finalc                    s   |   I d H  d S r   )stop_processing_metrics)r   r  r  r#   r   r   r   _handle_transcription  s   z&GladiaSTTService._handle_transcriptionc                    sV   | j jr| jr
dS t|  d d| _| tI dH  | jr)|  I dH  dS dS )zHandle speech start event from Gladia.

        Broadcasts UserStartedSpeakingFrame and optionally triggers interruption
        when VAD is enabled.
        Nz User started speakingT)	r  r  rG  r   r  broadcast_framer   rH  broadcast_interruptionrP  r   r   r   _on_speech_started  s   z#GladiaSTTService._on_speech_startedc                    s>   | j jr| js
dS d| _| tI dH  t|  d dS )zoHandle speech end event from Gladia.

        Broadcasts UserStoppedSpeakingFrame when VAD is enabled.
        NFz User stopped speaking)r  r  rG  r  r   r   r  rP  r   r   r   _on_speech_ended  s   z!GladiaSTTService._on_speech_endedc                    sX   | j r(| j jtju r*t|d}dd|id}| j t	|I dH  dS dS dS )z,Send audio chunk with proper message format.zutf-8audio_chunkchunk)typedataN)
rm  rn  r"   ro  base64	b64encodedecodesendr  dumps)r   rg  r  messager   r   r   rp    s   zGladiaSTTService._send_audioc              	      s   | j 4 I dH 3 | jr0t|  dt| j d | t| jI dH  W d  I dH  dS W d  I dH  dS 1 I dH sAw   Y  dS )z+Send any buffered audio after reconnection.Nz	 Sending z bytes of buffered audio)rF  rA  r   r  rj  rp  bytesrP  r   r   r   r    s   .z%GladiaSTTService._send_buffered_audioc                    s>   | j r| j jtju r| j tddiI d H  d S d S d S )Nr  stop_recording)rm  rn  r"   ro  r  r  r  rP  r   r   r   rd    s    z%GladiaSTTService._send_stop_recordingc                 C   s   | j r| j S td)zGet the current WebSocket connection.

        Returns:
            The WebSocket connection.

        Raises:
            Exception: If WebSocket is not connected.
        zWebsocket not connected)rm  r  rP  r   r   r   _get_websocket  s   	zGladiaSTTService._get_websocketc              
      s  |   2 z3 dH W }zt|}|d dkrY|drY|d d }| j4 I dH $ |d }|| jkrC|| j }| j|d | _|| _W d  I dH  n1 I dH sSw   Y  n|d dkr|d d	 }|d
 }|d }|d d }	|	r| t|| j	t
 ||dI dH  | j||	|dI dH  nZ| t|| j	t
 ||dI dH  nH|d dkr|d d }
|d d }|
d
 }|
d }||kr| t|dt
 |I dH  n|d dkr|  I dH  n|d dkr|  I dH  W q tjy   t|  d|  Y qw 6 dS )zxReceive and process websocket messages.

        Continuously processes messages from the websocket connection.
        Nr  r  acknowledgedr  
byte_ranger  r  	utterancer#   r  r  )result)r  r  r#   translationtranslated_utteranceoriginal_language speech_start
speech_endz Received non-JSON message: )r  r  loadsgetrF  rB  rA  
push_framer   _user_idr   r  r   r   r  r  JSONDecodeErrorr   rl  )r   r  contentr  end_bytert  r  r#   r  r  r  r  translated_languager  r   r   r   _receive_messages  s   


(
		

z"GladiaSTTService._receive_messagessilencec                    s   |  dI dH  dS )zSend an empty audio chunk to keep the Gladia connection alive.

        Args:
            silence: Silent PCM audio bytes (unused, Gladia accepts empty chunks).
            N)rp  )r   r  r   r   r   _send_keepalive  s   z GladiaSTTService._send_keepaliver   )4r   r   r  r  r  r0  r  r   InputParamsr   r  r   r  r   r  r   r  r5  rQ  rR  r   r2  r  r   rX  r   r[  r`  r   rc  r
   rf  r  r   r   rv  r\  re  ry  r  rw  r    r  r  r  rp  r  rd  r  r  r  __classcell__r   r   rL  r   r     s   
 
	
 ")	
	Dr  )Ar  rD  r  r  r   dataclassesr   r   typingr   r   r   r   r  logurur   rS  r	   rU  pipecat.frames.framesr
   r   r   r   r   r   r   r   r   pipecat.services.gladia.configr   r   r   r   r   pipecat.services.settingsr   r   r   pipecat.services.stt_latencyr   pipecat.services.stt_servicer   pipecat.transcriptions.languager   r   pipecat.utils.timer   (pipecat.utils.tracing.service_decoratorsr    rq  websockets.asyncio.clientr!   r  websockets.protocolr"   ModuleNotFoundErrorru  r  r  r  r   r   r  r  r   r   r   r   <module>   sB   ,
s