o
    iw                     @   s   U d Z ddlZddlmZmZ ddlmZmZmZm	Z	m
Z
 ddlmZ ddlmZ ddlmZmZmZmZmZmZmZmZmZmZ ddlmZ dd	lmZ dd
lmZm Z m!Z!m"Z" ddl#m$Z$ ddl%m&Z& ddl'm(Z(m)Z) ddl*m+Z+ ddl,m-Z- zddl.m/Z/ ddl0m1Z1 ddl2m3Z3 W n  e4y Z5 ze6de5  e6d e7de5 dZ5[5ww de(de8fddZ9eddG dd dZ:e:dddddddd e:dddddddd e:ddddd!ddd d"Z;ee8e:f e<d#< eG d$d% d%e Z=G d&d' d'e&Z>dS )(a'  Sarvam AI Speech-to-Text service implementation.

This module provides a streaming Speech-to-Text service using Sarvam AI's WebSocket-based
API. It supports real-time transcription with Voice Activity Detection (VAD) and
can handle multiple audio formats for Indian language speech recognition.
    N)	dataclassfield)AnyAsyncGeneratorDictLiteralOptional)logger)	BaseModel)
CancelFrameEndFrame
ErrorFrameFrame
StartFrameTranscriptionFrameUserStartedSpeakingFrameUserStoppedSpeakingFrameVADUserStartedSpeakingFrameVADUserStoppedSpeakingFrame)FrameDirection)sdk_headers)	NOT_GIVENSTTSettings	_NotGivenis_given)SARVAM_TTFS_P99)
STTService)Languageresolve_language)time_now_iso8601)
traced_stt)AsyncSarvamAI)ApiError)	EventTypezException: zEIn order to use Sarvam, you need to `pip install pipecat-ai[sarvam]`.zMissing module: languagereturnc                 C   sZ   t jdt jdt jdt jdt jdt jdt jdt jdt j	d	t j
d
t jdt jdi}t| |ddS )zConvert a Language enum to Sarvam's language code format.

    Args:
        language: The Language enum value to convert.

    Returns:
        The Sarvam language code string.
    bn-INgu-INhi-INkn-INml-INmr-INta-INte-INpa-INod-INen-INas-INF)use_base_code)r   BN_INGU_INHI_INKN_INML_INMR_INTA_INTE_INPA_INOR_INEN_INAS_INr   )r$   LANGUAGE_MAP r@   O/home/ubuntu/.local/lib/python3.10/site-packages/pipecat/services/sarvam/stt.pylanguage_to_sarvam_language9   s   rB   T)frozenc                   @   sR   e Zd ZU dZeed< eed< eed< ee ed< ee ed< eed< eed< d	S )
ModelConfigaK  Immutable configuration for a Sarvam STT model.

    Attributes:
        supports_prompt: Whether the model accepts prompt parameter.
        supports_mode: Whether the model accepts mode parameter.
        supports_language: Whether the model accepts language parameter.
        default_language: Default language code (None = auto-detect).
        default_mode: Default mode (None = not applicable).
        use_translate_endpoint: Whether to use speech_to_text_translate_streaming endpoint.
        use_translate_method: Whether to use translate() method instead of transcribe().
    supports_promptsupports_modesupports_languagedefault_languagedefault_modeuse_translate_endpointuse_translate_methodN)__name__
__module____qualname____doc__bool__annotations__r   strr@   r@   r@   rA   rD   U   s   
 rD   Funknown)rE   rF   rG   rH   rI   rJ   rK   
transcribe)saarika:v2.5zsaaras:v2.5z	saaras:v3MODEL_CONFIGSc                   @   sl   e Zd ZU dZedd dZedB eB ed< edd dZ	e
dB eB ed< ed	d dZe
dB eB ed
< dS )SarvamSTTSettingsaF  Settings for SarvamSTTService.

    Parameters:
        prompt: Optional prompt to guide transcription/translation style/context.
            Only applicable to models that support prompts (e.g., saaras:v2.5).
        vad_signals: Enable VAD signals in response.
        high_vad_sensitivity: Enable high VAD sensitivity.
    c                   C      t S Nr   r@   r@   r@   rA   <lambda>       zSarvamSTTSettings.<lambda>)default_factoryNpromptc                   C   rX   rY   rZ   r@   r@   r@   rA   r[      r\   vad_signalsc                   C   rX   rY   rZ   r@   r@   r@   rA   r[      r\   high_vad_sensitivity)rL   rM   rN   rO   r   r^   rR   r   rQ   r_   rP   r`   r@   r@   r@   rA   rW      s
   
 	"rW   c                       s  e Zd ZU dZeZeed< G dd deZdddddde	ddd	d	e
d
ee
 deed  dee de
dee dee dee dee def fddZdede
fddZdee
 fddZdefddZdedef fd d!Zd"edee
ef f fd#d$Zd%ee
 fd&d'Zdef fd(d)Zdef fd*d+Z de!f fd,d-Z"d.e#de$edf fd/d0Z%d1d2 Z&d3d4 Z'd5d6 Z(d7d8 Z)e*	dGd9e
d:edee fd;d<Z+d=e
defd>d?Z,defd@dAZ-dBe#fdCdDZ.dEdF Z/  Z0S )HSarvamSTTServicea  Sarvam speech-to-text service.

    Provides real-time speech recognition using Sarvam's WebSocket API.

    Event handlers available (in addition to STTService events):

    - on_connected(service): Connected to Sarvam WebSocket
    - on_disconnected(service): Disconnected from Sarvam WebSocket
    - on_connection_error(service, error): Connection error occurred

    Example::

        @stt.event_handler("on_connected")
        async def on_connected(service):
            ...
    	_settingsc                   @   sf   e Zd ZU dZdZee ed< dZee	 ed< dZ
eed  ed< dZee ed< dZee ed< dS )	zSarvamSTTService.InputParamsa  Configuration parameters for Sarvam STT service.

        .. deprecated:: 0.0.105
            Use ``settings=SarvamSTTService.Settings(...)`` instead.

        Parameters:
            language: Target language for transcription.
                - saarika:v2.5: Defaults to "unknown" (auto-detect supported)
                - saaras:v2.5: Not used (auto-detects language)
                - saaras:v3: Defaults to "unknown" (auto-detect supported)
            prompt: Optional prompt to guide transcription/translation style/context.
                Only applicable to saaras:v2.5. Defaults to None.
            mode: Mode of operation for saaras:v3 models only. Options: transcribe, translate,
                verbatim, translit, codemix. Defaults to "transcribe" for saaras:v3.
            vad_signals: Enable VAD signals in response. Defaults to None.
            high_vad_sensitivity: Enable high VAD (Voice Activity Detection) sensitivity. Defaults to None.
        Nr$   r^   rT   	translateverbatimtranslitcodemixmoder_   r`   )rL   rM   rN   rO   r$   r   r   rQ   r^   rR   rh   r   r_   rP   r`   r@   r@   r@   rA   InputParams   s   
 ri   Nwavg      @)	modelrh   sample_rateinput_audio_codecparamssettingsttfs_p99_latencykeepalive_timeoutkeepalive_intervalapi_keyrk   rh   rc   rl   rm   rn   ro   rp   rq   rr   c       
            s  | j dddddd}|dur| dd ||_|dur:| d |s:|j|_|j|_|jdur2|j}|j|_|j|_|durC|| |j}|t	vr^d
tt	 }td| d| d	t	| | _|jdurt| jjsttd
| d|dur| jjstd
| d|jdur| jjstd
| d|du r| jj}t jd|||	|
|d| || _|| _|| _t | _t|| jd| _d| _d| _d| _|jr| d | d | d t !d| j  dS )a  Initialize the Sarvam STT service.

        Args:
            api_key: Sarvam API key for authentication.
            model: Sarvam model to use for transcription.

                .. deprecated:: 0.0.105
                    Use ``settings=SarvamSTTService.Settings(model=...)`` instead.

            mode: Mode of operation. Options: transcribe, translate, verbatim,
                translit, codemix. Only applicable to models that support it
                (e.g., saaras:v3). Defaults to the model's default mode.
            sample_rate: Audio sample rate. Defaults to 16000 if not specified.
            input_audio_codec: Audio codec/format of the input file. Defaults to "wav".
            params: Configuration parameters for Sarvam STT service.

                .. deprecated:: 0.0.105
                    Use ``settings=SarvamSTTService.Settings(...)`` instead.

            settings: Runtime-updatable settings. When provided alongside deprecated
                parameters, ``settings`` values take precedence.
            ttfs_p99_latency: P99 latency from speech end to final transcript in seconds.
                Override for your deployment. See https://github.com/pipecat-ai/stt-benchmark
            keepalive_timeout: Seconds of no audio before sending silence to keep the
                connection alive. None disables keepalive.
            keepalive_interval: Seconds between idle checks when keepalive is enabled.
            **kwargs: Additional arguments passed to the parent STTService.
        rU   N)rk   r$   r^   r_   r`   rk   rn   z, zUnsupported model 'z'. Allowed values: .Model '$' does not support prompt parameter.z"' does not support mode parameter.>' does not support language parameter (auto-detects language).)rl   rp   rq   rr   ro   )api_subscription_keyheaderson_speech_startedon_speech_stoppedon_utterance_endz)Sarvam STT initialized with SDK headers: r@   )"Settings"_warn_init_param_moved_to_settingsrk   r$   r^   rh   r_   r`   apply_updaterV   joinsortedkeys
ValueError_configrE   rF   rG   rI   super__init___api_key_mode_input_audio_codecr   _sdk_headersr!   _sarvam_client_websocket_context_socket_client_receive_task_register_event_handlerr	   info)selfrs   rk   rh   rl   rm   rn   ro   rp   rq   rr   kwargsdefault_settingsresolved_modelallowed	__class__r@   rA   r      st   .	




	


zSarvamSTTService.__init__r$   r%   c                 C   s   t |S )zConvert pipecat Language enum to Sarvam's language code.

        Args:
            language: The Language enum value to convert.

        Returns:
            The Sarvam language code string.
        )rB   )r   r$   r@   r@   rA   language_to_service_languageK  s   	z-SarvamSTTService.language_to_service_languagec                 C   s   | j jr
t| j jS | jjS )zFResolve the current language setting to a Sarvam language code string.)rb   r$   rB   r   rH   r   r@   r@   rA   _get_language_stringV  s   z%SarvamSTTService._get_language_stringc                 C   s   dS )zCheck if this service can generate processing metrics.

        Returns:
            True, as Sarvam service supports metrics generation.
        Tr@   r   r@   r@   rA   can_generate_metrics\  s   z%SarvamSTTService.can_generate_metricsframe	directionc                    sj   t  ||I dH  | jjs/t|tr|  I dH  dS t|tr1| jr3| j	 I dH  dS dS dS dS )zProcess incoming frames.

        Handles VAD frames for TTFB tracking when using Pipecat's VAD
        instead of Sarvam's built-in VAD.
        N)
r   process_framerb   r_   
isinstancer   _start_metricsr   r   flush)r   r   r   r   r@   rA   r   d  s   

zSarvamSTTService.process_framedeltac                    s   t |jr|jdur| jjstd| jj dt|| jr7t |j	r7|j	dur7| jj
s7td| jj dt |I dH }ddh |  @ rX|  I dH  |  I dH   fdd| D }|rj| | |S )	a^  Apply a settings delta, validate, sync state, and reconnect.

        Args:
            delta: A :class:`STTSettings` (or ``SarvamSTTService.Settings``) delta.

        Returns:
            Dict mapping changed field names to their previous values.

        Raises:
            ValueError: If a setting is not supported by the current model.
        Nru   rw   rv   r$   r^   c                    s   i | ]\}}| vr||qS r@   r@   .0kvreconnect_fieldsr@   rA   
<dictcomp>      z5SarvamSTTService._update_settings.<locals>.<dictcomp>)r   r$   r   rG   r   rb   rk   r   r}   r^   rE   r   _update_settingsr   _disconnect_connectitems _warn_unhandled_updated_settings)r   r   changed	unhandledr   r   rA   r   t  s(    
z!SarvamSTTService._update_settingsr^   c                    s   ddl }|  |d |j| jj dtdd W d   n1 s&w   Y  | jjs?|dur=t	d| j
j ddS td	| j
j d
 || j
_|  I dH  |  I dH  dS )a  Set the transcription/translation prompt and reconnect.

        .. deprecated:: 0.0.104
            Use ``STTUpdateSettingsFrame(SarvamSTTService.Settings(prompt=...))`` instead.

        Args:
            prompt: Prompt text to guide transcription/translation style/context.
                   Pass None to clear/disable prompt.
                   Only applicable to models that support prompts.
        r   Nalwaysz[.set_prompt() is deprecated. Use STTUpdateSettingsFrame(self.Settings(prompt=...)) instead.   )
stacklevelru   rv   z	Updating z prompt.)warningscatch_warningssimplefilterwarnr   rL   DeprecationWarningr   rE   r   rb   rk   r	   r   r^   r   r   )r   r^   r   r@   r@   rA   
set_prompt  s(   

	zSarvamSTTService.set_promptc                    &   t  |I dH  |  I dH  dS )z~Start the Sarvam STT service.

        Args:
            frame: The start frame containing initialization parameters.
        N)r   startr   r   r   r   r@   rA   r        zSarvamSTTService.startc                    r   )zVStop the Sarvam STT service.

        Args:
            frame: The end frame.
        N)r   stopr   r   r   r@   rA   r     r   zSarvamSTTService.stopc                    r   )z[Cancel the Sarvam STT service.

        Args:
            frame: The cancel frame.
        N)r   cancelr   r   r   r@   rA   r     r   zSarvamSTTService.cancelaudioc              
   C  s   | j s	dV  dS z=t|d}| jdr| jnd| j }||| jd}| jjr9| j j	di |I dH  n| j j
di |I dH  W n tyb } ztd| |dV  W Y d}~nd}~ww dV  dS )zSend audio data to Sarvam for transcription.

        Args:
            audio: Raw audio bytes to transcribe.

        Yields:
            Frame: None (transcription results come via WebSocket callbacks).
        Nutf-8audio/r   encodingrl   zError sending audio to Sarvam: )error	exceptionr@   )r   base64	b64encodedecoder   
startswithrl   r   rK   rd   rT   	Exceptionr   )r   r   audio_base64r   method_kwargser@   r@   rA   run_stt  s.   	

 
zSarvamSTTService.run_sttc              
      s6  t d zȈ jjt jd} jjsd|d<  jjdur) jjr%dnd|d<  jjdur9 jjr5dnd|d<   }|durE||d	<  j	j
rS jdurS j|d
<  jjdurc j	jrc jj|d<  fdd} j	jrz| jjjfi | _n| jjjfi | _ j I dH  _ jjdur j	jrt jdd}t|r| jjI dH   fdd} jtj|     _   t d W dS  t y } zd _d _ j!d| |dI dH  W Y d}~dS d}~w t"y } zd _d _ j!d| |dI dH  W Y d}~dS d}~ww )z.Connect to Sarvam WebSocket API using the SDK.zConnecting to Sarvam)rk   rl   trueflush_signalNfalser_   r`   language_coderh   r^   c                    s   d j i}|g}d|v r|dd | D  d }|D ]C}z| di |d|iW   S  ty@ } z|}W Y d }~nd }~ww z
| di |W   S  ty_ } z|}W Y d }~qd }~ww |d urf|| di |S )Nadditional_headersr^   c                 S   s   i | ]\}}|d kr||qS )r^   r@   r   r@   r@   rA   r   1  r   zPSarvamSTTService._connect.<locals>._connect_with_sdk_headers.<locals>.<dictcomp>request_optionsr@   )r   appendr   	TypeError)
connect_fnr   r   attemptslast_type_errorattempt_kwargsr   r   r@   rA   _connect_with_sdk_headers)  s2   
z<SarvamSTTService._connect.<locals>._connect_with_sdk_headersr   c                    s      |  dS )z)Wrapper to handle async response handler.N)create_task_handle_message)messager   r@   rA   _message_handler\  s   z3SarvamSTTService._connect.<locals>._message_handlerz Connected to Sarvam successfullyzSarvam API error: 	error_msgr   zFailed to connect to Sarvam: )#r	   debugrb   rk   rR   rl   r_   r`   r   r   rF   r   r^   rE   rJ   r   "speech_to_text_translate_streamingconnectr   speech_to_text_streaming
__aenter__r   getattrcallableonr#   MESSAGEr   _receive_task_handlerr   _create_keepalive_taskr   r"   
push_errorr   )r   connect_kwargslanguage_stringr   prompt_setterr   r   r@   r   rA   r     sj   


((zSarvamSTTService._connectc              
      s   |   I dH  | jr| | jI dH  d| _| j}| j}d| _d| _|rj|rlz<z|dddI dH  W n tyS } z| jd| |dI dH  W Y d}~nd}~ww W t	d dS W t	d dS t	d w dS dS )z/Disconnect from Sarvam WebSocket API using SDK.Nz$Error closing WebSocket connection: r   z"Disconnected from Sarvam WebSocket)
_cancel_keepalive_taskr   cancel_taskr   r   	__aexit__r   r   r	   r   )r   socket_clientwebsocket_contextr   r@   r@   rA   r   s  s.   
zSarvamSTTService._disconnectc              
      sd   | j sdS z| j  I dH  W dS  ty1 } z| jd| |dI dH  W Y d}~dS d}~ww )zHandle incoming messages from Sarvam WebSocket.

        This task wraps the SDK's start_listening() method which processes
        messages via the registered event handler callback.
        NzSarvam receive task error: r   )r   start_listeningr   r   )r   r   r@   r@   rA   r     s   (z&SarvamSTTService._receive_task_handlerc           	   
      s  t d|  z|jdkrk|jj}|jj}t d| d|  |dkrL|  I dH  t d | dI dH  | t	I dH  | 
 I dH  W dS |d	krht d
 | dI dH  | tI dH  W dS W dS |jdkr|jj}|jj}|r| |}n|  }|r| |}ntj}| dI dH  |r| r| |d|I dH  | t|| jt |t|dr| nt|dI dH  |  I dH  W dS W dS  ty } z| jd| |dI dH  |  I dH  W Y d}~dS d}~ww )zHandle incoming WebSocket message from Sarvam SDK.

        Processes transcription data and VAD events from the Sarvam service.

        Args:
            message: The parsed response object from Sarvam WebSocket.
        zReceived response: eventszVAD Signal: z, Occurred at: START_SPEECHNzUser started speakingrz   
END_SPEECHzUser stopped speakingr{   datar|   Tdict)resultzFailed to handle message: r   )r	   r   typer   signal_type
occured_atr   _call_event_handlerbroadcast_framer   broadcast_interruptionr   
transcriptr   _map_language_code_to_enumr   r   r5   strip_handle_transcription
push_framer   _user_idr   hasattrr   rR   stop_processing_metricsr   r   stop_all_metrics)	r   r   signal	timestampr  r   r$   r   r   r@   r@   rA   r     s\   





z SarvamSTTService._handle_messager  is_finalc                    s   dS )zzHandle a transcription result with tracing.

        This method is decorated with @traced_stt for observability.
        Nr@   )r   r  r  r$   r@   r@   rA   r    s   z&SarvamSTTService._handle_transcriptionr   c                 C   sH   t jt jt jt jt jt jt jt jt j	t j
t jt jt jd}||t jS )z2Map Sarvam language code to pipecat Language enum.)r&   r'   r(   r)   r*   r+   r,   r-   r.   r/   zen-USr0   r1   )r   r3   r4   r5   r6   r7   r8   r9   r:   r;   r<   EN_USr=   r>   get)r   r   mappingr@   r@   rA   r    s   z+SarvamSTTService._map_language_code_to_enumc                 C   s
   | j duS )z6Check if the Sarvam SDK websocket client is connected.N)r   r   r@   r@   rA   _is_keepalive_ready  s   
z$SarvamSTTService._is_keepalive_readysilencec                    s~   t |d}| jdr| jnd| j }||| jd}| jjr1| jj	di |I dH  dS | jj
di |I dH  dS )zSend silent audio via the Sarvam SDK to keep the connection alive.

        Args:
            silence: Silent 16-bit mono PCM audio bytes.
        r   r   r   Nr@   )r   r   r   r   r   rl   r   rK   r   rd   rT   )r   r  r   r   r   r@   r@   rA   _send_keepalive  s   

z SarvamSTTService._send_keepalivec                    s   |   I dH  dS )z$Start processing metrics collection.N)start_processing_metricsr   r@   r@   rA   r     s   zSarvamSTTService._start_metricsrY   )1rL   rM   rN   rO   rW   r}   rQ   r
   ri   r   rR   r   r   intfloatr   r   r   r   rP   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   bytesr   r   r   r   r   r   r    r  r  r  r  r   __classcell__r@   r@   r   rA   ra      s   
 	
 '#			*p@	ra   )?rO   r   dataclassesr   r   typingr   r   r   r   r   logurur	   pydanticr
   pipecat.frames.framesr   r   r   r   r   r   r   r   r   r   "pipecat.processors.frame_processorr   pipecat.services.sarvam._sdkr   pipecat.services.settingsr   r   r   r   pipecat.services.stt_latencyr   pipecat.services.stt_servicer   pipecat.transcriptions.languager   r   pipecat.utils.timer   (pipecat.utils.tracing.service_decoratorsr    sarvamair!   sarvamai.core.api_errorr"   sarvamai.core.eventsr#   ModuleNotFoundErrorr   r   r   rR   rB   rD   rV   rQ   rW   ra   r@   r@   r@   rA   <module>   sv   0
		