o
    i .                     @   s   d Z ddlZddlZddlmZ ddlmZmZ ddlZddl	m
Z
 ddlmZ ddlmZmZmZ ddlmZ dd	lmZ dd
lmZ ddlmZmZ ddlmZ ddlmZ dedee fddZeG dd deZ G dd deZ!dS )zFal speech-to-text service implementation.

This module provides integration with Fal's Wizper API for speech-to-text
transcription using segmented audio processing.
    N)	dataclass)AsyncGeneratorOptional)logger)	BaseModel)
ErrorFrameFrameTranscriptionFrame)STTSettings)FAL_TTFS_P99)SegmentedSTTService)Languageresolve_language)time_now_iso8601)
traced_sttlanguagereturnc                 C   s  i t jdt jdt jdt jdt jdt jdt jdt jdt j	d	t j
d
t jdt jdt jdt jdt jdt jdt jdi t jdt jdt jdt jdt jdt jdt jdt jdt jdt jdt jdt jdt jdt jdt j d t j!d!t j"d"i t j#d#t j$d$t j%d%t j&d&t j'd't j(d(t j)d)t j*d*t j+d+t j,d,t j-d-t j.d.t j/d/t j0d0t j1d1t j2d2t j3d3i t j4d4t j5d5t j6d6t j7d7t j8d8t j9d9t j:d:t j;d;t j<d<t j=d=t j>d>t j?d?t j@d@t jAdAt jBdBt jCdCt jDdDi t jEdEt jFdFt jGdGt jHdHt jIdIt jJdJt jKdKt jLdLt jMdMt jNdNt jOdOt jPdPt jQdQt jRdRt jSdSt jTdTt jUdUt jVdVt jWdWt jXdXt jYdYt jZdZt j[d[t j\d\t j]d]t j^d^t j_d_t j`d`t jadat jbdbi}tc| |dcddS )ezConvert a Language enum to Fal's Wizper language code.

    Args:
        language: The Language enum value to convert.

    Returns:
        The corresponding Fal Wizper language code, or None if not supported.
    afamarasazbabebgbnbobrbscacscydadeeleneseteufafifofrglguhahehihrhthuhyidisitjajwkakkkmknkolalblnloltlvmgmimkmlmnmrmsmtmynenlnnnoocpaplpsptrorusasdsiskslsnsosqsrsusvswtatetgthtktltrttukuruzviyiyozhT)use_base_code)dr   AFAMARASAZBABEBGBNBOBRBSCACSCYDADEELENESETEUFAFIFOFRGLGUHAHEHIHRHTHUHYIDISITJAJWKAKKKMKNKOLALBLNLOLTLVMGMIMKMLMNMRMSMTMYNENLNNNOOCPAPLPSPTRORUSASDSISKSLSNSOSQSRSUSVSWTATETGTHTKTLTRTTUKURUZVIYIYOZHr   )r   LANGUAGE_MAP r   L/home/ubuntu/.local/lib/python3.10/site-packages/pipecat/services/fal/stt.pylanguage_to_fal_language   st  		
 !"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVer   c                   @   s   e Zd ZdZdS )FalSTTSettingszSettings for FalSTTService.N)__name__
__module____qualname____doc__r   r   r   r   r      s    r   c                       s   e Zd ZU dZeZeed< G dd deZdddddddde	d		d
e
e de
ej dededede
e de
e de
e de
e f fddZdefddZdede
e fddZe	d"dedede
e fddZdedeedf fd d!Z  ZS )#FalSTTServicezSpeech-to-text service using Fal's Wizper API.

    This service uses Fal's Wizper API to perform speech-to-text transcription on audio
    segments. It inherits from SegmentedSTTService to handle audio buffering and speech detection.
    	_settingsc                   @   sH   e Zd ZU dZejZee ed< dZ	e
ed< dZe
ed< dZe
ed< d	S )
zFalSTTService.InputParamsa  Configuration parameters for Fal's Wizper API.

        .. deprecated:: 0.0.105
            Use ``settings=FalSTTService.Settings(...)`` instead.

        Parameters:
            language: Language of the audio input. Defaults to English.
            task: Task to perform ('transcribe' or 'translate'). Defaults to 'transcribe'.
            chunk_level: Level of chunking ('segment'). Defaults to 'segment'.
            version: Version of Wizper model to use. Defaults to '3'.
        r   
transcribetasksegmentchunk_level3versionN)r   r   r   r   r   r   r   r   __annotations__r   strr   r   r   r   r   r   InputParams   s   
 r   Nr   r   r   )	api_keyaiohttp_sessionr   r   r   sample_rateparamssettingsttfs_p99_latencyr   r   r   r   r   r   r   r   r   c       	            s   | j dtjd}|dur4| d |s4|jdur|j|_|jdkr$|j}|jdkr,|j}|jdkr4|j}|dur=|| t	 j
d||	|d|
 || _|| _|| _|pZtdd	| _| jsctd
|| _|du | _dS )ap  Initialize the FalSTTService with API key and parameters.

        Args:
            api_key: Fal API key. If not provided, will check FAL_KEY environment variable.
            aiohttp_session: Optional aiohttp ClientSession for HTTP requests.
                If not provided, a session will be created and managed internally.
            task: Task to perform (``"transcribe"`` or ``"translate"``).
                Defaults to ``"transcribe"``.
            chunk_level: Level of chunking (``"segment"``). Defaults to ``"segment"``.
            version: Version of Wizper model to use. Defaults to ``"3"``.
            sample_rate: Audio sample rate in Hz. If not provided, uses the pipeline's rate.
            params: Configuration parameters for the Wizper API.

                .. deprecated:: 0.0.105
                    Use ``settings=FalSTTService.Settings(...)`` for model/language and
                    direct init parameters for task/chunk_level/version instead.

            settings: Runtime-updatable settings. When provided alongside deprecated
                parameters, ``settings`` values take precedence.
            ttfs_p99_latency: P99 latency from speech end to final transcript in seconds.
                Override for your deployment. See https://github.com/pipecat-ai/stt-benchmark
            **kwargs: Additional arguments passed to SegmentedSTTService.
        N)modelr   r   r   r   r   )r   r   r   FAL_KEY zQFAL_KEY must be provided either through api_key parameter or environment variabler   )Settingsr   r   "_warn_init_param_moved_to_settingsr   r   r   r   apply_updatesuper__init___task_chunk_level_versionosgetenv_api_key
ValueError_session_owns_session)selfr   r   r   r   r   r   r   r   r   kwargsdefault_settings	__class__r   r   r      sD   &





zFalSTTService.__init__r   c                 C   s   dS )zCheck if the service can generate processing metrics.

        Returns:
            True, as Fal STT service supports metrics generation.
        Tr   )r  r   r   r   can_generate_metrics  s   z"FalSTTService.can_generate_metricsr   c                 C   s   t |S )zConvert a Language enum to Fal's service-specific language code.

        Args:
            language: The language to convert.

        Returns:
            The Fal-specific language code, or None if not supported.
        )r   )r  r   r   r   r   language_to_service_language  s   	z*FalSTTService.language_to_service_language
transcriptis_finalc                    s   |   I dH  dS )z+Handle a transcription result with tracing.N)stop_processing_metrics)r  r
  r  r   r   r   r   _handle_transcription  s   z#FalSTTService._handle_transcriptionaudioc           
   
   C  s  z|   I dH  | jst | _dt|  }d|i}| jjdur+| jj|d< | j	dur5| j	|d< | j
dur?| j
|d< | jdurI| j|d< d| j d	d
}| jjd||d4 I dH 8}|jdkr| I dH }td|j d| dV  	 W d  I dH  W dS | I dH }W d  I dH  n1 I dH sw   Y  |rd|v r|d  }|r| |d| jjI dH  td| d t|| jt t| jj|dV  W dS W dS W dS W dS  ty }	 ztd|	 dV  W Y d}	~	dS d}	~	ww )a  Transcribes an audio segment using Fal's Wizper API.

        Args:
            audio: Raw audio bytes in WAV format (already converted by base class).

        Yields:
            Frame: TranscriptionFrame containing the transcribed text, or ErrorFrame on failure.

        Note:
            The audio is already in WAV format from the SegmentedSTTService.
            Only non-empty transcriptions are yielded.
        Nzdata:audio/x-wav;base64,	audio_urlr   r   r   r   zKey zapplication/json)AuthorizationzContent-Typezhttps://fal.run/fal-ai/wizper)jsonheaders   zFal API error (z): )errortextTzTranscription: [])resultzUnknown error occurred: )start_processing_metricsr  aiohttpClientSessionbase64	b64encodedecoder   r   r   r   r   r   poststatusr  r   r  stripr  r   debugr	   _user_idr   r   	Exception)
r  r  data_uripayloadr  resp
error_textresponser  er   r   r   run_stt  sb   








	(
 zFalSTTService.run_stt)N)r   r   r   r   r   r   r   r   r   r   r   r   r  r  intfloatr   boolr  r   r	  r   r  bytesr   r   r*  __classcell__r   r   r  r   r      sZ   
 	
R"r   )"r   r  r   dataclassesr   typingr   r   r  logurur   pydanticr   pipecat.frames.framesr   r   r	   pipecat.services.settingsr
   pipecat.services.stt_latencyr   pipecat.services.stt_servicer   pipecat.transcriptions.languager   r   pipecat.utils.timer   (pipecat.utils.tracing.service_decoratorsr   r   r   r   r   r   r   r   r   <module>   s&   q