o
    i,                  
   @   sR  d Z ddlZddlmZ ddlmZmZmZ ddlm	Z	 ddl
mZmZmZmZmZmZmZ ddlmZ ddlmZ dd	lmZ dd
lmZ ddlmZ ddlmZ ddlmZ zddl m!Z!m"Z"m#Z#m$Z$ ddl%m&Z&m'Z' ddl(m)Z) W n  e*y Z+ ze	,de+  e	,d e-de+ dZ+[+ww eG dd deZ.G dd deZ/dS )zAzure Speech-to-Text service implementation for Pipecat.

This module provides speech-to-text functionality using Azure Cognitive Services
Speech SDK for real-time audio transcription.
    N)	dataclass)AnyAsyncGeneratorOptional)logger)CancelFrameEndFrame
ErrorFrameFrameInterimTranscriptionFrame
StartFrameTranscriptionFramelanguage_to_azure_language)STTSettings)AZURE_TTFS_P99)
STTService)Language)time_now_iso8601)
traced_stt)CancellationReasonResultReasonSpeechConfigSpeechRecognizer)AudioStreamFormatPushAudioInputStream)AudioConfigzException: zCIn order to use Azure, you need to `pip install pipecat-ai[azure]`.zMissing module: c                   @   s   e Zd ZdZdS )AzureSTTSettingszSettings for AzureSTTService.N)__name__
__module____qualname____doc__ r"   r"   N/home/ubuntu/.local/lib/python3.10/site-packages/pipecat/services/azure/stt.pyr   6   s    r   c                       s\  e Zd ZU dZeZeed< dejdddde	dde
dee
 dee dee d	ee
 d
ee
 dee dee f fddZdefddZdedee
 fddZdedee
ef f fddZdedeedf fddZdef fddZdef fddZdef fdd Zd!d" Z d#d$ Z!e"	d/d%e
d&edee fd'd(Z#d)d* Z$d+d, Z%d-d. Z&  Z'S )0AzureSTTServicea"  Azure Speech-to-Text service for real-time audio transcription.

    This service uses Azure Cognitive Services Speech SDK to convert speech
    audio into text transcriptions. It supports continuous recognition and
    provides real-time transcription results with timing information.
    	_settingsN)regionlanguagesample_rateprivate_endpointendpoint_idsettingsttfs_p99_latencyapi_keyr&   r'   r(   r)   r*   r+   r,   c                   s   | j dtjd}
|dur|tjkr| dd ||
_|dur#|
| t jd	|||
d|	 |
jp7ttj}|s@|s@t	d|rR|rIt
d t|||d| _nt|||d| _|r`|| j_d| _d| _dS )
a  Initialize the Azure STT service.

        Args:
            api_key: Azure Cognitive Services subscription key.
            region: Azure region for the Speech service (e.g., 'eastus').
                Required unless ``private_endpoint`` is provided.
            language: Language for speech recognition. Defaults to English (US).

                .. deprecated:: 0.0.105
                    Use ``settings=AzureSTTService.Settings(language=...)`` instead.

            sample_rate: Audio sample rate in Hz. If None, uses service default.
            private_endpoint: Private endpoint for STT behind firewall.
                See https://learn.microsoft.com/en-us/azure/ai-services/speech-service/speech-services-private-link?tabs=portal
            endpoint_id: Custom model endpoint id.
            settings: Runtime-updatable settings. When provided alongside deprecated
                parameters, ``settings`` values take precedence.
            ttfs_p99_latency: P99 latency from speech end to final transcript in seconds.
                Override for your deployment. See https://github.com/pipecat-ai/stt-benchmark
            **kwargs: Additional arguments passed to parent STTService.
        N)modelr'   r'   )r(   r,   r+   z7Either 'region' or 'private_endpoint' must be provided.zHBoth 'region' and 'private_endpoint' provided; 'region' will be ignored.)subscriptionendpointspeech_recognition_language)r/   r&   r1   r"   )Settingsr   EN_US"_warn_init_param_moved_to_settingsr'   apply_updatesuper__init__r   
ValueErrorr   warningr   _speech_configr*   _audio_stream_speech_recognizer)selfr-   r&   r'   r(   r)   r*   r+   r,   kwargsdefault_settingsrecognition_language	__class__r"   r#   r7   H   sP   #


zAzureSTTService.__init__returnc                 C   s   dS )zCheck if this service can generate performance metrics.

        Returns:
            True as this service supports metrics generation.
        Tr"   r=   r"   r"   r#   can_generate_metrics   s   z$AzureSTTService.can_generate_metricsc                 C   s   t |S )zConvert a Language enum to Azure service-specific language code.

        Args:
            language: The language to convert.

        Returns:
            The Azure-specific language identifier, or None if not supported.
        r   )r=   r'   r"   r"   r#   language_to_service_language   s   	z,AzureSTTService.language_to_service_languagedeltac                    sX   t  |I dH }d|v r*| jjpttj| j_| j	r*| 
 I dH  |  I dH  |S )z9Apply a settings delta and reconnect if language changed.Nr'   )r6   _update_settingsr%   r'   r   r   r3   r:   r1   r;   _disconnect_connect)r=   rG   changedrA   r"   r#   rH      s   z AzureSTTService._update_settingsaudioc              
   C  sh   z|   I dH  | jr| j| dV  W dS  ty3 } ztd| dV  W Y d}~dS d}~ww )ao  Process audio data for speech-to-text conversion.

        Feeds audio data to the Azure speech recognizer for processing.
        Recognition results are handled asynchronously through callbacks.

        Args:
            audio: Raw audio bytes to process.

        Yields:
            Frame: Either None for successful processing or ErrorFrame on failure.
        NzUnknown error occurred: )error)start_processing_metricsr;   write	Exceptionr	   )r=   rL   er"   r"   r#   run_stt   s    zAzureSTTService.run_sttframec                    &   t  |I dH  |  I dH  dS )zzStart the speech recognition service.

        Args:
            frame: Frame indicating the start of processing.
        N)r6   startrJ   r=   rS   rA   r"   r#   rU         zAzureSTTService.startc                    rT   )zwStop the speech recognition service.

        Args:
            frame: Frame indicating the end of processing.
        N)r6   stoprI   rV   rA   r"   r#   rX      rW   zAzureSTTService.stopc                    rT   )zpCancel the speech recognition service.

        Args:
            frame: Frame indicating cancellation.
        N)r6   cancelrI   rV   rA   r"   r#   rY      rW   zAzureSTTService.cancelc              
      s   | j rdS z:t| jdd}t|| _ t| j d}t| j|d| _| jj	| j
 | jj	| j | jj	| j | j  W dS  ty` } z| jd| |dI dH  W Y d}~dS d}~ww )zHInitialize the Azure speech recognizer and begin continuous recognition.N   )samples_per_secondchannels)stream)speech_configaudio_configz*Uncaught exception during initialization: )	error_msg	exception)r;   r   r(   r   r   r   r:   r<   recognizingconnect_on_handle_recognizing
recognized_on_handle_recognizedcanceled_on_handle_canceled"start_continuous_recognition_asyncrP   
push_error)r=   stream_formatr_   rQ   r"   r"   r#   rJ      s(   

zAzureSTTService._connectc                    s6   | j r| j   d| _ | jr| j  d| _dS dS )z)Stop recognition and close audio streams.N)r<   !stop_continuous_recognition_asyncr;   closerD   r"   r"   r#   rI     s   


zAzureSTTService._disconnect
transcriptis_finalc                    s   |   I dH  dS )z+Handle a transcription result with tracing.N)stop_processing_metrics)r=   rn   ro   r'   r"   r"   r#   _handle_transcription  s   z%AzureSTTService._handle_transcriptionc                 C   s   |j jtjkrCt|j jdkrEt|j dd p| jj}t	|j j| j
t ||d}t| |j jd||   t| ||   d S d S d S )Nr   r'   resultT)rs   reasonr   RecognizedSpeechlentextgetattrr%   r'   r   _user_idr   asynciorun_coroutine_threadsaferq   get_event_loop
push_framer=   eventr'   rS   r"   r"   r#   rf     s   z%AzureSTTService._on_handle_recognizedc                 C   sp   |j jtjkr4t|j jdkr6t|j dd p| jj}t	|j j| j
t ||d}t| ||   d S d S d S )Nr   r'   rr   )rs   rt   r   RecognizingSpeechrv   rw   rx   r%   r'   r   ry   r   rz   r{   r}   r|   r~   r"   r"   r#   rd   '  s   z&AzureSTTService._on_handle_recognizingc                 C   sV   |j j}|jtjkr)d|j }|jr|d|j 7 }t| j|d| 	  d S d S )Nz Azure STT recognition canceled: z - )r`   )
rs   cancellation_detailsrt   r   Errorerror_detailsrz   r{   rj   r|   )r=   r   detailsr`   r"   r"   r#   rh   3  s   z#AzureSTTService._on_handle_canceled)N)(r   r   r    r!   r   r2   __annotations__r   r3   r   strr   intfloatr7   boolrE   rF   r   dictr   rH   bytesr   r
   rR   r   rU   r   rX   r   rY   rJ   rI   r   rq   rf   rd   rh   __classcell__r"   r"   rA   r#   r$   =   sb   
 	
X			
r$   )0r!   rz   dataclassesr   typingr   r   r   logurur   pipecat.frames.framesr   r   r	   r
   r   r   r   pipecat.services.azure.commonr   pipecat.services.settingsr   pipecat.services.stt_latencyr   pipecat.services.stt_servicer   pipecat.transcriptions.languager   pipecat.utils.timer   (pipecat.utils.tracing.service_decoratorsr   azure.cognitiveservices.speechr   r   r   r   $azure.cognitiveservices.speech.audior   r   %azure.cognitiveservices.speech.dialogr   ModuleNotFoundErrorrQ   rM   rP   r   r$   r"   r"   r"   r#   <module>   s4   $	
