o
    i/:                  
   @   s   d Z ddlZddlZddlmZmZ ddlZddlm	Z	 ddl
mZmZmZ ddlmZmZmZ ddlmZmZ zddlZW n  ey^ Z ze	de  e	d	 ed
e dZ[ww G dd deZG dd deZdS )a  Krisp turn analyzer for end-of-turn detection using Krisp VIVA SDK.

This module provides a turn analyzer implementation using Krisp's turn detection
(Tt) API to determine when a user has finished speaking in a conversation.

Note: This analyzer uses a different model than KrispVivaFilter. The model path
can be specified via the KRISP_VIVA_TURN_MODEL_PATH environment variable or
passed directly to the constructor.
    N)OptionalTuple)logger)KrispVivaSDKManagerint_to_krisp_frame_durationint_to_krisp_sample_rate)BaseTurnAnalyzerBaseTurnParamsEndOfTurnState)MetricsDataTurnMetricsDatazException: z?In order to use KrispVivaTurn, you need to install krisp_audio.zMissing module: c                   @   s*   e Zd ZU dZdZeed< dZeed< dS )KrispTurnParamsae  Configuration parameters for Krisp turn analysis.

    Parameters:
        threshold: Probability threshold for turn completion (0.0 to 1.0).
            Higher values require more confidence before marking turn as complete.
        frame_duration_ms: Frame duration in milliseconds for turn detection.
            Supported values: 10, 15, 20, 30, 32.
    g      ?	threshold   frame_duration_msN)	__name__
__module____qualname____doc__r   float__annotations__r   int r   r   V/home/ubuntu/.local/lib/python3.10/site-packages/pipecat/audio/turn/krisp_viva_turn.pyr   (   s   
 	r   c                       s   e Zd ZdZddddddee dee dee ded	df
 fd
dZdd Z	defddZ
def fddZed	efddZed	ee fddZed	efddZed	efddZdeded	efddZd	eeee f fddZd d! Z  ZS )"KrispVivaTurnzTurn analyzer using Krisp VIVA SDK for end-of-turn detection.

    Uses Krisp's turn detection (Tt) API to determine when a user has finished
    speaking. This analyzer requires a valid Krisp model file to operate.
    N )
model_pathsample_rateparamsapi_keyr   r   r   r   returnc             
      s  t  j|d ztj|d d| _W n ty( } z
d| _td| d}~ww z|p0td| _	| j	s>t
d td	| j	d
sHtdtj| j	sWtd| j	 |p[t | _d| _d| _d| _t | _d| _d| _g | _tj| _d| _d| _d| _ |r|nd}z
| !|| _W W dS  ty } zt
jd| dd d| _td| |d}~ww  ty   | jrt"  d| _ w )ah  Initialize the Krisp turn analyzer.

        Args:
            model_path: Path to the Krisp turn detection model file (.kef extension).
                If None, uses KRISP_VIVA_TURN_MODEL_PATH environment variable.
            sample_rate: Optional initial sample rate for audio processing.
                If provided, this will be used as the fixed sample rate.
            params: Configuration parameters for turn analysis behavior.
            api_key: Krisp SDK API key. If empty, falls back to
                the KRISP_VIVA_API_KEY environment variable.

        Raises:
            ValueError: If model_path is not provided and KRISP_VIVA_TURN_MODEL_PATH is not set.
            Exception: If model file doesn't have .kef extension.
            FileNotFoundError: If model file doesn't exist.
            RuntimeError: If Krisp SDK initialization fails.
        )r   )r   TFz Failed to initialize Krisp SDK: NKRISP_VIVA_TURN_MODEL_PATHzEModel path is not provided and KRISP_VIVA_TURN_MODEL_PATH is not set.z.Model path for KrispVivaTurn must be provided.z.kefz%Model is expected with .kef extensionzModel file not found: i>  )Failed to create turn detection session: exc_info)#super__init__r   acquire_sdk_acquired	ExceptionRuntimeErrorosgetenv_model_pathr   error
ValueErrorendswithpathisfileFileNotFoundErrorr   _params_tt_session_preload_tt_session_samples_per_frame	bytearray_audio_buffer_speech_triggered_last_probability_frame_probabilitiesr
   
INCOMPLETE_last_state_speech_stopped_time_e2e_processing_time_ms_last_metrics_create_tt_sessionrelease)selfr   r   r   r   epreload_sample_rate	__class__r   r   r&   =   s^   
zKrispVivaTurn.__init__c              
      s   | j rFz$t| dr| jdurd| _t| dr| jdurd| _t  d| _ W dS  tyE } ztjd| dd W Y d}~dS d}~ww dS )z1Release SDK reference when analyzer is destroyed.r5   Nr6   FzError in __del__: Tr#   )	r(   hasattrr5   r6   r   rC   r)   r   r.   )rD   rE   r   r   r   cleanup   s   "zKrispVivaTurn.cleanupc              
   C   s   z/t  }| j|_t  }t||_t| jj	|_
||_t|| jj	 d | _t j|}|W S  tyM } ztjd| dd td| |d}~ww )aV  Create a turn detection session with the specified sample rate.

        Args:
            sample_rate: Sample rate for the session

        Returns:
            krisp_audio.TtFloat instance

        Raises:
            ValueError: If sample rate or frame duration is not supported
            RuntimeError: If session creation fails
          z/Failed to create Krisp turn detection session: Tr#   N)krisp_audio	ModelInfor-   r1   TtSessionConfigr   inputSampleRater   r4   r   inputFrameDuration	modelInfor   r7   TtFloatcreater)   r   r.   r*   )rD   r   
model_infott_cfgtt_instancerE   r   r   r   rB      s   
z KrispVivaTurn._create_tt_sessionc              
      sx   | j |krdS t | z| | j | _|   W dS  ty; } ztjd| dd d| _W Y d}~dS d}~ww )zSet the sample rate and create/update the turn detection session.

        Args:
            sample_rate: The sample rate to set.
        Nr"   Tr#   )	_sample_rater%   set_sample_raterB   r5   clearr)   r   r.   )rD   r   rE   rG   r   r   rX      s   
zKrispVivaTurn.set_sample_ratec                 C      | j S )zGet all probabilities from the last append_audio call.

        Returns:
            List of probability values for each frame processed in the last append_audio call.
        )r<   rD   r   r   r   frame_probabilities      z!KrispVivaTurn.frame_probabilitiesc                 C   rZ   )zGet the last turn probability value computed.

        Returns:
            Last probability value, or None if no frames have been processed yet.
        )r;   r[   r   r   r   last_probability   r]   zKrispVivaTurn.last_probabilityc                 C   rZ   )zCheck if speech has been detected and triggered analysis.

        Returns:
            True if speech has been detected and turn analysis is active.
        )r:   r[   r   r   r   speech_triggered   r]   zKrispVivaTurn.speech_triggeredc                 C   rZ   )zGet the current turn analyzer parameters.

        Returns:
            Current turn analyzer configuration parameters.
        )r4   r[   r   r   r   r      r]   zKrispVivaTurn.paramsbuffer	is_speechc              
   C   s  | j du rtd tj| _tjS | jdu r"td tj| _tjS z| j| g | _	t
| jd }|| j }|dkrDtj| _tjW S || j }|d }t| jd| }| j|d | _tj|tjd}|tjd }	|	d| j}
tj}|
D ]i}|r| jstd	 d| _d
| _d| _n| jr| jdu rt | _| j | }|dk rqz|| _| j	| | jr|| jjkr| jdurt | j d | _tdd
|| jd| _t d tj!}| "   nqz|| _|W S  t#y } ztj$d| d
d tj}|| _|W  Y d}~S d}~ww )a  Append audio data for turn analysis.

        Args:
            buffer: Raw audio data bytes to append for analysis.
            is_speech: Whether the audio buffer contains detected speech.

        Returns:
            Current end-of-turn state after processing the audio.
        Nz<Turn detection session not initialized, returning INCOMPLETEz7Samples per frame not initialized, returning INCOMPLETE   r   )dtypeg      @z&Speech detected, turn analysis startedTrK   r   )	processoris_completeprobabilitye2e_processing_time_mszKrisp turn completez#Error during Krisp turn detection: r#   )%r5   r   warningr
   r=   r>   r7   r9   extendr<   lenbytesnp
frombufferint16astypefloat32reshaper:   tracer@   r?   timeperf_counterprocesstolistr;   appendr4   r   r   rA   debugCOMPLETErY   r)   r.   )rD   r`   ra   total_samplesnum_complete_framescomplete_samples_countbytes_to_processaudio_to_processaudio_int16audio_float32framesstateframeprobrE   error_stater   r   r   append_audio   s~   










zKrispVivaTurn.append_audioc                    s   | j }d| _ | j|fS )zAnalyze the current audio state to determine if turn has ended.

        Returns:
            Tuple containing the end-of-turn state and optional metrics data.
            Returns the last state determined by append_audio().
        N)rA   r>   )rD   metricsr   r   r   analyze_end_of_turn_  s   	
z!KrispVivaTurn.analyze_end_of_turnc                 C   s"   d| _ | j  tj| _d| _dS )z-Reset the turn analyzer to its initial state.FN)r:   r9   rY   r
   r=   r>   r?   r[   r   r   r   rY   l  s   

zKrispVivaTurn.clear)r   r   r   r   r   strr   r   r&   rJ   rB   rX   propertylistr\   r   r^   boolr_   r   rl   r
   r   r   r   r   rY   __classcell__r   r   rG   r   r   6   s>    	Rlr   )r   r+   rt   typingr   r   numpyrm   logurur   pipecat.audio.krisp_instancer   r   r   %pipecat.audio.turn.base_turn_analyzerr   r	   r
   pipecat.metrics.metricsr   r   rL   ModuleNotFoundErrorrE   r.   r)   r   r   r   r   r   r   <module>   s&   

