o
    i)                  
   @   sT  d Z ddlZddlZddlZddlmZ ddlm	Z	 ddl
mZ dZdefdd	Zdefd
dZdefddZdededefddZdededefddZdd ZdededefddZdedededefdd Zd!ed"ed#ed$efd%d&Zd'ed"ed#ed$efd(d)Zd*ed"ed#ed$edef
d+d,Zd'ed"ed#ed$efd-d.Zd'edefd/d0ZdS )1zAudio utility functions for Pipecat.

This module provides common audio processing utilities including mixing,
format conversion, volume calculation, and codec transformations for
various audio formats used in Pipecat pipelines.
    N)BaseAudioResamplerSOXRAudioResamplerSOXRStreamAudioResampler   returnc                  K   sX   ddl }|  |d |jdtdd W d   n1 s w   Y  tdi | S )a  Create a default audio resampler instance.

    .. deprecated:: 0.0.74
        This function is deprecated and will be removed in a future version.
        Use `create_stream_resampler` for real-time processing scenarios or
        `create_file_resampler` for batch processing of complete audio files.

    Args:
        **kwargs: Additional keyword arguments passed to the resampler constructor.

    Returns:
        A configured SOXRAudioResampler instance.
    r   Nalwaysz`create_default_resampler` is deprecated. Use `create_stream_resampler` for real-time processing scenarios or `create_file_resampler` for batch processing of complete audio files.   )
stacklevel )warningscatch_warningssimplefilterwarnDeprecationWarningr   )kwargsr   r   r   G/home/ubuntu/.local/lib/python3.10/site-packages/pipecat/audio/utils.pycreate_default_resampler   s   

	r   c                  K      t di | S )zCreate an audio resampler instance for batch processing of complete audio files.

    Args:
        **kwargs: Additional keyword arguments passed to the resampler constructor.

    Returns:
        A configured SOXRAudioResampler instance.
    Nr   r   r   r   r   r   create_file_resampler8      	r   c                  K   r   )zCreate a stream audio resampler instance.

    Args:
        **kwargs: Additional keyword arguments passed to the resampler constructor.

    Returns:
        A configured SOXRStreamAudioResampler instance.
    Nr   r   r   r   r   r   create_stream_resamplerD   r   r   audio1audio2c                 C   s   t j| t jd}t j|t jd}tt|t|}t j|d|t| fdd}t j|d|t| fdd}|t j|t j }t |ddt j}|t j	 S )a  Mix two audio streams together by adding their samples.

    Both audio streams are assumed to be 16-bit signed integer PCM data.
    If the streams have different lengths, the shorter one is zero-padded
    to match the longer stream.

    Args:
        audio1: First audio stream as raw bytes (16-bit signed integers).
        audio2: Second audio stream as raw bytes (16-bit signed integers).

    Returns:
        Mixed audio data as raw bytes with samples clipped to 16-bit range.
    dtyper   constant)modei i  )
np
frombufferint16maxlenpadastypeint32cliptobytes)r   r   data1data2
max_lengthpadded1padded2mixed_audior   r   r   	mix_audioP   s   r0   
left_audioright_audioc                 C   sh   t j| t jd}t j|t jd}tt|t|}|d| }|d| }t ||f}|t j S )a	  Interleave left and right mono audio channels into stereo audio.

    Takes two mono audio streams and combines them into a single stereo
    stream by interleaving the samples (L, R, L, R, ...). If the channels
    have different lengths, both are truncated to the shorter length.

    Args:
        left_audio: Left channel audio as raw bytes (16-bit signed integers).
        right_audio: Right channel audio as raw bytes (16-bit signed integers).

    Returns:
        Interleaved stereo audio data as raw bytes.
    r   N)r    r!   r"   minr$   column_stackr&   r)   )r1   r2   leftright
min_lengthstereor   r   r   interleave_stereo_audioo   s   r9   c                 C   s$   | | ||  }t dtd|}|S )a*  Normalize a value to the range [0, 1] and clamp it to bounds.

    Args:
        value: The value to normalize.
        min_value: The minimum value of the input range.
        max_value: The maximum value of the input range.

    Returns:
        Normalized value clamped to the range [0, 1].
    r      )r#   r3   )value	min_value	max_value
normalizednormalized_clampedr   r   r   normalize_value   s   r@   audiosample_ratec                 C   sN   t j| t jd}|t j}|j| }tj||d}||}t	|dd}|S )a  Calculate the loudness level of audio data using EBU R128 standard.

    Uses the pyloudnorm library to calculate integrated loudness according
    to the EBU R128 recommendation, then normalizes the result to [0, 1].

    Args:
        audio: Audio data as raw bytes (16-bit signed integers).
        sample_rate: Sample rate of the audio in Hz.

    Returns:
        Normalized loudness value between 0 (quiet) and 1 (loud).
    r   )
block_sizeiP   )
r    r!   r"   r&   float64sizepylnMeterintegrated_loudnessr@   )rA   rB   audio_npaudio_floatrC   meterloudnessr   r   r   calculate_audio_volume   s   

rN   r;   
prev_valuefactorc                 C   s   ||| |   S )a  Apply exponential smoothing to a value.

    Exponential smoothing is used to reduce noise in time-series data by
    giving more weight to recent values while still considering historical data.

    Args:
        value: The new value to incorporate.
        prev_value: The previous smoothed value.
        factor: Smoothing factor between 0 and 1. Higher values give more
                weight to the new value.

    Returns:
        The exponentially smoothed value.
    r   )r;   rO   rP   r   r   r   exp_smoothing   s   rQ   
ulaw_bytesin_rateout_rate	resamplerc                    &   t | d}||||I dH }|S )u  Convert μ-law encoded audio to PCM and optionally resample.

    Args:
        ulaw_bytes: μ-law encoded audio data as raw bytes.
        in_rate: Original sample rate of the μ-law audio in Hz.
        out_rate: Desired output sample rate in Hz.
        resampler: Audio resampler instance for rate conversion.

    Returns:
        PCM audio data as raw bytes at the specified output rate.
    r
   N)audioopulaw2linresample)rR   rS   rT   rU   in_pcm_bytesout_pcm_bytesr   r   r   ulaw_to_pcm      r\   	pcm_bytesc                    &   | | ||I dH }t|d}|S )u  Convert PCM audio to μ-law encoding and optionally resample.

    Args:
        pcm_bytes: PCM audio data as raw bytes (16-bit signed integers).
        in_rate: Original sample rate of the PCM audio in Hz.
        out_rate: Desired output sample rate in Hz.
        resampler: Audio resampler instance for rate conversion.

    Returns:
        μ-law encoded audio data as raw bytes at the specified output rate.
    Nr
   )rY   rW   lin2ulaw)r^   rS   rT   rU   rZ   out_ulaw_bytesr   r   r   pcm_to_ulaw      rb   
alaw_bytesc                    rV   )a  Convert A-law encoded audio to PCM and optionally resample.

    Args:
        alaw_bytes: A-law encoded audio data as raw bytes.
        in_rate: Original sample rate of the A-law audio in Hz.
        out_rate: Desired output sample rate in Hz.
        resampler: Audio resampler instance for rate conversion.

    Returns:
        PCM audio data as raw bytes at the specified output rate.
    r
   N)rW   alaw2linrY   )rd   rS   rT   rU   rZ   r[   r   r   r   alaw_to_pcm   r]   rf   c                    r_   )a  Convert PCM audio to A-law encoding and optionally resample.

    Args:
        pcm_bytes: PCM audio data as raw bytes (16-bit signed integers).
        in_rate: Original sample rate of the PCM audio in Hz.
        out_rate: Desired output sample rate in Hz.
        resampler: Audio resampler instance for rate conversion.

    Returns:
        A-law encoded audio data as raw bytes at the specified output rate.
    Nr
   )rY   rW   lin2alaw)r^   rS   rT   rU   rZ   out_alaw_bytesr   r   r   pcm_to_alaw	  rc   ri   c                 C   s&   t j| t jd}t | }|tkS )u-  Determine if an audio sample contains silence by checking amplitude levels.

    This function analyzes raw PCM audio data to detect silence by comparing
    the maximum absolute amplitude against a predefined threshold. The audio
    is expected to be clean speech or complete silence without background noise.

    Args:
        pcm_bytes: Raw PCM audio data as bytes (16-bit signed integers).

    Returns:
        bool: True if the audio sample is considered silence (below threshold),
              False otherwise.

    Note:
        Normal speech typically produces amplitude values between ±500 to ±5000,
        depending on factors like loudness and microphone gain. The threshold
        (SPEAKING_THRESHOLD) is set well below typical speech levels to
        reliably detect silence vs. speech.
    r   )r    r!   r"   absr#   SPEAKING_THRESHOLD)r^   
audio_datar=   r   r   r   
is_silence  s   rm   )__doc__rW   numpyr    
pyloudnormrG   -pipecat.audio.resamplers.base_audio_resamplerr   'pipecat.audio.resamplers.soxr_resamplerr   .pipecat.audio.resamplers.soxr_stream_resamplerr   rk   r   r   r   bytesr0   r9   r@   intfloatrN   rQ   r\   rb   rf   ri   boolrm   r   r   r   r   <module>   sN   

