o
    Si8                     @   s  U d dl Z d dlZd dlZd dlZd dlmZ d dlmZ d dlmZm	Z	m
Z
mZmZ d dlZd dlZd dlmZ d dlmZ d dlmZ d dlmZ d d	lmZmZmZmZmZmZ eG d
d deZ i a!eee"e"f ej#j$f e%d< de"de"dej#j$fddZ&eG dd deZeG dd deZ'G dd dZ(G dd de(Z)d dl*m+Z+m,Z,m-Z-m.Z.m/Z/m0Z0 dZ1dZ2dd Z3G dd  d e(Z4e4e)d!Z5eG d"d# d#eZ6eG d$d% d%eZ7d&d' Z8d(d) Z9dS )*    N)	dataclass)ROUND_HALF_UP)DictListLiteralOptionalTuple)get_current_resampling_backend)Resample)AudioTransform)libsox_rate)Secondscompute_num_samplesduring_docs_buildis_module_availableis_torchaudio_availableperturb_num_samplesc                   @   ^   e Zd ZU dZeed< dejdedejfddZ	de
d	ee
 dedee
ee
 f fd
dZdS )Speedz
    Speed perturbation effect, the same one as invoked with `sox speed` in the command line.

    It resamples the signal back to the input sampling rate, so the number of output samples will
    be smaller or greater, depending on the speed factor.
    factorsamplessampling_ratereturnc                 C   s*   t t|| j |}|t|}| S N)get_or_create_resamplerroundr   torch
from_numpynumpy)selfr   r   	resampler	augmented r"   R/home/ubuntu/.local/lib/python3.10/site-packages/lhotse/augmentation/torchaudio.py__call__%   s
   zSpeed.__call__offsetdurationc                 C   j   t ||}|durt ||nd}t|d| j }|dur$t|d| j nd}|| |dur2|| fS dfS )a	  
        This method helps estimate the original offset and duration for a recording
        before speed perturbation was applied.
        We need this estimate to know how much audio to actually load from disk during the
        call to ``load_audio()``.
        N   r   r   r   r   r%   r&   r   start_samplenum_samplesr"   r"   r#   reverse_timestamps,   s   
	
zSpeed.reverse_timestampsN__name__
__module____qualname____doc__float__annotations__npndarrayintr$   r   r   r   r-   r"   r"   r"   r#   r      s   
 r   _precompiled_resamplerssource_sampling_ratetarget_sampling_rater   c                 C   s&   | |f}|t vrt| |t |< t | S r   )r8   ResampleTensor)r9   r:   tplr"   r"   r#   r   J   s   r   c                   @   s   e Zd ZU dZeed< eed< dd Zedee	j
j fddZd	ejdejfd
dZdedee dedeeee f fddZdS )r
   zY
    Resampling effect, the same one as invoked with `sox rate` in the command line.
    r9   r:   c                 C   s   t | j| _t | j| _d S r   )r7   r9   r:   r   r"   r"   r#   __post_init__`   s   zResample.__post_init__r   c                 C   s   t  dkrd S t| j| jS )Nsox)r	   r   r9   r:   r=   r"   r"   r#   r    d   s
   
zResample.resamplerr   c                 O   s   | j | jkr|S t dkr7|j\}}g }t|D ]}t||d d f | j | j\}}|| qtj|ddS t	 rNt
|tjrEt|}| |}	|	 S dd l}
t| j | j}|
jj|| j| | j | dd}	|	S )Nr?   r   )axis)updownr@   )r9   r:   r	   shaperanger   appendr5   stackr   
isinstancer6   r   r   r    r   scipygcdsignalresample_poly)r   r   argskwargschannels_resampled_by_channelchannelresampled_samplesr!   rI   rJ   r"   r"   r#   r$   l   s6   



zResample.__call__r%   r&   r   c                 C   sd   | j | jkr
||fS t|| j td}|| j  }|dur,t|| j td}|| j  }||fS d}||fS )a  
        This method helps estimate the original offset and duration for a recording
        before resampling was applied.
        We need this estimate to know how much audio to actually load from disk during the
        call to ``load_audio()``.

        In case of resampling, the timestamps might change slightly when using non-trivial
        pairs of sampling rates, e.g. 16kHz -> 22.05kHz, because the number of samples in
        the resampled audio might actually correspond to incrementally larger/smaller duration.
        E.g. 16kHz, 235636 samples correspond to 14.72725s duration; after resampling to 22.05kHz,
        it is 324736 samples which correspond to 14.727256235827664s duration.
        )roundingN)r9   r:   r   r   )r   r%   r&   r   old_num_samples
old_offsetold_durationr"   r"   r#   r-      s   

zResample.reverse_timestampsN)r/   r0   r1   r2   r7   r4   r>   propertyr   r   nnModuler    r5   r6   r$   r   r   r-   r"   r"   r"   r#   r
   W   s"   
 !r
   c                   @   r   )Tempoa5  Tempo perturbation effect, the same one as invoked with `sox tempo` in the command line.

    Compared to speed perturbation, tempo preserves pitch.
    It resamples the signal back to the input sampling rate, so the number of output samples will
    be smaller or greater, depending on the tempo factor.
    r   r   r   r   c                 C   sZ   t   t  dd l}t|}t|tjrt|}|j	
||dt| jgg\}}| S )Nr   tempo)check_for_torchaudiocheck_torchaudio_version
torchaudior7   rH   r5   r6   r   r   sox_effectsapply_effects_tensorstrr   r   )r   r   r   r_   r!   new_sampling_rater"   r"   r#   r$      s   
zTempo.__call__r%   r&   c                 C   r'   )a	  
        This method helps estimate the original offset and duration for a recording
        before tempo perturbation was applied.
        We need this estimate to know how much audio to actually load from disk during the
        call to ``load_audio()``.
        Nr(   r)   r*   r"   r"   r#   r-      s   

zTempo.reverse_timestampsNr.   r"   r"   r"   r#   r[      s   
 r[   c                   @   s"   e Zd ZdejdejfddZdS )Codecr   r   c                 C   s   t )z\
        Apply encoder then decoder.

        To be implemented in derived classes.
        )NotImplementedErrorr   r   r"   r"   r#   r$      s   zCodec.__call__N)r/   r0   r1   r5   r6   r$   r"   r"   r"   r#   rd      s    rd   c                   @      e Zd Zdd Zdd ZdS )
MuLawCodecc                 C   s$   dd l }|j | _|j | _d S )Nr   )r_   
transformsMuLawEncodingencoderMuLawDecodingdecoderr   r_   r"   r"   r#   __init__   s   zMuLawCodec.__init__c                 C   s   |  | |S r   )rm   rk   rf   r"   r"   r#   r$      s   zMuLawCodec.__call__Nr/   r0   r1   ro   r$   r"   r"   r"   r#   rh      s    rh   )CDLLPOINTERc_intc_shortc_uint8c_void_p      c               
   C   s   zt d} W n ty } ztdd }~ww t| j_ttg| j_t| j_tt	t
t	ttg| j_tg| j_t| j_ttg| j_t| j_tt	tt	t
tg| j_tg| j_| S )Nzlibspandsp.sozWe cannot apply the narrowband transformation using the LPC10 codec as the SpanDSP library cannot be found. To install use `apt-get install libspandsp-dev` or visit <https://github.com/freeswitch/spandsp>.)rq   OSErrorRuntimeErrorrv   lpc10_encode_initrestypers   argtypeslpc10_encoderr   ru   rt   lpc10_encode_freelpc10_decode_initlpc10_decodelpc10_decode_free)apier"   r"   r#   libspandsp_api   s&   

r   c                   @   rg   )
LPC10Codecc                 C   s$   t  | _tt  | _tt  | _d S r   )r   r   ru   LPC10_FRAME_BYTESc_datart   LPC10_FRAME_SAMPLES	c_samplesr=   r"   r"   r#   ro     s   zLPC10Codec.__init__c           
      C   s.  | j d d}| j d d}|d t}d}tdt|t g}|D ]b}|d tj	}t
d|jd D ]	}	||	 | j|	< q6t
|jd tD ]}	d| j|	< qH| j || j| jt| jtksbJ | j || j| jttksqJ t
dtD ]}	| j|	 |d |< |d }qvq$| j | | j | |d S )Nr   r(   i   )r   r{   r   splitr   r   zeroslentoint16rE   rD   r   r~   r   r   r   r   r   )
r   r   rk   rm   framesidxoutframesamples_intir"   r"   r#   r$   !  s8   
zLPC10Codec.__call__Nrp   r"   r"   r"   r#   r     s    r   )lpc10mulawc                   @   sz   e Zd ZU dZeed< eed< eed< dd Zde	j
ded	e	j
fd
dZdedee dee d	eeee f fddZdS )
Narrowbandz
    Narrowband effect.

    Resample input audio to 8000 Hz, apply codec (encode then immediately decode), then (optionally) resample back to the original sampling rate.
    codecr9   restore_orig_src                 C   s:   t   dd l}| jtv rt| j  | _d S td| j )Nr   zunsupported codec: )r^   r_   r   CODECScodec_instance
ValueErrorrn   r"   r"   r#   r>   ]  s
   
zNarrowband.__post_init__r   r   r   c                 C   s   |j }t|}| jdkrt| jd}||}| |}| jr.| jdkr.td| j}||}| }| jrB||j krBt	|d|f}|S )Ni@  r(   )
sizer   r   r9   r   r   r   r   r5   resize)r   r   r   	orig_sizeresampler_downresampler_upr"   r"   r#   r$   f  s   


zNarrowband.__call__r%   r&   c                 C      ||fS )z
        This method just returnes the original offset and duration as the narrowband effect
        doesn't change any these audio properies.
        r"   r   r%   r&   r   r"   r"   r#   r-   |  s   zNarrowband.reverse_timestampsN)r/   r0   r1   r2   rb   r4   r7   boolr>   r5   r6   r$   r   r   r   r-   r"   r"   r"   r#   r   Q  s    
 	r   c                   @   sb   e Zd ZU dZeed< dejdedejfddZ	de
d	ee
 dee dee
ee
 f fd
dZdS )Volumez
    Volume perturbation effect, the same one as invoked with `sox vol` in the command line.
    It applies given gain (factor) to the input, without any postprocessing (such as a limiter).
    r   r   r   r   c                 C   s
   || j  S r   )r   )r   r   r   r"   r"   r#   r$     s   
zVolume.__call__r%   r&   c                 C   r   )z
        This method just returnes the original offset and duration as volume perturbation
        doesn't change any these audio properies.
        r"   r   r"   r"   r#   r-     s   
zVolume.reverse_timestampsNr.   r"   r"   r"   r#   r     s   
 r   c                  C   sB   dd l } ddlm} t s|| j|dk rtd d S d S d S )Nr   )parsez0.7zTorchaudio SoX effects chains are only introduced in version 0.7 - please upgrade your PyTorch to 1.7.1 and torchaudio to 0.7.2 (or higher) to use them.)r_   packaging.versionr   r   __version__warningswarn)r_   _versionr"   r"   r#   r^     s   r^   c                   C   s   t  stdd S )NzpThis transform is not supported in torchaudio-free Lhotse installation. Please install torchaudio and try again.)r   rz   r"   r"   r"   r#   r]     s
   r]   ):
contextlibostypingr   dataclassesr   decimalr   r   r   r   r   r   r   r5   r   lhotse.audio.resampling_backendr	   lhotse.augmentation.resampler
   r;   lhotse.augmentation.transformr   lhotse.tools.libsoxr   lhotse.utilsr   r   r   r   r   r   r   r8   r7   rY   rZ   r4   r   r[   rd   rh   ctypesrq   rr   rs   rt   ru   rv   r   r   r   r   r   r   r   r^   r]   r"   r"   r"   r#   <module>   sX   
  
 ,
U6
 18