o
    i#,                     @   s   d dl Z d dlZd dlZd dlZd dlmZ G dd deZG dd deZG dd deZ	G d	d
 d
eZ
G dd deZdS )    N)SoundHDF5Filec                   @   s8   e Zd ZdZ						dddZd	d
 ZdddZdS )SpeedPerturbationa  SpeedPerturbation

    The speed perturbation in kaldi uses sox-speed instead of sox-tempo,
    and sox-speed just to resample the input,
    i.e pitch and tempo are changed both.

    "Why use speed option instead of tempo -s in SoX for speed perturbation"
    https://groups.google.com/forum/#!topic/kaldi-help/8OOG7eE4sZ8

    Warning:
        This function is very slow because of resampling.
        I recommmend to apply speed-perturb outside the training using sox.

    ?皙?NTkaiser_bestc                 C   s   || _ || _tj|| _|d urPi | _|| _d | _d | _	d| _
t|d!}|D ]}| d d\}	}
t|
}
|
| j|	< q(W d    d S 1 sIw   Y  d S d | _|| _|| _	d S NTr   )res_typekeep_lengthnumpyrandomRandomStatestate	utt2ratioutt2ratio_filelowerupperaccept_uttidopenrstripsplitfloat)selfr   r   r   r   r
   seedflineuttratio r   L/home/ubuntu/.local/lib/python3.10/site-packages/espnet/transform/perturb.py__init__   s&   	"
zSpeedPerturbation.__init__c                 C   s>   | j d u rd| jj| j| j| j| jS d| jj| j| jS )Nz3{}(lower={}, upper={}, keep_length={}, res_type={})z{}({}, res_type={}))	r   format	__class____name__r   r   r   r
   r   r   r   r   r    __repr__9   s   
zSpeedPerturbation.__repr__c                 C   s   |s|S | tj}| jr| j| }n	| j| j| j}t	j
||d| jd}| jrrtt|t| }t|t|krJ||d |d d   }|S t|t|k rr|d |d d fgdd t|jd D  }tj||ddd}|S )	Nr	   )r
      c                 S   s   g | ]}d qS ))r   r   r   ).0_r   r   r    
<listcomp>\   s    z.SpeedPerturbation.__call__.<locals>.<listcomp>r   constant)	pad_widthconstant_valuesmode)astyper   float32r   r   r   uniformr   r   librosaresampler
   r   abslenrangendimpad)r   xuttidtrainr   ydiffr,   r   r   r    __call__G   s(   	zSpeedPerturbation.__call__)r   r   NTr   NNTr$   
__module____qualname____doc__r!   r&   r>   r   r   r   r    r   	   s    
 r   c                   @   s,   e Zd ZdZdddZdd	 ZdddZdS )BandpassPerturbationa  BandpassPerturbation

    Randomly dropout along the frequency axis.

    The original idea comes from the following:
        "randomly-selected frequency band was cut off under the constraint of
         leaving at least 1,000 Hz band within the range of less than 4,000Hz."
        (The Hitachi/JHU CHiME-5 system: Advances in speech recognition for
         everyday home environments using multiple microphone arrays;
         http://spandh.dcs.shef.ac.uk/chime_workshop/papers/CHiME_2018_paper_kanda.pdf)

                  ?Nc                 C   s$   || _ || _tj|| _|| _d S )N)r   r   r   r   r   r   axes)r   r   r   r   rI   r   r   r    r!   s   s   
zBandpassPerturbation.__init__c                 C   s   d | jj| j| jS )Nz{}(lower={}, upper={}))r"   r#   r$   r   r   r%   r   r   r    r&   z   s   zBandpassPerturbation.__repr__Tc                    st   |sS j dkrtd| j| j| j}fdd| jD   fddtjD }| jj	| |k}|9 S )Nr	   z@Input in time-freq domain: (Time, Channel, Freq) or (Time, Freq)c                    s"   g | ]}|d kr
|n j | qS )r   )r7   )r(   i)x_stftr   r    r*      s   " z1BandpassPerturbation.__call__.<locals>.<listcomp>c                    s    g | ]\}}| v r|nd qS )r	   r   )r(   rJ   s)rI   r   r    r*      s     )
r7   RuntimeErrorr   r1   r   r   rI   	enumerateshaperandn)r   rK   r:   r;   r   rO   maskr   )rI   rK   r    r>      s   
zBandpassPerturbation.__call__)rE   rF   NrG   r?   r@   r   r   r   r    rD   e   s
    
rD   c                   @   s(   e Zd ZdddZdd Zdd	d
ZdS )VolumePerturbation皙皙?NTc           
      C   s   || _ || _|| _|| _tj|| _|d urSi | _d | _d | _d| _	t
|d!}|D ]}| d d\}}	t|	}	|	| j|< q+W d    d S 1 sLw   Y  d S d | _d S r   )dbunitr   r   r   r   r   r   r   r   r   r   r   r   r   )
r   r   r   r   rU   r   r   r   r   r   r   r   r    r!      s$   "
zVolumePerturbation.__init__c                 C   :   | j d u rd| jj| j| j| jS d| jj| j| jS Nz!{}(lower={}, upper={}, dbunit={})z{}("{}", dbunit={})r   r"   r#   r$   r   r   rU   r   r%   r   r   r    r&         
zVolumePerturbation.__repr__c                 C   sR   |s|S | tj}| jr| j| }n	| j| j| j}| j	r%d|d  }|| S )N
      )
r/   r   r0   r   r   r   r1   r   r   rU   )r   r9   r:   r;   r   r   r   r    r>      s   zVolumePerturbation.__call__)rS   rT   NTNr?   r$   rA   rB   r!   r&   r>   r   r   r   r    rR      s    

rR   c                   @   s:   e Zd ZdZ							dddZd	d
 ZdddZdS )NoiseInjectionzAdd isotropic noiseNlistTc                 C   s  || _ || _|| _|| _|| _|| _tj|| _	|d urNi | _
t|d }|D ]}	|	 d d\}
}t|}|| j
|
< q(W d    n1 sHw   Y  nd | _
|d uri | _|dkrt|d'}|D ]}	|	 d d\}
}tj|dd\}}||f| j|
< qdW d    n1 sw   Y  n|dkrt|d| _nt|d | _|d ur|d urt| j
t| jkrtd||d S d S d S )Nr   r	   r`   int16dtype
sound.hdf5z%The uttids mismatch between {} and {})utt2noise_filer   filetyperU   r   r   r   r   r   r   r   r   r   r   r   	utt2noise	soundfilereadr   
ValueErrorsetrM   r"   )r   rg   r   r   r   rf   rU   r   r   r   r   snrfilenamesignalrater   r   r    r!      sP   

zNoiseInjection.__init__c                 C   rV   rW   rX   r%   r   r   r    r&      rY   zNoiseInjection.__repr__c           
      C   s8  |s|S | tj}|d ur| jd ur| j| }n	| j| j| j}| jr+d|d  }|t	|d 
  }| jd ur|d urG| j| \}}n| jt| j }|t	|d 
  }tt|t| }| jd|}	t|t|kr||	||	   }ntj||	||	 gdd}n	| jdd|j}|||  S )NrZ   r[   r'   r   wrap)r,   r.   r	   )r/   r   r0   r   r   r1   r   r   rU   sqrtmeanrg   choicer`   valuesr4   r5   randintr8   normalrO   )
r   r9   r:   r;   r   scalenoisero   r=   offsetr   r   r    r>     s*   
zNoiseInjection.__call__)Nr^   r_   Nr`   TNr?   r@   r   r   r   r    r]      s    
5
r]   c                   @   s(   e Zd Zd
ddZdd Zddd	ZdS )RIRConvolver`   c           	      C   s   || _ || _i | _|dkrDt|d(}|D ]}| d d\}}tj|dd\}}||f| j|< qW d    d S 1 s=w   Y  d S |dkrPt|d| _d S t	|)Nr`   r   r	   ra   rb   rd   )
utt2rir_filerf   utt2rirr   r   r   rh   ri   r   NotImplementedError)	r   r|   rf   r   r   r   rm   rn   ro   r   r   r    r!   0  s   "zRIRConvolve.__init__c                 C   s   d | jj| jS )Nz{}("{}"))r"   r#   r$   r{   r%   r   r   r    r&   A  s   zRIRConvolve.__repr__NTc                    sr   |s S   tj  jdkrtd j| j| \}}|jdkr1tj fdd|D ddS t	j
 |dd	S )
Nr	   z1Input x must be one dimensional array, but got {}r'   c                    s   g | ]
}t j |d dqS )samer.   )scipyconvolve)r(   r   r9   r   r    r*   U  s    z(RIRConvolve.__call__.<locals>.<listcomp>rH   )axisr~   r   )r/   r   r0   r7   rM   r"   rO   r|   stackr   r   )r   r9   r:   r;   rirro   r   r   r    r>   D  s   


zRIRConvolve.__call__)r`   r?   r\   r   r   r   r    rz   /  s    
rz   )r2   r   r   rh   espnet.utils.io_utilsr   objectr   rD   rR   r]   rz   r   r   r   r    <module>   s    \,1m