o
    }oiI3                     @   sb   d dl Z d dlZd dlmZ d dlZd dlZd dlZd dl	Z	d dl
mZmZmZ G dd dZdS )    N)Path)EnergyFeaturizerMelSpectrogramFeaturizerPitchFeaturizerc                   @   s&  e Zd Zdd Zdd Zejdd Zej	
dej	jdd	 Zej	
dej	jd
d Zej	
dej	jdd Zej	
dej	jdd Zej	
dej	jdd Zej	
dej	jdd Zej	
dej	jdd Zej	
dej	jdd Zej	
dej	jdd ZdS )TestTTSFeaturesc                 C   s@   d| _ d| _d| _d| _d| _d| j| j  | _d| j i| _d S )Nztest.wavP   d   iP  i'     audio_filepath)audio_filenamespec_dimhop_len	audio_lensample_ratespec_lenmanifest_entry)self r   k/home/ubuntu/.local/lib/python3.10/site-packages/tests/collections/tts/parts/preprocessing/test_features.pysetup_class!   s   zTestTTSFeatures.setup_classc                 C   s8   t | j| | j }dt | j||  | j  }||fS )Nr	   )intr   r   )r   offsetdurationstart_frame	end_framer   r   r   _compute_start_end_frames*   s   z)TestTTSFeatures._compute_start_end_framesc                 c   s`    t jj| jgd}t }zt|j}|| j }t	
||| j |V  W |  d S |  w )N)size)nprandomuniformr   tempfileTemporaryDirectoryr   namer   sfwriter   cleanup)r   
test_audiotemp_dirtest_dir
audio_pathr   r   r   _create_test_dir/   s   

z TestTTSFeatures._create_test_dirCPUc                 C   s   t | j| j| jd}|  }|j| j|d}W d    n1 s!w   Y  t|jdks/J |j	t
jks7J |jd | jksAJ |jd | jksKJ d S )Nmel_dim
hop_lengthr   r   	audio_dir   r   r	   )r   r   r   r   r*   compute_mel_specr   lenshapedtyper   float32r   )r   mel_featurizerr(   specr   r   r   test_compute_mel_spectrogram;   s   
z,TestTTSFeatures.test_compute_mel_spectrogramc                 C   s   d}t || j| j| jd}|  }|d }|j| j||d |j| j||d}W d    n1 s2w   Y  || }t|j	dksDJ |j
tjksLJ |j	d | jksVJ |j	d | jks`J d S )Nmel_test)feature_namer-   r.   r   featurer   r0   feature_dirr1   r   r	   )r   r   r   r   r*   saver   loadr3   r4   r5   torchr6   r   )r   mel_namer7   r(   r>   mel_dictmel_specr   r   r   "test_save_and_load_mel_spectrogramJ   s    
z2TestTTSFeatures.test_save_and_load_mel_spectrogramc                 C   s   t | j| jd}|  }|j| j|d\}}}W d    n1 s"w   Y  t|jdks0J |jd | jks:J |j	t
jksBJ t|jdksKJ |jd | jksUJ |j	tks\J t|jdkseJ |jd | jksoJ |j	t
jkswJ d S )N)r.   r   r/   r	   r   )r   r   r   r*   compute_pitchr   r3   r4   r   r5   r   r6   bool)r   pitch_featurizerr(   pitchvoicedvoiced_probr   r   r   test_compute_pitch_   s   
z"TestTTSFeatures.test_compute_pitchc                 C   s   t |d d d d }d|i}t| j| jd d}|j||d\}}}t| j| jdd}|j||d\}	}
}tj|	| tj|
| tj|| d S )	Nttsmini_ljspeechwavsLJ003-0182.wavr
   )r.   r   batch_secondsr/         ?)r   r   r   r   rF   rA   testingassert_close)r   test_data_dirr
   r   rH   rI   rJ   rK   pitch_featurizer_batchpitch_batchvoiced_batchvoiced_prob_batchr   r   r   test_compute_pitch_batchedu   s   
z*TestTTSFeatures.test_compute_pitch_batchedc                 C   s4  d}d}d}t |||| j| jd}|  }|d }|j| j||d |j| j||d}W d    n1 s6w   Y  || }|| }	|| }
t|jdksPJ |jd | j	ksZJ |j
tjksbJ t|	jdkskJ |	jd | j	ksuJ |	j
tjks}J t|
jdksJ |
jd | j	ksJ |
j
tjksJ d S )	N
pitch_testvoiced_mask_testvoiced_prob_test
pitch_namevoiced_mask_namevoiced_prob_namer.   r   r<   r=   r	   r   )r   r   r   r*   r?   r   r@   r3   r4   r   r5   rA   r6   rG   )r   r_   r`   ra   rH   r(   r>   
pitch_dictrI   voiced_maskrK   r   r   r   test_save_and_load_pitch   s:   
z(TestTTSFeatures.test_save_and_load_pitchc                  C   s  d}d}d}t |d d d d }d|i}d	}d	}d
}	d}
| j||d\}}| j|	|
d\}}|||d}||	|
d}t|||| j| jd}|  ,}|d }|j|||d |j|||d}|j|||d}|j|||d}W d    n1 szw   Y  || }|| }|| }|| }|| }|| }|| }|| }|| }tj	
||||  tj	
||||  tj	
||||  tj	
||||  tj	
||||  tj	
||||  d S )Nr[   r\   r]   rM   rN   rO   rP   r
   rR   g      @g333333?r   r   r
   r   r   r^   r<   r=   )r   r   r   r   r   r*   r?   r@   rA   rS   rT   ) r   rU   r_   r`   ra   r
   r   offset1	duration1offset2	duration2start1end1start2end2manifest_entry_segment1manifest_entry_segment2rH   r(   r>   rb   pitch_dict_segment1pitch_dict_segment2rI   rc   rK   pitch_segment1voiced_mask_segment1voiced_prob_segment1pitch_segment2voiced_mask_segment2voiced_prob_segment2r   r   r   !test_save_and_load_pitch_segments   s`   
z1TestTTSFeatures.test_save_and_load_pitch_segmentsc                 C   s   t | j| j| jd}t|d}|  }|j| j|d}W d    n1 s&w   Y  t|j	dks4J |j	d | j
ks>J |jtjksFJ d S )Nr,   )spec_featurizerr/   r	   r   )r   r   r   r   r   r*   compute_energyr   r3   r4   r   r5   r   r6   )r   r7   energy_featurizerr(   energyr   r   r   test_compute_energy   s   

z#TestTTSFeatures.test_compute_energyc                 C   s   d}t | j| j| jd}t||d}|  }|d }|j| j||d |j| j||d}W d    n1 s7w   Y  || }t	|j
dksIJ |j
d | jksSJ |jtjks[J d S )Nenergy_testr,   r;   rz   r<   r=   r	   r   )r   r   r   r   r   r*   r?   r   r@   r3   r4   r   r5   rA   r6   )r   energy_namer7   r|   r(   r>   energy_dictr}   r   r   r   test_save_and_load_energy   s    
z)TestTTSFeatures.test_save_and_load_energyc                 C   s0  d}d}d}d}d}| j ||d\}}| j ||d\}	}
| j||d}| j||d}t| j| j| jd}t||d}|  .}|d	 }|j| j	||d
 |j
| j	||d
}|j
|||d
}|j
|||d
}W d    n1 sow   Y  || }|| }|| }tj||||  tj|||	|
  d S )Nr   g        rR   g      ?re   rf   r,   r   r<   r=   )r   r   r   r   r   r   r   r*   r?   r   r@   rA   rS   rT   )r   rU   r   rg   rh   ri   rj   rk   rl   rm   rn   ro   rp   r7   r|   r(   r>   r   energy_dict_segment1energy_dict_segment2r}   energy_segment1energy_segment2r   r   r   "test_save_and_load_energy_segments  s>   
z2TestTTSFeatures.test_save_and_load_energy_segmentsN)__name__
__module____qualname__r   r   
contextlibcontextmanagerr*   pytestmarkrun_only_onunitr9   rE   rL   rZ   rd   ry   r~   r   r   r   r   r   r   r       s@    	






#
9

r   )r   r    pathlibr   numpyr   r   	soundfiler#   rA   1nemo.collections.tts.parts.preprocessing.featuresr   r   r   r   r   r   r   r   <module>   s   