o
    Si                     @   s   d Z ddlZddlmZ ddlmZmZmZmZm	Z	 ddl
mZ ddlmZmZmZmZ ddlmZmZmZ dZd	ed
fdedee dee defddZ	ddedee deeeee	eef f f fddZdedefddZdS )a;  
WHAM noise recordings preparation for Lhotse.

This recipe prepares the noise component of the WHAM dataset, which consists
of real-world ambient noise recordings collected in various environments
(cafes, restaurants, bars, etc.). These noise recordings are commonly used
in combination with clean speech datasets to create noisy mixtures for
speech separation and enhancement tasks.

For more details about WHAM, see:
- Paper: "WHAM!: Extending Speech Separation to Noisy Environments"
  https://arxiv.org/abs/1907.01160
- Original dataset: https://wham.csail.mit.edu/
    N)Path)DictIterableListOptionalUnion)ZipFile)	RecordingRecordingSetSupervisionSetvalidate)Pathlikeresumable_downloadsafe_extractzRhttps://my-bucket-a8b4b49c25c811ee9a7e8bba05fa24c7.s3.amazonaws.com/wham_noise.zip.F
target_dirurlforce_downloadreturnc                 C   s   t | } | jddd d}| | }| d }| d }| r+td| d| d |S t|||d	 td
 t|}|j| d |  W d   |S 1 sQw   Y  |S )a  
    Download and untar the WHAM corpus.

    :param target_dir: Pathlike, the path of the dir to store the dataset.
    :param url: str, the url that downloads file called "wham_noise.zip".
    :param force_download: bool, if True, download the archive even if it already exists.
    Tparentsexist_okzwham_noise.zip
wham_noisez.wham_noise_completedz	Skipping z	 because z exists.)filenamer   zExtracting files...)pathN)	r   mkdiris_filelogginginfor   r   
extractalltouch)r   r   r   zip_namezip_path
corpus_dircompleted_detectorzf r&   G/home/ubuntu/.local/lib/python3.10/site-packages/lhotse/recipes/wham.pydownload_wham   s$   



r(   r#   
output_dirc              
   C   s   t | } |  sJ d|  i }g d}|D ]&}td| d dt| | i||< td| d t|| d  q|d urkt |}|jddd |D ]}||  D ]\}}||d	| d
| d  qWqO|S )NzNo such directory: )trcvttz	Scanning z	 split...
recordingszValidating Tr   wham__z	.jsonl.gz)	r   is_dirr   r   scan_recordingsr   r   itemsto_file)r#   r)   	manifestssplitssplitkeymanifestr&   r&   r'   prepare_wham:   s"   r9   c                 C   s   t dd | dD S )Nc                 s   s    | ]}t |V  qd S N)r	   	from_file).0filer&   r&   r'   	<genexpr>U   s    

z"scan_recordings.<locals>.<genexpr>z*.wav)r
   from_recordingsrglob)r#   r&   r&   r'   r1   T   s   
r1   r:   )__doc__r   pathlibr   typingr   r   r   r   r   zipfiler   lhotser	   r
   r   r   lhotse.utilsr   r   r   WHAM_URLstrboolr(   r9   r1   r&   r&   r&   r'   <module>   s<    
 
