o
    2wi*q                  !   @   s  d Z ddlZddlZddlZddlZddlZddlZddlZddlm	Z	 ddl
mZmZmZ ddlmZ ddlmZ ddlmZmZmZmZmZmZ ddlZddlmZ dd	lmZmZ dd
lm Z m!Z!m"Z" ddl#m$Z$m%Z% ddl&m'Z'm(Z( ddl)m*Z*m+Z+m,Z, g dddgddgdZ-g dddgg ddZ.dZ/dZ0	d7de*defddZ1							 	 	!	"	d8d#e*d$ee* d%eee2ee2 f  d&e2d'e3d(e3d)e3d*e4d+e4d,e*d-e2d.e3dee2ee2ee"e(f f f fd/d0Z5	d9d#e*d1e4d2eee2  de3fd3d4Z6G d5d6 d6Z7dS ):a  
The CHiME-6 dataset is a collection of over 50 hours of conversational speech recordings
collected from twenty real dinner parties that have taken place in real homes. The
recordings have been made using multiple 4-channel microphone arrays and have been
fully transcribed.

The dataset features:

- simultaneous recordings from multiple microphone arrays;
- real conversation, i.e. talkers speaking in a relaxed and unscripted fashion;
- a range of room acoustics from 20 different homes each with two or three separate
  recording areas;
- real domestic noise backgrounds, e.g., kitchen appliances, air conditioning,
  movement, etc.

Fully-transcribed utterances are provided in continuous audio with ground truth speaker
labels and start/end time annotations for segmentation.

The dataset was used for the 5th and 6th CHiME Speech Separation and Recognition
Challenge. Further information and an open source baseline speech recognition system
are available online (http://spandh.dcs.shef.ac.uk/chime_challenge/chime2018).

NOTE: The CHiME-5 and CHiME-6 datasets are the same, with the only difference that
additional software was provided in CHiME-6 to perform array synchronization. We expect
that users have already downloaded the CHiME-5 dataset here:
https://licensing.sheffield.ac.uk/product/chime5

NOTE: Users can also additionally perform array synchronization as described here:
https://github.com/kaldi-asr/kaldi/blob/master/egs/chime6/s5_track1/local/generate_chime6_data.sh
We also provide this option in the `prepare_chime6` function.
    N)defaultdict)ProcessPoolExecutorThreadPoolExecutorwait)datetime)Path)AnyDictListOptionalSequenceUnion)tqdm)fix_manifests$validate_recordings_and_supervisions)AudioSource	RecordingRecordingSet)TimeFormatConverternormalize_text_chime6)SupervisionSegmentSupervisionSet)Pathlikeadd_durationsresumable_download)S03S04S05S06S07S08S12S13S16S17S18S19S20S22S23S24S02S09S01S21)traindeveval)r   r   r   r   r   r    r!   r"   r#   r$   r%   r(   r)   r*   )r-   r&   r'   r.   zfhttps://raw.githubusercontent.com/chimechallenge/chime6-synchronisation/master/chime6_audio_edits.jsonz`https://raw.githubusercontent.com/chimechallenge/chime6-synchronisation/master/audio_md5sums.txt.
target_dirreturnc                 C   s2   t d t d t d t d t |   t| S )a{  
    Download the original dataset. This cannot be done automatically because of the
    license agreement. Please visit the following URL and download the dataset manually:
    https://licensing.sheffield.ac.uk/product/chime5
    :param target_dir: Pathlike, the path of the dir to storage the dataset.
    :return: the path to downloaded and extracted directory with data.
    z5We cannot download the CHiME-6 dataset automatically.zAPlease visit the following URL and download the dataset manually:z0https://licensing.sheffield.ac.uk/product/chime5z>Then, please extract the tar files to the following directory:)printr   )r3    r6   R/home/ubuntu/sommelier/.venv/lib/python3.10/site-packages/lhotse/recipes/chime6.pydownload_chime6O   s   

r8   allmdmF   /usr/bin/soxkaldi
corpus_dir
output_dirdataset_partsmicuse_reference_arrayperform_array_syncverify_md5_checksumsnum_jobsnum_threads_per_jobsox_pathnormalize_textuse_chime7_splitc           '         s  ddl }dv sJ d|rt|}|jddd d|v r$tt }nt|tr,|g}t|	tt sEJ dtt  d	| tt
jd
d |D }|r|sZtd|d }|jddd |D ]}|d | jddd |d | jddd qgt|	dgd dd }|dksJ dt| ||	|d t|t|}td| d t|d fdd|D }t|}W d   n1 sw   Y  nt| }|rt|||drtd ntd|odk}tt}|D ]}g g }|rt| t|< dkri t| D ]}|r|d v rd!n|}d"d |d |  | d#D }t|dkrCt!d$| d%| d&g }t"|D ]/\}}|#t$d'd(| d(| d) gt|d* |j%d+d) }d(| d(| d) g||f< qI|&t|d }#t'||t(|j)|j*|j*|j) d, qn]t| D ]X}|r|d v rd!n|}d-d |d |  | d.D }g }t"t+|D ]\}}|#t$d'|gt|d* q|&t|d }#t'||t(|j)|j*|j*|j) d, qt,-dEfd/d0	}t| D ]}|r|d v rd!n|}t.|d | | d1 t}t/0|}t"|D ]b\}} | d2 }||||rD|d!krD| d3 ndd4}!t12| d5 }"t12| d6 }#|"|#kr]q-|#t3| d7| ||"t4|#|" d8d9|!t5| d: |
d;d<||d!krd=| v rd=| d= indd>	 q-W d   n	1 sw   Y  qt67|}t8|d?\}$}%t9|$|% |dur|r d@n}&|%:|dA|& dB| dC  |$:|dA dD| dC  |$|%d?||< q|S )Fa	  
    Returns the manifests which consist of the Recordings and Supervisions
    :param corpus_dir: Pathlike, the path of the data dir, either the original CHiME-5
        data or the synchronized CHiME-6 data. If former, the `perform_array_sync`
        must be True.
    :param output_dir: Pathlike, the path where to write the manifests.
    :param mic: str, the microphone type to use, choose from "ihm" (close-talk) or "mdm"
        (multi-microphone array) settings. For MDM, there are 6 array devices with 4
        channels each, so the resulting recordings will have 24 channels.
    :param use_reference_array: bool, if True, use the reference array for MDM setting.
        Only the supervision segments have the reference array information in the
        `channel` field. The recordings will still have all the channels in the array.
        Note that the train set does not have the reference array information.
    :param perform_array_sync: Bool, if True, perform array synchronization based on:
        https://github.com/chimechallenge/chime6-synchronisation
    :param num_jobs: int, the number of jobs to run in parallel for array synchronization.
    :param num_threads_per_job: int, number of threads to use per job for clock drift
        correction. Large values may require more memory, so we recommend using a job
        scheduler.
    :param sox_path: Pathlike, the path to the sox v14.4.2 binary. Note that different
        versions of sox may produce different results.
    :param normalize_text: str, the text normalization method, choose from "none", "upper",
        "kaldi". The "kaldi" method is the same as Kaldi's text normalization method for
        CHiME-6.
    :param verify_md5_checksums: bool, if True, verify the md5 checksums of the audio files.
        Note that this step is slow so we recommend only doing it once. It can be sped up
        by using the `num_jobs` argument.
    :param use_chime7_split: bool, if True, use the new split for CHiME-7 challenge.
    :return: a Dict whose key is the dataset part ("train", "dev" and "eval"), and the
        value is Dicts with the keys 'recordings' and 'supervisions'.

    NOTE: If `perform_array_sync` is True, the synchronized data will be written to
        `output_dir`/CHiME6. This may take a long time and the output will occupy
        approximately 160G of storage. We will also create a temporary directory for
        processing, so the required storage in total will be approximately 300G.
    r   N)ihmr:   z"mic must be either 'ihm' or 'mdm'.T)parentsexist_okr9   zdataset_parts must be one of z. Found c                 S   s   g | ]}t | qS r6   )DATASET_PARTS).0partr6   r6   r7   
<listcomp>   s    z"prepare_chime6.<locals>.<listcomp>z@If `perform_array_sync` is True, `output_dir` must be specified.CHiME6audiotranscriptionsz	--versionzutf-8 zv14.4.2a5  The sox version must be 14.4.2. Please download the sox v14.4.2 binary from https://sourceforge.net/projects/sox/files/sox/14.4.2/ and specify the path to the binary with the `sox_path` argument.You can also install it in a Conda environment with the following command: conda install -c conda-forge sox=14.4.2)r>   r?   rG   num_workersz&Performing array synchronization with z! jobs. This may take a long time.max_workersc                    s   g | ]
}j  j|d qS ))session)submitsynchronize_session)rN   rY   )chime6_array_synchronizerexr6   r7   rP      s    )rV   sessionszMD5 checksums verified. All OK.zXMD5 checksums do not match. Please prepare the array-synchronized CHiME-6 dataset again.r:   rJ   )r&   r'   r/   c                 S      g | ]}|qS r6   r6   rN   pr6   r6   r7   rP          _P*.wavzNo audio found for session z in z set.file   r;   )typechannelssource_)idsourcessampling_ratenum_samplesdurationc                 S   r_   r6   r6   r`   r6   r6   r7   rP   %  rb   _U*.wavc                    sF   dkr
|| f S | } st t|jS  fddt|jD S )NrJ   c                    s   g | ]\}} |j v r|qS r6   )rh   )rN   isrefr6   r7   rP   E  s    z8prepare_chime6.<locals>._get_channel.<locals>.<listcomp>)listrangenum_channels	enumeraterk   )spk_idrY   rs   	recording)global_spk_channel_maprA   
recordingsrr   r7   _get_channel=  s   z$prepare_chime6.<locals>._get_channel.jsonspeakerrs   rr   
start_timeend_time->  )rl   words)	normalizeEnglishlocation)	rj   recording_idstartrn   channeltextlanguager~   custom)r{   supervisionsz-refzchime6-_supervisions_z	.jsonl.gz_recordings_N);	soundfiler   mkdirrt   rM   keys
isinstancestrsetissubset	itertoolschainfrom_iterable
ValueError
subprocesscheck_outputdecodestripsplitChime6ArraySynchronizerminlenlogginginfor   r   _verify_md5_checksumsr5   RuntimeErrorr   dictDATASET_PARTS_CHIME7rglobFileNotFoundErrorrw   appendr   stem	SoundFiler   int
samplerateframessortedr   from_recordingsopenjsonloadr   hms_to_secondsr   r   r   r   from_segmentsr   r   to_file)'r>   r?   r@   rA   rB   rC   rD   rE   rF   rG   rH   rI   sfr^   
chime6_dirrO   sox_versionfuturesri   	manifestsr   rY   part_audio_pathsrk   idx
audio_pathrx   audio_sfr|   f
transcriptsegmentr   r   endrecording_setsupervision_set	mic_affixr6   )r\   r]   rz   rA   r{   r7   prepare_chime6a   sj  2


	





 
(






#


r   rV   r^   c                    s.  dd l tt }|d }ttt|dd i  t|d}|D ]}| j	ddd\}}| t|j
< q!W d    n1 s@w   Y  d	td
tf fdd}	t| d}
d urefdd|
D }
td| d t|d}tt||	|
t|
d}W d    t|S 1 sw   Y  t|S )Nr   zmd5sums.txtzDownloading checksum file)descrrT   r;   )maxsplitrd   r4   c                    sB    tt| d  }t| j}| v r| | krdS dS )NrbFT)md5r   r   read	hexdigestr   )rd   checksumfilename)	checksumshashlibr6   r7   _verify_checksum  s
   
z/_verify_md5_checksums.<locals>._verify_checksumz*.wavc                    s$   g | ]}|j d d  v r|qS )ri   r   )r   r   )rN   r   )r^   r6   r7   rP     s   $ z)_verify_md5_checksums.<locals>.<listcomp>zVerifying checksum with z workers...rW   )total)r   r   tempfilemkdtempr   CHIME6_MD5SUM_FILEr   r   r   r   r   r   boolrt   r   r5   r   r   mapr   r9   )r>   rV   r^   temp_dirchecksum_filer   liner   r   r   	all_filesr]   resultsr6   )r   r   r^   r7   r     s0   

r   c                   @   s  e Zd ZdZ		d!dededededd	f
d
dZdedd	fddZ		d"dedede	e
eef  dd	fddZdededeee  dd	fddZ			d#dedede	e
eef  dedd	f
ddZdedede
eeeeef f dd	fddZ		d"dede	e
eef  dd	fdd Zd	S )$r   z:
    Class for synchronizing CHiME6 array recordings.
    soxr;   r>   r?   rG   rV   r4   Nc                 C   s   t || _t || _t || _|| _| jjddd | jd }ttt| t	|}t
t|| _W d    n1 s=w   Y  t| jdsR| jd | _d S d S )NTrL   rK   zaudio_edits.jsonr   )r   r>   r?   rG   rV   r   r   CHIME6_AUDIO_EDITS_JSONr   r   r   r   r   audio_editsendswith)selfr>   r?   rG   rV   audio_edits_jsonr   r6   r6   r7   __init__  s   




z Chime6ArraySynchronizer.__init__rY   c                 C   s   t tjd| d| jd}|| jvrtd|  dS | j| }td| d | j|||d td| d	 | j	|||| j
d
 td| d | j||d t| dS )z6
        Synchronize a single CHiME6 session.
        chime6_ri   )prefixdirz!No audio edits found for session NzCorrecting z for frame drops...)frame_dropsz for clock drift...)
linear_fitnum_threadszAdjusting timestamps in z JSON files...)r   )r   r   r   r?   r   r   warningr5   correct_frame_dropscorrect_clock_driftrV   adjust_json_timestampsshutilrmtree)r   rY   r   session_audio_editsr6   r6   r7   r[     s(   


z+Chime6ArraySynchronizer.synchronize_sessionr   c                 C   s2  t | jd | d}|D ]}|| j}|| }|jjddd t|| qt | jd | d}tt	}	|D ]}|j
dd dd	 }
|	|
 | q=|	 D ]>\}
}|
|vrmtd
|
 d| d qXg g }}|D ]}|| j}|| }|| || qt| ||||
 d  qXd S )NrR   rc   Tr   ro   r2   r   ri   rU   zArray  in session z  has no frame drops information.edits)r   r>   r   relative_toparentr   ossymlinkr   rt   r   r   r   itemsr   r   _apply_edits_to_wav)r   r?   rY   r   session_binaural_wavswavwav_relative_pathwav_output_pathsession_array_wavs
array_wavsarray_idwavsin_wavsout_wavsr6   r6   r7   r     s:   

z+Chime6ArraySynchronizer.correct_frame_dropsr  r	  r   c                 C   s"  dd l }tjdd |D dd}|d d |d d  |d d  }tj|jd |f|jd}|jd }|D ]3}	|	d }
t|	d |}|	d }|| |
 }|
|krS n|d d |
d |f |d d |d |f< q6|d d d|f }t|D ]\}}|jt	|tj
|| ddd	d
d qxd S )Nr   c                 S   s   g | ]	}t | qS r6   )r   	from_file
load_audio)rN   r  r6   r6   r7   rP   '  s    z?Chime6ArraySynchronizer._apply_edits_to_wav.<locals>.<listcomp>)axisrU   re   r;   )shapedtyper   WAV)rd   datar   format)r   npconcatenatezerosr  r  r   rw   writer   expand_dims)r   r  r	  r   r   x	max_spacex_newlength_xeditin_fromin_toout_fromout_torp   r  r6   r6   r7   r   !  s0   $
.z+Chime6ArraySynchronizer._apply_edits_to_wavr   r   c                 C   s   t |d | d}t|dQ}g }|D ]8}||}	| j|	 }
|jdd dd }||vr@td| d	| d
 q|	|
| j||
||  qt|D ]}|  qSW d    d S 1 sew   Y  d S )NrR   z_*.wavrW   r2   r   ri   rU   zChannel r   z  has no clock drift information.)r   r   r   r   r?   namer   r   r   r   rZ   _apply_clock_drift_correctionr   result)r   r>   rY   r   r   session_wavsr]   r   r  r  r  mic_idfuturer6   r6   r7   r   D  s6   

	

z+Chime6ArraySynchronizer.correct_clock_driftin_wavout_wavc              	   C   s&  |d }|d }|j }t| j}t|}t|}t|tr|}|dd  dg }	|g}
d}tj| jd}tt	||	|D ]\}\}}}|d | d t| d	 }|d | d
 t| d	 }|dd||g}|dkr|dk ry| }d| dg}nd| ddddg}n||7 }dt
| dg}||7 }|dkr|| }|| dg7 }|dk r| | }q<d}|dd||dt|g}t| t| |
| q<|
| t|
 t| d S |dd||dt|g}|dkr|d| ddg7 }n
|d|  dg7 }tdd|  t| d S )Nspeedpaddingr;   rU   r   )r   /r2   z.wavz.xz-Dz-Rtrimrq   pad0szRunning command: rT   )r   r   rG   r   rt   r   r   r?   rw   zipr   r   callr   r   r   r   r   join)r   r&  r'  r   speedsr)  r   sox_cmdstartsendscommand_concatsamples_to_losetmpdirsegr   r   r(  of1of2command1r+  rn   command2commandr6   r6   r7   r!  h  sX   






z5Chime6ArraySynchronizer._apply_clock_drift_correctionc              	   C   sN  t | jd | d}|| j}| j| }g }t|d~}t|d`}t|}	|	D ]H}
d|
vr4q-|
d }|| d }|| d }|d }t	|
d	 d
 | | }t	|
d d
 | | }t
||
d	< t
||
d< ||
 q-tj||dd W d    n1 sw   Y  W d    d S W d    d S 1 sw   Y  d S )NrS   r}   r   wr~   r(  r)  r   r   originalr   re   )indent)nextr>   r   r   r?   r   r   r   r   r   seconds_to_hmsr   dump)r   rY   r   in_jsonrelative_pathout_jsoncorrected_uttsfinfoutr  r   pidr(  r)  delta_tr   r   r6   r6   r7   r     sJ   


(z.Chime6ArraySynchronizer.adjust_json_timestamps)r   r;   r   )Nr;   )__name__
__module____qualname____doc__r   r   r   r   r[   r   r	   r   r   r
   r   r   r   floatr!  r   r6   r6   r6   r7   r     s    
%
/

'
$
Qr   )r2   )Nr9   r:   FFFr;   r;   r<   r=   F)r;   N)8rO  r   r   r   r   r   r   r   collectionsr   concurrent.futuresr   r   r   r   dtpathlibr   typingr   r	   r
   r   r   r   numpyr  r   lhotser   r   lhotse.audior   r   r   lhotse.recipes.utilsr   r   lhotse.supervisionr   r   lhotse.utilsr   r   r   rM   r   r   r   r8   r   r   r   r   r   r   r6   r6   r6   r7   <module>   s      


	

  *

+