o
    Si|#                     @   sN  d Z ddlZddlmZ ddlmZ ddlmZ ddlm	Z	m
Z
mZ ddlmZ ddlmZmZ dd	lmZmZmZ dd
lmZ ddlmZmZ ddlmZmZmZmZ dZg dg ddZ		d$dede
e  defddZ!d%de"de de"fddZ#				d&dede
e d e
e" d!e
e" de
e  de	e"e	e"eeef f f fd"d#Z$dS )'u  
DiPCo is a speech data corpus that simulates a “dinner party” scenario taking place in
an everyday home environment. The corpus was created by recording multiple groups of
four Amazon employee volunteers having a natural conversation in English around a dining
table. The participants were recorded by a single-channel close-talk microphone and
by five far-field 7-microphone array devices positioned at different locations in the
recording room. The dataset contains the audio recordings and human labeled transcripts
of a total of 10 sessions with a duration between 15 and 45 minutes. The corpus was
created to advance in the field of noise robust and distant speech processing and is
intended to serve as a public research and benchmarking data set.

The corpus is made availabe under the CDLA-Permissive license.

More details and download link: https://www.amazon.science/publications/dipco-dinner-party-corpus
    N)defaultdict)datetime)Path)DictOptionalUnion)tqdm)fix_manifests$validate_recordings_and_supervisions)AudioSource	RecordingRecordingSet)normalize_text_chime6)SupervisionSegmentSupervisionSet)Pathlikeadd_durationsresumable_downloadsafe_extractz(https://s3.amazonaws.com/dipco/DiPCo.tgz)S02S04S05S09S10)S01S03S06S07S08deveval.F
target_dirforce_downloadreturnc                 C   sh   t | } | jddd | d }tt||d t|}t|| d W d   | S 1 s-w   Y  | S )a  
    Download and untar the dataset
    :param target_dir: Pathlike, the path of the dir to storage the dataset.
    :param force_download: Bool, if True, download the tars no matter if the tars exist.
    :return: the path to downloaded and extracted directory with data.
    Tparentsexist_okz	DiPCo.tgz)filenamer$   )pathN)r   mkdirr   
CORPUS_URLtarfileopenr   )r#   r$   tar_pathtar r1   H/home/ubuntu/.local/lib/python3.10/site-packages/lhotse/recipes/dipco.pydownload_dipco'   s   

r3   sessionuse_chime7_offsetc                 C   s(   |rt | dd }dd| dS | S )z
    Returns the session ID
    :param session: str, the session ID.
    :param use_chime7_offset: bool, if True, offset session IDs (from CHiME-7 challenge).
    :return: str, the session ID.
       NS   02d)int)r4   r5   session_numberr1   r1   r2   get_session_id;   s   	r<   mdmkaldi
corpus_dir
output_dirmicnormalize_textc                 C   sx  ddl }ddl}|dv sJ dt| } |  sJ d|  tt}|dur2t|}|jddd dD ]}g }	g }
|d	kri }t| D ]Y}d
d | d | | dD }g }t	|D ]\}}|
td|gt|d |jdd }||||f< q^|t|d }|	
tt|||t|j|j|j|j d qEnRt| D ]M}dd | d | | dD }g }t	t|D ]\}}|
td|gt|d q|t|d }|	
tt|||t|j|j|j|j d qdd }t| D ]}t| d | | d l}||}t	|D ]Z\}}|d }|d	kr&|||f nttd}||d d }||d d }t||}|

t| d| ||t|| dd |t|d! |d"d#||d$ |d% |d& d'd(
 qW d   n	1 syw   Y  qtt|	t !|
\}}t"|| |dur|#|d)| d*| d+  |#|d)| d,| d+  ||d-||< q4|S ).a  
    Returns the manifests which consist of the Recordings and Supervisions
    :param corpus_dir: Pathlike, the path of the data dir.
    :param output_dir: Pathlike, the path where to write the manifests.
    :param mic: str, the microphone type to use, choose from "ihm" (close-talk) or "mdm"
        (multi-microphone array) settings. For MDM, there are 5 array devices with 7
        channels each, so the resulting recordings will have 35 channels.
    :param normalize_text: str, the text normalization to apply. Choose from "none",
        "upper", or "kaldi". "kaldi" is the default and is the same normalization
        used in Kaldi's CHiME-6 recipe.
    :param use_chime7_offset: bool, if True, offset session IDs (from CHiME-7 challenge).
    :return: a Dict whose key is the dataset part ("dev" and "eval"), and the value is
        Dicts with the keys 'recordings' and 'supervisions'.
    r   N)ihmr=   z!mic must be one of 'ihm' or 'mdm'zNo such directory: Tr&   r   rC   c                 S      g | ]}|qS r1   r1   .0pr1   r1   r2   
<listcomp>v       z!prepare_dipco.<locals>.<listcomp>audioz_P*.wavfile)typechannelssource_r6   )idsourcessampling_ratenum_samplesdurationc                 S   rD   r1   r1   rE   r1   r1   r2   rH      rI   z_U*.wavc                 S   s   t | dt ddd  S )Nz%H:%M:%S.%fil  r6   )dtstrptimetotal_seconds)xr1   r1   r2   <lambda>   s   zprepare_dipco.<locals>.<lambda>transcriptionsz.json
speaker_id#   
start_timez
close-talkend_time-i>  )rR   words)	normalizeEnglishgender
nativenessmother_tongue)rd   re   )
rP   recording_idstartrT   channeltextlanguagespeakerrc   customzdipco-_supervisions_z	.jsonl.gz_recordings_)
recordingssupervisions)$json	soundfiler   is_dirr   dictr+   DATASET_PARTSrglob	enumerateappendr   strstemsplit	SoundFiler   r<   r:   
samplerateframessortedr.   loadlistranger   r   r   r	   r   from_recordingsr   from_segmentsr
   to_file)r?   r@   rA   rB   r5   rq   sf	manifestspartro   rp   global_spk_channel_mapr4   audio_pathsrQ   idx
audio_pathspk_idaudio_sf	_get_timef
transcriptsegmentrh   rg   end
session_idrecording_setsupervision_setr1   r1   r2   prepare_dipcoK   s   







 

r   )r"   F)F)Nr=   r>   F)%__doc__r-   collectionsr   r   rU   pathlibr   typingr   r   r   r   lhotser	   r
   lhotse.audior   r   r   lhotse.recipes.utilsr   lhotse.supervisionr   r   lhotse.utilsr   r   r   r   r,   ru   boolr3   ry   r<   r   r1   r1   r1   r2   <module>   sV    
