o
    2wi                     @   s  d Z ddlZddlZddlmZmZ ddlmZ ddlm	Z	m
Z
mZ ddlmZ ddlmZmZmZmZmZ ddlmZmZ dd	lmZmZ d
Z		ddededefddZ			d dede
e dedede	eeeef f f
ddZdedededefddZ dS )!a  
The Grid Corpus is a large multitalker audiovisual sentence corpus designed to support joint
computational-behavioral studies in speech perception. In brief, the corpus consists of high-quality
audio and video (facial) recordings of 1000 sentences spoken by each of 34 talkers (18 male, 16 female),
for a total of 34000 sentences. Sentences are of the form "put red at G9 now".

Source: https://zenodo.org/record/3625687
    N)ProcessPoolExecutoras_completed)Path)DictOptionalUnion)tqdm)	RecordingRecordingSetSupervisionSetfix_manifests$validate_recordings_and_supervisions)AlignmentItemSupervisionSegment)Pathlikeis_module_availablez10.5281/zenodo.3625687.F
target_dirforce_downloadreturnc              	   C   s   t dstdt| } | d }|jddd |d }| r!|r1tjdt dd|d |  t	|
d	d
dD ]}t|}|| W d   n1 sQw   Y  q:|S )a  
    Download and untar the dataset, supporting both LibriSpeech and MiniLibrispeech

    :param target_dir: Pathlike, the path of the dir to storage the dataset.
    :param dataset_parts: "librispeech", "mini_librispeech",
        or a list of splits (e.g. "dev-clean") to download.
    :param force_download: Bool, if True, download the tars no matter if the tars exist.
    :param alignments: should we download the alignments. The original source is:
        https://github.com/CorentinJ/librispeech-alignments
    :param base_url: str, the url of the OpenSLR resources.
    :param alignments_url: str, the url of LibriSpeech word alignments
    :return: the path to downloaded and extracted directory with data.
    
zenodo_getzLTo download Grid Audio-Visual Speech Corpus please 'pip install zenodo_get'.zgrid-corpusT)parentsexist_okz.downloadedzzenodo_get )shellcheckcwdz*.zipzUnzipping files)descN)r   RuntimeErrorr   mkdirexists
subprocessrunGRID_ZENODO_IDtouchr   globzipfileZipFile
extractall)r   r   
corpus_dirdownload_markerpf r,   P/home/ubuntu/sommelier/.venv/lib/python3.10/site-packages/lhotse/recipes/grid.pydownload_grid   s&   r.   T   r(   
output_dirwith_supervisionsnum_jobsc                 C   sp  t | } | d }| sJ | d }| sJ g }g }dd | dD }g }	t|F}
|D ]}|j}|dD ]}|	|
t|||| q9q/tt	|	t
|	ddD ]}| \}}|| |d	urj|| qTW d	   n1 suw   Y  t|}|rt|}t||\}}t|| |d	urt |}||d
  |r||d  d|i}|r|j|d |S )a  
    Returns the manifests which consist of the Recordings and Supervisions.
    When all the manifests are available in the ``output_dir``, it will simply read and return them.

    :param corpus_dir: Pathlike, the path of the data dir.
    :param output_dir: Pathlike, the path where to write the manifests.
    :param with_supervisions: bool, when False, we'll only return recordings; when True, we'll also
        return supervisions created from alignments, but might remove some recordings for which
        they are missing.
    :param num_jobs: int, number of parallel jobs.
    :return: a Dict whose key is the dataset part, and the value is Dicts with the keys 'audio' and 'supervisions'.
    
alignments	audio_25kc                 S   s   g | ]	}|j d ks|qS )z.zip)suffix).0r*   r,   r,   r-   
<listcomp>a   s    z prepare_grid.<locals>.<listcomp>zs*z*.mpgzScanning videos)totalr   Nzgrid_recordings.jsonl.gzzgrid_supervisions.jsonl.gz
recordings)supervisions)r   is_dirr$   r   nameappendsubmitprocess_singler   r   lenresultr
   from_recordingsr   from_segmentsr   r   to_fileupdate)r(   r0   r1   r2   ali_dir	audio_dirr9   r:   
video_dirsfuturesexspeaker_dirspeaker
video_pathr+   	recordingmaybe_supervisionansr,   r,   r-   prepare_gridE   sV   







rQ   rM   rL   rF   c           	      C   s   | j }tj| | d| d}d }|| | d}|rK| rKdd dd |  D D }t|j|jd|j	|j
d	d
d |D d|d|id	}||fS )N_)recording_idz.alignc              	   S   s<   g | ]\}}}t |t|d  tt|t| d  dqS )i  )symbolstartduration)r   floatint)r6   bewr,   r,   r-   r7      s    
z"process_single.<locals>.<listcomp>c                 s   s    | ]}|  V  qd S )N)split)r6   liner,   r,   r-   	<genexpr>   s    z!process_single.<locals>.<genexpr>g         c                 s   s     | ]}|j d kr|j V  qdS )silN)rT   )r6   itemr,   r,   r-   r^      s    Englishword)	idrS   rU   rV   channeltextlanguagerL   	alignment)stemr	   	from_filewith_suffixis_file	read_text
splitlinesr   rd   rV   channel_idsjoin)	rM   rL   rF   r1   video_idrN   supervisionali_pathalir,   r,   r-   r?      s(   r?   )r   F)NTr/   )!__doc__r    r%   concurrent.futuresr   r   pathlibr   typingr   r   r   	tqdm.autor   lhotser	   r
   r   r   r   lhotse.supervisionr   r   lhotse.utilsr   r   r"   boolr.   rX   strrQ   r?   r,   r,   r,   r-   <module>   sV    
)
B