o
    SiL                     @   sx  d Z ddlZddlZddlZddlZddlZddlmZmZ ddl	m
Z
 ddlmZ ddlmZmZ ddlmZmZmZmZmZ ddlmZ dd	lmZmZmZmZmZmZ dd
lm Z  ddl!m"Z"m#Z# ddl$m%Z%m&Z& g dZ'g dZ(dZ)edg dZ*	d5de+dee+ de+de+de+de%dee, fddZ-		d6de%dee, defddZ.		d6de%dee, defd d!Z/				"d7d#ee% d$ee% d%ee% d&e0dee+ee+eeef f f f
d'd(Z1d)e%d&e0dee+ee+eeef f f fd*d+Z2d,ee+ee+eeef f f dee+eeef f fd-d.Z3d)e%d&e0dee+ee+eeef f f fd/d0Z4d1e%d2ee+e*f deeef fd3d4Z5dS )8a  
The following description is taken from the official website:
https://www.robots.ox.ac.uk/~vgg/data/voxceleb/

VoxCeleb is an audio-visual dataset consisting of short clips of human speech, extracted
from interview videos uploaded to YouTube. VoxCeleb contains speech from speakers spanning
a wide range of different ethnicities, accents, professions and ages. There are a total of
7000+ speakers and 1 million utterances.

All speaking face-tracks are captured "in the wild", with background chatter, laughter,
overlapping speech, pose variation and different lighting conditions. VoxCeleb consists
of both audio and video, comprising over 2000 hours of speech. Each segment is at least
3 seconds long.

The dataset consists of two versions, VoxCeleb1 and VoxCeleb2. Each version has it's own
train/test split. For each version, the YouTube URLs, face detections and tracks, audio files,
cropped face videos and speaker meta-data are provided. There is no overlap between the
two versions.

- VoxCeleb1: VoxCeleb1 contains over 100,000 utterances for 1,251 celebrities.
  http://www.robots.ox.ac.uk/~vgg/data/voxceleb/
- VoxCeleb2: VoxCeleb2 contains over a million utterances for 6,112 identities.
  http://www.robots.ox.ac.uk/~vgg/data/voxceleb2/

LICENSE: The VoxCeleb dataset is available to download for commercial/research purposes
under a Creative Commons Attribution 4.0 International License. The copyright remains with
the original owners of the video.

This Lhotse recipe prepares the VoxCeleb1 and VoxCeleb2 datasets.
    N)defaultdict
namedtuple)as_completed)ProcessPoolExecutor)PathPurePath)DictListOptionalTupleUnion)tqdm)CutSetMonoCut	RecordingRecordingSetSupervisionSegmentSupervisionSet)combine)fix_manifests$validate_recordings_and_supervisions)Pathlikeresumable_download)zIhttps://thor.robots.ox.ac.uk/~vgg/data/voxceleb/vox1a/vox1_dev_wav_partaazIhttps://thor.robots.ox.ac.uk/~vgg/data/voxceleb/vox1a/vox1_dev_wav_partabzIhttps://thor.robots.ox.ac.uk/~vgg/data/voxceleb/vox1a/vox1_dev_wav_partaczIhttps://thor.robots.ox.ac.uk/~vgg/data/voxceleb/vox1a/vox1_dev_wav_partadzGhttps://thor.robots.ox.ac.uk/~vgg/data/voxceleb/vox1a/vox1_test_wav.zipzAhttps://www.robots.ox.ac.uk/~vgg/data/voxceleb/meta/vox1_meta.csv)
zIhttps://thor.robots.ox.ac.uk/~vgg/data/voxceleb/vox1a/vox2_dev_aac_partaazIhttps://thor.robots.ox.ac.uk/~vgg/data/voxceleb/vox1a/vox2_dev_aac_partabzIhttps://thor.robots.ox.ac.uk/~vgg/data/voxceleb/vox1a/vox2_dev_aac_partaczIhttps://thor.robots.ox.ac.uk/~vgg/data/voxceleb/vox1a/vox2_dev_aac_partadzIhttps://thor.robots.ox.ac.uk/~vgg/data/voxceleb/vox1a/vox2_dev_aac_partaezIhttps://thor.robots.ox.ac.uk/~vgg/data/voxceleb/vox1a/vox2_dev_aac_partafzIhttps://thor.robots.ox.ac.uk/~vgg/data/voxceleb/vox1a/vox2_dev_aac_partagzIhttps://thor.robots.ox.ac.uk/~vgg/data/voxceleb/vox1a/vox2_dev_aac_partahzGhttps://thor.robots.ox.ac.uk/~vgg/data/voxceleb/vox1a/vox2_test_aac.zipzAhttps://www.robots.ox.ac.uk/~vgg/data/voxceleb/meta/vox2_meta.csvz9http://www.openslr.org/resources/49/voxceleb1_test_v2.txtSpeakerMetadataidnamegendernationalitysplitFvoxceleb_name	part_urlspart_suffixdev_zip_nametest_zip_name
target_dirforce_downloadc                 C   s  t |}|jddd || }| r |s td| d |S t }|D ]}	ttj	
tj	|	jj}
t |}||
 }t|	||d q't|| d0}t|| dD ]}t|d}t|| W d	   n1 sqw   Y  qYW d	   n1 sw   Y  |d
D ]}t||t |j  q|dD ]}t||t |j  qW d	   n1 sw   Y  td t|}|| W d	   n1 sw   Y  td t|| }|| W d	   |S 1 sw   Y  |S )aN  
    Download and unzip a VoxCeleb dataset

    ;param voxceleb_name :str, dataset name.
    :param part_urls: List[str], list of downloable links to zip partials.
    ;param part_suffix: str, dataset partial suffix.
    ;param dev_zip_name: str, name of concatenated dev zip file.
    ;param test_zip_name: str, name of concatenated test zip file.
    :param target_dir: Pathlike, the path of the dir to store the dataset.
    :param force_download: bool, if True, download the archive even if it already exists.

    :return: the path to downloaded and extracted directory with data.
    T)parentsexist_okz	Skipping z because file exists.)filenamer&   wb*rbNz*.zipz*.csvzUnzipping dev...zUnzipping test...)r   mkdirexistslogginginfotempfileTemporaryDirectoryr   urllibparseunquoteurlparsepathr   r   opensortedglobshutilcopyfileobjmovezipfileZipFile
extractall)r    r!   r"   r#   r$   r%   r&   zip_pathtemp_dirurlurl_filenametemp_target_fileoutFilefileinFilezf rJ   K/home/ubuntu/.local/lib/python3.10/site-packages/lhotse/recipes/voxceleb.py_download_voxcelebU   sV   



rL   .returnc              	   C      t dtddd| |dS )a~  
    Download and unzip the VoxCeleb1 data.

    .. note:: A "connection refused" error may occur if you are downloading without a password.

    :param target_dir: Pathlike, the path of the dir to store the dataset.
    :param force_download: bool, if True, download the archive even if it already exists.
    :return: the path to downloaded and extracted directory with data.
    	VoxCeleb1vox1_dev_wav_partzvox1_dev_wav.zipzvox1_test_wav.zipr    r!   r"   r#   r$   r%   r&   )rL   VOXCELEB1_PARTS_URLr%   r&   rJ   rJ   rK   download_voxceleb1      rU   c              	   C   rO   )a~  
    Download and unzip the VoxCeleb2 data.

    .. note:: A "connection refused" error may occur if you are downloading without a password.

    :param target_dir: Pathlike, the path of the dir to store the dataset.
    :param force_download: bool, if True, download the archive even if it already exists.
    :return: the path to downloaded and extracted directory with data.
    	VoxCeleb2vox2_dev_aac_partzvox2_aac.zipzvox2_test_aac.ziprR   )rL   VOXCELEB2_PARTS_URLrT   rJ   rJ   rK   download_voxceleb2   rV   rZ      voxceleb1_rootvoxceleb2_root
output_dirnum_jobsc                 C   s  | rt | nd} |rt |nd}| s|std|dur t |nd}tt}| r?td |t| | |t|d  ntd |rwtd t	||}d|v rst
|d d |d |d d< t
|d d	 |d	 |d d	< n||d< d
D ]E}||vrqy|| d }|| d	 }t||\}}t|| ||| d< ||| d	< |dur||d| d  ||d| d  qy|durd|v rt|d D ]\}	}
|
|d|	d  d  qd|v rt|d D ]\}	}
|
|d|	d  d  q|S )a  
    Prepare manifests for the VoxCeleb v1 and v2 corpora.

    The manifests are created in a dict with three splits: train, dev and test, for each
    of the two versions.
    Each split contains a RecordingSet and SupervisionSet in a dict under keys 'recordings' and 'supervisions'.

    :param voxceleb1_root: Path to the VoxCeleb v1 dataset.
    :param voxceleb2_root: Path to the VoxCeleb v2 dataset.
    :param output_dir: Path to the output directory.
    :param num_jobs: Number of parallel jobs to run.
    :return: A dict with standard corpus splits ("train" and "test") containing the manifests.

    NOTE: We prepare the data using the Kaldi style split, i.e., the whole VoxCeleb2
    ("dev" and "test") and the training portion ("dev") of VoxCeleb1 are put into the
    "train" split. The "test" split contains the "test" portion of VoxCeleb1. So if
    VoxCeleb1 is not provided, no "test" split is created in the output manifests.

    Example usage:

    .. code-block:: python

        >>> from lhotse.recipes.voxceleb import prepare_voxceleb
        >>> manifests = prepare_voxceleb(voxceleb_v1_root='/path/to/voxceleb1',
        ...                               voxceleb_v2_root='/path/to/voxceleb2',
        ...                               output_dir='/path/to/output',
        ...                               num_jobs=4)

    NOTE: If VoxCeleb1 is provided, we also prepare the trials file using the list provided
    in http://www.openslr.org/resources/49/voxceleb1_test_v2.txt. This file is used in the
    Kaldi recipes for VoxCeleb speaker verification. This is prepared as 2 tuples of the form
    (CutSet, CutSet) with identical id's, one for each of positive pairs and negative pairs.
    These are stored in the dict under keys 'pos_trials' and 'neg_trials', respectively.
    For evaluation purpose, the :class:`lhotse.dataset.sampling.CutPairsSampler`
    can be used to sample from this tuple.
    Nz4Either VoxCeleb1 or VoxCeleb2 path must be provided.zPreparing VoxCeleb1...testzGVoxCeleb1 not provided, no test split or trials file will be created...zPreparing VoxCeleb2...train
recordingssupervisions)ra   r`   voxceleb_recordings_z	.jsonl.gzvoxceleb_supervisions_
pos_trialszvoxceleb_pos-trials_uttr[   
neg_trialszvoxceleb_neg-trials_utt)r   
ValueErrorr   dictr/   r0   update_prepare_voxceleb_v1_prepare_voxceleb_trials_prepare_voxceleb_v2r   r   r   to_file	enumerate)r\   r]   r^   r_   	manifestsv2_manifestsr   rb   rc   icutsrJ   rJ   rK   prepare_voxceleb   sZ   *



rt   corpus_pathc              	      s  i }t | d d(}t| |D ]}| d\}}}} t|||| d||< qW d   n1 s4w   Y  t|K}	g }
g }g }| d dD ]}||	t	|| qKt
t|t|dd	d
D ]}| \}}|
| || qdt|
}t|}W d   n1 sw   Y  tt}dD ]' | fdd|  d< dd |  d D |fdd|  d< q|d|d< |S )z
    Prepare manifests for the VoxCeleb1 corpus. The manifests are created in a dict with
    2 splits: train ("dev") and test.
    zvox1_meta.csvr	r   Nwavz*.wavzProcessing VoxCeleb1Ftotaldescleave)devr`   c                    s   | j d  kS )Nr   )custom)s)r   rJ   rK   <lambda>H  s    z&_prepare_voxceleb_v1.<locals>.<lambda>rc   c                 S   s   g | ]}|j qS rJ   recording_id).0r   rJ   rJ   rK   
<listcomp>J  s    z(_prepare_voxceleb_v1.<locals>.<listcomp>c                    s
   | j  v S )N)r   )rv   )	split_idsrJ   rK   r   L  s   
 rb   r}   ra   )r8   nextstripr   r   r   rglobappendsubmit_process_filer   r   lenresultr   from_recordingsr   from_segmentsr   ri   filterpop)ru   r_   speaker_metadataflinespkidr   r   r   exrb   rc   futurespfuture	recordingsupervisionrecording_setsupervision_setrp   rJ   )r   r   rK   rk   #  sN   






rk   rp   c                 C   s  | d }| d }g g g g f\}}}}t tdd tdd}t|D ]\}}	|	 d\}
}}d|dd	 d
}d|dd	 d
}||vsR||vratd| d| d|  q"|
dkr|	t
d| || d	|| j|| d	d |	t
d| || d	|| j|| d	d q"|	t
d| || d	|| j|| d	d |	t
d| || d	|| j|| d	d q"W d   n1 sw   Y  t|t|ft|t|fdS )z;
    Prepare the trials file for the VoxCeleb1 corpus.
    rb   rc   zvoxceleb_trials.txt)r)   rv    -rM   r   /zTrial z contains unknown recording: z or 1ztrial-)r   r   startdurationrc   channelN)rf   rg   )r   VOXCELEB1_TRIALS_URLr8   ro   r   r   joinr/   warningr   r   r   r   	from_cuts)rp   rb   rc   cuts_utt1_poscuts_utt2_poscuts_utt1_negcuts_utt2_negr   idxr   targetutt1utt2rJ   rJ   rK   rl   R  s   

7rl   c              	   C   s0  i }t | d d)}t| |D ]}ttj|d\}}}}t|d|d|d||< qW d   n1 s5w   Y  t|A}	g }
g }g }| dD ]}|	|	
t|| qJt|t|d| d	d
dD ]}| \}}|
	| |	| qeW d   n1 sw   Y  t|
}t|}||d}|S )z
    Prepare manifests for the VoxCeleb2 corpus. The manifests are created the same dict
    without any splits since the whole data is used in the final "train" split.
    zvox2_meta.csvrv   , r   Nz*.m4azProcessing VoxCeleb2 z	 split...Fry   )rb   rc   )r8   r   mapstrr   r   r   r   r   r   r   r   r   r   r   r   r   r   r   )ru   r_   r   r   r   r   _r   r   r   rb   rc   r   r   r   r   r   r   r   rp   rJ   rJ   rK   rm     sB   	






rm   	file_pathr   c                 C   sz   | j j j}| j j}| j}| d| d| }tj| |d}t||||| jd|j|| j|| j|| j	dd}||fS )zT
    Process a single wav file and return a Recording and a SupervisionSegment.
    r   r   g        )speaker_namer   r   )r   r   speakerr   r   r   r~   )
parentstemr   	from_filer   r   r   r   r   r   )r   r   
speaker_id
session_iduttidr   r   r   rJ   rJ   rK   r     s$   
r   )F)rM   F)NNNr[   )6__doc__r/   r;   r1   r3   r>   collectionsr   r   concurrent.futuresr   concurrent.futures.processr   pathlibr   r   typingr   r	   r
   r   r   	tqdm.autor   lhotser   r   r   r   r   r   lhotse.manipulationr   	lhotse.qar   r   lhotse.utilsr   r   rS   rY   r   r   r   boolrL   rU   rZ   intrt   rk   rl   rm   r   rJ   rJ   rJ   rK   <module>   s     	
:


c
/
K
*

