o
    NiV                     @   s   d Z ddlmZ ddlmZ ddlmZ ddlZddlZddlZddlZ	ddl
m  mZ ddlmZ dddd	d
dddZg dZdZdZdd Zdd ZG dd dejjZdS )zSAVEE dataset.    )absolute_import)division)print_functionNangerdisgustfear	happinessneutralsadnesssurprise)adfhnsasu)DCJEJKKLaG  
@inproceedings{Vlasenko_combiningframe,
author = {Vlasenko, Bogdan and Schuller, Bjorn and Wendemuth, Andreas and Rigoll, Gerhard},
year = {2007},
month = {01},
pages = {2249-2252},
title = {Combining frame and turn-level information for robust recognition of emotions within speech},
journal = {Proceedings of Interspeech}
}
a  
SAVEE (Surrey Audio-Visual Expressed Emotion) is an emotion recognition
dataset. It consists of recordings from 4 male actors in 7 different emotions,
480 British English utterances in total. The sentences were chosen from the
standard TIMIT corpus and phonetically-balanced for each emotion.
This release contains only the audio stream from the original audio-visual
recording.
The data is split so that the training set consists of 2 speakers, and both the
validation and test set consists of samples from 1 speaker, respectively.
c                 C   s   t | |krtdjt | |dtdd | D }td| dkr)td| g }d}| D ]\}}|}||7 }||t|| t|| f q/|d	 d
 |d	 d |f|d	< |S )a  Computes boundary indices for each of the splits in split_probs.

  Args:
    split_probs: List of (split_name, prob), e.g. [('train', 0.6), ('dev', 0.2),
      ('test', 0.2)]
    n_items: Number of items we want to split.

  Returns:
    The item indices of boundaries between different splits. For the above
    example and n_items=100, these will be
    [('train', 0, 60), ('dev', 60, 80), ('test', 80, 100)].
  z]Not enough items for the splits. There are {splits} splits while there are only {items} items)splitsitemsc                 s       | ]\}}|V  qd S N ).0namepr   r   S/home/ubuntu/.local/lib/python3.10/site-packages/tensorflow_datasets/audio/savee.py	<genexpr>R       z,_compute_split_boundaries.<locals>.<genexpr>   g:0yE>z"Probs should sum up to 1. probs={}g        r   )len
ValueErrorformatsumabsappendint)split_probsn_itemstotal_probssplit_boundariessum_pr   r   prevr   r   r   _compute_split_boundariesA   s$   "r1   c                 C   s   t tdd | D }tj|}|| t|t|}i }|D ]\}}}	t||	D ]}
||||
 < q+q!t	
t}| D ]\}}|| }|| | q<|S )a  Split items to train/dev/test, so all items in group go into same split.

  Each group contains all the samples from the same speaker ID. The samples are
  splitted so that all each speaker belongs to exactly one split.

  Args:
    items_and_groups: Sequence of (item_id, group_id) pairs.
    split_probs: List of (split_name, prob), e.g. [('train', 0.6), ('dev', 0.2),
      ('test', 0.2)]
    split_number: Generated splits should change with split_number.

  Returns:
    Dictionary that looks like {split name -> set(ids)}.
  c                 s   r   r   r   )r   item_idgroup_idr   r   r   r    r   r!   z-_get_inter_splits_by_group.<locals>.<genexpr>)sortedsetnprandomRandomStateshuffler1   r$   rangecollectionsdefaultdictadd)items_and_groupsr+   split_numbergroupsrngr.   group_id_to_split
split_namei_starti_endisplit_to_idsr2   r3   splitr   r   r   _get_inter_splits_by_groupc   s   

rI   c                   @   s8   e Zd ZdZejdZdZdd Z	dd Z
dd	 Zd
S )Saveez8The audio part of SAVEE dataset for emotion recognition.z1.0.0a2    manual_dir should contain the file AudioData.zip. This file should be under
  Data/Zip/AudioData.zip in the dataset folder provided upon registration.
  You need to register at
  http://personal.ee.surrey.ac.uk/Personal/P.Jackson/SAVEE/Register.html in
  order to get the link to download the dataset.
  c              
   C   sH   t jj| tt jt jjdddt jjtt	
 dtjdddtdS )	NwaviD  )file_formatsample_rate)namesaudiolabel
speaker_id)rP   rQ   z%http://kahlan.eps.surrey.ac.uk/savee/)builderdescriptionfeaturessupervised_keyshomepagecitation)tfdscoreDatasetInfo_DESCRIPTIONrU   FeaturesDictAudio
ClassLabellist	LABEL_MAPvaluestfstring	_CITATION)selfr   r   r   _info   s   zSavee._infoc                 C   s   t j|jd}tjj|std	||
|}g }tjjd	|D ]}t j|\}}t j|\}}|||f q'g d}	t||	d}
tjjtjjd|
d idtjjtjjd|
d	 idtjjtjjd|
d
 idgS )zReturns SplitGenerators.zAudioData.zipz`SAVEE requires manual download of the data. Please download the audio data and place it into: {}z{}/AudioData/*/*.wav))traing333333?)
validation皙?)testrj   r   
file_namesrh   )r   
gen_kwargsri   rk   )ospathjoin
manual_dirrc   iogfileexistsAssertionErrorr&   extractglobrH   r)   rI   rY   rZ   SplitGeneratorSplitTRAIN
VALIDATIONTEST)rf   
dl_managerzip_pathextract_pathr>   fnamefolder_rR   r+   r   r   r   r   _split_generators   s6   



zSavee._split_generatorsc           
      c   sr    |D ]3}t j|\}}t j|\}}td|d}t| }d||dd }	|	|||dfV  qdS )zYields examples.z^([a-zA-Z]+)r"   z{}_{}.r   rO   N)rn   ro   rH   rematchgroupra   r&   )
rf   rl   r   r   wavnamer   rR   label_abbrevrQ   keyr   r   r   _generate_examples   s   zSavee._generate_examplesN)__name__
__module____qualname____doc__rY   rZ   VersionVERSIONMANUAL_DOWNLOAD_INSTRUCTIONSrg   r   r   r   r   r   r   rJ      s    #rJ   )r   
__future__r   r   r   r;   rn   r   numpyr6   tensorflow.compat.v2compatv2rc   tensorflow_datasets.public_api
public_apirY   ra   SPEAKERSre   r\   r1   rI   rZ   GeneratorBasedBuilderrJ   r   r   r   r   <module>   s0   
"!