o
    Ni5                     @   s   d Z ddlmZ ddlmZ ddlmZ ddlZddlZddlZddl	m
  mZ ddlmZ dZdZdZd	Zd
ZdZdZdZdZdZdZdZg dZg dZg dZdZg dZ G dd dej!j"Z#G dd dej!j$Z%dS )zNSynth Dataset.    )absolute_import)division)print_functionNa  The NSynth Dataset is an audio dataset containing ~300k musical notes, each
with a unique pitch, timbre, and envelope. Each note is annotated with three
additional pieces of information based on a combination of human evaluation
and heuristic algorithms: Source, Family, and Qualities.
zFull NSynth Dataset is split into train, valid, and test sets, with no
instruments overlapping between the train set and the valid/test sets.
a7  NSynth Dataset limited to acoustic instruments in the MIDI pitch interval
[24, 84]. Uses alternate splits that have overlap in instruments (but not exact
notes) between the train set and valid/test sets. This variant was originally
introduced in the ICLR 2019 GANSynth paper (https://arxiv.org/abs/1902.08710).
zThis version additionally contains estimates for F0 using CREPE
(Kim et al., 2018) and A-weighted perceptual loudness in decibels. Both signals
are provided at a frame rate of 250Hz.
a  @InProceedings{pmlr-v70-engel17a,
  title = 	 {Neural Audio Synthesis of Musical Notes with {W}ave{N}et Autoencoders},
  author = 	 {Jesse Engel and Cinjon Resnick and Adam Roberts and Sander Dieleman and Mohammad Norouzi and Douglas Eck and Karen Simonyan},
  booktitle = 	 {Proceedings of the 34th International Conference on Machine Learning},
  pages = 	 {1068--1077},
  year = 	 {2017},
  editor = 	 {Doina Precup and Yee Whye Teh},
  volume = 	 {70},
  series = 	 {Proceedings of Machine Learning Research},
  address = 	 {International Convention Centre, Sydney, Australia},
  month = 	 {06--11 Aug},
  publisher = 	 {PMLR},
  pdf = 	 {http://proceedings.mlr.press/v70/engel17a/engel17a.pdf},
  url = 	 {http://proceedings.mlr.press/v70/engel17a.html},
}
   i>     i   i   g      ^@g333334@)bassbrassfluteguitarkeyboardmalletorganreedstring
synth_leadvocal)acoustic
electronic	synthetic)
brightdark
distortion
fast_decaylong_releasemultiphonicnonlinear_env
percussivereverbztempo-syncedz>http://download.magenta.tensorflow.org/datasets/nsynth/nsynth-)trainvalidtestc                       s&   e Zd ZdZ		d fdd	Z  ZS )NsynthConfigz!BuilderConfig for NSynth Dataset.Fc                    s   g }|r
| d n| d |r| d tjdd}tjdd}tjdd	}tt| jdd
||||gd| || _|| _	dS )a  Constructs a NsynthConfig.

    Args:
      gansynth_subset: bool, whether to use the subset of the dataset introduced
        in the ICLR 2019 GANSynth paper (Engel, et al. 2018). This subset uses
        acoustic-only instrument sources and limits the pitches to the interval
        [24, 84]. The train and test splits are also modified so that
        instruments (but not specific notes) overlap between them. See
        https://arxiv.org/abs/1902.08710 for more details.
      estimate_f0_and_loudness: bool, whether to estimate fundamental frequency
        (F0) and loudness for the audio (at 250 Hz) and add them to the set of
        features.
      **kwargs: keyword arguments forwarded to super.
    gansynth_subsetfullf0_and_loudnessz2.3.0z4New `loudness_db` feature in decibels (unormalized).z2.3.1z,F0 computed with normalization fix in CREPE.z2.3.2zUse Audio feature..)nameversionsupported_versionsN )
appendtfdscoreVersionsuperr!   __init__joinr"   estimate_f0_and_loudness)selfr"   r1   kwargs
name_partsv230v231v232	__class__r)   T/home/ubuntu/.local/lib/python3.10/site-packages/tensorflow_datasets/audio/nsynth.pyr/   d   s0   


zNsynthConfig.__init__)FF)__name__
__module____qualname____doc__r/   __classcell__r)   r)   r8   r:   r!   a   s
    r!   c                   @   sN   e Zd ZdZeededededdee dgZdd Z	dd	 Z
d
d ZdS )NsynthzBA large-scale and high-quality dataset of annotated musical notes.)descriptionT)r"   rA   )r"   r1   rA   c              	   C   s   t jtjjtt ft jtdtjjddtjjddtjjddtjjt	dtjjt
dddd tD d	}| jjrjtt f}tjj|t jd
tjj|t jd
tjj|t jd
d|d< dtjj|t jd
i|d< tjj| ttj|dttjjtddS )N)shapedtypesample_rate   )num_classesi  )nameslabelfamilysourcec                 S   s   i | ]}|t jqS r)   )tfbool).0qualityr)   r)   r:   
<dictcomp>       z Nsynth._info.<locals>.<dictcomp>idaudiopitchvelocity
instrument	qualities)rB   rC   hzmidi
confidencef0dbloudnessz#https://g.co/magenta/nsynth-dataset)rD   )builderrA   featureshomepagecitationmetadata)rL   r   r+   ra   Audio_AUDIO_RATE	_NUM_SECSfloat32
ClassLabel_INSTRUMENT_FAMILIES_INSTRUMENT_SOURCES
_QUALITIESbuilder_configr1   _F0_AND_LOUDNESS_RATETensorr,   DatasetInfo_DESCRIPTIONFeaturesDict	_CITATIONBeamMetadataDict)r2   ra   f0_and_ld_shaper)   r)   r:   _info   s<   

	
zNsynth._infoc                    s>  i }dd t D |d< td |d< | jjrtd |d< || tjj d }|	 
  }W d   n1 s<w   Y  || jjd	 d
 _dd t D  fddt D | jjr fddt D tjj d }t|}|D ]}|d  |d  qxW d   n1 sw   Y  fddt D S )zReturns splits.c                 S   s   i | ]	}|t d |  qS )z%s.tfrecord.tar)_BASE_DOWNLOAD_PATHrN   splitr)   r)   r:   rP      s    z,Nsynth._split_generators.<locals>.<dictcomp>exampleszinstrument_labels.txtinstrument_labelszgansynth_splits.csvgansynth_splitsNrW   rI   c                 S   s   i | ]}|t  qS r)   )setrN   sr)   r)   r:   rP      rQ   c                    s   i | ]
}| d  | gqS rz   r)   r~   dl_pathsr)   r:   rP      s    c                    s   i | ]	}| d    qS r   )valuesr~   r   r)   r:   rP          ry   rS   c                    s,   g | ]}t jj| | | |d dqS ))tfrecord_dirsidsry   )r&   
gen_kwargs)r+   r,   SplitGeneratorrx   )
split_dirs	split_idsr)   r:   
<listcomp>   s    z,Nsynth._split_generators.<locals>.<listcomp>)_SPLITSrw   rm   r"   download_and_extractrL   iogfileGFilereadstrip
splitlinesinfora   rG   csv
DictReaderadd)r2   
dl_managerdl_urlsfr{   readerrowr)   )r   r   r   r:   _split_generators   s4   


zNsynth._split_generatorsc           
         s   t jjj  fdd} fdd} fdd} fdd}| d	d
 |D B  jjj j	t
jjdB  |B  j||dB }	| jjr[|	  B  |B  |B }	|	S )z(Build PCollection of examples for split.c                    s   j jd  | jj  d jjd }||tj	 d j
jtjd d jjd  d jjd tj d jjd tj d	 jjd tj d
 jjd d fddttD dfS )z(Maps an input example to a TFDS example.zbase-examplesnote_strr   rT   )rC   rU   rV   instrument_strinstrument_family_strinstrument_source_strrH   c                    s"   i | ]\}}| d  j j| qS )rX   )
int64_listvalue)rN   iqra   r)   r:   rP     s    zINsynth._build_pcollection.<locals>._emit_base_example.<locals>.<dictcomp>rR   )metricsMetricscounterincra   feature
bytes_listr   nparray
float_listrh   r   rL   compatas_text	enumeraterl   )exid_beamry   r   r:   _emit_base_example   s,   
z5Nsynth._build_pcollection.<locals>._emit_base_examplec                    s<   | \}}|rt j|d |v r jjd  dS dS )NrS   zin-splitTF)rL   r   r   r   r   r   r   )id_exr   	unused_idr   r   r)   r:   	_in_split  s
   z,Nsynth._build_pcollection.<locals>._in_splitc                    s  | \}} j jd  |d }tt }t|}tt }|d | t }|| }|d dks2J t	j
|dt|fdd}dt }	tjjjj|td|	d	dd
\}
}}}
tjjjj|}d||t	j k< t	|}t|}|t	j|t	j|t	jd|d< ||fS )zBEstimate the fundamental frequency using CREPE and add to example.zestimate-f0rT      r   constant)modei  TF)srviterbi	step_sizecenterverboserY   r]   )r   r   r   r   rf   rn   lenrg   _CREPE_FRAME_SIZEr   padintr+   r,   lazy_importscrepepredictlibrosa
hz_to_midiinf
nan_to_numdictastyperh   )r   r   r   rT   hop_size	n_samplesn_framesn_samples_padded	n_paddingcrepe_step_size_f0_hzf0_confidencef0_midir   r)   r:   _estimate_f0  s8   





z/Nsynth._build_pcollection.<locals>._estimate_f0c                    s8  | \}} j jd  |d }ttt }t|jd }tt	|| }|d | t
 }|| }t|d|ffd}tjjj}	|	j|t
|ddj}
t|
}d	}tt||}|d
9 }|	jtt
d}|	|tjddf }|| }|t8 }t|t }tj|dd}t|}d|tji|d< ||fS )zCCompute loudness, add to example (ref is white noise, amplitude=1).zcompute-loudnessrT   r   r   r   F)n_fft
hop_lengthr   g#B;g      4@)r   r   N)axisr^   r_   )r   r   r   r   r   rf   rn   rB   r   ceil	_LD_N_FFTr   r+   r,   r   r   stftTabslog10maximumfft_frequenciesA_weightingnewaxis_REF_DB	_LD_RANGEmeanr   r   rh   )r   r   r   rT   r   n_samples_initialr   n_samples_finalr   r   spectra	amplitudeaminpower_dbfrequenciesa_weightingr_   mean_loudness_dbr   r)   r:   _calc_loudness:  s8   

z1Nsynth._build_pcollection.<locals>._calc_loudnessc                 S   s   g | ]	}t j|d qS )*)ospathr0   )rN   dir_r)   r)   r:   r   h  r   z-Nsynth._build_pcollection.<locals>.<listcomp>)coder)r   )r+   r,   r   apache_beamCreater   
tfrecordioReadAllFromTFRecordcoders
ProtoCoderrL   r   ExampleMapFilterrm   r1   	Reshuffle)
r2   pipeliner   r   ry   r   r   r   r   rz   r)   r   r:   _build_pcollection   s6   
&-zNsynth._build_pcollectionN)r;   r<   r=   r>   r!   _FULL_DESCRIPTION_GANSYNTH_DESCRIPTION_F0_AND_LOUDNESS_ADDENDUMBUILDER_CONFIGSrv   r   r  r)   r)   r)   r:   r@      s    
	,'r@   )&r>   
__future__r   r   r   r   r   numpyr   tensorflow.compat.v2r   v2rL   tensorflow_datasets.public_api
public_apir+   rq   r  r  r	  rs   rg   rf   rn   r   r   r   r   rj   rk   rl   rw   r   r,   BuilderConfigr!   BeamBasedBuilderr@   r)   r)   r)   r:   <module>   s8   +