o
    i&+                     @   s  d dl Z d dlmZ d dlZd dlm  mZ d dlZd dl	m
Z d dl	mZ d dlmZ d dlm
Z
mZ d dlmZ d dlmZ d d	lmZ G d
d de
ZG dd de
ZG dd deee  Zdddde fdededededejdB dedeeB fddZdd ZdS )    N)files)Dataset)load_from_disk)nn)r   Sampler)tqdm)MelSpec)defaultc                   @   s@   e Zd Z						ddefddZd	d
 Zdd Zdd ZdS )	HFDataset]  d         vocos
hf_datasetc                 C   s,   || _ || _|| _t||||||d| _d S N)n_fft
hop_length
win_lengthn_mel_channelstarget_sample_ratemel_spec_type)datar   r   r   mel_spectrogram)selfr   r   r   r   r   r   r    r   H/home/ubuntu/.local/lib/python3.10/site-packages/f5_tts/model/dataset.py__init__   s   
zHFDataset.__init__c                 C   s<   | j | }|d d }|d d }|jd | | j | j S )Naudioarraysampling_rate)r   shaper   r   )r   indexrowr   sample_rater   r   r   get_frame_len)   s   
zHFDataset.get_frame_lenc                 C   
   t | jS Nlenr   r   r   r   r   __len__/      
zHFDataset.__len__c           
      C   s   | j | }|d d }|d d }|jd | }|dks |dk r,| |d t| j  S t| }|| jkrDtj	
|| j}||}|d}| |}|d}|d	 }	t||	d
S )Nr   r   r    r!      333333?   r   textmel_specr1   )r   r"   __getitem__r*   torch
from_numpyfloatr   
torchaudio
transformsResample	unsqueezer   squeezedict)
r   r#   r$   r   r%   durationaudio_tensor	resamplerr3   r1   r   r   r   r4   2   s$   




zHFDataset.__getitem__N)r   r   r   r   r   r   )__name__
__module____qualname__r   r   r&   r,   r4   r   r   r   r   r
      s    
r
   c                   @   sP   e Zd Z									dded	ejdB fd
dZdd Zdd Zdd Z	dS )CustomDatasetNr   r   r   r   r   Fcustom_datasetmel_spec_modulec              
   C   sX   || _ || _|| _|| _|| _|| _|| _|	| _|	s*t|
t	||||||d| _
d S d S r   )r   	durationsr   r   r   r   r   preprocessed_melr	   r   r   )r   rE   rG   r   r   r   r   r   r   rH   rF   r   r   r   r   S   s*   
zCustomDataset.__init__c                 C   s:   | j d ur| j | | j | j S | j| d | j | j S )Nr>   )rG   r   r   r   )r   r#   r   r   r   r&   v   s   
zCustomDataset.get_frame_lenc                 C   r'   r(   r)   r+   r   r   r   r,   }   r-   zCustomDataset.__len__c           
      C   s   	 | j | }|d }|d }|d }d|  krdkrn nn
|d t| j  }q| jr4t|d }n1t|\}}|jd	 dkrJtj|d	dd
}|| j	kr[tj
|| j	}	|	|}| |}|d	}||dS )NT
audio_pathr1   r>   r/   r.   r0   r3   r   )dimkeepdimr2   )r   r*   rH   r5   tensorr8   loadr"   meanr   r9   r:   r   r<   )
r   r#   r$   rI   r1   r>   r3   r   source_sample_rater@   r   r   r   r4      s,   



zCustomDataset.__getitem__)	Nr   r   r   r   r   r   FN)
rA   rB   rC   r   r   Moduler   r&   r,   r4   r   r   r   r   rD   R   s$    
#rD   c                   @   sP   e Zd ZdZ	ddee dedefdd	Zd
eddfddZdd Z	dd Z
dS )DynamicBatchSamplerab  Extension of Sampler that will do the following:
    1.  Change the batch size (essentially number of sequences)
        in a batch to ensure that the total number of frames are less
        than a certain threshold.
    2.  Make sure the padding efficiency in the batch is high.
    3.  Shuffle batches each epoch while maintaining reproducibility.
    r   NFsamplerframes_thresholddrop_residualc                 C   s$  || _ || _|| _|| _d| _g g }}| j j}t| j ddD ]}	||	||	f q|j	dd d g }
d}t|d| ddD ]9\}	}|| | jkra|dksWt
|
|k ra|
|	 ||7 }qBt
|
dkrl||
 || jkrw|	g}
|}qBg }
d}qB|st
|
dkr||
 ~|| _d	| _d S )
Nr   zPSorting with sampler... if slow, check whether dataset is provided with duration)descc                 S   s   | d S )Nr0   r   )elemr   r   r   <lambda>   s    z.DynamicBatchSampler.__init__.<locals>.<lambda>)keyzCreating dynamic batches with z audio frames per gpuT)rR   rS   max_samplesrandom_seedepochdata_sourcer   appendr&   sortr*   batches	drop_last)r   rR   rS   rY   rZ   rT   indicesr_   r\   idxbatchbatch_frames	frame_lenr   r   r   r      s@   

"





zDynamicBatchSampler.__init__r[   returnc                 C   s
   || _ dS )z Sets the epoch for this sampler.N)r[   )r   r[   r   r   r   	set_epoch   s   
zDynamicBatchSampler.set_epochc                    sd    j d ur+t }| j  j  tjt j|d } fdd|D }t	|S  j}t	|S )N)	generatorc                    s   g | ]} j | qS r   )r_   ).0ir+   r   r   
<listcomp>       z0DynamicBatchSampler.__iter__.<locals>.<listcomp>)
rZ   r5   	Generatormanual_seedr[   randpermr*   r_   tolistiter)r   gra   r_   r   r+   r   __iter__   s   
zDynamicBatchSampler.__iter__c                 C   r'   r(   )r*   r_   r+   r   r   r   r,      r-   zDynamicBatchSampler.__len__)r   NF)rA   rB   rC   __doc__r   intboolr   rg   rs   r,   r   r   r   r   rQ      s    	
-rQ   pinyinrawdataset_name	tokenizerdataset_type
audio_typerF   mel_spec_kwargsrf   c                 C   s  t d |dkrsttdd|  d| }|dkr4z	t| d}W n   t| d}Y d	}n|d
krBt| d}d}t| dddd}	t	|	}
W d   n1 s\w   Y  |
d }t
|f|||d|}|S |dkrz	t|  d}W n   t|  d}Y t|  dddd}	t	|	}
W d   n1 sw   Y  |
d }t
|f||d|}|S |dkrt d | d\}}tt| d| d| ttddd}|S )a  
    dataset_type    - "CustomDataset" if you want to use tokenizer name and default data path to load for train_dataset
                    - "CustomDatasetPath" if you just want to pass the full path to a preprocessed dataset without relying on tokenizer
    zLoading dataset ...rD   f5_ttsz../../data/_rx   z/rawz
/raw.arrowFmelz
/mel.arrowTz/duration.jsonrzutf-8)encodingNr>   )rG   rH   rF   CustomDatasetPath)rG   rH   r
   zShould manually modify the path of huggingface dataset to your need.
May also the corresponding script cuz different dataset may have different format./ztrain.z
../../data)split	cache_dir)printstrr   joinpathr   Dataset_	from_fileopenjsonrM   rD   r   r
   load_dataset)ry   rz   r{   r|   rF   r}   rel_data_pathtrain_datasetrH   f	data_dictrG   prepostr   r   r   r      sh   *r   c           
      C   s   dd | D }t dd |D }| }g }|D ]}d||d f}tj||dd}|| qt |}dd | D }t dd |D }	t||||	d	S )
Nc                 S   s   g | ]	}|d   dqS )r3   r   )r<   ri   itemr   r   r   rk   6  s    zcollate_fn.<locals>.<listcomp>c                 S   s   g | ]}|j d  qS )r!   )r"   )ri   specr   r   r   rk   7  rl   r   r!   )valuec                 S   s   g | ]}|d  qS )r1   r   r   r   r   r   rk   B      c                 S   s   g | ]}t |qS r   )r*   r   r   r   r   rk   C  r   )r   mel_lengthsr1   text_lengths)	r5   
LongTensoramaxsizeFpadr]   stackr=   )
rc   	mel_specsr   max_mel_lengthpadded_mel_specsr   paddingpadded_specr1   r   r   r   r   
collate_fn5  s"   
r   )r   importlib.resourcesr   r5   torch.nn.functionalr   
functionalr   r8   datasetsr   r   r   torch.utils.datar   r   f5_tts.model.modulesr   f5_tts.model.utilsr	   r
   rD   listru   rQ   r=   r   rP   r   r   r   r   r   r   <module>   sH    ATO
B