o
    +i                     @   sV   d Z ddlZddlZddlmZmZ ddlmZ G dd deZdd Z	dddZ
dS )z/
Dataset and DataLoader for LeWM TTS training.
    N)Dataset
DataLoader)Pathc                   @   s2   e Zd ZdZdddZdd Zdd	 Zd
d ZdS )
TTSDatasetz>Loads preprocessed mel spectrograms and text for TTS training.     c                    sv   t |ddd}t|| _W d    n1 sw   Y   | _|| _ fdd| jD | _tdt| j d d S )Nrutf-8)encodingc                    s   g | ]
}|d   kr|qS )
mel_frames ).0emax_mel_framesr    /home/ubuntu/lewm-tts/dataset.py
<listcomp>   s
    z'TTSDataset.__init__.<locals>.<listcomp>z	Dataset: z samples loaded)openjsonloadmanifestr   max_text_lenprintlen)selfmanifest_pathr   r   fr   r   r   __init__   s   
zTTSDataset.__init__c                 C   s
   t | jS )N)r   r   )r   r   r   r   __len__   s   
zTTSDataset.__len__c                 C   s   t |d}|S )zMConvert text to byte-level tokens (handles any Unicode including Devanagari).r	   )listencode)r   texttokensr   r   r   text_to_tokens   s   zTTSDataset.text_to_tokensc                 C   sx   | j | }tj|d dd}| |d }|d | j }tj|tjd}|||jd t|d}d|v r:|d |d< |S )	Nmel_pathT)weights_onlyr!   dtype   )meltext_tokensr   text_len
speaker_id)	r   torchr   r#   r   tensorlongshaper   )r   idxentryr)   r"   r*   resultr   r   r   __getitem__%   s   
zTTSDataset.__getitem__N)r   r   )__name__
__module____qualname____doc__r   r   r#   r4   r   r   r   r   r      s    
r   c                 C   s.  t dd | D }t dd | D }| d d jd }t| }t|||}tj||tjd}tj||tjd}tj||tjd}t| D ]3\}	}
|
d }|
d }|
d ||	d	d	d	|f< |
d
 ||	d	|f< d||	d	|f< d||	d	|f< qE||||d}d| d v rtj	dd | D tjd|d< |S )zPad and batch samples.c                 s       | ]}|d  V  qdS )r   Nr   r   sr   r   r   	<genexpr>A       zcollate_fn.<locals>.<genexpr>c                 s   r9   )r+   Nr   r:   r   r   r   r<   B   r=   r   r)   r&   r   r+   Nr*   F)r)   r*   mel_mask	text_maskr,   c                 S   s   g | ]}|d  qS )r,   r   r:   r   r   r   r   ]   s    zcollate_fn.<locals>.<listcomp>)
maxr0   r   r-   zerosr/   onesbool	enumerater.   )batchmax_melmax_textn_melsB
mel_paddedtext_paddedr>   r?   ir;   t_melt_textr3   r   r   r   
collate_fn>   s.   rO         Tc              	   C   s&   t | }t||||tddd}||fS )NT)
batch_sizeshufflenum_workersrO   
pin_memory	drop_last)r   r   rO   )r   rR   rT   rS   datasetloaderr   r   r   build_dataloaderb   s   	rY   )rP   rQ   T)r8   r   r-   torch.utils.datar   r   pathlibr   r   rO   rY   r   r   r   r   <module>   s    3$