o
    i8	                     @   sJ   d Z ddlZddlZddlmZmZ G dd deZdd ZdddZdS )zS
Dataset for codec-based JEPA TTS v5.
Loads precomputed EnCodec embeddings + text.
    N)Dataset
DataLoaderc                   @   s&   e Zd Zd	ddZdd Zdd ZdS )
CodecDataset  c                    sr   t |d}t|| _W d    n1 sw   Y   | _ fdd| jD | _tdt| j d  d d S )Nrc                    s   g | ]
}|d   kr|qS )
emb_frames .0emax_codec_framesr   #/home/ubuntu/lewm-tts/dataset_v5.py
<listcomp>   s    z)CodecDataset.__init__.<locals>.<listcomp>zCodecDataset: z samples (max z frames))openjsonloadmanifestr   printlen)selfmanifest_pathr   fr   r   r   __init__   s    zCodecDataset.__init__c                 C   s
   t | jS )N)r   r   )r   r   r   r   __len__   s   
zCodecDataset.__len__c                 C   sD   | j | }tj|d dd}tjt|d dtjd}||dS )Nemb_pathT)weights_onlytextzutf-8dtype)	codec_embtext_tokens)r   torchr   tensorlistencodelong)r   idxentryembr!   r   r   r   __getitem__   s   

zCodecDataset.__getitem__N)r   )__name__
__module____qualname__r   r   r*   r   r   r   r   r      s    
r   c                 C   s0  dd | D }dd | D }|d j d }tdd |D }tt| ||}tjt| |tjd}t|D ]\}}|j d }	|||d d d |	f< d	||d |	f< q6td
d |D }
tjt| |
tjd}tjt| |
tjd}t|D ]\}}|||d |j d f< d	||d |j d f< qv||||dS )Nc                 S      g | ]}|d  qS )r    r   r
   br   r   r   r          zcollate_fn.<locals>.<listcomp>c                 S   r.   )r!   r   r/   r   r   r   r       r1   r   c                 s       | ]}|j d  V  qdS )   Nshaper	   r   r   r   	<genexpr>$       zcollate_fn.<locals>.<genexpr>r   r3   Fc                 s   r2   )r   Nr4   )r
   tr   r   r   r6   -   r7   )r    r!   
codec_mask	text_mask)	r5   maxr"   zerosr   onesbool	enumerater&   )batch
codec_embsr!   	codec_dimmax_codec_lenpadded_codecr9   ir   Tmax_text_lenpadded_textr:   r8   r   r   r   
collate_fn   s*   
rI   @      r   c              	   C   s*   t | |d}t||d|tddd}||fS )Nr   T)
batch_sizeshufflenum_workersrI   
pin_memory	drop_last)r   r   rI   )r   rL   rN   r   datasetloaderr   r   r   build_dataloader<   s   rS   )rJ   rK   r   )	__doc__r   r"   torch.utils.datar   r   r   rI   rS   r   r   r   r   <module>   s    