o
    }oi +                     @   sv   d dl mZmZ d dlmZ d dlZd dlmZm	Z	 d dl
mZmZ d dlmZ G dd dejZG dd	 d	eZdS )
    )ListOptionalN)EVAL_DATALOADERSTRAIN_DATALOADERS)
DataLoaderDataset)MegatronDataSamplerc                       s   e Zd ZdZ													d#d	ed
edededeee  dedededededef fddZd$de	ddfddZ
defddZdefddZdefdd Zdefd!d"Z  ZS )%MockDataModulea  
    A PyTorch Lightning DataModule for creating mock datasets for training, validation, and testing.

    Args:
        image_h (int): Height of the images in the dataset. Default is 1024.
        image_w (int): Width of the images in the dataset. Default is 1024.
        micro_batch_size (int): Micro batch size for the data sampler. Default is 4.
        global_batch_size (int): Global batch size for the data sampler. Default is 8.
        rampup_batch_size (Optional[List[int]]): Ramp-up batch size for the data sampler. Default is None.
        num_train_samples (int): Number of training samples. Default is 10,000.
        num_val_samples (int): Number of validation samples. Default is 10,000.
        num_test_samples (int): Number of testing samples. Default is 10,000.
        num_workers (int): Number of worker threads for data loading. Default is 8.
        pin_memory (bool): Whether to use pinned memory for data loading. Default is True.
        persistent_workers (bool): Whether to use persistent workers for data loading. Default is False.
        image_precached (bool): Whether the images are pre-cached. Default is False.
        text_precached (bool): Whether the text data is pre-cached. Default is False.
             N'  TFimage_himage_wmicro_batch_sizeglobal_batch_sizerampup_batch_sizenum_train_samplesnum_val_samplesnum_test_samplesnum_workers
pin_memorypersistent_workersc                    sv   t    || _|| _|| _|| _|| _|	| _|
| _|| _	|| _
|| _|| _|| _d | _d| _t| j|||d| _d S )N
   )seq_lenr   r   r   )super__init__r   r   r   r   r   r   r   r   image_precachedtext_precachedr   r   	tokenizer
seq_lengthr   data_sampler)selfr   r   r   r   r   r   r   r   r   r   r   r   r   	__class__ m/home/ubuntu/.local/lib/python3.10/site-packages/nemo/collections/diffusion/data/diffusion_mock_datamodule.pyr   -   s*   
zMockDataModule.__init__ stagereturnc                 C   sR   t dd| j| j| jd| _t dd| j| j| jd| _t dd| j| j| jd| _dS )z
        Sets up datasets for training, validation, and testing.

        Args:
            stage (str): The stage of the process (e.g., 'fit', 'test'). Default is an empty string.
        r
   )image_Himage_Wlengthr   r   N)	_MockT2IDatasetr   r   r   	_train_dsr   _validation_dsr   _test_ds)r"   r(   r%   r%   r&   setupU   s*   zMockDataModule.setupc                 C      t | ds	|   | | jS )z
        Returns the training DataLoader.

        Returns:
            TRAIN_DATALOADERS: DataLoader for the training dataset.
        r.   )hasattrr1   _create_dataloaderr.   r"   r%   r%   r&   train_dataloaderr      
zMockDataModule.train_dataloaderc                 C   r2   )z
        Returns the validation DataLoader.

        Returns:
            EVAL_DATALOADERS: DataLoader for the validation dataset.
        r/   )r3   r1   r4   r/   r5   r%   r%   r&   val_dataloader}   r7   zMockDataModule.val_dataloaderc                 C   r2   )z
        Returns the testing DataLoader.

        Returns:
            EVAL_DATALOADERS: DataLoader for the testing dataset.
        r0   )r3   r1   r4   r0   r5   r%   r%   r&   test_dataloader   r7   zMockDataModule.test_dataloaderc                 K   s   t |f| j| j| jd|S )a  
        Creates a DataLoader for the given dataset.

        Args:
            dataset: The dataset to load.
            **kwargs: Additional arguments for the DataLoader.

        Returns:
            DataLoader: Configured DataLoader for the dataset.
        )r   r   r   )r   r   r   r   )r"   datasetkwargsr%   r%   r&   r4      s   z!MockDataModule._create_dataloader)r
   r
   r   r   Nr   r   r   r   TFFF)r'   )__name__
__module____qualname____doc__intr   r   boolr   strr1   r   r6   r   r8   r9   r   r4   __classcell__r%   r%   r#   r&   r	      sV    
	
(r	   c                       sH   e Zd ZdZ											
	d fdd	Zdd Zdd Z  ZS )r-   aM  
    A mock dataset class for text-to-image tasks, simulating data samples for training and testing.

    This dataset generates synthetic data for both image and text inputs, with options to use
    pre-cached latent representations or raw data. The class is designed for use in testing and
    prototyping machine learning models.

    Attributes:
        image_H (int): Height of the generated images.
        image_W (int): Width of the generated images.
        length (int): Total number of samples in the dataset.
        image_key (str): Key for accessing image data in the output dictionary.
        txt_key (str): Key for accessing text data in the output dictionary.
        hint_key (str): Key for accessing hint data in the output dictionary.
        image_precached (bool): Whether to use pre-cached latent representations for images.
        text_precached (bool): Whether to use pre-cached embeddings for text.
        prompt_seq_len (int): Sequence length for text prompts.
        pooled_prompt_dim (int): Dimensionality of pooled text embeddings.
        context_dim (int): Dimensionality of the text embedding context.
        vae_scale_factor (int): Scaling factor for the VAE latent representation.
        vae_channels (int): Number of channels in the VAE latent representation.
        latent_shape (tuple): Shape of the latent representation for images (if pre-cached).
        prompt_embeds_shape (tuple): Shape of the text prompt embeddings (if pre-cached).
        pooped_prompt_embeds_shape (tuple): Shape of pooled text embeddings (if pre-cached).
        text_ids_shape (tuple): Shape of the text token IDs (if pre-cached).

    Methods:
        __getitem__(index):
            Retrieves a single sample from the dataset based on the specified index.
        __len__():
            Returns the total number of samples in the dataset.
    順 imagestxthintF         r      c                    s   t    || _|| _|| _|| _|| _|| _|| _|| _	| jr.|t
|| t
|| f| _| j	rA|	|f| _|
f| _|	df| _d S d S )N   )r   r   r,   HW	image_keytxt_keyhint_keyr   r   r@   latent_shapeprompt_embeds_shapepooped_prompt_embeds_shapetext_ids_shape)r"   r*   r+   r,   rO   rP   rQ   r   r   prompt_seq_lenpooled_prompt_dimcontext_dimvae_scale_factorvae_channelsr#   r%   r&   r      s    

z_MockT2IDataset.__init__c                 C   s   i }| j rt| j|d< t| j|d< ntd| j| j|| j< td| j| j|| j< | jrKt| j	|d< t| j
|d< t| j|d< |S d|| j< |S )a  
        Retrieves a single sample from the dataset.

        The sample can include raw image and text data or pre-cached latent representations,
        depending on the configuration.

        Args:
            index (int): Index of the sample to retrieve.

        Returns:
            dict: A dictionary containing the generated data sample. The keys and values
                  depend on whether `image_precached` and `text_precached` are set.
                  Possible keys include:
                    - 'latents': Pre-cached latent representation of the image.
                    - 'control_latents': Pre-cached control latent representation.
                    - 'images': Raw image tensor.
                    - 'hint': Hint tensor for the image.
                    - 'prompt_embeds': Pre-cached text prompt embeddings.
                    - 'pooled_prompt_embeds': Pooled text prompt embeddings.
                    - 'text_ids': Text token IDs.
                    - 'txt': Text input string (if text is not pre-cached).
        latentscontrol_latentsrL   prompt_embedspooled_prompt_embedstext_idszThis is a sample caption input)r   torchrandnrR   rM   rN   rO   rQ   r   rS   rT   rU   rP   )r"   indexitemr%   r%   r&   __getitem__   s   
z_MockT2IDataset.__getitem__c                 C   s   | j S )z
        Returns the total number of samples in the dataset.

        Returns:
            int: Total number of samples (`length` attribute).
        )r,   r5   r%   r%   r&   __len__  s   z_MockT2IDataset.__len__)rD   rE   rF   rG   FFrH   rI   rJ   r   rK   )r<   r=   r>   r?   r   rd   re   rC   r%   r%   r#   r&   r-      s     % (r-   )typingr   r   lightning.pytorchpytorchplr`   !lightning.pytorch.utilities.typesr   r   torch.utils.datar   r   nemo.lightning.pytorch.pluginsr   LightningDataModuler	   r-   r%   r%   r%   r&   <module>   s    