o
    wi                     @   sp   d Z ddlmZ ddlmZ ddlmZ G dd deZG dd deeZG d	d
 d
eZG dd deeZ	dS )zdTrain PyTorch models directly from POSIX tar archive.

Code works locally or over HTTP connections.
   )utils)IterableDataset)PipelineStagec                   @   s    e Zd ZdZdd Zdd ZdS )MockDatasetzCreate a mock dataset for performance testing and unit testing.

    Args:
        sample: The sample to be returned repeatedly.
        length (int): The length of the mock dataset.
    c                 C   s   || _ || _d S N)samplelength)selfr   r    r
   U/home/ubuntu/sommelier/.venv/lib/python3.10/site-packages/webdataset/extradatasets.py__init__   s   
zMockDataset.__init__c                 c   s    t | jD ]}| jV  qdS )zYield samples from the mock dataset.

        Returns:
            Iterator: An iterator that yields the same sample repeatedly.
        N)ranger   r   )r	   _r
   r
   r   __iter__   s   
zMockDataset.__iter__N)__name__
__module____qualname____doc__r   r   r
   r
   r
   r   r      s    r   c                   @   s"   e Zd ZdZdddZdd ZdS )
repeatedlya2  Repeatedly yield samples from a dataset.

    Args:
        source: The source dataset to repeat.
        nepochs (int, optional): Maximum number of epochs to repeat.
        nbatches (int, optional): Maximum number of batches to repeat.
        length (int, optional): Length of the repeated dataset.
    Nc                 C   s   || _ || _|| _d S r   )sourcer   nbatches)r	   r   nepochsr   r   r
   r
   r   r   3   s   
zrepeatedly.__init__c                 C   s   t j|| j| jdS )zReturn an iterator that iterates repeatedly over a source.

        Args:
            source: The source dataset to repeat.

        Returns:
            Iterator: An iterator that repeatedly yields samples from the source.
        )r   r   )r   r   r   r   )r	   r   r
   r
   r   invoke8   s
   	zrepeatedly.invoke)NNN)r   r   r   r   r   r   r
   r
   r
   r   r   )   s    
	r   c                       0   e Zd ZdZ fddZdd Zdd Z  ZS )
with_epocha  Change the actual and nominal length of an IterableDataset.

    This will continuously iterate through the original dataset, but
    impose new epoch boundaries at the given length/nominal.
    This exists mainly as a workaround for the odd logic in DataLoader.
    It is also useful for choosing smaller nominal epoch sizes with
    very large datasets.

    Args:
        dataset: The source IterableDataset.
        length (int): Declared length of the dataset.
    c                    s   t    || _d | _d S r   )superr   r   r   r	   datasetr   	__class__r
   r   r   V      

zwith_epoch.__init__c                 C   s   t | j}d|d< |S )zReturn the pickled state of the dataset.

        This resets the dataset iterator, since that can't be pickled.

        Returns:
            dict: A dictionary representing the pickled state of the dataset.
        Nr   )dict__dict__)r	   resultr
   r
   r   __getstate__[   s   
zwith_epoch.__getstate__c                 c   s    | j du rt|| _ t| jD ]/}zt| j }W n" ty;   t|| _ zt| j }W n ty8   Y Y  dS w Y nw |V  qd| _ dS )a  Return an iterator over the dataset.

        This iterator returns as many samples as given by the `length` parameter.

        Args:
            dataset: The source dataset to iterate over.

        Yields:
            Sample: The next sample from the dataset.
        N)r   iterr   r   nextStopIteration)r	   r   r   r   r
   r
   r   r   g   s"   




zwith_epoch.invoke)r   r   r   r   r   r$   r   __classcell__r
   r
   r   r   r   H   s
    r   c                       r   )with_lengthzRepeatedly yield samples from a dataset with a specified length.

    Args:
        dataset: The source dataset.
        length (int): The stated length of the dataset.
    c                    s   t    || _|| _d S r   )r   r   r   r   r   r   r
   r   r      r    zwith_length.__init__c                 C   s   t |S )zReturn an iterator that iterates over the source dataset.

        Args:
            dataset: The source dataset to iterate over.

        Returns:
            Iterator: An iterator over the source dataset.
        )r%   )r	   r   r
   r
   r   r      s   	zwith_length.invokec                 C   s   | j S )zrReturn the user specified length.

        Returns:
            int: The specified length of the dataset.
        )r   )r	   r
   r
   r   __len__   s   zwith_length.__len__)r   r   r   r   r   r   r*   r(   r
   r
   r   r   r)      s
    r)   N)
r    r   pytorchr   r   r   r   r   r)   r
   r
   r
   r   <module>   s   9