o
    wiw                     @   s^   d Z ddlZddlZddlmZ dd Zdd ZG d	d
 d
eZdddZ	G dd deZ
dS )z1Classes for mixing samples from multiple sources.    N   )IterableDatasetc                  g   sF    d}	 zt | |t|   }|V  W n
 ty   Y dS w |d7 }q)zYield samples from multiple sources in a round-robin fashion until the shortest source is exhausted.

    Args:
        *sources: Iterable sources to draw samples from.

    Yields:
        Sample from one of the sources.
    r   Tr   N)nextlenStopIterationsourcesisample r   K/home/ubuntu/sommelier/.venv/lib/python3.10/site-packages/webdataset/mix.pyround_robin_shortest   s   	
r   c                  g   sr    t | } d}t| dkr7|t| ; }zt| | }|d7 }|V  W n ty.   | |= Y nw t| dksdS dS )zYield samples from multiple sources in a round-robin fashion until all sources are exhausted.

    Args:
        *sources: Iterable sources to draw samples from.

    Yields:
        Sample from one of the sources.
    r   r   N)listr   r   r   r   r   r   r   round_robin_longest#   s   	

r   c                   @   s"   e Zd ZdZdddZdd ZdS )	
RoundRobinz8Iterate over multiple datasets in a round-robin fashion.Fc                 C   s   || _ || _dS )zInitialize the RoundRobin iterator.

        Args:
            datasets (list): List of datasets to iterate over.
            longest (bool): If True, continue until the longest dataset is exhausted.
        N)datasetslongest)selfr   r   r   r   r   __init__;   s   
zRoundRobin.__init__c                 C   s&   dd | j D }| jrt| S t| S )zReturn an iterator over the sources.

        Returns:
            iterator: An iterator that yields samples from the datasets in a round-robin fashion.
        c                 S      g | ]}t |qS r   iter.0dr   r   r   
<listcomp>K       z'RoundRobin.__iter__.<locals>.<listcomp>)r   r   r   r   r   r   r   r   r   __iter__E   s   zRoundRobin.__iter__N)F__name__
__module____qualname____doc__r   r   r   r   r   r   r   8   s    

r   Fc                 c   s    |du rdgt |  }nt|}t | dkrTt|t|  }t }t||}z	t| | V  W n t	yK   |rF| |= ||= nY dS Y nw t | dksdS dS )a  Yield samples randomly from multiple sources based on given probabilities.

    Args:
        sources (list): List of iterable sources to draw samples from.
        probs (list, optional): List of probabilities for each source. Defaults to None.
        longest (bool): If True, continue until all sources are exhausted. Defaults to False.

    Yields:
        Sample randomly selected from one of the sources.
    Nr   r   )
r   r   nparraysumcumsumrandomsearchsortedr   r   )r   probsr   cumrr	   r   r   r   random_samplesR   s$   r-   c                   @   s"   e Zd ZdZdddZdd ZdS )		RandomMixzZIterate over multiple datasets by randomly selecting samples based on given probabilities.NFc                 C   s   || _ || _|| _dS )a:  Initialize the RandomMix iterator.

        Args:
            datasets (list): List of datasets to iterate over.
            probs (list, optional): List of probabilities for each dataset. Defaults to None.
            longest (bool): If True, continue until all datasets are exhausted. Defaults to False.
        N)r   r*   r   )r   r   r*   r   r   r   r   r   r   s   
zRandomMix.__init__c                 C   s"   dd | j D }t|| j| jdS )zReturn an iterator over the sources.

        Returns:
            iterator: An iterator that yields samples randomly from the datasets.
        c                 S   r   r   r   r   r   r   r   r      r   z&RandomMix.__iter__.<locals>.<listcomp>)r   )r   r-   r*   r   r   r   r   r   r   ~   s   zRandomMix.__iter__NFr   r   r   r   r   r.   o   s    
r.   r/   )r#   r(   numpyr$   pytorchr   r   r   r   r-   r.   r   r   r   r   <module>   s   
