o
    Si                     @   sf   d dl Z d dlmZ d dlmZmZ d dlZd dlm	Z	 d dl
mZ G dd dZG dd	 d	eZdS )
    N)deque)ListOptional)CutSet)Cutc                   @   s   e Zd ZdZdefddZedefddZede	e
 fdd	Zd
edd fddZdddZdeddfddZd ddZdeddfddZd!ddZdefddZdefddZdS )"
DataSourcez
    An iterator wrapper over CutSet that helps with the sampling process:
    it allows for deterministic re-shuffling of elements and "returning"
    sampled elements to be yielded again.
    itemsc                 C   sf   || _ | j | _d | _t | _| js#tdd | j D | _t| j | _	nd | _d | _	| j| _
| j	| _d S )Nc                 s   s    | ]}|j V  qd S N)duration).0c r   W/home/ubuntu/.local/lib/python3.10/site-packages/lhotse/dataset/sampling/data_source.py	<genexpr>   s    z&DataSource.__init__.<locals>.<genexpr>)_orig_items_shuffled_items_iterr   	_reusableis_lazysum_total_durationlen_total_cuts_remaining_durationremaining_cuts)selfr   r   r   r   __init__   s   zDataSource.__init__returnc                 C   s   | j jS r	   )r   r   r   r   r   r   r   !   s   zDataSource.is_lazyc                 C   s   | j d u rd S td| j S )Nr   )r   maxr   r   r   r   remaining_duration%   s   
zDataSource.remaining_durationseedc                 C   s&   |    t|}| jj|d| _| S )z
        Shuffles the elements using the provided random seed value.
        When the input CutSet is lazy, we use a streaming variant of
        shuffle, that may be less random.
        )rng)resetrandomRandomr   shuffler   )r   r!   rr   r   r   r&   -   s   
zDataSource.shuffleotherc                 C   s   |    | j|j| _| S )z{
        Sorts the underlying CutSet to provide Cuts in the same order of cut_ids
        as the other DataSource.
        )r#   r   	sort_liker   )r   r(   r   r   r   r)   8   s   zDataSource.sort_likecutNc                 C   s8   | j | | js|  j|j7  _|  jd7  _dS dS )z8Push the cut in front of other cuts to be sampled again.   N)r   appendr   r   r
   r   )r   r*   r   r   r   	take_backA   s
   zDataSource.take_backc                 C   s$   d| _ | j  | j| _| j| _dS z'Reset the iterable state of DataSource.N)r   r   clearr   r   r   r   r   r   r   r   r#   H   s   
zDataSource.resetstepsc                 C   s.   |dksJ t |  t|D ]}t|  qdS z5Advance the data source by ``steps`` amount of steps.r   N)iterrangenextr   r0   ir   r   r   fast_forwardO   s
   
zDataSource.fast_forwardc                 C   s   |    t| j| _| S r	   )r#   r2   r   r   r   r   r   r   __iter__V   s   zDataSource.__iter__c                 C   sD   | j r	| j  }nt| j}| js |  j|j8  _|  jd8  _|S Nr+   )r   popleftr4   r   r   r   r
   r   r   next_cutr   r   r   __next__[   s   
zDataSource.__next__c                 C   s
   t | jS r	   )r   r   r   r   r   r   __len__e   s   
zDataSource.__len__)r(   r   r   r   r   N)r   r   )__name__
__module____qualname____doc__r   r   propertyboolr   r   floatr    intr&   r)   r   r-   r#   r7   r8   r=   r>   r   r   r   r   r      s    
	


r   c                       sb   e Zd ZdZdededef fddZdd	d
ZdeddfddZ	dddZ
defddZ  ZS )WeightedDataSourcea  
    An iterator wrapper over CutSet that helps with the sampling process:
    it allows for deterministic re-shuffling of elements and "returning"
    sampled elements to be yielded again.

    Every cut has a sampling weight. At the beginning of each epoch, we
    pre-compute the indexes by sampling from multi-nomial distribution without
    replacement. The data source will be exhausted if the number of drawn cuts
    exceed num_samples
    r   weightsnum_samplesc                    sf   t  j|d t|t|ksJ d|t|k sJ dt|}||  }|| _|| _d| _dS )aL  The constructor of the weighted data source

        Args:
            items (CutSet): The cutset itself
            weights (List): A list of values representing the weight of each cut. All values must be positive
            num_samples (int): The number of samples to be drawn. Must smaller than the total number of cuts
        )r   zThe length should matchzDThe number of samples to be drawn should not exceed the dataset sizeN)	superr   r   nparrayr   rI   rJ   sampled_indexes)r   r   rI   rJ   	__class__r   r   r   u   s   

zWeightedDataSource.__init__r   Nc                 C   s*   d| _ d| _| j  | j| _| j| _dS r.   )r   rN   r   r/   r   r   r   r   r   r   r   r   r#      s
   
zWeightedDataSource.resetr0   c                 C   s0   |dksJ t |  t|D ]}t| j qdS r1   )r2   r3   r4   rN   r5   r   r   r   r7      s
   zWeightedDataSource.fast_forwardc                 C   sD   |    t| j| _tjjt| j| j	| jdd| _
t| j
| _
| S )NF)preplace)r#   r2   r   r   rL   r$   choicer   rI   rJ   rN   r   r   r   r   r8      s   zWeightedDataSource.__iter__c                 C   sJ   | j r	| j  }n| jt| j }| js#|  j|j8  _|  jd8  _|S r9   )	r   r:   r   r4   rN   r   r   r
   r   r;   r   r   r   r=      s   zWeightedDataSource.__next__r?   )r   rH   )r@   rA   rB   rC   r   r   rG   r   r#   r7   r8   r   r=   __classcell__r   r   rO   r   rH   i   s    

rH   )r$   collectionsr   typingr   r   numpyrL   lhotser   
lhotse.cutr   r   rH   r   r   r   r   <module>   s    ^