o
    Sit                     @   sh   d dl Z d dlmZmZmZmZ d dlmZmZ d dl	m
Z
 d dlmZ d dlmZ G dd deZdS )	    N)AnyDictListOptional)CutSetSeconds)TimeConstraint)WeightedDataSource)SimpleCutSamplerc                       s   e Zd ZdZ							ddedededed	ee d
e	de	dee dee def fddZ
deeef f fddZdeeef ddf fddZdddZ  ZS )WeightedSimpleCutSamplera:  
    Samples cuts from a CutSet, where the sampling prob is given by a list.
    To enable global sampling, cuts must be in eager mode.

    When performing sampling, it avoids having duplicated cuts in the same batch.
    The sampler terminates if the number of sampled cuts reach :attr:`num_samples`

    When one of :attr:`max_duration`, or :attr:`max_cuts` is specified,
    the batch size is dynamic.

    Example usage:

        >>> dataset = K2SpeechRecognitionDataset(cuts)
        >>> weights = get_weights(cuts)
        >>> sampler = WeightedSimpleCutSampler(cuts, weights, num_samples=100, max_duration=200.0)
        >>> loader = DataLoader(dataset, sampler=sampler, batch_size=None)
        >>> for epoch in range(start_epoch, n_epochs):
        ...     sampler.set_epoch(epoch)
        ...     train(loader)
    NFr   cutscuts_weightnum_samplesmax_durationmax_cutsshuffle	drop_last
world_sizerankseedc              
      sJ   t  j|||||	|||
d |jrJ dt|||d| _|| _|| _dS )ag  
        WeightedSimpleCutSampler's constructor

        :param cuts: the ``CutSet`` to sample data from.
        :param cuts_weight: the weight of each cut for sampling.
        :param num_samples: the number of samples to be drawn.
        :param max_duration: The maximum total recording duration from ``cuts``.
        :param max_cuts: The maximum number of cuts sampled to form a mini-batch.
            By default, this constraint is off.
        :param shuffle: When ``True``, the cuts will be shuffled at the start of iteration.
            Convenient when mini-batch loop is inside an outer epoch-level loop, e.g.:
            `for epoch in range(10): for batch in dataset: ...` as every epoch will see a
            different cuts order.
        :param drop_last: When ``True``, the last batch is dropped if it's incomplete.
        :param world_size: Total number of distributed nodes. We will try to infer it by default.
        :param rank: Index of distributed node. We will try to infer it by default.
        :param seed: Random seed used to consistently shuffle the dataset across different processes.
        )r   r   r   r   r   r   r   r   z(This sampler does not support lazy mode!)weightsr   N)super__init__is_lazyr	   data_sourcer   r   )selfr   r   r   r   r   r   r   r   r   r   	__class__ [/home/ubuntu/.local/lib/python3.10/site-packages/lhotse/dataset/sampling/weighted_simple.pyr       s    

z!WeightedSimpleCutSampler.__init__returnc                    s*   t   }|| j | j| jd |S )z
        Return the current state of the sampler in a state_dict.
        Together with ``load_state_dict()``, this can be used to restore the
        training loop's state to the one stored in the state_dict.
        )time_constraintr   r   )r   
state_dictupdater!   r   r   )r   r"   r   r   r   r"   Q   s   
z#WeightedSimpleCutSampler.state_dictr"   c                    sx   t di |d}| j|krtd| j d| d || _t | | j| j	j
j |d| _|d| _dS )	aX  
        Restore the state of the sampler that is described in a state_dict.
        This will result in the sampler yielding batches from where the previous training left it off.

        .. caution::
            The samplers are expected to be initialized with the same CutSets,
            but this is not explicitly checked anywhere.

        .. caution::
            The input ``state_dict`` is being mutated: we remove each consumed key, and expect
            it to be empty at the end of loading. If you don't want this behavior, pass a copy
            inside of this function (e.g., using ``import deepcopy``).

        .. note::
            For implementers of sub-classes of CutSampler: the flag ``self._just_restored_state`` has to be
            handled in ``__iter__`` to make it avoid resetting the just-restored state (only once).
        r!   zKSimpleCutSampler.load_state_dict(): Inconsistent time_constraint:
expected z

received z=
We will overwrite the settings with the received state_dict.r   r   Nr   )r   popr!   warningswarnr   load_state_dictr   fast_forwarddiagnosticscurrent_epoch_stats
total_cutsr   r   )r   r"   r!   r   r   r   r'   a   s   
z(WeightedSimpleCutSampler.load_state_dictc                 C   s"   | j r| S | j  t| j | S )zi
        Prepare the dataset for iterating over a new epoch. Will shuffle the data if requested.
        )_just_restored_stater)   reset_current_epochiterr   )r   r   r   r   __iter__   s
   

z!WeightedSimpleCutSampler.__iter__)NNFFNNr   )r    r   )__name__
__module____qualname____doc__r   r   intr   r   boolr   r   strr   r"   r'   r/   __classcell__r   r   r   r   r   
   sB    	
1$r   )r%   typingr   r   r   r   lhotser   r   lhotse.dataset.sampling.baser   #lhotse.dataset.sampling.data_sourcer	   lhotse.dataset.sampling.simpler
   r   r   r   r   r   <module>   s    