o
    Si6"                     @   sx   d dl mZ d dlmZ d dlmZmZmZmZm	Z	m
Z
mZ d dlmZ d dlmZ d dlmZmZ G dd deZd	S )
    )reduce)add)AnyCallableDictListOptionalTupleUnion)CutSet)Cut)
CutSamplerSamplingDiagnosticsc                       sP  e Zd ZdZdddededdf fdd	Zedee	 fd
dZ
edee fddZedee fddZ fddZdeeef f fddZdeeef ddf fddZdd Zdeeee f fddZdeddf fddZdeegef ddfdd Zd!eeeed"f f ddfd#d$Zedefd%d&Zdefd'd(Z   Z!S ))
ZipSamplera-  
    :class:`.ZipSampler` takes several samplers as input and concatenates their
    sampled mini-batch cuts together into a single :class:`~lhotse.cut.CutSet`,
    or returns a tuple of the mini-batch CutSets.
    It is helpful for ensuring that each batch consists of some proportion of cuts
    coming from different sources.

    The input samplers do not have to provide the same number of batches -- when
    any of the samplers becomes depleted, the iteration will stop (like with
    Python's ``zip()`` function).

    Example::

        >>> sampler = ZipSampler(
        ...     SimpleCutSampler(cuts_corpusA, max_duration=250, shuffle=True),
        ...     SimpleCutSampler(cuts_corpusB, max_duration=100, shuffle=True),
        ... )
        >>> for cut in sampler:
        ...     pass  # profit
    T)merge_batchessamplersr   returnNc                   s    t  jddd || _|| _dS )ap  
        ZipSampler's constructor.

        :param samplers: The list of samplers from which we sample batches together.
        :param merge_batches: Should we merge the batches from each sampler into a single CutSet,
            or return a tuple of CutSets. Setting this to ``False`` makes ZipSampler behave
            more like Python's ``zip`` function.
        r      )rank
world_sizeN)super__init__r   r   )selfr   r   	__class__ O/home/ubuntu/.local/lib/python3.10/site-packages/lhotse/dataset/sampling/zip.pyr       s   	
zZipSampler.__init__c                 C   ,   zt dd | jD W S  ty   Y dS w )z
        Remaining duration of data left in the sampler (may be inexact due to float arithmetic).

        .. note: For ZipSampler, it's the minimum of remaining durations in its sub-samplers.
        c                 s       | ]}|j V  qd S N)remaining_duration.0sr   r   r   	<genexpr>5       z0ZipSampler.remaining_duration.<locals>.<genexpr>Nminr   	TypeErrorr   r   r   r   r    -   s
   zZipSampler.remaining_durationc                 C   r   )z
        Remaining number of cuts in the sampler.
        Not available when the CutSet is read in lazy mode (returns None).

        .. note: For ZipSampler, it's the minimum of remaining cuts in its sub-samplers.
        c                 s   r   r   )remaining_cutsr!   r   r   r   r$   B   r%   z,ZipSampler.remaining_cuts.<locals>.<genexpr>Nr&   r)   r   r   r   r*   9   
   zZipSampler.remaining_cutsc                 C   r   )z
        Total number of cuts in the sampler.
        Not available when the CutSet is read in lazy mode (returns None).

        .. note: For ZipSampler, it's the minimum of num cuts in its sub-samplers.
        c                 s   r   r   )num_cutsr!   r   r   r   r$   O   r%   z&ZipSampler.num_cuts.<locals>.<genexpr>Nr&   r)   r   r   r   r,   F   r+   zZipSampler.num_cutsc                    s"   t    | jD ]}|  qdS )a8  
        Enables re-setting to the start of an epoch when iter() is called.
        This is only needed in one specific scenario: when we restored previous
        sampler state via ``sampler.load_state_dict()`` but want to discard
        the progress in the current epoch and start from the beginning.
        N)r   allow_iter_to_reset_stater   )r   r#   r   r   r   r-   S   s   


z$ZipSampler.allow_iter_to_reset_statec                    s,   t   }|| jdd | jD d |S )z
        Return the current state of the sampler in a state_dict.
        Together with ``load_state_dict()``, this can be used to restore the
        training loop's state to the one stored in the state_dict.
        c                 S   s   g | ]}|  qS r   )
state_dictr!   r   r   r   
<listcomp>h   s    z)ZipSampler.state_dict.<locals>.<listcomp>)r   r   )r   r.   updater   r   )r   r.   r   r   r   r.   ^   s   
zZipSampler.state_dictr.   c                    s|   | d| _t| jt|d ks#J dt| j dt|d  dt| j| dD ]	\}}|| q,t | dS )aX  
        Restore the state of the sampler that is described in a state_dict.
        This will result in the sampler yielding batches from where the previous training left it off.

        .. caution::
            The samplers are expected to be initialized with the same CutSets,
            but this is not explicitly checked anywhere.

        .. caution::
            The input ``state_dict`` is being mutated: we remove each consumed key, and expect
            it to be empty at the end of loading. If you don't want this behavior, pass a copy
            inside of this function (e.g., using ``import deepcopy``).

        .. note::
            For implementers of sub-classes of CutSampler: the flag ``self._just_restored_state`` has to be
            handled in ``__iter__`` to make it avoid resetting the just-restored state (only once).
        r   r   z_Error in ZipSampler.load_state_dict(): Inconsistent number of samplers: current ZipSampler has z, the state_dict has .N)popr   lenr   zipload_state_dictr   )r   r.   sampler
sampler_sdr   r   r   r5   m   s   
zZipSampler.load_state_dictc                 C   s   | j D ]}t| q| S r   )r   iter)r   r6   r   r   r   __iter__   s   

zZipSampler.__iter__c                    s   |    | jrLg }| jD ]}t|}|| q|st S t|tr,tdd |D S t|}g }t	|D ] |t fdd|D  q6t
|S g }| jD ]	}|t| qQt
|S )Nc                 s   s    | ]
}|D ]}|V  qqd S r   r   r"   batchcr   r   r   r$      s    z)ZipSampler._next_batch.<locals>.<genexpr>c                 3   s"    | ]}|  D ]}|V  qqd S r   r   r:   ir   r   r$      s     )r-   r   r   nextappendr   
isinstance	from_cutsr3   rangetuple)r   cutsr6   r;   	tuple_lencut_setsr   r=   r   _next_batch   s*   


zZipSampler._next_batchepochc                    s&   | j D ]}|| qt | dS )a  
        Sets the epoch for this sampler. When :attr:`shuffle=True`, this ensures all replicas
        use a different random ordering for each epoch. Otherwise, the next iteration of this
        sampler will yield the same ordering.

        :param epoch: Epoch number.
        N)r   	set_epochr   )r   rI   r#   r   r   r   rJ      s   
zZipSampler.set_epoch	predicatec                 C   s   | j D ]}|| qdS )a
  
        Add a constraint on individual cuts that has to be satisfied to consider them.

        Can be useful when handling large, lazy manifests where it is not feasible to
        pre-filter them before instantiating the sampler.

        Example:
            >>> cuts = CutSet(...)
            ... sampler = SimpleCutSampler(cuts, max_duration=100.0)
            ... # Retain only the cuts that have at least 1s and at most 20s duration.
            ... sampler.filter(lambda cut: 1.0 <= cut.duration <= 20.0)
        N)r   filter)r   rK   r6   r   r   r   rL      s   
zZipSampler.filterr;   .c                 C   s   d S r   r   )r   r;   r   r   r   _log_diagnostics   s   zZipSampler._log_diagnosticsc                 C   s   t tdd | jD S )Nc                 s   r   r   )diagnosticsr!   r   r   r   r$      r%   z)ZipSampler.diagnostics.<locals>.<genexpr>)r   r   r   r)   r   r   r   rN      s   zZipSampler.diagnosticsc                 C   s
   | j  S )zJReturns a string describing the statistics of the sampling process so far.)rN   
get_reportr)   r   r   r   rO      s   
zZipSampler.get_report)"__name__
__module____qualname____doc__r   boolr   propertyr   floatr    intr*   r,   r-   r   strr   r.   r5   r9   r
   r   r	   rH   rJ   r   r   rL   rM   r   rN   rO   __classcell__r   r   r   r   r   
   s(     $"r   N)	functoolsr   operatorr   typingr   r   r   r   r   r	   r
   lhotser   
lhotse.cutr   lhotse.dataset.sampling.baser   r   r   r   r   r   r   <module>   s    $