o
    Siu                     @   s  d dl Z d dlZd dlZd dlZd dlmZ d dlmZ d dlm	Z	m
Z
mZmZmZmZmZmZ d dlmZmZmZmZ d dlmZmZmZmZ edZG dd	 d	eeZG d
d dZedfdefddZ deddfddZ!edefddZ"G dd dZ#G dd dZ$G dd deZ%G dd deZ&G dd deZ'G dd  d eZ(G d!d" d"eZ)G d#d$ d$eZ*G d%d& d&eZ+G d'd( d(eZ,G d)d* d*eZ-G d+d, d,eZ.d-e	d.e/de	fd/d0Z0d1efd2d3Z1dS )4    N)contextmanager)partial)AnyCallableIterableListLiteralOptionalTypeVarUnion)	LazyMixindecode_json_linedeserialize_item	open_best)Pathlikefastcopyis_module_availablestreaming_shuffleTc                   @   s  e Zd ZdZdeegef fddZdeegef fddZe	dd	d
ddede
eeeef   deeed f fddZe	d	d
d	dde
eeeef   deeed f de
e fddZ			d!de
ej defddZd"de
e defddZdd  Zd	S )#AlgorithmMixinz
    Helper base class with methods that are supposed to work identically
    on Lhotse manifest classes such as CutSet, RecordingSet, etc.
    	predicatec                    s6   t | }| jr|t|  dS | fdd| D S )a'  
        Return a new manifest containing only the items that satisfy ``predicate``.
        If the manifest is lazy, the filtering will also be applied lazily.

        :param predicate: a function that takes a cut as an argument and returns bool.
        :return: a filtered manifest.
        r   c                 3   s    | ]	} |r|V  qd S N ).0cutr   r   ?/home/ubuntu/.local/lib/python3.10/site-packages/lhotse/lazy.py	<genexpr>'   s    z(AlgorithmMixin.filter.<locals>.<genexpr>)typeis_lazy
LazyFilter
from_items)selfr   clsr   r   r   filter   s   zAlgorithmMixin.filtertransform_fnc                 C   s,   t | }|t| j|d}| jr|S | S )a  
        Apply `transform_fn` to each item in this manifest and return a new manifest.
        If the manifest is opened lazy, the transform is also applied lazily.

        :param transform_fn: A callable (function) that accepts a single item instance
            and returns a new (or the same) instance of the same type.
            E.g. with CutSet, callable accepts ``Cut`` and returns also ``Cut``.
        :return: a new ``CutSet`` with transformed cuts.
        )fn)r   
LazyMapperdatar   to_eager)r"   r%   r#   ansr   r   r   map)   s
   
zAlgorithmMixin.mapFNr   
stop_earlyweightsseedr-   r.   r/   trng
randomizedc                G      | t ||||dS )a  
        Merges multiple manifest iterables into a new iterable by lazily multiplexing them during iteration time.
        If one of the iterables is exhausted before the others, we will keep iterating until all iterables
        are exhausted. This behavior can be changed with ``stop_early`` parameter.

        :param manifests: iterables to be multiplexed.
            They can be either lazy or eager, but the resulting manifest will always be lazy.
        :param stop_early: should we stop the iteration as soon as we exhaust one of the manifests.
        :param weights: an optional weight for each iterable, affects the probability of it being sampled.
            The weights are uniform by default.
            If lengths are known, it makes sense to pass them here for uniform distribution of
            items in the expectation.
        :param seed: the random seed, ensures deterministic order across multiple iterations.
        r,   )LazyIteratorMultiplexer)r#   r-   r.   r/   	manifestsr   r   r   mux9   s
   zAlgorithmMixin.muxr.   r/   max_open_streamsr8   c                G   r3   )a  
        Merges multiple manifest iterables into a new iterable by lazily multiplexing them during iteration time.
        Unlike ``mux()``, this method allows to limit the number of max open sub-iterators at any given time.

        To enable this, it performs 2-stage sampling.
        First, it samples with replacement the set of iterators ``I`` to construct a subset ``I_sub``
        of size ``max_open_streams``.
        Then, for each iteration step, it samples an iterator ``i`` from ``I_sub``,
        fetches the next item from it, and yields it.
        Once ``i`` becomes exhausted, it is replaced with a new iterator ``j`` sampled from ``I_sub``.

        .. caution:: Do not use this method with inputs that are infinitely iterable as they will
            silently break the multiplexing property by only using a subset of the input iterables.

        .. caution:: This method is not recommended for multiplexing for a small amount of iterations,
            as it may be much less accurate than ``mux()`` depending on the number of open streams,
            iterable sizes, and the random seed.

        :param manifests: iterables to be multiplexed.
            They can be either lazy or eager, but the resulting manifest will always be lazy.
        :param weights: an optional weight for each iterable, affects the probability of it being sampled.
            The weights are uniform by default.
            If lengths are known, it makes sense to pass them here for uniform distribution of
            items in the expectation.
        :param seed: the random seed, ensures deterministic order across multiple iterations.
        :param max_open_streams: the number of iterables that can be open simultaneously at any given time.
        r7   )"LazyInfiniteApproximateMultiplexer)r#   r.   r/   r8   r5   r   r   r   infinite_muxU   s   #zAlgorithmMixin.infinite_mux'  rngbuffer_sizec                 C   sJ   t | }|du r
t}| jr|t| j||dS | j }|| ||S )ad  
        Shuffles the elements and returns a shuffled variant of self.
        If the manifest is opened lazily, performs shuffling on-the-fly with a fixed buffer size.

        :param rng: an optional instance of ``random.Random`` for precise control of randomness.
        :return: a shuffled copy of self, or a manifest that is shuffled lazily.
        N)r=   r<   )r   randomr   LazyShufflerr(   copyshuffle)r"   r<   r=   r#   newr   r   r   rA      s   

zAlgorithmMixin.shuffletimespreserve_idc                 C   s   t | }|t| ||dS )aP  
        Return a new, lazily evaluated manifest that iterates over the original elements ``times``
        number of times.

        :param times: how many times to repeat (infinite by default).
        :param preserve_id: when ``True``, we won't update the element ID with repeat number.
        :return: a repeated manifest.
        )rC   rD   )r   LazyRepeater)r"   rC   rD   r#   r   r   r   repeat   s   	zAlgorithmMixin.repeatc                 C   s   t | }|t| j|jS r   )r   LazyIteratorChainr(   )r"   otherr#   r   r   r   __add__   s   zAlgorithmMixin.__add__)Nr;   NF)__name__
__module____qualname____doc__r   r   boolr$   r+   classmethodr	   r   r   intfloatr   r6   r:   r>   RandomrA   rF   rI   r   r   r   r   r      sF    -
r   c                   @   s(   e Zd ZdZh dZdd Zdd ZdS )Dillablea<  
    Mix-in that will leverage ``dill`` instead of ``pickle``
    when pickling an object.

    It is useful when the user can't avoid ``pickle`` (e.g. in multiprocessing),
    but needs to use unpicklable objects such as lambdas.

    If ``dill`` is not installed, it defers to what ``pickle`` does by default.
    >   1yesTruetruec                 C   s    t  rdd l}|| jS | jS Nr   )is_dill_enableddilldumps__dict__)r"   r[   r   r   r   __getstate__   s   zDillable.__getstate__c                 C   s(   t  rdd l}||| _d S || _d S rY   )rZ   r[   loadsr]   )r"   stater[   r   r   r   __setstate__   s   
zDillable.__setstate__N)rK   rL   rM   rN   _ENABLED_VALUESr^   ra   r   r   r   r   rT      s
    
rT   )rU   rW   rX   rV   returnc                 C   s   t dotjdd| v S )zKReturns bool indicating if dill-based pickling in Lhotse is enabled or not.r[   LHOTSE_DILL_ENABLED0)r   osenvironget)rb   r   r   r   rZ      s   rZ   valuec                 C   s&   t dsJ d| rdndtjd< dS )z0Enable or disable dill-based pickling in Lhotse.r[   z^Cannot enable dill because dill is not installed. Please run 'pip install dill' and try again.rU   re   rd   N)r   rf   rg   )ri   r   r   r   set_dill_enabled   s   
rj   c                 c   s"    t  }t|  dV  t| dS )a:  
    Context manager that overrides the setting of Lhotse's dill-backed pickling
    and restores the previous value after exit.

    Example::

        >>> import pickle
        ... with dill_enabled(True):
        ...    pickle.dump(CutSet(...).filter(lambda c: c.duration < 5), open("cutset.pickle", "wb"))
    N)rZ   rj   )ri   previousr   r   r   dill_enabled   s
   rl   c                   @   s>   e Zd ZdZddededdfddZd	d
 ZdefddZ	dS )LazyTxtIteratorz
    LazyTxtIterator is a thin wrapper over builtin ``open`` function to
    iterate over lines in a (possibly compressed) text file.
    It can also provide the number of lines via __len__ via fast newlines counting.
    Tpathas_text_examplerc   Nc                 C   s   || _ || _d | _d S r   )rn   ro   _len)r"   rn   ro   r   r   r   __init__   s   
zLazyTxtIterator.__init__c                 c   s    ddl m} d}t| jd}|D ]}| }| jr||}|V  |d7 }qW d    n1 s1w   Y  | jd u r@|| _d S d S )Nr   )TextExampler   )lhotse.cut.textrr   r   rn   stripro   rp   )r"   rr   totfliner   r   r   __iter__   s   


zLazyTxtIterator.__iter__c                 C      | j d u rt| j| _ | j S r   rp   count_newlines_fastrn   r"   r   r   r   __len__     
zLazyTxtIterator.__len__)T)
rK   rL   rM   rN   r   rO   rq   rz   rQ   r   r   r   r   r   rm      s
    rm   c                   @   s8   e Zd ZdZdeddfddZdd Zdefd	d
ZdS )LazyJsonlIteratorz
    LazyJsonlIterator provides the ability to read JSON lines as Python dicts.
    It can also provide the number of lines via __len__ via fast newlines counting.
    rn   rc   Nc                 C   s   || _ d | _d S r   )rn   rp   r"   rn   r   r   r   rq     s   
zLazyJsonlIterator.__init__c                 c   sj    d}t | jd}|D ]}t|}|V  |d7 }qW d    n1 s$w   Y  | jd u r3|| _d S d S )Nr   rs   rt   )r   rn   r   rp   )r"   rw   rx   ry   r(   r   r   r   rz     s   


zLazyJsonlIterator.__iter__c                 C   r{   r   r|   r~   r   r   r   r      r   zLazyJsonlIterator.__len__)	rK   rL   rM   rN   r   rq   rz   rQ   r   r   r   r   r   r     s
    
r   c                   @   sT   e Zd ZdZdeddfddZedefddZd	d
 Zde	fddZ
dddZdS )LazyManifestIteratora  
    LazyManifestIterator provides the ability to read Lhotse objects from a
    JSONL file on-the-fly, without reading its full contents into memory.

    This class is designed to be a partial "drop-in" replacement for ordinary dicts
    to support lazy loading of RecordingSet, SupervisionSet and CutSet.
    Since it does not support random access reads, some methods of these classes
    might not work properly.
    rn   rc   Nc                 C   s   t || _d S r   )r   sourcer   r   r   r   rq   1     zLazyManifestIterator.__init__c                 C   s   | j jS r   )r   rn   r~   r   r   r   rn   4  s   zLazyManifestIterator.pathc                 c   s    t t| jE d H  d S r   )r+   r   r   r~   r   r   r   rz   8  s   zLazyManifestIterator.__iter__c                 C   
   t | jS r   )lenr   r~   r   r   r   r   ;     
zLazyManifestIterator.__len__rG   c                 C   
   t | |S r   rG   r"   rH   r   r   r   rI   >  r   zLazyManifestIterator.__add__rc   rG   )rK   rL   rM   rN   r   rq   propertyrn   rz   rQ   r   rI   r   r   r   r   r   &  s    
r   c                   @   sb   e Zd ZdZddddededeeee	d f  d	dfd
dZ
dd Zd	efddZdddZdS )rG   a@  
    A thin wrapper over multiple iterators that enables to combine lazy manifests
    in Lhotse. It iterates all underlying iterables sequentially.

    It also supports shuffling the sub-iterators when it's iterated over.
    This can be used to implement sharding (where each iterator is a shard)
    with randomized shard order. Every iteration of this object will increment
    an internal counter so that the next time it's iterated, the order of shards
    is again randomized.

    .. note:: if any of the input iterables is a dict, we'll iterate only its values.
    FN)shuffle_itersr/   	iteratorsr   r/   r0   rc   c                G   sV   g | _ || _|| _d| _|D ]}t|tr"|j D ]}| j | qq| j | qd S rY   )r   r   r/   	num_iters
isinstancerG   append)r"   r   r/   r   itsub_itr   r   r   rq   P  s   

zLazyIteratorChain.__init__c                 c   s    ddl m} | j}| jr,| jd u rt}nt|| j| j }|| |  jd7  _|D ]}t	|t
r9| }|E d H  q.d S )Nr   resolve_seedrt   )lhotse.dataset.dataloadingr   r   r   r/   r>   rS   r   rA   r   dictvalues)r"   r   r   r<   r   r   r   r   rz   b  s   


zLazyIteratorChain.__iter__c                 C      t dd | jD S )Nc                 s       | ]}t |V  qd S r   r   r   r   r   r   r   r   s      z,LazyIteratorChain.__len__.<locals>.<genexpr>sumr   r~   r   r   r   r   r     zLazyIteratorChain.__len__c                 C   r   r   r   r   r   r   r   rI   u  r   zLazyIteratorChain.__add__r   )rK   rL   rM   rN   r   rO   r	   r   rQ   r   rq   rz   r   rI   r   r   r   r   rG   B  s     
rG   c                   @   st   e Zd ZdZdddddededeeee	e
f   d	ee	ed
 f ddf
ddZdd Zde	fddZdddZdS )r4   aO  
    A wrapper over multiple iterators that enables to combine lazy manifests in Lhotse.
    During iteration, unlike :class:`.LazyIteratorChain`, :class:`.LazyIteratorMultiplexer`
    at each step randomly selects the iterable used to yield an item.

    Since the iterables might be of different length, we provide a ``weights`` parameter
    to let the user decide which iterables should be sampled more frequently than others.
    When an iterable is exhausted, we will keep sampling from the other iterables, until
    we exhaust them all, unless ``stop_early`` is set to ``True``.
    FNr   r,   r   r-   r.   r/   r0   rc   c                G   sj   t || _|| _|| _t| jdksJ d|d u r$dgt| j | _n|| _t| jt| jks3J d S )Nrt   z5There have to be at least two iterables to multiplex.)listr   r-   r/   r   r.   )r"   r-   r.   r/   r   r   r   r   rq     s   
z LazyIteratorMultiplexer.__init__c           
      #   s    ddl m} t|j}dd jD }dd tt|D   fdd}| rgtdd t	t j
D  \}}|j||d	d
d }|| }z	t|}	|	V  W n tya   d |< Y q)w | s,d S d S )Nr   r   c                 S   s   g | ]}t |qS r   )iterr   r   r   r   
<listcomp>  s    z4LazyIteratorMultiplexer.__iter__.<locals>.<listcomp>c                 S   s   g | ]}d qS )Fr   )r   _r   r   r   r     s    c                      s   j rt  S t  S r   )r-   anyallr   	exhaustedr"   r   r   should_continue  s   

z9LazyIteratorMultiplexer.__iter__.<locals>.should_continuec                 S   s    g | ]\}\}}|s||fqS r   r   )r   iis_exhaustedwr   r   r   r     s    
rt   r.   kT)r   r   r>   rS   r/   r   ranger   zip	enumerater.   choicesnextStopIteration)
r"   r   r<   itersr   active_indexesactive_weightsidxselecteditemr   r   r   rz     s,   
z LazyIteratorMultiplexer.__iter__c                 C   r   )Nc                 s   r   r   r   r   r   r   r   r     r   z2LazyIteratorMultiplexer.__len__.<locals>.<genexpr>r   r~   r   r   r   r     r   zLazyIteratorMultiplexer.__len__rG   c                 C   r   r   r   r   r   r   r   rI     r   zLazyIteratorMultiplexer.__add__r   )rK   rL   rM   rN   r   rO   r	   r   r   rQ   rR   r   rq   rz   r   rI   r   r   r   r   r4   y  s&    
r4   c                   @   sf   e Zd ZdZddddddededeeee	e
f   d	ee	ed
 f dee	 ddfddZdd ZdS )r9   a  
    A variant of :class:`.LazyIteratorMultiplexer` that allows to control the number of
    iterables that are simultaneously open.

    It is useful for large-scale data sets where opening multiple file handles in
    many processes leads to exhaustion of the operating system resources.

    If the data sets are sharded, it is recommended to pass each shard as a separate iterator
    when creating objects of this class. It is OK to assign a dataset-level weight to each shard
    (e.g., if a dataset has a weight of 0.5, assign weight 0.5 to each of its shards).

    There are several differences between this class and :class:`.LazyIteratorMultiplexer`:
    * Objects of this class are infinite iterators.
    * We hold a list of ``max_open_streams`` open iterators at any given time.
        This list is filled by sampling input iterators with replacement.

    These differences are necessary to guarantee the weighted sampling property.
    If we did not sample with replacement or make it infinite, we would simply
    exhaust highly-weighted iterators towards the beginning of each "epoch"
    and keep sampling only lowly-weighted iterators towards the end of each "epoch".
    FNr   )r-   r.   r/   r8   r   r-   r.   r/   r0   r8   rc   c                G   s   t || _|| _|| _|| _|d u s|t| jkrt| j| _t| jdks(J || _|d u r8dgt| j | _t| jt| jksDJ | jd u sV| jdksXJ d| jd S d S )Nr   rt   zself.max_open_streams=)r   r   r-   r/   r8   r   r.   )r"   r-   r.   r/   r8   r   r   r   r   rq     s   

z+LazyInfiniteApproximateMultiplexer.__init__c                 #   s    ddl m} t|jfdd}| dgj  dgj ttj}dtddf fdd	}tjD ]}|| q>	 j	|t
dkrQndddd } | }z	t|}|V  W n ty{   || t | }|V  Y nw qF)aZ  
        Assumptions
        - we have N streams but can only open M at the time (M < N)
        - the streams are finite
        - each stream needs to be "short" to ensure the mux property
        - each stream may be interpreted as a shard belonging to some larger group of streams
          (e.g. multiple shards of a given dataset).
        r   r   c                  3   sD    t ttj} 	  j| jddd }j| j| fV  q)NTrt   )r   r   )r   r   r   r   r   r.   )indexesr   )r<   r"   r   r   shuffled_streams  s   zELazyInfiniteApproximateMultiplexer.__iter__.<locals>.shuffled_streamsNposrc   c                    s$   t \}}t| | < || < d S r   )r   r   )r   sampled_streamsampled_weight)active_streamsr   stream_sourcer   r   sample_new_stream_at  s   zILazyInfiniteApproximateMultiplexer.__iter__.<locals>.sample_new_stream_atTrt   r   )r   r   r>   rS   r/   r8   r   r   rQ   r   r   r   r   )r"   r   r   stream_indexesr   
stream_posr   r   r   )r   r   r<   r"   r   r   rz     s:   	


z+LazyInfiniteApproximateMultiplexer.__iter__)rK   rL   rM   rN   r   rO   r	   r   r   rQ   rR   r   rq   rz   r   r   r   r   r9     s(    
r9   c                	   @   sV   e Zd ZdZ		ddededeej ddfdd	Z	d
d Z
defddZdddZdS )r?   z
    A wrapper over an iterable that enables lazy shuffling.
    The shuffling algorithm is reservoir-sampling based.
    See :func:`lhotse.utils.streaming_shuffle` for details.
    r;   Niteratorr=   r<   rc   c                 C   s   || _ || _|| _d S r   )r   r=   r<   )r"   r   r=   r<   r   r   r   rq   B  s   
zLazyShuffler.__init__c                 C   s   t tt | j| j| jdS )N)bufsizer<   )r   r   r   r=   r<   r~   r   r   r   rz   L  s   zLazyShuffler.__iter__c                 C   r   r   r   r   r~   r   r   r   r   U  r   zLazyShuffler.__len__rG   c                 C   r   r   r   r   r   r   r   rI   X  r   zLazyShuffler.__add__)r;   Nr   )rK   rL   rM   rN   r   rQ   r	   r>   rS   rq   rz   r   rI   r   r   r   r   r?   ;  s     	

	r?   c                   @   sP   e Zd ZdZdedeegef ddfddZdd	 Z	dddZ
defddZdS )r    z
    A wrapper over an iterable that enables lazy filtering.
    It works like Python's `filter` built-in by applying the filter predicate
    to each element and yielding it further if predicate returned ``True``.
    r   r   rc   Nc                 C   sb   || _ || _t| jsJ d| dt| jtjr+| jjdkr-tds/t	d d S d S d S d S )Nz2LazyFilter: 'predicate' arg must be callable (got ).<lambda>r[   zA lambda was passed to LazyFilter: it may prevent you from forking this process. If you experience issues with num_workers > 0 in torch.utils.data.DataLoader, try passing a regular function instead.)
r   r   callabler   types
LambdaTyperK   r   warningswarn)r"   r   r   r   r   r   rq   c  s"   
zLazyFilter.__init__c                 C   s   t | j| jS r   )r$   r   r   r~   r   r   r   rz   t  r   zLazyFilter.__iter__rG   c                 C   r   r   r   r   r   r   r   rI   w  r   zLazyFilter.__add__c                 C      t d)Na$  LazyFilter does not support __len__ because it would require iterating over the whole iterator, which is not possible in a lazy fashion. If you really need to know the length, convert to eager mode first using `.to_eager()`. Note that this will require loading the whole iterator into memory.	TypeErrorr~   r   r   r   r   z     zLazyFilter.__len__r   )rK   rL   rM   rN   r   r   r   rO   rq   rz   rI   rQ   r   r   r   r   r   r    \  s     
r    c                
   @   sf   e Zd ZdZ	ddedeegef deeegef  ddfddZ	d	d
 Z
defddZdddZdS )r'   a  
    A wrapper over an iterable that enables lazy function evaluation on each item.
    It works like Python's `map` built-in by applying a callable ``fn``
    to each element ``x`` and yielding the result of ``fn(x)`` further.

    New in Lhotse v1.22.0: ``apply_fn`` can be provided to decide whether ``fn`` should be applied
        to a given example or not (in which case it will return it as-is, i.e., it does not filter).
    Nr   r&   apply_fnrc   c                 C   s   || _ || _|| _t| jsJ d| d| jd ur(t| js(J d| dt| jtjr5| jjdksEt| jtjrL| jjdkrNt sPt	
d d S d S d S d S )Nz+LazyMapper: 'fn' arg must be callable (got r   z1LazyMapper: 'apply_fn' arg must be callable (got r   zA lambda was passed to LazyMapper: it may prevent you from forking this process. If you experience issues with num_workers > 0 in torch.utils.data.DataLoader, try passing a regular function instead.)r   r&   r   r   r   r   r   rK   rZ   r   r   )r"   r   r&   r   r   r   r   rq     s.   

zLazyMapper.__init__c                 c   sT    | j d u rt| j| jE d H  d S | jD ]}|  |r"| |}n|}|V  qd S r   )r   r+   r&   r   )r"   r   r*   r   r   r   rz     s   


zLazyMapper.__iter__c                 C   r   r   r   r~   r   r   r   r     r   zLazyMapper.__len__rG   c                 C   r   r   r   r   r   r   r   rI     r   zLazyMapper.__add__r   r   )rK   rL   rM   rN   r   r   r   r	   rO   rq   rz   rQ   r   rI   r   r   r   r   r'     s    
r'   c                   @   sB   e Zd ZdZdeddfddZdd Zdd
dZdefddZ	dS )LazyFlattenera  
    A wrapper over an iterable of collections that flattens it to an iterable of items.

    Example::

        >>> list_of_cut_sets: List[CutSet] = [CutSet(...), CutSet(...)]
        >>> list_of_cuts: List[Cut] = list(LazyFlattener(list_of_cut_sets))
    r   rc   Nc                 C   s
   || _ d S r   r   )r"   r   r   r   r   rq     r   zLazyFlattener.__init__c                 c   s    | j D ]}|E d H  qd S r   r   )r"   cutsr   r   r   rz     s   
zLazyFlattener.__iter__rG   c                 C   r   r   r   r   r   r   r   rI     r   zLazyFlattener.__add__c                 C   r   )Na'  LazyFlattener does not support __len__ because it would require iterating over the whole iterator, which is not possible in a lazy fashion. If you really need to know the length, convert to eager mode first using `.to_eager()`. Note that this will require loading the whole iterator into memory.r   r~   r   r   r   r     r   zLazyFlattener.__len__r   )
rK   rL   rM   rN   r   rq   rz   rI   rQ   r   r   r   r   r   r     s    	
r   c                	   @   sR   e Zd ZdZ	ddedee deddfdd	Zd
d Z	defddZ
dddZdS )rE   z_
    A wrapper over an iterable that enables to repeat it N times or infinitely (default).
    NFr   rC   rD   rc   c                 C   s2   || _ || _|| _| jd u s| jdksJ d S d S rY   )r   rC   rD   )r"   r   rC   rD   r   r   r   rq     s    zLazyRepeater.__init__c                 c   s    d}| j d u s|| j k r>| jr| j}n
t| jtt|d}d}|D ]}d}|V  q"|s.d S |d7 }| j d u s|| j k sd S d S )Nr   )r   FTrt   )rC   rD   r   r'   r   attach_repeat_idx_to_id)r"   epochr   at_least_oncer   r   r   r   rz     s    zLazyRepeater.__iter__c                 C   s0   | j d u rtdt| j dt| j| j  S )Nzobject of type 'z' is an infinite iterator)rC   r   r   rK   r   r   r~   r   r   r   r     s
   
zLazyRepeater.__len__rG   c                 C   r   r   r   r   r   r   r   rI     r   zLazyRepeater.__add__rJ   r   )rK   rL   rM   rN   r   r	   rQ   rO   rq   rz   r   rI   r   r   r   r   rE     s    
rE   c                   @   sJ   e Zd ZdZdedededdfddZd	d
 ZdddZdefddZ	dS )
LazySlicerzZ
    A wrapper over an iterable that enables selecting k-th element every n elements.
    r   r   nrc   Nc                 C   s4   || _ ||k sJ d| d| d|| _|| _d S )NzKWhen selecting k-th element every n elements, k must be less than n (got k=z n=r   )r   r   r   )r"   r   r   r   r   r   r   rq     s   

zLazySlicer.__init__c                 c   s0    t | jD ]\}}|| j | jkr|V  qd S r   )r   r   r   r   )r"   r   r   r   r   r   rz     s   zLazySlicer.__iter__rG   c                 C   r   r   r   r   r   r   r   rI     r   zLazySlicer.__add__c                 C   r   )Na$  LazySlicer does not support __len__ because it would require iterating over the whole iterator, which is not possible in a lazy fashion. If you really need to know the length, convert to eager mode first using `.to_eager()`. Note that this will require loading the whole iterator into memory.r   r~   r   r   r   r     r   zLazySlicer.__len__r   )
rK   rL   rM   rN   r   rQ   rq   rz   rI   r   r   r   r   r   r     s    
r   r   r   c                 C   s&   t | ds| S t| | j d| dS )Nid_repeat)r   )hasattrr   r   )r   r   r   r   r   r     s   
r   rn   c                 C   sd   dd }t | dksdnd}t| |}tdd ||jD }W d   |S 1 s+w   Y  |S )	z
    Counts newlines in a file using buffered chunk reads.
    The fastest possible option in Python according to:
    https://stackoverflow.com/a/68385697/5285891
    (This is a slightly modified variant of that answer.)
    c                 s   s(    | d}|r|V  | d}|sd S d S )Ni   r   )readerbr   r   r   	_make_gen-  s   z&count_newlines_fast.<locals>._make_gen-rbrs   c                 s   s    | ]}| d V  qdS )   
N)count)r   bufr   r   r   r   5  s    z&count_newlines_fast.<locals>.<genexpr>N)strr   r   read)rn   r   	read_moderx   r   r   r   r   r}   %  s   
r}   )2rf   r>   r   r   
contextlibr   	functoolsr   typingr   r   r   r   r   r	   r
   r   lhotse.serializationr   r   r   r   lhotse.utilsr   r   r   r   r   r   rT   	frozensetrO   rZ   rj   rl   rm   r   r   rG   r4   r9   r?   r    r'   r   rE   r   rQ   r   r}   r   r   r   r   <module>   s>    ( 
 7G{!'8)