o
    æS™iÚ± ã                   @   s¶  d dl Z d dlZd dlZd dlZd dlZd dlZd dlmZ d dlm	Z	m
Z
mZ d dlmZmZ d dl mZmZ d dlmZ d dlmZmZmZmZmZmZmZmZmZmZmZmZm Z m!Z! d dl"Z#d dl$Z$d dl%m&Z& d d	l'm(Z( d d
l)m*Z*m+Z+ d dl,m-Z- d dl.m/Z/ d dl0m1Z1 d dl2m3Z3m4Z4 d dl5m6Z6 d dl7m8Z8 d dl9m:Z: d dl;m<Z<m=Z=m>Z> d dl?m@Z@mAZA d dlBmCZCmDZD d dlEmFZFmGZGmHZHmIZImJZJmKZKmLZLmMZM d dlNmOZO d dlPmQZQmRZR d dlSmTZTmUZUmVZVmWZWmXZXmYZYmZZZm[Z[m\Z\m]Z]m^Z^m_Z_m`Z` e deCdZadebfdd„ZcG dd„ deOeFƒZdG d d!„ d!e$jejfjgƒZh	 	"		dŸd#e/d$e/d%eXd&ebd'eeV d(eei de3fd)d*„ZjdddeUd+d"dfd,e/d-eXd.ekd/ekd0eld1eid(ebd2eeeie!ekelf f  de/fd3d4„Zm		d d5e/d6e/d'eeV d(eei de3f
d7d8„Znd9ee/ de3fd:d;„Zod9ee/ de/fd<d=„Zp		d d,e/d>eeX d?eei fd@dA„Zq					"	Bd¡dCee* dDeeR dEee> dFeeW dGebdHeXdedfdIdJ„Zr				"	Bd¢dFeWdCee* dDeeR dEee> dGebdHeXdedfdKdL„Zse dMƒZMdNeeM dOeeMgebf deeeM eeM f fdPdQ„ZtdReude/fdSdT„Zvd9eddedfdUdV„Zwd9eddedfdWdX„Zxd9eddedfdYdZ„Zyd9edd[eXdedfd\d]„Zzd^d_„ Z{d`da„ Z|dbdc„ Z}ddde„ Z~dfdg„ Zdhdi„ Z€djdk„ Zdldm„ Z‚dndo„ Zƒdpdq„ Z„drds„ Z…dtdu„ Z†dvdw„ Z‡dxdy„ Zˆdzd{„ Z‰d|d}„ ZŠd~d„ Z‹d€d„ ZŒd‚dƒ„ Zd„d…„ ZŽd†d‡„ Zdˆd‰„ Z	Š	"	d£d,e/d‹eXdŒeidebd(ebdŽeej‘ de/fdd„Z’	"d¤d9edd‘eWd’eek d“ekd”eeieif d•ebd–ebd—eei d˜ebd™ebdšebdeeieei f fd›dœ„Z“G ddž„ džeGƒZ”dS )¥é    N)Údefaultdict)ÚExecutorÚProcessPoolExecutorÚas_completed)ÚpartialÚreduce)ÚchainÚislice)ÚPath)ÚAnyÚCallableÚDictÚ	FrozenSetÚIterableÚListÚLiteralÚOptionalÚSequenceÚSetÚTupleÚTypeÚTypeVarÚUnion)ÚIntervalTree)Útqdm)ÚRecordingSetÚ"null_result_on_audio_loading_error)Ú	AugmentFn)ÚCut)ÚDataCut)ÚMixedCutÚMixTrack)ÚMonoCut)ÚMultiCut)Ú
PaddingCut)ÚFeatureExtractorÚFeaturesÚ
FeatureSet)ÚStatsAccumulatorÚcompute_global_stats)ÚFeaturesWriterÚLilcomChunkyWriter)ÚAlgorithmMixinÚDillableÚLazyFlattenerÚLazyIteratorChainÚLazyManifestIteratorÚ
LazyMapperÚ
LazySlicerÚT)ÚSerializable)ÚSupervisionSegmentÚSupervisionSet)ÚDEFAULT_PADDING_VALUEÚLOG_EPSILONÚDecibelsÚPathlikeÚSecondsÚcompute_num_framesÚcompute_num_samplesÚexactly_one_not_nullÚfastcopyÚifnoneÚsplit_manifest_lazyÚsplit_sequenceÚuuid4ÚFW)ÚboundÚreturnc                 C   s
   t | tƒS ©N)Ú
isinstancer   )Úexample© rJ   úB/home/ubuntu/.local/lib/python3.10/site-packages/lhotse/cut/set.pyÚis_cutL   ó   
rL   c                   @   s¬
  e Zd ZdZd&deee  ddfdd„Zdd defdd	„Z	e
dee fd
d„ƒZe
d'dd„ƒZe
d'dd„ƒZe
d'dd„ƒZe
dee fdd„ƒZe
dee fdd„ƒZe	d(dee dedee dd fdd„ƒZedee dd fdd„ƒZeZe							d)d ee d!ee d"ee d#ee d$ed%ed&edd fd'd(„ƒZed)ee dd fd*d+„ƒZ ed,e!ee"e f dd fd-d.„ƒZ#e						/		d*d0ee$ee"e f  d1ee d2ed3ed4ede!ee%d5 f d6ee"e&egef   d7ee dd fd8d9„ƒZ'	:	;			<		d+d=ed0e$eef d>ee d?ed@edAedBedCedDede$eee f fdEdF„Z(dee fdGdH„Z)	d,d=ee dDede*ee ee ee f fdIdJ„Z+d-dKeddfdLdM„Z,		d.dNedOedPeded  fdQdR„Z-	S	T	;d/d=edUedVedWedXeded  fdYdZ„Z.ddddd[œd\eee  d]eee  d^ee d_ee dd f
d`da„Z/e0fdbe&e1ge1f dcee&e1gef  dd fddde„Z2dfe&e3gef dd fdgdh„Z4	i	d0djedkee&eee5 ge5f  dd fdldm„Z6			n		<d1doedpee dqe%dr dsedBedd fdtdu„Z7	v		w		<d2dxedyedzee diedsedBedd fd{d|„Z8d'd}d~„Z9		<d3dyee dBedd fdd€„Z:d'dd‚„Z;d4dƒedd fd„d…„Z<d-dƒedd fd†d‡„Z=de!d e"e f dd fdˆd‰„Z>	d5dŠed‹ee?e  de$ee@f fdŒd„ZAdddeBdŽddfdeded‘ed’eCd“ed”ed•ee$ee!eeCf f  dd fd–d—„ZD			d6d˜ed™edšed”ed›eeEjF dd fdœd„ZG	ž		d7ded“ed”edŸedd f
d d¡„ZH			<d8ded¢ee dšedBedd f
d£d¤„ZI		¥d9d¦ed§ede!eeJjK e*eJjKeJjKf f fd¨d©„ZLd:dªede!ed f fd«d¬„ZM		d5d­ed®ed¯ee dd fd°d±„ZNd4d²eCd®edd fd³d´„ZOd4d²eCd®edd fdµd¶„ZPd4d²eCd®edd fd·d¸„ZQ	d;d¹edºed®edd fd»d¼„ZR	d;d½eCd¾ed®edd fd¿dÀ„ZSd4d®edd fdÁdÂ„ZTddddd;gfdÃedÄ dÅedÆed®edÇee dd fdÈdÉ„ZU			Ê		Ë	/	d<dd dee dÌedÍee!eVe"eV f  d”ee dÎeCde!ee%dÏ eEjFf dÐedd fdÑdÒ„ZWd'dÓdÔ„ZXd'dÕdÖ„ZYd'd×dØ„ZZd'dÙdÚ„Z[d'dÛdÜ„Z\dde]dddfdÝe^dÞedBee dßee_ dàe`ea dáeeb dâedãedd fdädå„Zcddædçdde]dfdÝe^dÞedèee déedêed¦edßee_ dàe`ea dëedd fdìdí„Zd	î						d=dÞedïedðee dBee dáeeb dßee_ dãedñedd fdòdó„Ze			d>dÞee dôee dÝee^ de$eeJjKf fdõdö„Zfd,edd fd÷dø„Zgd,edd fdùdú„Zhd4d=edDedd fdûdü„Zi	d&dýejd#ee dd fdþdÿ„Zkdbe&egef dd fd d„Zl	d?dededd fdd„Zmdbe&e3ge3f dd fdd„Zndbe&egef dd fdd	„Zod@dedd fdd„Zpdd„ Zqedddddœdedededefdd„ƒZrdefdd„Zsde!eef defdd„Ztde!eef defd d!„Zudefd"d#„Zvdee fd$d%„ZwdS (A  ÚCutSetav  
    :class:`~lhotse.cut.CutSet` represents a collection of cuts.
    CutSet ties together all types of data -- audio, features and supervisions, and is suitable to represent
    training/dev/test sets.

    CutSet can be either "lazy" (acts as an iterable) which is best for representing full datasets,
    or "eager" (acts as a list), which is best for representing individual mini-batches (and sometimes test/dev datasets).
    Almost all operations are available for both modes, but some of them are more efficient depending on the mode
    (e.g. indexing an "eager" manifest is O(1)).

    .. note::
        :class:`~lhotse.cut.CutSet` is the basic building block of PyTorch-style Datasets for speech/audio processing tasks.

    When coming from Kaldi, there is really no good equivalent -- the closest concept may be Kaldi's "egs" for training
    neural networks, which are chunks of feature matrices and corresponding alignments used respectively as inputs and
    supervisions. :class:`~lhotse.cut.CutSet` is different because it provides you with all kinds of metadata,
    and you can select just the interesting bits to feed them to your models.

    :class:`~lhotse.cut.CutSet` can be created from any combination of :class:`~lhotse.audio.RecordingSet`,
    :class:`~lhotse.supervision.SupervisionSet`, and :class:`~lhotse.features.base.FeatureSet`
    with :meth:`lhotse.cut.CutSet.from_manifests`::

        >>> from lhotse import CutSet
        >>> cuts = CutSet.from_manifests(recordings=my_recording_set)
        >>> cuts2 = CutSet.from_manifests(features=my_feature_set)
        >>> cuts3 = CutSet.from_manifests(
        ...     recordings=my_recording_set,
        ...     features=my_feature_set,
        ...     supervisions=my_supervision_set,
        ... )

    When creating a :class:`.CutSet` with :meth:`.CutSet.from_manifests`, the resulting cuts will have the same duration
    as the input recordings or features. For long recordings, it is not viable for training.
    We provide several methods to transform the cuts into shorter ones.

    Consider the following scenario::

                          Recording
        |-------------------------------------------|
        "Hey, Matt!"     "Yes?"        "Oh, nothing"
        |----------|     |----|        |-----------|

        .......... CutSet.from_manifests() ..........
                            Cut1
        |-------------------------------------------|

        ............. Example CutSet A ..............
            Cut1          Cut2              Cut3
        |----------|     |----|        |-----------|

        ............. Example CutSet B ..............
                  Cut1                  Cut2
        |---------------------||--------------------|

        ............. Example CutSet C ..............
                     Cut1        Cut2
                    |---|      |------|

    The CutSet's A, B and C can be created like::

        >>> cuts_A = cuts.trim_to_supervisions()
        >>> cuts_B = cuts.cut_into_windows(duration=5.0)
        >>> cuts_C = cuts.trim_to_unsupervised_segments()

    .. note::
        Some operations support parallel execution via an optional ``num_jobs`` parameter.
        By default, all processing is single-threaded.

    .. caution::
        Operations on cut sets are not mutating -- they return modified copies of :class:`.CutSet` objects,
        leaving the original object unmodified (and all of its cuts are also unmodified).

    :class:`~lhotse.cut.CutSet` can be stored and read from JSON, JSONL, etc. and supports optional gzip compression::

        >>> cuts.to_file('cuts.jsonl.gz')
        >>> cuts4 = CutSet.from_file('cuts.jsonl.gz')

    It behaves similarly to a ``dict``::

            >>> 'rec1-1-0' in cuts
            True
            >>> cut = cuts['rec1-1-0']
            >>> for cut in cuts:
            >>>    pass
            >>> len(cuts)
            127

    :class:`~lhotse.cut.CutSet` has some convenience properties and methods to gather information about the dataset::

        >>> ids = list(cuts.ids)
        >>> speaker_id_set = cuts.speakers
        >>> # The following prints a message:
        >>> cuts.describe()
        Cuts count: 547
        Total duration (hours): 326.4
        Speech duration (hours): 79.6 (24.4%)
        ***
        Duration statistics (seconds):
        mean    2148.0
        std      870.9
        min      477.0
        25%     1523.0
        50%     2157.0
        75%     2423.0
        max     5415.0
        dtype: float64


    Manipulation examples::

        >>> longer_than_5s = cuts.filter(lambda c: c.duration > 5)
        >>> first_100 = cuts.subset(first=100)
        >>> split_into_4 = cuts.split(num_splits=4)
        >>> shuffled = cuts.shuffle()
        >>> random_sample = cuts.sample(n_cuts=10)
        >>> new_ids = cuts.modify_ids(lambda c: c.id + '-newid')

    These operations can be composed to implement more complex operations, e.g.
    bucketing by duration:

        >>> buckets = cuts.sort_by_duration().split(num_splits=30)

    Cuts in a :class:`.CutSet` can be detached from parts of their metadata::

        >>> cuts_no_feat = cuts.drop_features()
        >>> cuts_no_rec = cuts.drop_recordings()
        >>> cuts_no_sup = cuts.drop_supervisions()

    Sometimes specific sorting patterns are useful when a small CutSet represents a mini-batch::

        >>> cuts = cuts.sort_by_duration(ascending=False)
        >>> cuts = cuts.sort_like(other_cuts)

    :class:`~lhotse.cut.CutSet` offers some batch processing operations::

        >>> cuts = cuts.pad(num_frames=300)  # or duration=30.0
        >>> cuts = cuts.truncate(max_duration=30.0, offset_type='start')  # truncate from start to 30.0s
        >>> cuts = cuts.mix(other_cuts, snr=[10, 30], mix_prob=0.5)

    :class:`~lhotse.cut.CutSet` supports lazy data augmentation/transformation methods which require adjusting some information
    in the manifest (e.g., ``num_samples`` or ``duration``).
    Note that in the following examples, the audio is untouched -- the operations are stored in the manifest,
    and executed upon reading the audio::

        >>> cuts_sp = cuts.perturb_speed(factor=1.1)
        >>> cuts_vp = cuts.perturb_volume(factor=2.)
        >>> cuts_24k = cuts.resample(24000)
        >>> cuts_rvb = cuts.reverb_rir(rir_recordings)

    .. caution::
        If the :class:`.CutSet` contained :class:`~lhotse.features.base.Features` manifests, they will be
        detached after performing audio augmentations such as :meth:`.CutSet.perturb_speed`,
        :meth:`.CutSet.resample`, :meth:`.CutSet.perturb_volume`, or :meth:`.CutSet.reverb_rir`.

    :class:`~lhotse.cut.CutSet` offers parallel feature extraction capabilities
    (see `meth`:.CutSet.compute_and_store_features: for details),
    and can be used to estimate global mean and variance::

        >>> from lhotse import Fbank
        >>> cuts = CutSet()
        >>> cuts = cuts.compute_and_store_features(
        ...     extractor=Fbank(),
        ...     storage_path='/data/feats',
        ...     num_jobs=4
        ... )
        >>> mvn_stats = cuts.compute_global_feature_stats('/data/features/mvn_stats.pkl', max_cuts=10000)

    See also:

        - :class:`~lhotse.cut.Cut`
    NÚcutsrF   c                 C   s   t |g ƒ| _d S rG   )r@   rO   ©ÚselfrO   rJ   rJ   rK   Ú__init__ý   ó   zCutSet.__init__Úotherc                 C   s   | j |j kS rG   ©rO   ©rQ   rT   rJ   rJ   rK   Ú__eq__   s   zCutSet.__eq__c                 C   ó   | j S )z Alias property for ``self.cuts``rU   ©rQ   rJ   rJ   rK   Údata  s   zCutSet.datac                 C   ó   t  dd„ | jD ƒ¡S )Nc                 s   ó    | ]
}t |tƒr|V  qd S rG   )rH   r    ©Ú.0ÚcutrJ   rJ   rK   Ú	<genexpr>
  ó   € z$CutSet.mixed_cuts.<locals>.<genexpr>©rN   Ú	from_cutsrO   rY   rJ   rJ   rK   Ú
mixed_cuts  ó   zCutSet.mixed_cutsc                 C   r[   )Nc                 s   r\   rG   )rH   r"   r]   rJ   rJ   rK   r`     ra   z%CutSet.simple_cuts.<locals>.<genexpr>rb   rY   rJ   rJ   rK   Úsimple_cuts  re   zCutSet.simple_cutsc                 C   r[   )Nc                 s   r\   rG   )rH   r#   r]   rJ   rJ   rK   r`     ra   z$CutSet.multi_cuts.<locals>.<genexpr>rb   rY   rJ   rJ   rK   Ú
multi_cuts  re   zCutSet.multi_cutsc                 C   s   dd„ | j D ƒS )Nc                 s   ó    | ]}|j V  qd S rG   ©Úid©r^   ÚcrJ   rJ   rK   r`     ó   € zCutSet.ids.<locals>.<genexpr>rU   rY   rJ   rJ   rK   Úids  s   z
CutSet.idsc                 C   s   t dd„ | D ƒƒS )Nc                 s   s"    | ]}|j D ]}|jV  qqd S rG   )ÚsupervisionsÚspeaker)r^   r_   ÚsupervisionrJ   rJ   rK   r`     s   € ÿÿz"CutSet.speakers.<locals>.<genexpr>)Ú	frozensetrY   rJ   rJ   rK   Úspeakers  s   ÿzCutSet.speakersTÚpathsÚshuffle_itersÚseedc                 C   s   t tdd„ | D ƒ||dœŽƒS )aZ  
        Constructor that creates a single CutSet out of many manifest files.
        We will iterate sequentially over each of the files, and by default we
        will randomize the file order every time CutSet is iterated.

        This is intended primarily for large datasets which are split into many small manifests,
        to ensure that the order in which data is seen during training can be properly randomized.

        :param paths: a list of paths to cut manifests.
        :param shuffle_iters: bool, should we shuffle `paths` each time we iterate the returned
            CutSet (enabled by default).
        :param seed: int, random seed controlling the shuffling RNG.
            By default, we'll use Python's global RNG so the order
            will be different on each script execution.
        :return: a lazy CutSet instance.
        c                 s   ó    | ]}t |ƒV  qd S rG   )r0   )r^   ÚprJ   rJ   rK   r`   4  ó   € z$CutSet.from_files.<locals>.<genexpr>)ru   rv   )rN   r/   )rt   ru   rv   rJ   rJ   rK   Ú
from_files  s   ýÿzCutSet.from_filesc                 C   s   t t| ƒƒS )zOLeft for backward compatibility, where it implicitly created an "eager" CutSet.)rN   ÚlistrU   rJ   rJ   rK   rc   :  s   zCutSet.from_cutsFçü©ñÒMbP?Ú
recordingsro   ÚfeaturesÚoutput_pathÚ
random_idsÚ	toleranceÚlazyc                 C   s,   |rt | |||||dS t| |||||dS )a4  
        Create a CutSet from any combination of supervision, feature and recording manifests.
        At least one of ``recordings`` or ``features`` is required.

        The created cuts will be of type :class:`.MonoCut`, even when the recordings have multiple channels.
        The :class:`.MonoCut` boundaries correspond to those found in the ``features``, when available,
        otherwise to those found in the ``recordings``.

        When ``supervisions`` are provided, we'll be searching them for matching recording IDs
        and attaching to created cuts, assuming they are fully within the cut's time span.

        :param recordings: an optional :class:`~lhotse.audio.RecordingSet` manifest.
        :param supervisions: an optional :class:`~lhotse.supervision.SupervisionSet` manifest.
        :param features: an optional :class:`~lhotse.features.base.FeatureSet` manifest.
        :param output_path: an optional path where the :class:`.CutSet` is stored.
        :param random_ids: boolean, should the cut IDs be randomized. By default, use the recording ID
            with a loop index and a channel idx, i.e. "{recording_id}-{idx}-{channel}")
        :param tolerance: float, tolerance for supervision and feature segment boundary comparison.
            By default, it's 1ms. Increasing this value can be helpful when importing Kaldi data
            directories with precomputed features (typically 0.02 - 0.1 should be sufficient).
        :param lazy: boolean, when ``True``, output_path must be provided
        :return: a new :class:`.CutSet` instance.
        )r}   ro   r~   r   r€   r   )Úcreate_cut_set_lazyÚcreate_cut_set_eager)r}   ro   r~   r   r€   r   r‚   rJ   rJ   rK   Úfrom_manifestsA  s"   !ú	úzCutSet.from_manifestsrZ   c                 C   s   t  dd„ | D ƒ¡S )Nc                 s   rw   rG   )Údeserialize_cutr]   rJ   rJ   rK   r`   w  ry   z$CutSet.from_dicts.<locals>.<genexpr>)rN   rc   )rZ   rJ   rJ   rK   Ú
from_dictsu  s   zCutSet.from_dictsÚpathc                 K   s"   ddl m} t|| fi |¤ŽdS )aN  
        Provides the ability to read Lhotse objects from a WebDataset tarball (or a
        collection of them, i.e., shards) sequentially, without reading the full contents
        into memory. It also supports passing a list of paths, or WebDataset-style pipes.

        CutSets stored in this format are potentially much faster to read from due to
        sequential I/O (we observed speedups of 50-100x vs random-read mechanisms).

        Since this mode does not support random access reads, some methods of CutSet
        might not work properly (e.g. ``len()``).

        The behaviour of the underlying ``WebDataset`` instance can be customized by
        providing its kwargs directly to the constructor of this class. For details,
        see :func:`lhotse.dataset.webdataset.mini_webdataset` documentation.

        **Examples**

        Read manifests and data from a single tarball::

            >>> cuts = CutSet.from_webdataset("data/cuts-train.tar")

        Read manifests and data from a multiple tarball shards::

            >>> cuts = CutSet.from_webdataset("data/shard-{000000..004126}.tar")
            >>> # alternatively
            >>> cuts = CutSet.from_webdataset(["data/shard-000000.tar", "data/shard-000001.tar", ...])

        Read manifests and data from shards in cloud storage (here AWS S3 via AWS CLI)::

            >>> cuts = CutSet.from_webdataset("pipe:aws s3 cp data/shard-{000000..004126}.tar -")

        Read manifests and data from shards which are split between PyTorch DistributeDataParallel
        nodes and dataloading workers, with shard-level shuffling enabled::

            >>> cuts = CutSet.from_webdataset(
            ...     "data/shard-{000000..004126}.tar",
            ...     split_by_worker=True,
            ...     split_by_node=True,
            ...     shuffle_shards=True,
            ... )

        r   )ÚLazyWebdatasetIteratorrU   )Úlhotse.dataset.webdatasetr‰   rN   )rˆ   Ú
wds_kwargsr‰   rJ   rJ   rK   Úfrom_webdatasety  s   .zCutSet.from_webdataseté*   ÚfieldsÚin_dirÚsplit_for_dataloadingÚshuffle_shardsÚstateful_shuffleÚ
randomizedÚcut_map_fnsÚslice_lengthc           	      C   s*   ddl m} t|| |||||||ddS )a—  
        Reads cuts and their corresponding data from multiple shards,
        also recognized as the Lhotse Shar format.
        Each shard is numbered and represented as a collection of one text manifest and
        one or more binary tarfiles.
        Each tarfile contains a single type of data, e.g., recordings, features, or custom fields.

        Given an example directory named ``some_dir``, its expected layout is
        ``some_dir/cuts.000000.jsonl.gz``, ``some_dir/recording.000000.tar``,
        ``some_dir/features.000000.tar``, and then the same names but numbered with ``000001``, etc.
        There may also be other files if the cuts have custom data attached to them.

        The main idea behind Lhotse Shar format is to optimize dataloading with sequential reads,
        while keeping the data composition more flexible than e.g. WebDataset tar archives do.
        To achieve this, Lhotse Shar keeps each data type in a separate archive, along a single
        CutSet JSONL manifest.
        This way, the metadata can be investigated without iterating through the binary data.
        The format also allows iteration over a subset of fields, or extension of existing data
        with new fields.

        As you iterate over cuts from ``LazySharIterator``, it keeps a file handle open for the
        JSONL manifest and all of the tar files that correspond to the current shard.
        The tar files are read item by item together, and their binary data is attached to
        the cuts.
        It can be normally accessed using methods such as ``cut.load_audio()``.

        We can simply load a directory created by :class:`~lhotse.shar.writers.shar.SharWriter`.
        Example::

            >>> cuts = LazySharIterator(in_dir="some_dir")
            ... for cut in cuts:
            ...     print("Cut", cut.id, "has duration of", cut.duration)
            ...     audio = cut.load_audio()
            ...     fbank = cut.load_features()

        :class:`.LazySharIterator` can also be initialized from a dict, where the keys
        indicate fields to be read, and the values point to actual shard locations.
        This is useful when only a subset of data is needed, or it is stored in different
        locations. Example::

            >>> cuts = LazySharIterator({
            ...     "cuts": ["some_dir/cuts.000000.jsonl.gz"],
            ...     "recording": ["another_dir/recording.000000.tar"],
            ...     "features": ["yet_another_dir/features.000000.tar"],
            ... })
            ... for cut in cuts:
            ...     print("Cut", cut.id, "has duration of", cut.duration)
            ...     audio = cut.load_audio()
            ...     fbank = cut.load_features()

        We also support providing shell commands as shard sources, inspired by WebDataset.
        The "cuts" field expects a .jsonl stream, while the other fields expect a .tar stream.
        Example::

            >>> cuts = LazySharIterator({
            ...     "cuts": ["pipe:curl https://my.page/cuts.000000.jsonl"]
            ...     "recording": ["pipe:curl https://my.page/recording.000000.tar"],
            ... })
            ... for cut in cuts:
            ...     print("Cut", cut.id, "has duration of", cut.duration)
            ...     audio = cut.load_audio()

        The shell command can also contain pipes, which can be used to e.g. decompressing.
        Example::

            >>> cuts = LazySharIterator({
            ...     "cuts": ["pipe:curl https://my.page/cuts.000000.jsonl.gz | gunzip -c -"],
                    (...)
            ... })

        Finally, we allow specifying URLs or cloud storage URIs for the shard sources.
        We defer to ``smart_open`` library to handle those.
        Example::

            >>> cuts = LazySharIterator({
            ...     "cuts": ["s3://my-bucket/cuts.000000.jsonl.gz"],
            ...     "recording": ["s3://my-bucket/recording.000000.tar"],
            ... })
            ... for cut in cuts:
            ...     print("Cut", cut.id, "has duration of", cut.duration)
            ...     audio = cut.load_audio()

        :param fields: a dict whose keys specify which fields to load,
            and values are lists of shards (either paths or shell commands).
            The field "cuts" pointing to CutSet shards always has to be present.
        :param in_dir: path to a directory created with ``SharWriter`` with
            all the shards in a single place. Can be used instead of ``fields``.
        :param split_for_dataloading: bool, by default ``False`` which does nothing.
            Setting it to ``True`` is intended for PyTorch training with multiple
            dataloader workers and possibly multiple DDP nodes.
            It results in each node+worker combination receiving a unique subset
            of shards from which to read data to avoid data duplication.
            This is mutually exclusive with ``seed='randomized'``.
        :param shuffle_shards: bool, by default ``False``. When ``True``, the shards
            are shuffled (in case of multi-node training, the shuffling is the same
            on each node given the same seed).
        :param seed: When ``shuffle_shards`` is ``True``, we use this number to
            seed the RNG.
            Seed can be set to ``'randomized'`` in which case we expect that the user provided
            :func:`lhotse.dataset.dataloading.worker_init_fn` as DataLoader's ``worker_init_fn``
            argument. It will cause the iterator to shuffle shards differently on each node
            and dataloading worker in PyTorch training. This is mutually exclusive with
            ``split_for_dataloading=True``.
            Seed can be set to ``'trng'`` which, like ``'randomized'``, shuffles the shards
            differently on each iteration, but is not possible to control (and is not reproducible).
            ``trng`` mode is mostly useful when the user has limited control over the training loop
            and may not be able to guarantee internal Shar epoch is being incremented, but needs
            randomness on each iteration (e.g. useful with PyTorch Lightning).
        :param stateful_shuffle: bool, by default ``False``. When ``True``, every
            time this object is fully iterated, it increments an internal epoch counter
            and triggers shard reshuffling with RNG seeded by ``seed`` + ``epoch``.
            Doesn't have any effect when ``shuffle_shards`` is ``False``.
        :param cut_map_fns: optional sequence of callables that accept cuts and return cuts.
            It's expected to have the same length as the number of shards, so each function
            corresponds to a specific shard.
            It can be used to attach shard-specific custom attributes to cuts.
        :param slice_length: optional int, when set enables random slicing of shards that
            may improve sampling randomness for many-dataset-with-many-large-shards setups
            at the cost of efficiency. In this mode, we randomly select K to skip first K examples
            and read only ``slice_length`` examples from each shard, then move to the next one.

        See also: :class:`~lhotse.shar.readers.lazy.LazySharIterator`,
            :meth:`~lhotse.cut.set.CutSet.to_shar`.
        r   )ÚLazySharIterator)rŽ   r   r   r‘   r’   rv   r”   r•   rU   )Úlhotse.sharr–   rN   )	rŽ   r   r   r‘   r’   rv   r”   r•   r–   rJ   rJ   rK   Ú	from_shar«  s    øÿzCutSet.from_sharéè  r   é   Ú
output_dirÚ
shard_sizeÚshard_offsetÚwarn_unused_fieldsÚinclude_cutsÚnum_jobsÚfault_tolerantÚverbosec
                 C   sD  |dkr	t |tƒsJ d|› dƒ‚|dkr#t| ||||||d||	d
S |	r+ttddnd	d
„ }
| j||dd|d}t|ƒL}g }ttƒ}t	|ƒD ]\}}| 
|jt||d|||dd|d›|ddd¡ qH|
t|ƒƒD ]}| ¡ }| ¡ D ]\}}||  |¡ quqkW d  ƒ n1 sŒw   Y  |D ]
}t|| ƒ||< q“t|ƒS )a»  
        Writes cuts and their corresponding data into multiple shards,
        also recognized as the Lhotse Shar format.
        Each shard is numbered and represented as a collection of one text manifest and
        one or more binary tarfiles.
        Each tarfile contains a single type of data, e.g., recordings, features, or custom fields.

        The main idea behind Lhotse Shar format is to optimize dataloading with sequential reads,
        while keeping the data composition more flexible than e.g. WebDataset tar archives do.
        To achieve this, Lhotse Shar keeps each data type in a separate archive, along a single
        CutSet JSONL manifest.
        This way, the metadata can be investigated without iterating through the binary data.
        The format also allows iteration over a subset of fields, or extension of existing data
        with new fields.

        The user has to specify which fields should be saved, and what compression to use for each of them.
        Currently we support ``wav``, ``flac``, and ``mp3`` compression for ``recording`` and custom audio fields,
        and ``lilcom`` or ``numpy`` for ``features`` and custom array fields.

        Example::

            >>> cuts = CutSet(...)  # cuts have 'recording' and 'features'
            >>> output_paths = cuts.to_shar(
            ...     "some_dir", shard_size=100, fields={"recording": "mp3", "features": "lilcom"}
            ... )

        It would create a directory ``some_dir`` with files such as ``some_dir/cuts.000000.jsonl.gz``,
        ``some_dir/recording.000000.tar``, ``some_dir/features.000000.tar``,
        and then the same names but numbered with ``000001``, etc.
        The starting shard offset can be set using ``shard_offset`` parameter. The writer starts from 0 by default.
        The function returns a dict that maps field names to lists of saved shard paths.

        When ``shard_size`` is set to ``None``, we will disable automatic sharding and the
        shard number suffix will be omitted from the file names.

        The option ``warn_unused_fields`` will emit a warning when cuts have some data attached to them
        (e.g., recording, features, or custom arrays) but saving it was not specified via ``fields``.

        The option ``include_cuts`` controls whether we store the cuts alongside ``fields`` (true by default).
        Turning it off is useful when extending existing dataset with new fields/feature types,
        but the original cuts do not require any modification.

        When ``num_jobs`` is greater than 1, we will first split the CutSet into shard CutSets,
        and then export the ``fields`` in parallel using multiple subprocesses. Enabling ``verbose``
        will display a progress bar.

        .. note:: It is recommended not to set ``num_jobs`` too high on systems with slow disks,
            as the export will likely be bottlenecked by I/O speed in these cases.
            Try experimenting with 4-8 jobs first.

        The option ``fault_tolerant`` will skip over audio files that failed to load with a warning.
        By default it is disabled.

        See also: :class:`~lhotse.shar.writers.shar.SharWriter`,
            :meth:`~lhotse.cut.set.CutSet.to_shar`.
        r   z:The number of jobs must be an integer greater than 0 (got ú).rš   N)
rO   r›   rœ   r   rŽ   rž   rŸ   Úshard_suffixr¡   r¢   zShard progress©Údescc                 S   ó   | S rG   rJ   ©ÚxrJ   rJ   rK   Ú<lambda>—  ó    z CutSet.to_shar.<locals>.<lambda>rO   é   ©r›   Ú
chunk_sizeÚprefixÚ
num_digitsÚ	start_idxTÚ.Ú06dF)rO   r›   rœ   r   rŽ   rž   rŸ   r¤   r¡   r¢   Úpreload)rH   ÚintÚ_export_to_shar_singler   r   Ú
split_lazyr   r   r{   Ú	enumerateÚappendÚsubmitr   ÚresultÚitemsÚextendÚsortedÚdict)rQ   r›   rŽ   rœ   r   rž   rŸ   r    r¡   r¢   ÚprogbarÚshardsÚexÚfuturesÚoutput_pathsÚidxÚshardÚfÚpartial_pathsÚkÚvrJ   rJ   rK   Úto_sharA  sp   
Dÿ
þöû

ôÿÿþìzCutSet.to_sharc                 C   s   dd„ | D ƒS )Nc                 s   ó    | ]}|  ¡ V  qd S rG   )Úto_dictr]   rJ   rJ   rK   r`   ¼  ry   z"CutSet.to_dicts.<locals>.<genexpr>rJ   rY   rJ   rJ   rK   Úto_dicts»  ó   zCutSet.to_dictsc              
      sb  |durt |ƒ}|jddd tƒ ‰tƒ ‰t |dur|d nd¡|‰t |dur-|d nd¡_‰t |dur;|d nd¡B‰ dtf‡ ‡‡‡‡fdd	„}|rTt| d
dn| D ]"}t	|tƒrb||ƒ qVt	|t
ƒrx|jD ]}t	|jtƒrw||jƒ qjqVW d  ƒ n1 sƒw   Y  W d  ƒ n1 s’w   Y  W d  ƒ n1 s¡w   Y  ˆ ¡ ˆ ¡ ˆ  ¡ fS )a  
        Return a 3-tuple of unique (recordings, supervisions, features) found in
        this :class:`CutSet`. Some manifest sets may also be ``None``, e.g.,
        if features were not extracted.

        .. note:: :class:`.MixedCut` is iterated over its track cuts.

        :param output_dir: directory where the manifests will be saved.
            The following files will be created: 'recordings.jsonl.gz',
            'supervisions.jsonl.gz', 'features.jsonl.gz'.
        :param verbose: when ``True``, shows a progress bar.
        NT©ÚparentsÚexist_okzrecordings.jsonl.gzzsupervisions.jsonl.gzzfeatures.jsonl.gzr_   c                    sr   | j r| jˆvrˆ | j¡ ˆ | j¡ | jrˆ  | j¡ | jD ]}|jˆvr6ˆ | 	| j
¡¡ ˆ |j¡ q d S rG   )Úhas_recordingÚrecording_idÚwriteÚ	recordingÚaddÚhas_featuresr~   ro   rj   Úwith_offsetÚstart)r_   Úsup©ÚfwÚrwÚstored_ridsÚstored_sidsÚswrJ   rK   ÚsaveÜ  s   

€ûzCutSet.decompose.<locals>.savezDecomposing cutsr¥   )r
   ÚmkdirÚsetr   Úopen_writerr6   r'   r   r   rH   r    Útracksr_   Úopen_manifest)rQ   r›   r¢   râ   r_   ÚtrackrJ   rÜ   rK   Ú	decompose¾  sB   ÿÿþÿü




€úé€ € zCutSet.decomposeÚfullc                 C   s(   ddl m} ||d}| | ¡ ¡  dS )u#"  
        Print a message describing details about the ``CutSet`` - the number of cuts and the
        duration statistics, including the total duration and the percentage of speech segments.

        :param full: when ``True``, prints the full duration statistics, including % of speech
            by speaker count.

        Example output (for AMI train set):

        >>> cs.describe(full=True)

            Cut statistics:
            â•’â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•¤â•â•â•â•â•â•â•â•â•â•â••
            â”‚ Cuts count:               â”‚ 133      â”‚
            â”œâ”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”¼â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”¤
            â”‚ Total duration (hh:mm:ss) â”‚ 79:23:03 â”‚
            â”œâ”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”¼â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”¤
            â”‚ mean                      â”‚ 2148.7   â”‚
            â”œâ”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”¼â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”¤
            â”‚ std                       â”‚ 867.4    â”‚
            â”œâ”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”¼â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”¤
            â”‚ min                       â”‚ 477.9    â”‚
            â”œâ”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”¼â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”¤
            â”‚ 25%                       â”‚ 1509.8   â”‚
            â”œâ”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”¼â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”¤
            â”‚ 50%                       â”‚ 2181.7   â”‚
            â”œâ”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”¼â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”¤
            â”‚ 75%                       â”‚ 2439.9   â”‚
            â”œâ”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”¼â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”¤
            â”‚ 99%                       â”‚ 5300.7   â”‚
            â”œâ”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”¼â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”¤
            â”‚ 99.5%                     â”‚ 5355.3   â”‚
            â”œâ”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”¼â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”¤
            â”‚ 99.9%                     â”‚ 5403.2   â”‚
            â”œâ”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”¼â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”¤
            â”‚ max                       â”‚ 5415.2   â”‚
            â”œâ”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”¼â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”¤
            â”‚ Recordings available:     â”‚ 133      â”‚
            â”œâ”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”¼â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”¤
            â”‚ Features available:       â”‚ 0        â”‚
            â”œâ”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”¼â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”¤
            â”‚ Supervisions available:   â”‚ 102222   â”‚
            â•˜â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•§â•â•â•â•â•â•â•â•â•â•â•›
            Speech duration statistics:
            â•’â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•¤â•â•â•â•â•â•â•â•â•â•â•¤â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â••
            â”‚ Total speech duration        â”‚ 64:59:51 â”‚ 81.88% of recording       â”‚
            â”œâ”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”¼â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”¼â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”¤
            â”‚ Total speaking time duration â”‚ 74:33:09 â”‚ 93.91% of recording       â”‚
            â”œâ”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”¼â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”¼â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”¤
            â”‚ Total silence duration       â”‚ 14:23:12 â”‚ 18.12% of recording       â”‚
            â”œâ”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”¼â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”¼â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”¤
            â”‚ Single-speaker duration      â”‚ 56:18:24 â”‚ 70.93% (86.63% of speech) â”‚
            â”œâ”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”¼â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”¼â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”¤
            â”‚ Overlapped speech duration   â”‚ 08:41:28 â”‚ 10.95% (13.37% of speech) â”‚
            â•˜â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•§â•â•â•â•â•â•â•â•â•â•â•§â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•›
            Speech duration statistics by number of speakers:
            â•’â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•¤â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•¤â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•¤â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•¤â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â••
            â”‚ Number of speakers   â”‚ Duration (hh:mm:ss)   â”‚ Speaking time (hh:mm:ss)   â”‚ % of speech   â”‚ % of speaking time   â”‚
            â•žâ•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•ªâ•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•ªâ•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•ªâ•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•ªâ•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•¡
            â”‚ 1                    â”‚ 56:18:24              â”‚ 56:18:24                   â”‚ 86.63%        â”‚ 75.53%               â”‚
            â”œâ”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”¼â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”¼â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”¼â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”¼â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”¤
            â”‚ 2                    â”‚ 07:51:44              â”‚ 15:43:28                   â”‚ 12.10%        â”‚ 21.09%               â”‚
            â”œâ”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”¼â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”¼â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”¼â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”¼â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”¤
            â”‚ 3                    â”‚ 00:47:36              â”‚ 02:22:47                   â”‚ 1.22%         â”‚ 3.19%                â”‚
            â”œâ”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”¼â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”¼â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”¼â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”¼â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”¤
            â”‚ 4                    â”‚ 00:02:08              â”‚ 00:08:31                   â”‚ 0.05%         â”‚ 0.19%                â”‚
            â”œâ”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”¼â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”¼â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”¼â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”¼â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”¤
            â”‚ Total                â”‚ 64:59:51              â”‚ 74:33:09                   â”‚ 100.00%       â”‚ 100.00%              â”‚
            â•˜â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•§â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•§â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•§â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•§â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•›
        r   )ÚCutSetStatistics)rê   N)Úlhotse.cut.describerë   Ú
accumulateÚdescribe)rQ   rê   rë   ÚstatsrJ   rJ   rK   rî   õ  s   G
zCutSet.describeÚ
num_splitsÚshuffleÚ	drop_lastc                 C   s   dd„ t | |||dD ƒS )aN  
        Split the :class:`~lhotse.CutSet` into ``num_splits`` pieces of equal size.

        :param num_splits: Requested number of splits.
        :param shuffle: Optionally shuffle the recordings order first.
        :param drop_last: determines how to handle splitting when ``len(seq)`` is not divisible
            by ``num_splits``. When ``False`` (default), the splits might have unequal lengths.
            When ``True``, it may discard the last element in some splits to ensure they are
            equally long.
        :return: A list of :class:`~lhotse.CutSet` pieces.
        c                 S   s   g | ]}t |ƒ‘qS rJ   )rN   )r^   ÚsubsetrJ   rJ   rK   Ú
<listcomp>R  s    ÿÿz CutSet.split.<locals>.<listcomp>)rð   rñ   rò   )rB   )rQ   rð   rñ   rò   rJ   rJ   rK   ÚsplitA  s   üþzCutSet.splitÚ é   r®   r¯   r°   r±   c                 C   s   t | |||||dS )aû  
        Splits a manifest (either lazily or eagerly opened) into chunks, each
        with ``chunk_size`` items (except for the last one, typically).

        In order to be memory efficient, this implementation saves each chunk
        to disk in a ``.jsonl.gz`` format as the input manifest is sampled.

        .. note:: For lowest memory usage, use ``load_manifest_lazy`` to open the
            input manifest for this method.

        :param it: any iterable of Lhotse manifests.
        :param output_dir: directory where the split manifests are saved.
            Each manifest is saved at: ``{output_dir}/{prefix}.{split_idx}.jsonl.gz``
        :param chunk_size: the number of items in each chunk.
        :param prefix: the prefix of each manifest.
        :param num_digits: the width of ``split_idx``, which will be left padded with zeros to achieve it.
        :param start_idx: The split index to start counting from (default is ``0``).
        :return: a list of lazily opened chunk manifests.
        r­   )rA   )rQ   r›   r®   r¯   r°   r±   rJ   rJ   rK   r·   \  s   úzCutSet.split_lazy)Úsupervision_idsÚcut_idsÚfirstÚlastrø   rù   rú   rû   c             
      s"  t ˆ|||ƒsJ dƒ‚|dur|dksJ ‚t t| |ƒ¡}|S |dur>|dks)J ‚t| ƒ}||kr3| S t t| || |ƒ¡S ˆdurRtˆƒ‰t ‡fdd„| D ƒ¡S |durt|ƒ}t|ƒ‰ t‡ fdd„| D ƒƒ}t|ƒt|ƒk rŠt 	dt|ƒ› d	t|ƒ› d
t|ƒt|ƒ › d¡ | 
|¡S dS )aÝ  
        Return a new ``CutSet`` according to the selected subset criterion.
        Only a single argument to ``subset`` is supported at this time.

        Example:
            >>> cuts = CutSet.from_yaml('path/to/cuts')
            >>> train_set = cuts.subset(supervision_ids=train_ids)
            >>> test_set = cuts.subset(supervision_ids=test_ids)

        :param supervision_ids: List of supervision IDs to keep.
        :param cut_ids: List of cut IDs to keep.
            The returned :class:`.CutSet` preserves the order of `cut_ids`.
        :param first: int, the number of first cuts to keep.
        :param last: int, the number of last cuts to keep.
        :return: a new ``CutSet`` with the subset results.
        z*subset() can handle only one non-None arg.Nr   c                 3   s:    | ]}t ‡ fd d„|jD ƒƒr| ‡ fdd„¡V  qdS )c                 3   s    | ]}|j ˆ v V  qd S rG   ri   ©r^   Ús©rø   rJ   rK   r`   ®  ó   € z*CutSet.subset.<locals>.<genexpr>.<genexpr>c                    s
   | j ˆ v S rG   ri   ©rý   rþ   rJ   rK   rª   ¬  s   
 z)CutSet.subset.<locals>.<genexpr>.<lambda>N)Úanyro   Úfilter_supervisionsr]   rþ   rJ   rK   r`   «  s   € ý
ÿz CutSet.subset.<locals>.<genexpr>c                    s   g | ]	}|j ˆ v r|‘qS rJ   ri   r]   )Úid_setrJ   rK   rô   µ  s    z!CutSet.subset.<locals>.<listcomp>z(In CutSet.subset(cut_ids=...): expected z cuts but got z
 instead (z! cut IDs were not in the CutSet).)r>   rN   rc   r	   Úlenrä   r{   rr   ÚloggingÚwarningÚ	sort_like)rQ   rø   rù   rú   rû   ÚoutÚNrO   rJ   )r  rø   rK   ró   €  s@   ÿþþÿÿ
õzCutSet.subsetÚtransform_fnÚapply_fnc                 C   s&   t t| j||dƒ}| jr|S | ¡ S )N)Úfnr  )rN   r1   rZ   Úis_lazyÚto_eager)rQ   r
  r  ÚansrJ   rJ   rK   Úmap¾  s   z
CutSet.mapÚ	predicatec                 C   ó   |   tt|d¡S )aÝ  
        Return a new CutSet with Cuts containing only `SupervisionSegments` satisfying `predicate`

        Cuts without supervisions are preserved

        Example:
            >>> cuts = CutSet.from_yaml('path/to/cuts')
            >>> at_least_five_second_supervisions = cuts.filter_supervisions(lambda s: s.duration >= 5)

        :param predicate: A callable that accepts `SupervisionSegment` and returns bool
        :return: a CutSet with filtered supervisions
        )r  )r  r   Ú_filter_supervisions)rQ   r  rJ   rJ   rK   r  È  s   zCutSet.filter_supervisionsÚ	delimiterÚmerge_policyÚcustom_merge_fnc                 C   ó   |   tt||d¡S )a0  
        Return a copy of the cut that has all of its supervisions merged into
        a single segment.

        The new start is the start of the earliest superivion, and the new duration
        is a minimum spanning duration for all the supervisions. The text fields of
        all segments are concatenated with a whitespace.

        :param merge_policy: one of "keep_first" or "delimiter". If "keep_first", we
            keep only the first segment's field value, otherwise all string fields
            (including IDs) are prefixed with "cat#" and concatenated with a hash symbol "#".
            This is also applied to ``custom`` fields. Fields with a ``None`` value are omitted.
        :param custom_merge_fn: a function that will be called to merge custom fields values.
            We expect ``custom_merge_fn`` to handle all possible custom keys.
            When not provided, we will treat all custom values as strings.
            It will be called roughly like:
            ``custom_merge_fn(custom_key, [s.custom[custom_key] for s in sups])``
        ©r  r  )r  r   Ú_merge_supervisions)rQ   r  r  rJ   rJ   rK   Úmerge_supervisionsÙ  s   ýÿzCutSet.merge_supervisionsÚcenterÚkeep_overlappingÚmin_durationÚcontext_direction)r  ÚleftÚrightÚrandomÚkeep_all_channelsc                 C   sN   |dkrt tt| tt||||dƒƒƒS ddlm} ||| t||||d}|S )a	  
        Return a new CutSet with Cuts that have identical spans as their supervisions.

        For example, the following cut::

                    Cut
            |-----------------|
             Sup1
            |----|  Sup2
               |-----------|

        is transformed into two cuts::

             Cut1
            |----|
             Sup1
            |----|
               Sup2
               |-|
                    Cut2
               |-----------|
               Sup1
               |-|
                    Sup2
               |-----------|

        For the case of a multi-channel cut with multiple supervisions, we can either trim
        while respecting the supervision channels (in which case output cut has the same channels
        as the supervision) or ignore the channels (in which case output cut has the same channels
        as the input cut).

        :param keep_overlapping: when ``False``, it will discard parts of other supervisions that overlap with the
            main supervision. In the illustration above, it would discard ``Sup2`` in ``Cut1`` and ``Sup1`` in ``Cut2``.
            In this mode, we guarantee that there will always be exactly one supervision per cut.
        :param min_duration: An optional duration in seconds; specifying this argument will extend the cuts
            that would have been shorter than ``min_duration`` with actual acoustic context in the recording/features.
            If there are supervisions present in the context, they are kept when ``keep_overlapping`` is true.
            If there is not enough context, the returned cut will be shorter than ``min_duration``.
            If the supervision segment is longer than ``min_duration``, the return cut will be longer.
        :param context_direction: Which direction should the cut be expanded towards to include context.
            The value of "center" implies equal expansion to left and right;
            random uniformly samples a value between "left" and "right".
        :param keep_all_channels: If ``True``, the output cut will have the same channels as the input cut. By default,
            the trimmed cut will have the same channels as the supervision.
        :param num_jobs: Number of parallel workers to process the cuts.
        :return: a ``CutSet``.
        rš   ©r  r  r  r"  r   ©Úsplit_parallelize_combine)rN   r.   r1   r   Ú_trim_to_supervisions_singleÚlhotse.manipulationr%  )rQ   r  r  r  r"  r    r%  r»   rJ   rJ   rK   Útrim_to_supervisionsø  s4   8ûþÿÿù	zCutSet.trim_to_supervisionsç        ú ÚtypeÚ	max_pauseÚmax_segment_durationc           	      C   sR   |dkrt tt| tt|||||dƒƒƒS ddlm} ||| t|||||d}|S )a=  
        Return a new CutSet with Cuts that have identical spans as the alignments of
        type `type`. An additional `max_pause` is allowed between the alignments to
        merge contiguous alignment items.

        For the case of a multi-channel cut with multiple alignments, we can either trim
        while respecting the supervision channels (in which case output cut has the same channels
        as the supervision) or ignore the channels (in which case output cut has the same channels
        as the input cut).

        :param type: The type of the alignment to trim to (e.g. "word").
        :param max_pause: The maximum pause allowed between the alignments to merge them.
        :param delimiter: The delimiter to use when concatenating the alignment items.
        :param keep_all_channels: If ``True``, the output cut will have the same channels as the input cut. By default,
            the trimmed cut will have the same channels as the supervision.
        :param num_jobs: Number of parallel workers to process the cuts.
        :return: a ``CutSet``.
        rš   ©r+  r,  r-  r  r"  r   r$  )rN   r.   r1   r   Ú_trim_to_alignments_singler'  r%  )	rQ   r+  r,  r-  r  r"  r    r%  r»   rJ   rJ   rK   Útrim_to_alignmentsM  s8   úþÿÿø
zCutSet.trim_to_alignmentsc                 C   sR   ddl m} g }| D ]}||ddd}|D ]}| |j|j|jd¡ qq
t|ƒS )z¢
        Return a new CutSet with Cuts created from segments that have no supervisions (likely
        silence or noise).

        :return: a ``CutSet``.
        r   )Ú find_segments_with_speaker_count)Úmin_speakersÚmax_speakers©ÚoffsetÚduration)rì   r1  r¹   ÚtruncaterÚ   r6  rN   )rQ   r1  rO   r_   ÚsegmentsÚspanrJ   rJ   rK   Útrim_to_unsupervised_segmentsˆ  s   ÿÿz$CutSet.trim_to_unsupervised_segmentsc                 C   sB   |dkrt tt| tt|dƒƒƒS ddlm} ||| t|d}|S )u1  
        Return a new CutSet with Cuts based on supervision groups. A supervision group is
        a set of supervisions with no gaps between them (or gaps shorter than ``max_pause``).
        This is similar to the concept of an `utterance group` as described in this paper:
        https://arxiv.org/abs/2211.00482

        For example, the following cut::

                                                Cut
        â•”â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•—
        â•‘â”Œâ”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”                              â”Œâ”€â”€â”€â”€â”€â”€â”€â”€â”                 â•‘
        â•‘â”‚ Hello this is John.  â”‚                              â”‚   Hi   â”‚                 â•‘
        â•‘â””â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”˜                              â””â”€â”€â”€â”€â”€â”€â”€â”€â”˜                 â•‘
        â•‘            â”Œâ”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”            â”Œâ”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”â•‘
        â•‘            â”‚     Hey, John. How are you?      â”‚            â”‚  What do you do?  â”‚â•‘
        â•‘            â””â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”˜            â””â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”˜â•‘
        â•šâ•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•

        is transformed into two cuts::

                            Cut 1                                       Cut 2
        â•”â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•—    â•”â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•—
        â•‘â”Œâ”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”                        â•‘    â•‘â”Œâ”€â”€â”€â”€â”€â”€â”€â”€â”                 â•‘
        â•‘â”‚ Hello this is John.  â”‚                        â•‘    â•‘â”‚   Hi   â”‚                 â•‘
        â•‘â””â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”˜                        â•‘    â•‘â””â”€â”€â”€â”€â”€â”€â”€â”€â”˜                 â•‘
        â•‘            â”Œâ”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”â•‘    â•‘      â”Œâ”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”â•‘
        â•‘            â”‚     Hey, John. How are you?      â”‚â•‘    â•‘      â”‚  What do you do?  â”‚â•‘
        â•‘            â””â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”˜â•‘    â•‘      â””â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”˜â•‘
        â•šâ•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•    â•šâ•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•

        For the case of a multi-channel cut with multiple supervisions, we keep all the channels
        in the recording.

        :param max_pause: An optional duration in seconds; if the gap between two supervisions
            is longer than this, they will be treated as separate groups.
        :param num_jobs: Number of parallel workers to process the cuts.
        :return: a ``CutSet``.
        rš   ©r,  r   r$  )rN   r.   r1   r   Ú"_trim_to_supervision_groups_singler'  r%  )rQ   r,  r    r%  r»   rJ   rJ   rK   Útrim_to_supervision_groupsš  s(   ,þþÿÿüz!CutSet.trim_to_supervision_groupsc                 C   sF   | j s| jr
tdƒ‚ddlm} |dd„ | ƒ}t dd„ | ¡ D ƒ¡S )af  
        Find cuts that come from the same recording and have matching start and end times, but
        represent different channels. Then, combine them together to form MultiCut's and return
        a new ``CutSet`` containing these MultiCut's. This is useful for processing microphone array
        recordings.

        It is intended to be used as the first operation after creating a new ``CutSet`` (but
        might also work in other circumstances, e.g. if it was cut to windows first).

        Example:
            >>> ami = prepare_ami('path/to/ami')
            >>> cut_set = CutSet.from_manifests(recordings=ami['train']['recordings'])
            >>> multi_channel_cut_set = cut_set.combine_same_recording_channels()

        In the AMI example, the ``multi_channel_cut_set`` will yield MultiCuts that hold all single-channel
        Cuts together.
        zQThis operation is not applicable to CutSet's containing MixedCut's or MultiCut's.r   )Úgroupbyc                 S   s   | j j| j| jfS rG   )rÖ   rj   rÚ   Úend©r_   rJ   rJ   rK   rª   õ  ó    z8CutSet.combine_same_recording_channels.<locals>.<lambda>c                 s   s    | ]}t j|Ž V  qd S rG   )r#   Ú	from_mono)r^   rO   rJ   rJ   rK   r`   ö  rÿ   z9CutSet.combine_same_recording_channels.<locals>.<genexpr>)rd   rg   Ú
ValueErrorÚcytoolz.itertoolzr>  rN   rc   Úvalues)rQ   r>  ÚgroupsrJ   rJ   rK   Úcombine_same_recording_channelsÝ  s   ÿz&CutSet.combine_same_recording_channelsÚ	ascendingc                 C   ó   t t| dd„ | dƒS )a  
        Sort the CutSet alphabetically according to 'recording_id'. Ascending by default.

        This is advantageous before caling `save_audios()` on a `trim_to_supervision()`
        processed `CutSet`, also make sure that `set_caching_enabled(True)` was called.
        c                 S   s   | j jS rG   )rÖ   rj   r@  rJ   rJ   rK   rª      s    z-CutSet.sort_by_recording_id.<locals>.<lambda>©ÚkeyÚreverse©rN   r¾   ©rQ   rH  rJ   rJ   rK   Úsort_by_recording_idø  s   ÿzCutSet.sort_by_recording_idc                 C   rI  )zj
        Sort the CutSet according to cuts duration and return the result. Descending by default.
        c                 S   rX   rG   ©r6  r@  rJ   rJ   rK   rª     s    z)CutSet.sort_by_duration.<locals>.<lambda>rJ  rM  rN  rJ   rJ   rK   Úsort_by_duration  s   ÿzCutSet.sort_by_durationc                 C   sr   t t|tƒr	|jn|ƒ}t| jƒt|ƒksJ dƒ‚dd„ t|ƒD ƒ}dgt|ƒ }| D ]	}||||j < q+t|ƒS )zg
        Sort the CutSet according to the order of cut IDs in ``other`` and return the result.
        z<sort_like() expects both CutSet's to have identical cut IDs.c                 S   s   i | ]\}}||“qS rJ   rJ   )r^   ÚindexrÊ   rJ   rJ   rK   Ú
<dictcomp>  s    z$CutSet.sort_like.<locals>.<dictcomp>N)r{   rH   rN   rn   rä   r¸   r  rj   )rQ   rT   Ú	other_idsÚ	index_mapr  r_   rJ   rJ   rK   r    s   
ÿþzCutSet.sort_likeÚindex_mixed_tracksÚkeep_idsc                 C   s&   i }| D ]}|  |j||d¡ q|S )a«  
        Create a two-level index of supervision segments. It is a mapping from a Cut's ID to an
        interval tree that contains the supervisions of that Cut.

        The interval tree can be efficiently queried for overlapping and/or enveloping segments.
        It helps speed up some operations on Cuts of very long recordings (1h+) that contain many
        supervisions.

        :param index_mixed_tracks: Should the tracks of MixedCut's be indexed as additional, separate entries.
        :param keep_ids: If specified, we will only index the supervisions with the specified IDs.
        :return: a mapping from Cut ID to an interval tree of SupervisionSegments.
        )rV  rW  )ÚupdateÚindex_supervisions)rQ   rV  rW  Úindexedr_   rJ   rJ   rK   rY    s   ÿÿzCutSet.index_supervisionsr   r6  Ú
num_framesÚnum_samplesÚpad_feat_valueÚ	directionÚpreserve_idÚpad_value_dictc                 C   s”   t dd„ |||fD ƒƒr;t dd„ | D ƒƒrtdd„ | D ƒƒ}nt dd„ | D ƒƒr2tdd„ | D ƒƒ}n	tdd„ | D ƒƒ}|  tt|||||||d¡S )	a€  
        Return a new CutSet with Cuts padded to ``duration``, ``num_frames`` or ``num_samples``.
        Cuts longer than the specified argument will not be affected.
        By default, cuts will be padded to the right (i.e. after the signal).

        When none of ``duration``, ``num_frames``, or ``num_samples`` is specified,
        we'll try to determine the best way to pad to the longest cut based on
        whether features or recordings are available.

        :param duration: The cuts minimal duration after padding.
            When not specified, we'll choose the duration of the longest cut in the CutSet.
        :param num_frames: The cut's total number of frames after padding.
        :param num_samples: The cut's total number of samples after padding.
        :param pad_feat_value: A float value that's used for padding the features.
            By default we assume a log-energy floor of approx. -23 (1e-10 after exp).
        :param direction: string, 'left', 'right' or 'both'. Determines whether the padding is added
            before or after the cut.
        :param preserve_id: When ``True``, preserves the cut ID from before padding.
            Otherwise, generates a new random ID (default).
        :param pad_value_dict: Optional dict that specifies what value should be used
            for padding arrays in custom attributes.
        :return: A padded CutSet.
        c                 s   s    | ]}|d u V  qd S rG   rJ   )r^   ÚargrJ   rJ   rK   r`   V  ry   zCutSet.pad.<locals>.<genexpr>c                 s   rh   rG   ©rØ   rk   rJ   rJ   rK   r`   W  rm   c                 s   rh   rG   ©r[  rk   rJ   rJ   rK   r`   X  rm   c                 s   rh   rG   )rÓ   rk   rJ   rJ   rK   r`   Y  rm   c                 s   rh   rG   )r\  rk   rJ   rJ   rK   r`   Z  rm   c                 s   rh   rG   rP  r]   rJ   rJ   rK   r`   \  rm   )r6  r[  r\  r]  r^  r_  r`  )ÚallÚmaxr  r   Ú_pad)rQ   r6  r[  r\  r]  r^  r_  r`  rJ   rJ   rK   Úpad1  s$   %øÿz
CutSet.padÚmax_durationÚoffset_typeÚkeep_excessive_supervisionsÚrngc              
   C   s2   |dv sJ d|› dƒ‚|   tt|||||d¡S )aÇ  
        Return a new CutSet with the Cuts truncated so that their durations are at most `max_duration`.
        Cuts shorter than `max_duration` will not be changed.
        :param max_duration: float, the maximum duration in seconds of a cut in the resulting manifest.
        :param offset_type: str, can be:
        - 'start' => cuts are truncated from their start;
        - 'end' => cuts are truncated from their end minus max_duration;
        - 'random' => cuts are truncated randomly between their start and their end minus max_duration
        :param keep_excessive_supervisions: bool. When a cut is truncated in the middle of a supervision segment,
        should the supervision be kept.
        :param preserve_id: bool. Should the truncated cut keep the same ID or get a new, random one.
        :param rng: optional random number generator to be used with a 'random' ``offset_type``.
        :return: a new CutSet instance with truncated cuts.
        )rÚ   r?  r!  zUnknown offset type: 'ú')rh  ri  rj  r_  rk  )r  r   Ú_truncate_single)rQ   rh  ri  rj  r_  rk  rJ   rJ   rK   r7  k  s   

üúÿzCutSet.truncateÚbothÚpad_silencec              	   C   s   |   tt||||d¡S )a±  
        Returns a new CutSet with cuts extended by `duration` amount.

        :param duration: float (seconds), specifies the duration by which the CutSet is extended.
        :param direction: string, 'left', 'right' or 'both'. Determines whether to extend on the left,
            right, or both sides. If 'both', extend on both sides by the same duration (equal to `duration`).
        :param preserve_id: bool. Should the extended cut keep the same ID or get a new, random one.
        :param pad_silence: bool. If True, the extended part of the cut will be padded with silence if required
            to match the specified duration.
        :return: a new CutSet instance.
        )r6  r^  r_  ro  )r  r   Ú
_extend_by)rQ   r6  r^  r_  ro  rJ   rJ   rK   Ú	extend_by‘  s   ûÿzCutSet.extend_byÚhopc              
   C   sR   |s|}|dkrt tt| tt|||dƒƒƒS ddlm} ||| t|||d}|S )a8  
        Return a new ``CutSet``, made by traversing each ``DataCut`` in windows of ``duration`` seconds by ``hop`` seconds and
        creating new ``DataCut`` out of them.

        The last window might have a shorter duration if there was not enough audio, so you might want to
        use either ``.filter()`` or ``.pad()`` afterwards to obtain a uniform duration ``CutSet``.

        :param duration: Desired duration of the new cuts in seconds.
        :param hop: Shift between the windows in the new cuts in seconds.
        :param keep_excessive_supervisions: bool. When a cut is truncated in the middle of a supervision segment,
            should the supervision be kept.
        :param num_jobs: The number of parallel workers.
        :return: a new CutSet with cuts made from shorter duration windows.
        rš   ©r6  rr  rj  r   r$  )rN   r.   r1   r   Ú_cut_into_windows_singler'  r%  )rQ   r6  rr  rj  r    r%  r»   rJ   rJ   rK   Úcut_into_windows­  s4   üþÿÿúzCutSet.cut_into_windowsé   ÚcollateÚlimitc                 C   sn   | j rJ dƒ‚t| ƒ|k sJ dt| ƒ› d|› dƒ‚|r0ddlm} || ƒ\}}| ¡ | ¡ fS dd„ | D ƒS )	aY  
        Reads the audio of all cuts in this :class:`.CutSet` into memory.
        Useful when this object represents a mini-batch.

        :param collate: Should we collate the read audio into a single array.
            Shorter cuts will be padded. False by default.
        :param limit: Maximum number of read audio examples.
            By default it's 1024 which covers most frequently encountered mini-batch sizes.
            If you are working with larger batch sizes, increase this limit.
        :return: A list of numpy arrays, or a single array with batch size as the first dim.
        z+Cannot load audio of cuts in a lazy CutSet.z'Cannot load audio of a CutSet with len=z because limit was set to zs. This is a safe-guard against accidental CPU memory blow-ups. If you know what you're doing, set the limit higher.r   )Úcollate_audioc                 S   s   g | ]}|  ¡ ‘qS rJ   )Ú
load_audior]   rJ   rJ   rK   rô   û  ó    z%CutSet.load_audio.<locals>.<listcomp>)r  r  Úlhotse.dataset.collationry  Únumpy)rQ   rw  rx  ry  ÚaudiosÚ
audio_lensrJ   rJ   rK   rz  ß  s   ÿzCutSet.load_audioÚn_cutsc                    sT   |dksJ ‚t  ttˆ ƒƒt|tˆ ƒƒ¡}‡ fdd„|D ƒ}|dkr&|d S t|ƒS )z²
        Randomly sample this ``CutSet`` and return ``n_cuts`` cuts.
        When ``n_cuts`` is 1, will return a single cut instance; otherwise will return a ``CutSet``.
        r   c                    s   g | ]}ˆ | ‘qS rJ   rJ   )r^   rÅ   rY   rJ   rK   rô     r{  z!CutSet.sample.<locals>.<listcomp>rš   )r!  ÚsampleÚranger  ÚminrN   )rQ   r€  Úcut_indicesrO   rJ   rY   rK   r  ý  s   zCutSet.sampleÚsampling_rateÚaffix_idÚrecording_fieldc                 C   ó   |   tt|||d¡S )a=  
        Return a new :class:`~lhotse.cut.CutSet` that contains cuts resampled to the new
        ``sampling_rate``. All cuts in the manifest must contain recording information.
        If the feature manifests are attached, they are dropped.

        :param sampling_rate: The new sampling rate.
        :param affix_id: Should we modify the ID (useful if both versions of the same
            cut are going to be present in a single manifest).
        :param recording_field: which recording field to resample.
        :return: a modified copy of the ``CutSet``.
        )r…  r†  r‡  )r  r   Ú	_resample)rQ   r…  r†  r‡  rJ   rJ   rK   Úresample	  s   üÿzCutSet.resampleÚfactorc                 C   r  )a‹  
        Return a new :class:`~lhotse.cut.CutSet` that contains speed perturbed cuts
        with a factor of ``factor``. It requires the recording manifests to be present.
        If the feature manifests are attached, they are dropped.
        The supervision manifests are modified to reflect the speed perturbed
        start times and durations.

        :param factor: The resulting playback speed is ``factor`` times the original one.
        :param affix_id: Should we modify the ID (useful if both versions of the same
            cut are going to be present in a single manifest).
        :return: a modified copy of the ``CutSet``.
        ©r‹  r†  )r  r   Ú_perturb_speed©rQ   r‹  r†  rJ   rJ   rK   Úperturb_speed#  s   zCutSet.perturb_speedc                 C   r  )aÓ  
        Return a new :class:`~lhotse.cut.CutSet` that contains tempo perturbed cuts
        with a factor of ``factor``.

        Compared to speed perturbation, tempo preserves pitch.
        It requires the recording manifests to be present.
        If the feature manifests are attached, they are dropped.
        The supervision manifests are modified to reflect the tempo perturbed
        start times and durations.

        :param factor: The resulting playback tempo is ``factor`` times the original one.
        :param affix_id: Should we modify the ID (useful if both versions of the same
            cut are going to be present in a single manifest).
        :return: a modified copy of the ``CutSet``.
        rŒ  )r  r   Ú_perturb_temporŽ  rJ   rJ   rK   Úperturb_tempo2  s   zCutSet.perturb_tempoc                 C   r  )aV  
        Return a new :class:`~lhotse.cut.CutSet` that contains volume perturbed cuts
        with a factor of ``factor``. It requires the recording manifests to be present.
        If the feature manifests are attached, they are dropped.
        The supervision manifests are remaining the same.

        :param factor: The resulting playback volume is ``factor`` times the original one.
        :param affix_id: Should we modify the ID (useful if both versions of the same
            cut are going to be present in a single manifest).
        :return: a modified copy of the ``CutSet``.
        rŒ  )r  r   Ú_perturb_volumerŽ  rJ   rJ   rK   Úperturb_volumeD  s   zCutSet.perturb_volumeÚcodecÚrestore_orig_src                    s   |   ‡ ‡‡fdd„¡S )aB  
        Return a new :class:`~lhotse.cut.CutSet` that contains narrowband effect cuts.
        It requires the recording manifests to be present.
        If the feature manifests are attached, they are dropped.
        The supervision manifests are remaining the same.

        :param codec: Codec name.
        :param restore_orig_sr: Restore original sampling rate.
        :param affix_id: Should we modify the ID (useful if both versions of the same
            cut are going to be present in a single manifest).
        :return: a modified copy of the ``CutSet``.
        c                    s   | j ˆˆˆ dS )N)r”  r•  r†  )Ú
narrowbandr@  ©r†  r”  r•  rJ   rK   rª   b  s    ÿz#CutSet.narrowband.<locals>.<lambda>)r  )rQ   r”  r•  r†  rJ   r—  rK   r–  R  s   ÿzCutSet.narrowbandÚtargetÚ	mix_firstc                 C   rˆ  )a†  
        Return a new :class:`~lhotse.cut.CutSet` that will lazily apply loudness normalization
        to the desired ``target`` loudness (in dBFS).

        :param target: The target loudness in dBFS.
        :param affix_id: When true, we will modify the ``Cut.id`` field
            by affixing it with "_ln{target}".
        :return: a modified copy of the current ``CutSet``.
        )r˜  r™  r†  )r  r   Ú_normalize_loudness)rQ   r˜  r™  r†  rJ   rJ   rK   Únormalize_loudnessg  s   üÿzCutSet.normalize_loudnessc                 C   r  )a  
        Return a new :class:`~lhotse.cut.CutSet` that will lazily apply WPE dereverberation.

        :param affix_id: When true, we will modify the ``Cut.id`` field
            by affixing it with "_wpe".
        :return: a modified copy of the current ``CutSet``.
        )r†  )r  r   Ú_dereverb_wpe)rQ   r†  rJ   rJ   rK   Údereverb_wpe|  s   zCutSet.dereverb_wpeÚrir_recordingsr   Únormalize_outputÚ
early_onlyÚrir_channelsc              
   C   s8   |rt |ƒnd}|  tt|rt |¡nd||||d¡S )aö  
        Return a new :class:`~lhotse.cut.CutSet` that contains original cuts convolved with
        randomly chosen impulse responses from `rir_recordings`. It requires the recording manifests to be present.
        If the feature manifests are attached, they are dropped.
        The supervision manifests remain the same.

        If no ``rir_recordings`` are provided, we will generate a set of impulse responses using a fast random
        generator (https://arxiv.org/abs/2208.04101).

        :param rir_recordings: RecordingSet containing the room impulse responses.
        :param normalize_output: When true, output will be normalized to have energy as input.
        :param early_only: When true, only the early reflections (first 50 ms) will be used.
        :param affix_id: Should we modify the ID (useful if both versions of the same
            cut are going to be present in a single manifest).
        :param rir_channels: The channels of the impulse response to use. By default, first channel will be used.
            If it is a multi-channel RIR, applying RIR will produce MixedCut. If no RIR is
            provided, we will generate one with as many channels as this argument specifies.
        :return: a modified copy of the ``CutSet``.
        N)Úrir_recordingrŸ  r   r†  r¡  )r{   r  r   Ú_reverb_rirr!  Úchoice)rQ   rž  rŸ  r   r†  r¡  rJ   rJ   rK   Ú
reverb_rir†  s   úÿzCutSet.reverb_riré   ç      ð?Úallow_paddingÚsnrÚmix_prob©Útrngr“   Úrandom_mix_offsetc	           	      C   s   t t| ||||||||d	ƒS )a	  
        Mix cuts in this ``CutSet`` with randomly sampled cuts from another ``CutSet``.
        A typical application would be data augmentation with noise, music, babble, etc.

        :param cuts: a ``CutSet`` containing cuts to be mixed into this ``CutSet``.
        :param duration: an optional float in seconds.
            When ``None``, we will preserve the duration of the cuts in ``self``
            (i.e. we'll truncate the mix if it exceeded the original duration).
            Otherwise, we will keep sampling cuts to mix in until we reach the specified
            ``duration`` (and truncate to that value, should it be exceeded).
        :param allow_padding: an optional bool.
            When it is ``True``, we will allow the offset to be larger than the reference
            cut by padding the reference cut.
        :param snr: an optional float, or pair (range) of floats, in decibels.
            When it's a single float, we will mix all cuts with this SNR level
            (where cuts in ``self`` are treated as signals, and cuts in ``cuts`` are treated as noise).
            When it's a pair of floats, we will uniformly sample SNR values from that range.
            When ``None``, we will mix the cuts without any level adjustment
            (could be too noisy for data augmentation).
        :param preserve_id: optional string ("left", "right"). when specified, append will preserve the cut id
            of the left- or right-hand side argument. otherwise, a new random id is generated.
        :param mix_prob: an optional float in range [0, 1].
            Specifies the probability of performing a mix.
            Values lower than 1.0 mean that some cuts in the output will be unchanged.
        :param seed: an optional int or "trng". Random seed for choosing the cuts to mix and the SNR.
            If "trng" is provided, we'll use the ``secrets`` module for non-deterministic results
            on each iteration. You can also directly pass a ``random.Random`` instance here.
        :param random_mix_offset: an optional bool.
            When ``True`` and the duration of the to be mixed in cut in longer than the original cut,
             select a random sub-region from the to be mixed in cut.
        :return: a new ``CutSet`` with mixed cuts.
        )	rO   Úmix_in_cutsr6  r¨  r©  r_  rª  rv   r­  )rN   ÚLazyCutMixer)	rQ   rO   r6  r¨  r©  r_  rª  rv   r­  rJ   rJ   rK   Úmix­  s   +÷ÿz
CutSet.mixc                 C   ó
   |   t¡S )z}
        Return a new :class:`.CutSet`, where each :class:`.Cut` is copied and detached from its extracted features.
        )r  Ú_drop_featuresrY   rJ   rJ   rK   Údrop_featuresæ  ó   
zCutSet.drop_featuresc                 C   r±  )zu
        Return a new :class:`.CutSet`, where each :class:`.Cut` is copied and detached from its recordings.
        )r  Ú_drop_recordingsrY   rJ   rJ   rK   Údrop_recordingsì  r´  zCutSet.drop_recordingsc                 C   r±  )zw
        Return a new :class:`.CutSet`, where each :class:`.Cut` is copied and detached from its supervisions.
        )r  Ú_drop_supervisionsrY   rJ   rJ   rK   Údrop_supervisionsò  r´  zCutSet.drop_supervisionsc                 C   r±  )z‘
        Return a new :class:`.CutSet`, where each :class:`.Cut` is copied and detached from the alignments present in its supervisions.
        )r  Ú_drop_alignmentsrY   rJ   rJ   rK   Údrop_alignmentsø  r´  zCutSet.drop_alignmentsc                 C   r±  )a(  
        Return a new :class:`.CutSet`, where each :class:`.Cut` is copied and detached from any in-memory data it held.
        The manifests for in-memory data are converted into placeholders that can still be looked up for
        metadata, but will fail on attempts to load the data.
        )r  Ú_drop_in_memory_datarY   rJ   rJ   rK   Údrop_in_memory_dataþ  s   
zCutSet.drop_in_memory_dataÚ	extractorÚstorage_pathÚ
augment_fnÚstorage_typeÚexecutorÚmix_eagerlyÚprogress_barc	              	      s°  ddl m}	 dd„ }
ˆdu rd‰ˆdkrˆdurt d¡ d‰ˆdkr.t ¡ dkr.t d¡ ˆdu rkˆdkrk|rAttd	tˆƒd
}
ˆˆƒ‰t	 
dd„ |
‡ ‡‡‡fdd„ˆD ƒƒD ƒ¡W  d  ƒ S 1 sfw   Y  dtˆƒv r}dtdtf‡fdd„‰	ntˆƒ‰ˆjddd dtdtf‡fdd„‰	‡‡fdd„tˆƒD ƒ}ˆdu r°ddl}tˆ| d¡d‰‡ ‡‡‡‡‡	fdd„t|ƒD ƒ}|rËttdt|ƒd
}
|	|
dd„ |D ƒƒƒ}|S )aŸ  
        Extract features for all cuts, possibly in parallel,
        and store them using the specified storage object.

        Examples:

            Extract fbank features on one machine using 8 processes,
            store arrays partitioned in 8 archive files with lilcom compression:

            >>> cuts = CutSet(...)
            ... cuts.compute_and_store_features(
            ...     extractor=Fbank(),
            ...     storage_path='feats',
            ...     num_jobs=8,
            ... )

            Extract fbank features on one machine using 8 processes,
            store each array in a separate file with lilcom compression:

            >>> cuts = CutSet(...)
            ... cuts.compute_and_store_features(
            ...     extractor=Fbank(),
            ...     storage_path='feats',
            ...     num_jobs=8,
            ...     storage_type=LilcomFilesWriter
            ... )

            Extract fbank features on multiple machines using a Dask cluster
            with 80 jobs,
            store arrays partitioned in 80 archive files with lilcom compression:

            >>> from distributed import Client
            ... cuts = CutSet(...)
            ... cuts.compute_and_store_features(
            ...     extractor=Fbank(),
            ...     storage_path='feats',
            ...     num_jobs=80,
            ...     executor=Client(...)
            ... )

            Extract fbank features on one machine using 8 processes,
            store each array in an S3 bucket (requires ``smart_open``):

            >>> cuts = CutSet(...)
            ... cuts.compute_and_store_features(
            ...     extractor=Fbank(),
            ...     storage_path='s3://my-feature-bucket/my-corpus-features',
            ...     num_jobs=8,
            ...     storage_type=LilcomURLWriter
            ... )

        :param extractor: A ``FeatureExtractor`` instance
            (either Lhotse's built-in or a custom implementation).
        :param storage_path: The path to location where we will store the features.
            The exact type and layout of stored files will be dictated by the
            ``storage_type`` argument.
        :param num_jobs: The number of parallel processes used to extract the features.
            We will internally split the CutSet into this many chunks
            and process each chunk in parallel.
        :param augment_fn: an optional callable used for audio augmentation.
            Be careful with the types of augmentations used: if they modify
            the start/end/duration times of the cut and its supervisions,
            you will end up with incorrect supervision information when using this API.
            E.g. for speed perturbation, use ``CutSet.perturb_speed()`` instead.
        :param storage_type: a ``FeaturesWriter`` subclass type.
            It determines how the features are stored to disk,
            e.g. separate file per array, HDF5 files with multiple arrays, etc.
        :param executor: when provided, will be used to parallelize the feature extraction process.
            By default, we will instantiate a ProcessPoolExecutor.
            Learn more about the ``Executor`` API at
            https://lhotse.readthedocs.io/en/latest/parallelism.html
        :param mix_eagerly: Related to how the features are extracted for ``MixedCut``
            instances, if any are present.
            When False, extract and store the features for each track separately,
            and mix them dynamically when loading the features.
            When True, mix the audio first and store the mixed features,
            returning a new ``DataCut`` instance with the same ID.
            The returned ``DataCut`` will not have a ``Recording`` attached.
        :param progress_bar: Should a progress bar be displayed (automatically turned off
            for parallel computation).
        :return: Returns a new ``CutSet`` with ``Features`` manifests attached to the cuts.
        r   ©Úcombinec                 S   r§   rG   rJ   r¨   rJ   rJ   rK   rª   g  r«   z3CutSet.compute_and_store_features.<locals>.<lambda>Nrš   úoExecutor argument was passed but num_jobs set to 1: we will ignore the executor and use non-parallel execution.zÂnum_jobs is > 1 and torch's number of threads is > 1 as well: For certain configs this can result in a never ending computation. If this happens, use torch.set_num_threads(1) to circumvent this.zExtracting and storing features©r¦   Útotalc                 s   s    | ]	}|d ur|V  qd S rG   rJ   )r^   Ú	maybe_cutrJ   rJ   rK   r`     s   € ôóz4CutSet.compute_and_store_features.<locals>.<genexpr>c                 3   s&    | ]}t |jƒˆˆˆ ˆd V  qdS ))r½  Ústorager¿  rÂ  N)r   Úcompute_and_store_featuresr]   )r¿  r½  rÂ  rÊ  rJ   rK   r`   ƒ  s   € 	øÿú
ÿz://rÅ   rF   c                    s   ˆ › d| › S )Nz/feats-rJ   ©rÅ   ©r¾  rJ   rK   Úsub_storage_path”  rÏ   z;CutSet.compute_and_store_features.<locals>.sub_storage_pathTrÐ   c                    s   ˆ d| ›  S )Nzfeats-rJ   rÌ  rÍ  rJ   rK   rÎ    rÏ   c                    s   g | ]}t tˆ|ˆ d ƒ‘qS ))rÉ   Ún)rN   r2   )r^   Úi)r    rQ   rJ   rK   rô   £  ó    z5CutSet.compute_and_store_features.<locals>.<listcomp>Úspawn)Ú
mp_contextc                    s0   g | ]\}}ˆj tj|ˆˆ|ƒˆ ˆˆd d‘qS )F)r½  r¾  r¿  rÀ  rÂ  rÃ  )rº   rN   rË  ©r^   rÐ  Úcs)r¿  rÁ  r½  rÂ  rÀ  rÎ  rJ   rK   rô   ¯  s    õ÷ÿz1Extracting and storing features (chunks progress)c                 s   rÌ   rG   ©r»   ©r^   rÇ   rJ   rJ   rK   r`   Å  ry   )r'  rÅ  r  r  ÚtorchÚget_num_threadsr   r   r  rN   rc   Ústrrµ   r
   rã   r‚  Úmultiprocessingr   Úget_contextr¸   )rQ   r½  r¾  r    r¿  rÀ  rÁ  rÂ  rÃ  rÅ  ÚprogressÚcut_setsrÛ  rÃ   Úcuts_with_featsrJ   )
r¿  rÁ  r½  rÂ  r    rQ   rÊ  r¾  rÀ  rÎ  rK   rË    s`   ]ÿÿÿ
ÿ

	÷þ ÿ
ÿôýz!CutSet.compute_and_store_featuresg     À‚@é   Úmanifest_pathÚbatch_durationÚnum_workersÚ	overwritec
                    sh  ddl m}
 ddlm} ddlm}m} ddlm‰ ˆj	‰t
j||	d‰|| |d}| ‡fdd	„¡ ||d
}||d||d}dtt dttj ddf‡‡‡‡‡fdd„}g }ˆÍ |||	radndd¥‰td|jdŽ}|
ddy}| tˆjƒ¡ |D ]f}|d ‰|d }|rŽ|d nd}tˆƒdkr—q~t‡fdd„ˆD ƒƒs¤J ‚ˆ dur´‡ fdd„tˆ|ƒD ƒ}t ¡  ˆj|ˆd j|d}W d  ƒ n1 sÎw   Y  | | |ˆ|¡¡ | tˆƒ¡ q~W d  ƒ n1 sïw   Y  W d  ƒ n1 sþw   Y  W d  ƒ n1 sw   Y  W d  ƒ ˆ ¡ S W d  ƒ ˆ ¡ S 1 s+w   Y  ˆ ¡ S ) a   
        Extract features for all cuts in batches.
        This method is intended for use with compatible feature extractors that
        implement an accelerated :meth:`~lhotse.FeatureExtractor.extract_batch` method.
        For example, ``kaldifeat`` extractors can be used this way (see, e.g.,
        :class:`~lhotse.KaldifeatFbank` or :class:`~lhotse.KaldifeatMfcc`).

        When a CUDA GPU is available and enabled for the feature extractor, this can
        be much faster than :meth:`.CutSet.compute_and_store_features`.
        Otherwise, the speed will be comparable to single-threaded extraction.

        Example: extract fbank features on one GPU, using 4 dataloading workers
        for reading audio, and store the arrays in an archive file with
        lilcom compression::

            >>> from lhotse import KaldifeatFbank, KaldifeatFbankConfig
            >>> extractor = KaldifeatFbank(KaldifeatFbankConfig(device='cuda'))
            >>> cuts = CutSet(...)
            ... cuts = cuts.compute_and_store_features_batch(
            ...     extractor=extractor,
            ...     storage_path='feats',
            ...     batch_duration=500,
            ...     num_workers=4,
            ... )

        :param extractor: A :class:`~lhotse.features.base.FeatureExtractor` instance,
            which should implement an accelerated ``extract_batch`` method.
        :param storage_path: The path to location where we will store the features.
            The exact type and layout of stored files will be dictated by the
            ``storage_type`` argument.
        :param manifest_path: Optional path where to write the CutSet manifest
            with attached feature manifests. If not specified, we will be keeping
            all manifests in memory.
        :param batch_duration: The maximum number of audio seconds in a batch.
            Determines batch size dynamically.
        :param num_workers: How many background dataloading workers should be used
            for reading the audio.
        :param collate: If ``True``, the waveforms will be collated into a single
            padded tensor before being passed to the feature extractor. Some extractors
            can be faster this way (for e.g., see ``lhotse.features.kaldi.extractors``).
            If you are using ``kaldifeat`` extractors, you should set this to ``False``.
        :param augment_fn: an optional callable used for audio augmentation.
            Be careful with the types of augmentations used: if they modify
            the start/end/duration times of the cut and its supervisions,
            you will end up with incorrect supervision information when using this API.
            E.g. for speed perturbation, use ``CutSet.perturb_speed()`` instead.
        :param storage_type: a ``FeaturesWriter`` subclass type.
            It determines how the features are stored to disk,
            e.g. separate file per array, HDF5 files with multiple arrays, etc.
        :param overwrite: should we overwrite the manifest, HDF5 files, etc.
            By default, this method will append to these files if they exist.
        :return: Returns a new ``CutSet`` with ``Features`` manifests attached to the cuts.
        r   )ÚThreadPoolExecutor©Ú
DataLoader)ÚSimpleCutSamplerÚUnsupervisedWaveformDataset)Úvalidate_features)rä  )rh  c                    s   | j ˆ jvS rG   )rj   Ú
ignore_idsr@  )Úcuts_writerrJ   rK   rª     ó    z9CutSet.compute_and_store_features_batch.<locals>.<lambda>)rw  N)Ú
batch_sizeÚsamplerrã  rO   r~   rF   c                    s  t | |ƒD ]†\‰ }tˆ tƒr ˆ tˆ |jd |jd ˆd¡ qt|tjƒr,| ¡  	¡ }ˆ ˆ j
|¡}tˆ jˆ jˆj|jd |jd ˆˆ jˆ jˆjtˆjƒ|d}ˆ||d tˆ tƒrfˆ j|_tˆ |d‰ tˆ tƒr„ˆ j
|_tˆ j
dˆ jd‡ fdd„ˆ jD ƒ|d d	‰ ˆjˆ d
d qd S )Nr   rš   )r[  Únum_featuresÚframe_shift)rÚ   r6  r+  r[  rð  rñ  r…  ÚchannelsrÀ  r¾  Ústorage_key)Ú
feats_data)r~   c                    s   g | ]
}t |ˆ jd d‘qS )r   )rÔ   Úchannel)r?   rj   rü   r@  rJ   rK   rô   S  s    ÿÿzQCutSet.compute_and_store_features_batch.<locals>._save_worker.<locals>.<listcomp>)rj   rÚ   r6  rõ  ro   r~   rÖ   T)Úflush)ÚziprH   r$   rÕ   r?   ÚshaperØ  ÚTensorÚcpur}  rj   r&   rÚ   r6  Únamer…  rõ  rÚ  r¾  r   rÔ   r    r"   ro   )rO   r~   Úfeat_matró  Úfeat_manifest)rì  r½  Úfeats_writerrñ  rê  r@  rK   Ú_save_worker"  s\   
üÿõ


þõÉz=CutSet.compute_and_store_features_batch.<locals>._save_workerÚwÚa)ÚmodezComputing features in batchesrÇ  rš   )Úmax_workersÚaudior  c                 3   s     | ]}|j ˆ d  j kV  qdS )r   N©r…  rk   rU   rJ   rK   r`   o  ó   € z:CutSet.compute_and_store_features_batch.<locals>.<genexpr>c                    s   g | ]
\}}ˆ ||j ƒ‘qS rJ   r  )r^   rl   r   )r¿  rJ   rK   rô   s  s    ÿz;CutSet.compute_and_store_features_batch.<locals>.<listcomp>)r…  Úlengths)Úconcurrent.futuresrå  Útorch.utils.datarç  Úlhotse.datasetrè  ré  Ú	lhotse.qarê  rñ  rN   rå   Úfilterr   r   ÚnpÚndarrayr   Únum_cutsrX  r  rë  rd  r÷  rØ  Úno_gradÚextract_batchr…  r¹   rº   rç   )rQ   r½  r¾  rá  râ  rã  rw  r¿  rÀ  rä  rå  rç  rè  ré  rï  ÚdatasetÚdloaderrÿ  rÃ   rÝ  rÁ  ÚbatchÚwavesÚ	wave_lensr~   rJ   )r¿  rO   rì  r½  rþ  rñ  rê  rK   Ú compute_and_store_features_batchÈ  sp   A
ÿ,:ÿÿþÿü
ÿ
ÿýæ÷€ €* 'Ù'Ù'z'CutSet.compute_and_store_features_batchÚwavÚformatÚencodingÚshuffle_on_splitc	                    s$  ddl m}
 ddlm} |
}|du rd}|dkr#ˆdur#t d¡ d‰dtdtd	tf‡fd
d„‰ˆdu rS|dkrS|r@t	t
dd}t|‡ ‡‡‡‡fdd„| D ƒƒƒ ¡ S | j||d}ˆdu rkddl}t|| d¡d‰‡ ‡‡‡‡fdd„t|ƒD ƒ}|r…t	t
dt|ƒd}||dd„ |D ƒƒƒ}|S )a  
        Store waveforms of all cuts as audio recordings to disk.

        :param storage_path: The path to location where we will store the audio recordings.
            For each cut, a sub-directory will be created that starts with the first 3
            characters of the cut's ID. The audio recording is then stored in the sub-directory
            using filename ``{cut.id}.{format}``
        :param format: Audio format argument supported by ``torchaudio.save`` or ``soundfile.write``.
            Tested values are: ``wav``, ``flac``, and ``opus``.
        :param encoding: Audio encoding argument supported by ``torchaudio.save`` or ``soundfile.write``.
            Please refer to the documentation of the relevant library used in your audio backend.
        :param num_jobs: The number of parallel processes used to store the audio recordings.
            We will internally split the CutSet into this many chunks
            and process each chunk in parallel.
        :param augment_fn: an optional callable used for audio augmentation.
            Be careful with the types of augmentations used: if they modify
            the start/end/duration times of the cut and its supervisions,
            you will end up with incorrect supervision information when using this API.
            E.g. for speed perturbation, use ``CutSet.perturb_speed()`` instead.
        :param executor: when provided, will be used to parallelize the process.
            By default, we will instantiate a ProcessPoolExecutor.
            Learn more about the ``Executor`` API at
            https://lhotse.readthedocs.io/en/latest/parallelism.html
        :param progress_bar: Should a progress bar be displayed (automatically turned off
            for parallel computation).
        :param shuffle_on_split: Shuffle the ``CutSet`` before splitting it for the parallel workers.
            It is active only when `num_jobs > 1`. The default is True.
        :param kwargs: Deprecated arguments go here and are ignored.
        :return: Returns a new ``CutSet``.
        r   )ÚidentityrÄ  Nrš   rÆ  r_   r¾  rF   c                    s6   t |ƒ| jd d…  }|jddd || jd ˆ   S )Né   T©rÒ   rÑ   r²   )r
   rj   rã   )r_   r¾  Úsubdir)r  rJ   rK   Úfile_storage_pathÁ  s   z-CutSet.save_audios.<locals>.file_storage_pathzStoring audio recordingsr¥   c                 3   s(    | ]}|j ˆ|ˆƒˆˆˆ d V  qdS ))r¾  r  r  r¿  N)Ú
save_audior]   )r¿  r  r   r  r¾  rJ   rK   r`   Î  s   € úü
ÿz%CutSet.save_audios.<locals>.<genexpr>)rñ   rÒ  )r  rÓ  c                    s*   g | ]\}}ˆj tj|ˆˆˆˆ d d‘qS )F)r¾  r  r  r¿  rÃ  )rº   rN   Úsave_audiosrÔ  )r¿  r  rÁ  r  r¾  rJ   rK   rô   é  s    öøÿz&CutSet.save_audios.<locals>.<listcomp>z*Storing audio recordings (chunks progress)rÇ  c                 s   rÌ   rG   rÖ  r×  rJ   rJ   rK   r`   þ  ry   )Úcytoolzr  r'  rÅ  r  r  r   r:   r
   r   r   rN   r  rõ   rÛ  r   rÜ  r¸   r  )rQ   r¾  r  r  r    rÁ  r¿  rÃ  r  Úkwargsr  rÅ  rÝ  rÞ  rÛ  rÃ   rO   rJ   )r¿  r  rÁ  r   r  r¾  rK   r"  †  sP   *ÿÿ	ùÿ
öþõýzCutSet.save_audiosÚmax_cutsc                 C   s$  |durY| }|durt ||ƒ}t|ƒ}t|ƒ}t| |j¡d}t|g|ƒD ]}| |¡}| |¡ q&| 	¡ }	|durWt
|dƒ}
t |	|
¡ W d  ƒ |	S 1 sRw   Y  |	S dd„ | D ƒ}t|ƒshtdƒ‚t|ƒs|t dt|ƒ› dt|ƒ› d	¡ tt d
d„ | D ƒ|durŠ|nt| ƒƒ|dS )aÊ  
        Compute the global means and standard deviations for each feature bin in the manifest.
        It follows the implementation in scikit-learn:
        https://github.com/scikit-learn/scikit-learn/blob/0fb307bf39bbdacd6ed713c00724f8f871d60370/sklearn/utils/extmath.py#L715
        which follows the paper:
        "Algorithms for computing the sample variance: analysis and recommendations", by Chan, Golub, and LeVeque.

        :param storage_path: an optional path to a file where the stats will be stored with pickle.
        :param max_cuts: optionally, limit the number of cuts used for stats estimation. The cuts will be
            selected randomly in that case.
        :param extractor: optional FeatureExtractor, when provided, we ignore any pre-computed features.

        :return a dict of ``{'norm_means': np.ndarray, 'norm_stds': np.ndarray}`` with the
            shape of the arrays equal to the number of feature bins in this manifest.
        N)Úfeature_dimÚwbc                 S   s   g | ]}|j ‘qS rJ   rb  r]   rJ   rJ   rK   rô   (	  rA  z7CutSet.compute_global_feature_stats.<locals>.<listcomp>zKCould not find any features in this CutSet; did you forget to extract them?zComputing global stats: only ú/z cuts have features.c                 s   s    | ]	}|j r|jV  qd S rG   )rØ   r~   r]   rJ   rJ   rK   r`   4	  ó   € z6CutSet.compute_global_feature_stats.<locals>.<genexpr>)Úfeature_manifestsr¾  )r	   ÚiterÚnextr(   r&  r…  r   Úcompute_featuresrX  ÚgetÚopenÚpickleÚdumpr  rC  rd  r  r  Úsumr  r)   )rQ   r¾  r%  r½  rO   rú   rï   r_   ÚarrÚmvnrÇ   Úhave_featuresrJ   rJ   rK   Úcompute_global_feature_stats	  sH   

ÿ

ÿþÿÿþúz#CutSet.compute_global_feature_statsc                 C   r  ©N)rˆ   )r  r   Ú _add_features_path_prefix_single©rQ   rˆ   rJ   rJ   rK   Úwith_features_path_prefix:	  ó   z CutSet.with_features_path_prefixc                 C   r  r7  )r  r   Ú!_add_recording_path_prefix_singler9  rJ   rJ   rK   Úwith_recording_path_prefix=	  r;  z!CutSet.with_recording_path_prefixc              	      s’  ddl m‰ m‰ ddlm‰ t|ƒ}|d ‰ˆjddd |d }|d ‰ˆjddd i ‰|r6ttd	d
ndd„ }t	 
|d ¡q}t|ƒ]‰‡ ‡‡‡‡‡‡fdd„}|| ƒD ]D}t|tƒrd| |¡ qWt|tƒr„t|ƒ}|jD ]}	t|	jtƒr}||	jƒ qp| |¡ qWt|tƒr“||ƒ}| |¡ qWtdt|ƒ› ƒ‚W d  ƒ n1 s¦w   Y  W d  ƒ n1 sµw   Y  ˆ ¡ D ]}
|
 ¡  q¾| ¡ S )uÉ  
        Copies every data item referenced by this CutSet into a new directory.
        The structure is as follows:

        - output_dir
        â”œâ”€â”€ audio
        |   â”œâ”€â”€ rec1.flac
        |   â””â”€â”€ ...
        â”œâ”€â”€ custom
        |   â”œâ”€â”€ field1
        |   |   â”œâ”€â”€ arr1-1.npy
        |   |   â””â”€â”€ ...
        |   â””â”€â”€ field2
        |       â”œâ”€â”€ arr2-1.npy
        |       â””â”€â”€ ...
        â”œâ”€â”€ features.lca
        â””â”€â”€ cuts.jsonl.gz

        :param output_dir: The root directory where we'll store the copied data.
        :param verbose: Show progress bar, enabled by default.
        :return: CutSet manifest pointing to the new data.
        r   )ÚArrayÚTemporalArray)ÚNumpyHdf5Writerr  Tr  zfeatures.lcaÚcustomzCopying CutSet datar¥   c                 S   r§   rG   rJ   r¨   rJ   rJ   rK   rª   c	  r«   z"CutSet.copy_data.<locals>.<lambda>zcuts.jsonl.gzc                    s®   t | ƒ} | jr| jjˆd| _| jr|  ˆ| j  d¡¡} | jd urU| j 	¡ D ]-\}}t
|ˆ ˆfƒrT|ˆvrGˆ| }|jddd ˆ|ƒˆ|< ˆ| }| | j| ¡ ¡ q'| S )N©Úwriterz.flacTr  )r?   rØ   r~   Ú
copy_featsrÓ   r!  rÔ   Úwith_suffixrA  r¼   rH   rã   rÕ   rj   Úload)r_   rÉ   rÊ   rx   Úcust_writer©r>  r@  r?  Ú	audio_dirÚ
custom_dirÚcustom_writersÚfeature_writerrJ   rK   Ú_copy_singlei	  s$   ÿ
€z&CutSet.copy_data.<locals>._copy_singlezUnexpected manifest type: N)Úlhotse.arrayr>  r?  Úlhotse.features.ior@  r
   rã   r   r   rN   rå   r+   rH   r$   rÕ   r    r?   ræ   r_   r   ÚRuntimeErrorr+  rE  Úcloserç   )rQ   r›   r¢   Úfeature_filerÀ   Úmanifest_writerrM  ÚitemÚcpyÚtr   rJ   rH  rK   Ú	copy_data@	  sN   ÿþ




€
ïé€ *
zCutSet.copy_datarC  c                 C   sà   t  |¡_}| D ]Q}|jrt|tƒr| |¡ qt|tƒr=t|ƒ}|jD ]}t|j	t
ƒr6|j	jj|d|j	_q$| |¡ qt|t
ƒrTt|ƒ}|jj|d|_| |¡ q| |¡ qW d  ƒ | ¡ S 1 sgw   Y  | ¡ S )a  
        Save a copy of every feature matrix found in this CutSet using ``writer``
        and return a new manifest with cuts referring to the new feature locations.

        :param writer: a :class:`lhotse.features.io.FeaturesWriter` instance.
        :param output_path: optional path where the new manifest should be stored.
            It's used to write the manifest incrementally and return a lazy manifest,
            otherwise the copy is stored in memory.
        :return: a copy of the manifest.
        rB  N)rN   rå   rØ   rH   r$   rÕ   r    r?   ræ   r_   r   r~   rD  rç   )rQ   rC  r   rS  rT  rU  rV  rJ   rJ   rK   rD  ”	  s,   


€
î
ÿëzCutSet.copy_featsc                 C   r  )a£  
        Modify the IDs of cuts in this ``CutSet``.
        Useful when combining multiple ``CutSet``s that were created from a single source,
        but contain features with different data augmentations techniques.

        :param transform_fn: A callable (function) that accepts a string (cut ID) and returns
        a new string (new cut ID).
        :return: a new ``CutSet`` with cuts with modified IDs.
        ©r
  )r  r   Ú_with_id©rQ   r
  rJ   rJ   rK   Ú
modify_ids¸	  s   
zCutSet.modify_idsÚ	add_emptyÚ	shrink_okc                 C   r  )a  
        Fills the whole duration of each cut in a :class:`.CutSet` with a supervision segment.

        If the cut has one supervision, its start is set to 0 and duration is set to ``cut.duration``.
        Note: this may either expand a supervision that was shorter than a cut, or shrink a supervision
        that exceeds the cut.

        If there are no supervisions, we will add an empty one when ``add_empty==True``, otherwise
        we won't change anything.

        If there are two or more supervisions, we will raise an exception.

        :param add_empty: should we add an empty supervision with identical time bounds as the cut.
        :param shrink_ok: should we raise an error if a supervision would be shrank as a result
            of calling this method.
        ©r\  r]  )r  r   Ú_fill_supervision)rQ   r\  r]  rJ   rJ   rK   Úfill_supervisionsÄ	  s   ÿzCutSet.fill_supervisionsc                 C   r  )zÐ
        Modify the SupervisionSegments by `transform_fn` in this CutSet.

        :param transform_fn: a function that modifies a supervision as an argument.
        :return: a new, modified CutSet.
        rX  )r  r   Ú_map_supervisionsrZ  rJ   rJ   rK   Úmap_supervisionsÛ	  s   	zCutSet.map_supervisionsc                 C   r  )a=  
        Return a copy of this ``CutSet`` with all ``SupervisionSegments`` text transformed with ``transform_fn``.
        Useful for text normalization, phonetic transcription, etc.

        :param transform_fn: a function that accepts a string and returns a string.
        :return: a new, modified CutSet.
        rX  )rb  r   Ú_transform_textrZ  rJ   rJ   rK   Útransform_textæ	  s   
ÿzCutSet.transform_texté
   Úbuffer_sizec              
   C   sD   ddl m} ddlm}m} t||tƒ || ddddƒdd|dƒS )aˆ  
        Pre-fetches the CutSet elements in a background process.
        Useful for enabling concurrent reading/processing/writing in ETL-style tasks.

        .. caution:: This method internally uses a PyTorch DataLoader with a single worker.
            It is not suitable for use in typical PyTorch training scripts.

        .. caution:: If you run into pickling issues when using this method, you're also likely
            using .filter/.map methods with a lambda function.
            Please set ``lhotse.set_dill_enabled(True)`` to resolve these issues, or convert lambdas
            to regular functions + ``functools.partial``

        r   ræ  )ÚDynamicCutSamplerÚIterableDatasetWrapperrš   )r%  ÚrankÚ
world_sizeN)r  rî  rã  Úprefetch_factor)r	  rç  r
  rg  rh  rN   Ú_BackgroundCutFetcher)rQ   rf  rç  rg  rh  rJ   rJ   rK   Úprefetchò	  s   þùÿzCutSet.prefetchc                 C   s   ddl m} || ƒS )u"  
        Converts a CutSet to a HuggingFace Dataset. Currently, only MonoCut with one recording source is supported.
        Other cut types will be supported in the future.

        Currently, two formats are supported:
            1. If each cut has one supervision (e.g. LibriSpeech), each cut is represented as a single row (entry)
               in the HuggingFace dataset with all the supervision information stored along the cut information.
               The final HuggingFace dataset format is:
                   â•”â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•¦â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•—
                   â•‘      Feature      â•‘            Type               â•‘
                   â• â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•¬â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•£
                   â•‘        id         â•‘ Value(dtype='string')         â•‘
                   â• â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•¬â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•£
                   â•‘      audio        â•‘ Audio()                       â•‘
                   â• â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•¬â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•£
                   â•‘     duration      â•‘ Value(dtype='float32')        â•‘
                   â• â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•¬â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•£
                   â•‘   num_channels    â•‘ Value(dtype='uint16')         â•‘
                   â• â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•¬â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•£
                   â•‘       text        â•‘ Value(dtype='string')         â•‘
                   â• â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•¬â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•£
                   â•‘     speaker       â•‘ Value(dtype='string')         â•‘
                   â• â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•¬â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•£
                   â•‘     language      â•‘ Value(dtype='string')         â•‘
                   â• â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•¬â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•£
                   â•‘   {x}_alignment   â•‘ Sequence(Alignment)           â•‘
                   â•šâ•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•©â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•
               where x stands for the alignment type (commonly used: "word", "phoneme").

               Alignment is represented as:
                   â•”â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•¦â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•—
                   â•‘      Feature      â•‘            Type               â•‘
                   â• â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•¬â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•£
                   â•‘      symbol       â•‘ Value(dtype='string')         â•‘
                   â• â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•¬â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•£
                   â•‘       start       â•‘ Value(dtype='float32')        â•‘
                   â• â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•¬â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•£
                   â•‘        end        â•‘ Value(dtype='float32')        â•‘
                   â•šâ•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•©â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•


            2. If each cut has multiple supervisions (e.g. AMI), each cut is represented as a single row (entry)
               while all the supervisions are stored in a separate list of dictionaries under the 'segments' key.
               The final HuggingFace dataset format is:
                   â•”â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•¦â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•—
                   â•‘   Feature    â•‘                 Type               â•‘
                   â• â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•¬â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•£
                   â•‘      id      â•‘ Value(dtype='string')              â•‘
                   â• â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•¬â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•£
                   â•‘    audio     â•‘ Audio()                            â•‘
                   â• â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•¬â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•£
                   â•‘   duration   â•‘ Value(dtype='float32')             â•‘
                   â• â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•¬â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•£
                   â•‘ num_channels â•‘ Value(dtype='uint16')              â•‘
                   â• â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•¬â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•£
                   â•‘   segments   â•‘ Sequence(Segment)                  â•‘
                   â•šâ•â•â•â•â•â•â•â•â•â•â•â•â•â•â•©â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•
               where one Segment is represented as:
                   â•”â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•¦â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•—
                   â•‘      Feature      â•‘            Type               â•‘
                   â• â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•¬â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•£
                   â•‘        text       â•‘ Value(dtype='string')         â•‘
                   â• â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•¬â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•£
                   â•‘       start       â•‘ Value(dtype='float32')        â•‘
                   â• â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•¬â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•£
                   â•‘        end        â•‘ Value(dtype='float32')        â•‘
                   â• â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•¬â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•£
                   â•‘      channel      â•‘ Value(dtype='string')         â•‘
                   â• â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•¬â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•£
                   â•‘      speaker      â•‘ Value(dtype='string')         â•‘
                   â• â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•¬â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•£
                   â•‘      language     â•‘ Value(dtype='string')         â•‘
                   â• â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•¬â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•£
                   â•‘   {x}_alignment   â•‘ Sequence(Alignment)           â•‘
                   â•šâ•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•©â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•
        :return: A HuggingFace Dataset.
        r   )Úexport_cuts_to_hf)Ú	lhotse.hfrn  )rQ   rn  rJ   rJ   rK   Úto_huggingface_dataset
  s   NzCutSet.to_huggingface_datasetr  ÚsentenceÚlanguageÚgender©Ú	audio_keyÚtext_keyÚlang_keyÚ
gender_keyru  rv  rw  rx  c                 O   s(   ddl m} t||| |||dœ|¤ŽƒS )aH  
        Initializes a Lhotse CutSet from an existing HF dataset,
        or args/kwargs passed on to ``datasets.load_dataset()``.

        Use ``audio_key``, ``text_key``, ``lang_key`` and ``gender_key`` options to indicate which keys in dict examples
        returned from HF Dataset should be looked up for audio, transcript, language, and gender respectively.
        The remaining keys in HF dataset examples will be stored inside ``cut.custom`` dictionary.

        Example with existing HF dataset::

            >>> import datasets
            ... dataset = datasets.load_dataset("mozilla-foundation/common_voice_11_0", "hi", split="test")
            ... dataset = dataset.map(some_transform)
            ... cuts = CutSet.from_huggingface_dataset(dataset)
            ... for cut in cuts:
            ...     pass

        Example providing HF dataset init args/kwargs::

            >>> import datasets
            ... cuts = CutSet.from_huggingface_dataset("mozilla-foundation/common_voice_11_0", "hi", split="test")
            ... for cut in cuts:
            ...     pass

        r   )ÚLazyHFDatasetIteratorrt  )ro  ry  rN   )ru  rv  rw  rx  Údataset_argsÚdataset_kwargsry  rJ   rJ   rK   Úfrom_huggingface_datasetb
  s   "ûúÿzCutSet.from_huggingface_datasetc                 C   s2   zt | ƒ}W n   d}Y d|› dt| jƒ› dS )Nz	<unknown>zCutSet(len=z) [underlying data type: ú])r  r+  rZ   )rQ   Úlen_valrJ   rJ   rK   Ú__repr__‘
  s
   zCutSet.__repr__c                    s6   t ˆ tƒrt‡ fdd„| D ƒƒS t‡ fdd„| D ƒƒS )Nc                 3   s    | ]}ˆ |j kV  qd S rG   ri   ©r^   rT  ©rT   rJ   rK   r`   š
  rÿ   z&CutSet.__contains__.<locals>.<genexpr>c                 3   s    | ]	}ˆ j |j kV  qd S rG   ri   r€  r  rJ   rK   r`   œ
  r)  )rH   rÚ  r  rV   rJ   r  rK   Ú__contains__˜
  s   
zCutSet.__contains__Úindex_or_idc                    sZ   z| j ˆ  W S  ty,   | jrt‡ fdd„t| ƒD ƒƒ Y S t‡ fdd„| D ƒƒ Y S w )Nc                 3   s     | ]\}}|ˆ kr|V  qd S rG   rJ   )r^   rÅ   rT  ©rƒ  rJ   rK   r`   ¤
  r  z%CutSet.__getitem__.<locals>.<genexpr>c                 3   s    | ]
}|j ˆ kr|V  qd S rG   ri   r€  r„  rJ   rK   r`   §
  ra   )rO   Ú	TypeErrorr  r,  r¸   )rQ   rƒ  rJ   r„  rK   Ú__getitem__ž
  s   úzCutSet.__getitem__c                 C   ó
   t | jƒS rG   )r  rO   rY   rJ   rJ   rK   Ú__len__©
  rM   zCutSet.__len__c                 c   s    | j E d H  d S rG   rU   rY   rJ   rJ   rK   Ú__iter__¬
  s   €zCutSet.__iter__rG   )rF   rN   )TN)NNNNFr|   F)NNFFTr   NN)r™   r   TTrš   FF)NF©F)FF)rö   r÷   r   )r  N)TNr  Frš   )r)  Nr*  Frš   )Nrš   )T)FN©TFN)rn  FT)NTrš   )Frv  )rš   )TT)NFr¦  Nr§  r   F)r  NNNNTT)NNN)TF)re  )xÚ__name__Ú
__module__Ú__qualname__Ú__doc__r   r   r   rR   ÚboolrW   ÚpropertyrZ   rd   rf   rg   rÚ  rn   r   rs   Ústaticmethodr   r:   rµ   rz   rc   Ú
from_itemsr   r6   r'   r;   r…   r¿   r‡   r   r   rŒ   r   r   r   r˜   rË   rÎ   r   ré   rî   rõ   r·   ró   rL   r3   r  r5   r  r   r  r(  r0  r:  r=  rG  rO  rQ  r  r   r   rY  r8   Úfloatrg  r!  ÚRandomr7  rq  ru  r  r  rz  r  rŠ  r  r‘  r“  r–  r›  r  r¥  r9   r°  r³  r¶  r¸  rº  r¼  r+   r%   r   r   rD   r   rË  r  r"  r6  r:  r=  rW  r*   rD  r[  r`  rb  rd  rm  rp  r|  r  r‚  r†  rˆ  r‰  rJ   rJ   rJ   rK   rN   P   sZ    -ÿÿÿÿþùÿþýüûúùø3ÿþ1øÿþýüûúùø	÷ öþ
ýüûúùø	÷
ö
õzÿÿÿ
þ7Oüþýü
ûúþýüûú
ù'ú
ý
üûú
ùAýþý
ü
ÿ
þýþý
ü!úþýüûú
ùXùþýüûúù
ø;ýþý
üCÿÿ
ÿ

þøþýüûúùø	
÷>úþýüûú
ù)ûþýüû
úûþýüû
ú4ýþý
üüþýü
ûÿÿÿÿ
þÿÿÿÿ
þúþýüûú
ù*÷þýüûúùø	÷

ö9÷þýüûúùø	÷

ö Göþýüûúùø	÷
ö
õ B÷þýüûúùø	÷
õ}üþýü
û9Uÿÿÿ
þ $ÿÿÿþÿþ R
ûþýüû. rN   c                   @   s   e Zd Zdefdd„ZdS )rl  rO   c                 C   s   t |ƒdksJ ‚|d S )Nrš   r   )r  rP   rJ   rJ   rK   r†  ±
  s   z!_BackgroundCutFetcher.__getitem__N)rŒ  r  rŽ  rN   r†  rJ   rJ   rJ   rK   rl  °
  s    rl  FÚreference_cutÚmixed_in_cutr5  r¨  r©  r_  c           
   
      s|  t dd„ | |fD ƒƒrˆdurt d¡ d‰| jdur*|jdur*| j|jks*J dƒ‚ˆ| jksE|sEJ d|j› dˆ› d| j› d	| j› d
	ƒ‚| j|jksXJ d| j› d|j› dƒ‚t| tƒsbt|tƒržt| tƒrvt|tƒrv| j	|j	ksvJ dƒ‚t| t
ƒs€t|t
ƒržt| t
ƒrŠ| }|‰ n|}| ‰ t‡ fdd„|jD ƒƒsžJ dƒ‚|du r¨ttƒ ƒ}n|dkr°| j}n|dkr¸|j}ntd|› dƒ‚ˆ| jkrË| jˆd} t| t
ƒrÞtt| jg ƒƒdkrÞ| j}nt| ttt
fƒrít| dg}n	tdt| ƒ› ƒ‚t|t
ƒrtt|jg ƒƒdkrt|ˆˆdg}	n&‡‡fdd„|jD ƒ}	nt|ttfƒr-t|ˆˆdg}	n	tdt|ƒ› ƒ‚t
|||	 dS )a9  
    Overlay, or mix, two cuts. Optionally the ``mixed_in_cut`` may be shifted by ``offset`` seconds
    and scaled down (positive SNR) or scaled up (negative SNR).
    Returns a MixedCut, which contains both cuts and the mix information.
    The actual feature mixing is performed during the call to :meth:`~MixedCut.load_features`.

    :param reference_cut: The reference cut for the mix - offset and snr are specified w.r.t this cut.
    :param mixed_in_cut: The mixed-in cut - it will be offset and rescaled to match the offset and snr parameters.
    :param offset: How many seconds to shift the ``mixed_in_cut`` w.r.t. the ``reference_cut``.
    :param allow_padding: If the offset is larger than the cut duration, allow the cut to be padded.
    :param snr: Desired SNR of the ``right_cut`` w.r.t. the ``left_cut`` in the mix.
    :param preserve_id: optional string ("left", "right"). when specified, append will preserve the cut id
        of the left- or right-hand side argument. otherwise, a new random id is generated.
    :return: A :class:`~MixedCut` instance.
    c                 s   s    | ]}t |tƒV  qd S rG   )rH   r$   r]   rJ   rJ   rK   r`   Ð
  rÿ   zmix.<locals>.<genexpr>NzÍYou are mixing cuts to a padding cut with a specified SNR - the resulting energies would be extremely low or high. We are setting snr to None, so that the original signal energies will be retained instead.z2Cannot mix cuts with different feature dimensions.zCannot mix cut 'z' with offset z, which is greater than cuts z duration of z,. Set `allow_padding=True` to allow padding.z/Cannot mix cuts with different sampling rates (z vs. z(). Please resample the recordings first.z0Cannot mix MultiCuts with different channel ids.c                 3   s(    | ]}|j d kp|jjˆ jkV  qdS )r#   N)r+  r_   rõ  ©r^   rè   )Ú	multi_cutrJ   rK   r`   ú
  s
   € ÿ
ÿzYCannot mix a MultiCut with a MixedCut that contains MultiCuts with different channel ids.r  r   z2Unexpected value for 'preserve_id' argument: got 'z+', expected one of (None, 'left', 'right').rP  r   r@  z"Unsupported type of cut in mix(): ©r_   r5  r©  c                    s`   g | ],}t |jt|jˆ  d dˆdu r|jn|jdu rˆnˆdur*|dur*|jˆ ndd‘qS )r÷   ©ÚndigitsNrš  )r!   r_   Úroundr5  r©  r˜  )r5  r©  rJ   rK   rô   $  s    îÿ
ÿ
ÿñÿzmix.<locals>.<listcomp>)rj   ræ   )r  ÚwarningsÚwarnrð  r6  rj   r…  rH   r#   rõ  r    rd  ræ   rÚ  rC   rC  rg  r  r@   Ú
transformsr   r$   r!   r+  )
r–  r—  r5  r¨  r©  r_  Ú	mixed_cutÚmixed_cut_idÚ
old_tracksÚ
new_tracksrJ   )r™  r5  r©  rK   r°  ¶
  sŽ   ÿÿÿÿþÿÿþÿÿ

þýÿÿ
ÿír°  r   r_   r6  r[  r\  r]  r^  r`  c                    sÚ  t |||ƒsJ d|› d|› d|› dƒ‚t| dƒrUt| jtƒrUddlm‰  ‡ fdd„| j ¡ D ƒ}t|ƒdkrUˆd	uoEt	‡fd
d„|D ƒƒf}	|	sUt
 d|› dt› d¡ |d	urz|| jkr`| S | jrlt|| j| jdnd	}
| jrxt|| jdnd	}|d	ur­| js…J dƒ‚|}
|
| j }| jr–t|| jdnd	}|
| jkr­|| jkr­|d	u s«|| jkr­| S |d	urÔ| js¸J dƒ‚|| jkr¿| S |}|| j }| jrÒt|| j| jdnd	}
t|| j dd}d	}| jrï| j}|jt||jƒd}tttƒ ƒ||| j| jr|
| j nd	| jr|| j nd	| j| j|ˆd
}|dkr(| j||r"dnd	d}|S |dkr;|j| |r5dnd	d}|S |dkrf|j |jd dj| |rOdnd	dj|j |jd d|r`dnd	d}|S t!d|› ƒ‚)aÔ  
    Return a new MixedCut, padded with zeros in the recording, and ``pad_feat_value`` in each feature bin.

    The user can choose to pad either to a specific `duration`; a specific number of frames `num_frames`;
    or a specific number of samples `num_samples`. The three arguments are mutually exclusive.

    :param cut: DataCut to be padded.
    :param duration: The cut's minimal duration after padding.
    :param num_frames: The cut's total number of frames after padding.
    :param num_samples: The cut's total number of samples after padding.
    :param pad_feat_value: A float value that's used for padding the features.
        By default we assume a log-energy floor of approx. -23 (1e-10 after exp).
    :param direction: string, 'left', 'right' or 'both'. Determines whether the padding is added before or after
        the cut.
    :param preserve_id: When ``True``, preserves the cut ID before padding.
        Otherwise, a new random ID is generated for the padded cut (default).
    :param pad_value_dict: Optional dict that specifies what value should be used
        for padding arrays in custom attributes.
    :return: a padded MixedCut if duration is greater than this cut's duration, otherwise ``self``.
    zIExpected only one of (duration, num_frames, num_samples) to be set: got (z, ú)rA  r   ©r?  c                    s   g | ]\}}t |ˆ ƒr|‘qS rJ   )rH   )r^   rÉ   rÊ   r¦  rJ   rK   rô   f  rÑ  zpad.<locals>.<listcomp>Nc                 3   s    | ]}|ˆ v V  qd S rG   rJ   )r^   rÉ   )r`  rJ   rK   r`   j  ry   zpad.<locals>.<genexpr>z6Cut being padded has custom TemporalArray attributes: zˆ. We expected a 'pad_value_dict' argument with padding values for these attributes. We will proceed and use the default padding value (=r£   ©r6  rñ  r…  )r6  r…  z~Cannot pad a cut using num_frames when it is missing pre-computed features (did you run cut.compute_and_store_features(...)?).zCannot pad a cut using num_samples when it is missing a Recording object (did you attach recording/recording set when creating the cut/cut set?)r÷   r›  rc  )
rj   r6  Ú
feat_valuerð  r[  r\  rñ  r…  ÚvideorA  r   r  )r_  rn  é   rP  zUnknown type of padding: )"r>   ÚhasattrrH   rA  r¿   rN  r?  r¼   r  rd  rž  rŸ  r7   r6  rØ   r<   rñ  r…  rÓ   r=   r[  r\  r  Ú	has_videor©  Ú	copy_withÚfpsr$   rÚ  rC   rð  r¹   r7  rC  )r_   r6  r[  r\  r]  r^  r_  r`  Úarr_keysÚpadding_values_specifiedÚtotal_num_framesÚtotal_num_samplesÚpadding_durationr©  Úpadding_cutÚpaddedrJ   )r?  r`  rK   rg  A  sÎ   ÿÿÿÿþþÿ
ûýùÿýÿ
ÿý
	
ÿ

ûýù

ÿñ

ò
ôÿüÿþrg  Úleft_cutÚ	right_cutc                 C   s   | j |||dS )z5Helper method for functional-style appending of Cuts.)r©  r_  )r¹   )r¶  r·  r©  r_  rJ   rJ   rK   r¹   Ü  s   r¹   rO   c                 C   ó
   t t| ƒS )zNReturn a MixedCut that consists of the input Cuts mixed with each other as-is.)r   r°  rU   rJ   rJ   rK   Úmix_cutsæ  r´  r¹  c                 C   r¸  )zOReturn a MixedCut that consists of the input Cuts appended to each other as-is.)r   r¹   rU   rJ   rJ   rK   Úappend_cutsí  r´  rº  rñ  Úuse_alignment_if_existsc           	      C   s  | j s|dusJ dƒ‚| j r| j}| j}n	t| j|| jd}tj|tjd}| j	D ][}|ra|j
ra||j
v ra|j
| D ]%}|jdkrHt|j| ƒnd}|j| jk rWt|j| ƒn|}d|||…< q:q)|jdkrmt|j| ƒnd}|j| jk r|t|j| ƒn|}d|||…< q)|S )a-  
    Compute a mask that indicates which frames in a cut are covered by supervisions.

    :param cut: a cut object.
    :param frame_shift: optional frame shift in seconds; required when the cut does not have
        pre-computed features, otherwise ignored.
    :param use_alignment_if_exists: optional str (key from alignment dict); use the specified
        alignment type for generating the mask
    :returns a 1D numpy array with value 1 for **frames** covered by at least one supervision,
    and 0 for **frames** not covered by any supervision.
    NzJNo features available. Either pre-compute features or provide frame_shift.r§  )Údtyper   r§  )rØ   rñ  r[  r<   r6  r…  r  ÚzerosÚfloat32ro   Ú	alignmentrÚ   r  r?  )	r_   rñ  r»  r[  Úmaskrq   ÚaliÚstÚetrJ   rJ   rK   Úcompute_supervisions_frame_maskô  sB   ÿý
ÿþ
ÿýù	ÿýrÄ  r|   r}   ro   r~   r   r€   r   c                 C   sÈ  |dus| dusJ dƒ‚|du|du| du}}}|r |  ¡ }|r’|r(|   ¡ } g }	t|ƒD ]b\}
}|jdu pCt|jtƒpCt|jƒdk}|rSt}|jdurP|jnd}nt}t|jƒ}|	 	||rdt
tƒ ƒn|j› d|
› |j|j|||rx| |j nd|r‹t|j|j||j|jd|dƒng d¡ q.nCg }	t| ƒD ]<\}}|jdkr©t}|jd }nt}|j}|	 	||r¸t
tƒ ƒn|j› d|› d|j|||rÏt|j|jd	ƒng d
¡ q˜t|	ƒ}	|durâ|	 |¡ |	S )a‹  
    Create a :class:`.CutSet` from any combination of supervision, feature and recording manifests.
    At least one of ``recordings`` or ``features`` is required.

    The created cuts will be of type :class:`.DataCut` (MonoCut for single-channel and MultiCut for multi-channel).
    The :class:`.DataCut` boundaries correspond to those found in the ``features``, when available,
    otherwise to those found in the ``recordings``.

    When ``supervisions`` are provided, we'll be searching them for matching recording IDs
    and attaching to created cuts, assuming they are fully within the cut's time span.

    :param recordings: an optional :class:`~lhotse.audio.RecordingSet` manifest.
    :param supervisions: an optional :class:`~lhotse.supervision.SupervisionSet` manifest.
    :param features: an optional :class:`~lhotse.features.base.FeatureSet` manifest.
    :param output_path: an optional path where the :class:`.CutSet` is stored.
    :param random_ids: boolean, should the cut IDs be randomized. By default, use the recording ID
        with a loop index and a channel idx, i.e. "{recording_id}-{idx}-{channel}")
    :param tolerance: float, tolerance for supervision and feature segment boundary comparison.
        By default, it's 1ms. Increasing this value can be helpful when importing Kaldi data
        directories with precomputed features.
    :return: a new :class:`.CutSet` instance.
    Nú>At least one of 'features' or 'recordings' has to be provided.rš   r   ú-T©rÔ   rõ  Ústart_afterÚ
end_beforeÚadjust_offsetr   ©rj   rÚ   r6  rõ  r~   rÖ   ro   ©rÔ   ©rj   rÚ   r6  rõ  rÖ   ro   )r  r¸   rò  rH   rµ   r  r"   r#   r{   r¹   rÚ  rC   rÔ   rÚ   r6  Úfindr?  Únum_channelsÚchannel_idsrj   rN   Úto_file)r}   ro   r~   r   r€   r   Úsup_okÚfeat_okÚrec_okrO   rÅ   ÚfeatsÚis_monoÚclsrõ  ÚridxrÖ   rJ   rJ   rK   r„   +  sŒ   ÿ
ý

ÿý
öúÿíÿô&
ÿøÿ
r„   c                    s  | dusJ dƒ‚|dus|dusJ dƒ‚|du|du|du}}}d|fd|fd|ffD ]\}	}
|
durG|
j sGt d|	› dt|
ƒj› d	¡ q-|r|rQt|ƒnt d¡}|r\t|ƒnt d¡}t 	| ¡•}t
|ƒD ]ˆ\}‰ t|ƒ}|du sŠ|jˆ jksŠJ d
ˆ j› d|j› dƒ‚t|‡ fdd„ƒ\}}t |¡}ˆ jdu p«tˆ jtƒp«tˆ jƒdk}|r»t}ˆ jdur¸ˆ jnd}nt}tˆ jƒ}||rÊttƒ ƒnˆ j› d|› ˆ jˆ j|ˆ ||rêt|jˆ j|ˆ jˆ jd|dƒng d}| |¡ qkW d  ƒ n1 sþw   Y  t | ¡S |rt|ƒnt d¡}t 	| ¡_}t
|ƒD ]R\}‰t|‡fdd„ƒ\}}t |¡}ˆj dkr@t}ˆj!d }nt}ˆj!}||rNttƒ ƒnˆj› d|› dˆj|ˆ|rft|jˆjdƒng d}| |¡ qW d  ƒ n	1 s|w   Y  t | ¡S )a  
    Create a :class:`.CutSet` from any combination of supervision, feature and recording manifests.
    At least one of ``recordings`` or ``features`` is required.

    This method is the "lazy" variant, which allows to create a :class:`.CutSet` with a minimal memory usage.
    It has some extra requirements:

        - The user must provide an ``output_path``, where we will write the cuts as
            we create them. We'll return a lazily-opened :class:`CutSet` from that file.

        - ``recordings`` and ``features`` (if both provided) have to be of equal length
            and sorted by ``recording_id`` attribute of their elements.

        - ``supervisions`` (if provided) have to be sorted by ``recording_id``;
            note that there may be multiple supervisions with the same ``recording_id``,
            which is allowed.

    In addition, to prepare cuts in a fully memory-efficient way, make sure that:

        - All input manifests are stored in JSONL format and opened lazily
            with ``<manifest_class>.from_jsonl_lazy(path)`` method.

    For more details, see :func:`.create_cut_set_eager`.

    :param output_path: path to which we will write the cuts.
    :param recordings: an optional :class:`~lhotse.audio.RecordingSet` manifest.
    :param supervisions: an optional :class:`~lhotse.supervision.SupervisionSet` manifest.
    :param features: an optional :class:`~lhotse.features.base.FeatureSet` manifest.
    :param random_ids: boolean, should the cut IDs be randomized. By default, use the recording ID
        with a loop index and a channel idx, i.e. "{recording_id}-{idx}-{channel}")
    :param tolerance: float, tolerance for supervision and feature segment boundary comparison.
        By default, it's 1ms. Increasing this value can be helpful when importing Kaldi data
        directories with precomputed features.
    :return: a new :class:`.CutSet` instance.
    NzFYou must provide the 'output_path' argument to create a CutSet lazily.rÅ  r}   ro   r~   zManifest passed in argument 'z%' is not opened lazily; open it with z=.from_jsonl_lazy() to reduce the memory usage of this method.z2Mismatched recording_id: Features.recording_id == z, but Recording.id == 'rl  c                    s   | j ˆ j kS rG   rÌ  r   )rÕ  rJ   rK   rª   ì  rí  z%create_cut_set_lazy.<locals>.<lambda>rš   r   rÆ  TrÇ  rË  c                    s   | j ˆ jkS rG   )rÔ   rj   r   )rÖ   rJ   rK   rª      rí  rÌ  rÍ  )"r  r  Úinfor+  rŒ  r+  Ú	itertoolsÚrepeatrN   rå   r¸   r,  rj   rÔ   Ú
_takewhiler6   Úfrom_segmentsrò  rH   rµ   r  r"   r#   r{   rÚ  rC   rÚ   r6  rÎ  r?  rÕ   Úfrom_jsonl_lazyrÏ  rÐ  )r   r}   ro   r~   r€   r   rÒ  rÓ  rÔ  ÚmtypeÚmrC  rÅ   ÚrecÚsupsrÖ  r×  rõ  r_   rØ  rJ   )rÕ  rÖ   rK   rƒ   ™  sÀ   
,ÿÿ
ýýÿÿ€
ÿÿÿ


ÿý
öúÿíÔÿ
.ÿ
ÿø
æÿ
rƒ   r3   Úiterabler  c                 C   sV   g }z	 t | ƒ}||ƒr| |¡ nt|g| ƒ} nqW || fS  ty*   Y || fS w )a%  
    Collects items from ``iterable`` as long as they satisfy the ``predicate``.
    Returns a tuple of ``(collected_items, iterable)``, where ``iterable`` may
    continue yielding items starting from the first one that did not satisfy
    ``predicate`` (unlike ``itertools.takewhile``).
    )r,  r¹   r   ÚStopIteration)rã  r  Ú	collectedrT  rJ   rJ   rK   rÜ  <  s   	úþþrÜ  Úraw_cutc                 C   s~   |   d¡}|dkrt | ¡S |dkrt | ¡S |dkr t | ¡S |dkr.t d¡ t | ¡S |dkr7t | ¡S td|› d	ƒ‚)
Nr+  r"   r#   r$   r   a   Your manifest was created with Lhotse version earlier than v0.8, when MonoCut was called Cut. Please re-generate it with Lhotse v0.8 as it might stop working in a future version (using manifest.from_file() and then manifest.to_file() should be sufficient).r    z-Unexpected cut type during deserialization: 'rl  )	Úpopr"   Ú	from_dictr#   r$   rž  rŸ  r    rC  )ræ  Úcut_typerJ   rJ   rK   r†   T  s   



ÿ

r†   c                 C   s   | j |||d ¡ S )Nrs  )ru  r  )rO   r6  rr  rj  rJ   rJ   rK   rt  h  s   ýürt  c                 C   s   | j ||||d ¡ S )Nr#  )r(  r  )rO   r  r  r  r"  rJ   rJ   rK   r&  u  s   üûr&  c                 C   s   | j |||||d ¡ S )Nr.  )r0  r  )rO   r+  r,  r-  r  r"  rJ   rJ   rK   r/  „  s   ûúr/  r,  c                 C   s   | j |d ¡ S )Nr;  )r=  r  )rO   r,  rJ   rJ   rK   r<  •  s
   ÿþr<  c                 C   ó
   |   |¡S rG   )r=  ©r_   rˆ   rJ   rJ   rK   r<  ž  rM   r<  c                 C   rê  rG   )r:  rë  rJ   rJ   rK   r8  ¢  rM   r8  c                 C   s   |   || jƒ¡S rG   )Úwith_idrj   ©r_   r
  rJ   rJ   rK   rY  ¦  rS   rY  c                 C   ó   | j ||dS )Nr^  )Úfill_supervision)r_   r\  r]  rJ   rJ   rK   r_  ª  rÏ   r_  c                 C   rê  rG   )rb  rí  rJ   rJ   rK   ra  ®  rM   ra  c                 C   rê  rG   )rd  )rÛ   r
  rJ   rJ   rK   rc  ²  rM   rc  c                 C   rê  rG   )r  )r_   r  rJ   rJ   rK   r  ¶  rM   r  c                 C   rî  )Nr  )r  )r_   r  r  rJ   rJ   rK   r  º  s   ÿr  c                 O   ó   | j |i |¤ŽS rG   )rg  ©r_   Úargsr$  rJ   rJ   rK   rf  À  rS   rf  c                 O   rð  rG   )rq  rñ  rJ   rJ   rK   rp  Ä  rS   rp  c                 O   rð  rG   )rŠ  rñ  rJ   rJ   rK   r‰  È  rS   r‰  c                 O   rð  rG   )r  rñ  rJ   rJ   rK   r  Ì  rS   r  c                 O   rð  rG   )r‘  rñ  rJ   rJ   rK   r  Ð  rS   r  c                 O   rð  rG   )r“  rñ  rJ   rJ   rK   r’  Ô  rS   r’  c                 O   rð  rG   )r¥  rñ  rJ   rJ   rK   r£  Ø  rS   r£  c                 O   rð  rG   )r›  rñ  rJ   rJ   rK   rš  Ü  rS   rš  c                 O   rð  rG   )r  rñ  rJ   rJ   rK   rœ  à  rS   rœ  c                 O   rð  rG   )r³  rñ  rJ   rJ   rK   r²  ä  rS   r²  c                 O   rð  rG   )Údrop_recordingrñ  rJ   rJ   rK   rµ  è  rS   rµ  c                 O   rð  rG   )rº  rñ  rJ   rJ   rK   r¹  ì  rS   r¹  c                 O   rð  rG   )r¸  rñ  rJ   rJ   rK   r·  ð  rS   r·  c                 O   rð  rG   )r¼  rñ  rJ   rJ   rK   r»  ô  rS   r»  Trh  ri  rj  rk  c                    s4   ˆ j ˆkrˆ S ‡ ‡‡‡fdd„}ˆ j|ƒ ˆ||dS )Nc                     sX   ˆdkrdS ˆ j ˆ } ˆdkr| S ˆdkr%ˆd u rt d| ¡S ˆ d| ¡S tdˆ› ƒ‚)NrÚ   r)  r?  r!  zUnknown 'offset_type' option: )r6  r!  ÚuniformrC  )Úlast_offset©r_   rh  ri  rk  rJ   rK   Úcompute_offset  s   
z(_truncate_single.<locals>.compute_offset)r5  r6  rj  r_  )r6  r7  )r_   rh  ri  rj  r_  rk  r÷  rJ   rö  rK   rm  ø  s   
ürm  r›   rœ   r   rŽ   rž   rŸ   r¤   r¢   r¡   r´   c                 C   sÎ   ddl m} td| d}|
r|  ¡ } ||||||||d>}| D ]1}z| |¡ W n# tyM } z|	rBt d|j› d|› d¡ n‚ W Y d }~nd }~ww | 	¡  q!W d   ƒ |j
S 1 s_w   Y  |j
S )	Nr   )Ú
SharWriterzExporting to SHAR)r¦   Údisable)r›   rŽ   rœ   r   rž   rŸ   r¤   zSkipping: failed to load cut 'z'. Error message: r²   )r—   rø  r   r  rÕ   Ú	Exceptionr  r  rj   rX  rÄ   )rO   r›   rœ   r   rŽ   rž   rŸ   r¤   r¢   r¡   r´   rø  ÚpbarrC  r_   ÚerJ   rJ   rK   r¶     s@   ùÿü€þ
ö
÷êr¶   c                   @   s¼   e Zd ZdZ								d#dd	d
d	dee dedeeee	e f  dee
 dedeeed ejf dededdfdd„Zdd„ Zdededejdefdd„Zdefdd„Zd$d!d"„ZdS )%r¯  aµ	  
    Iterate over cuts from ``cuts`` CutSet while mixing randomly sampled ``mix_in_cuts`` into them.
    A typical application would be data augmentation with noise, music, babble, etc.

    :param cuts: a ``CutSet`` we are iterating over.
    :param mix_in_cuts: a ``CutSet`` containing other cuts to be mixed into ``cuts``.
    :param duration: an optional float in seconds.
        When ``None``, we will preserve the duration of the cuts in ``self``
        (i.e. we'll truncate the mix if it exceeded the original duration).
        Otherwise, we will keep sampling cuts to mix in until we reach the specified
        ``duration`` (and truncate to that value, should it be exceeded).
    :param allow_padding: an optional bool.
        When it is ``True``, we will allow the offset to be larger than the reference
        cut by padding the reference cut.
    :param snr: an optional float, or pair (range) of floats, in decibels.
        When it's a single float, we will mix all cuts with this SNR level
        (where cuts in ``self`` are treated as signals, and cuts in ``cuts`` are treated as noise).
        When it's a pair of floats, we will uniformly sample SNR values from that range.
        When ``None``, we will mix the cuts without any level adjustment
        (could be too noisy for data augmentation).
    :param preserve_id: optional string ("left", "right"). when specified, append will preserve the cut id
        of the left- or right-hand side argument. otherwise, a new random id is generated.
    :param mix_prob: an optional float in range [0, 1].
        Specifies the probability of performing a mix.
        Values lower than 1.0 mean that some cuts in the output will be unchanged.
    :param seed: an optional int or "trng". Random seed for choosing the cuts to mix and the SNR.
        If "trng" is provided, we'll use the ``secrets`` module for non-deterministic results
        on each iteration. You can also directly pass a ``random.Random`` instance here.
    :param random_mix_offset: an optional bool.
        When ``True`` and the duration of the to be mixed in cut in longer than the original cut,
         select a random sub-region from the to be mixed in cut.
    :param stateful: when True, each time this object is iterated we will shuffle the noise cuts
        using a different random seed. This is useful when you often re-start the iteration and
        don't want to keep seeing the same noise examples. Enabled by default.
    NFr¦  r§  r   TrO   rN   r®  r6  r¨  r©  r_  rª  rv   r«  r­  ÚstatefulrF   c                 C   sÈ   || _ || _|| _|| _|| _|| _|| _|| _|	| _|
| _	d| _
d| j  kr-dks0J ‚ J ‚| jd u s<| jdks<J ‚t| jttfƒrUt| jƒdksSJ d|› dƒ‚d S t| jtd ƒttfƒsbJ ‚d S )Nr   r)  r§  rª  z@SNR range must be a list or tuple with exactly two values (got: r¥  )Úsourcer®  r6  r¨  r©  r_  rª  rv   r­  rý  Únum_times_iteratedrH   Útupler{   r  r+  rµ   r”  )rQ   rO   r®  r6  r¨  r©  r_  rª  rv   r­  rý  rJ   rJ   rK   rR   l  s$   
ÿzLazyCutMixer.__init__c           
      #   sª   ddl m} tˆjtjƒrˆj‰ nt |ˆjƒˆj ¡‰ ˆjr'ˆ jd7  _ˆjj	r3‡ ‡fdd„}n‡ ‡fdd„}t
|ƒ ƒ}ˆjD ]}t|ƒrQˆ  dd¡ˆjkrU|V  qBtˆjttfƒrcˆ jˆjŽ nˆj}tˆjd uroˆjn|jd	 d
d}t|ƒ}ˆ ||ˆ ¡}|j||ˆjd}|j}	|	|d	 k r¼t|ƒ}ˆ |||	 ˆ ¡}|j|||	ˆjˆjd}t|	|j d
d}	|	|d	 k s”|jˆjd urÆˆjn|jˆjd ud}|V  qBd S )Nr   )Úresolve_seedrš   c                   3   s     ˆj  ¡ jˆ ddE d H  d S )NiÐ  )rk  rf  )r®  rÛ  rñ   rJ   ©rk  rQ   rJ   rK   Ú	noise_genŸ  s   €z(LazyCutMixer.__iter__.<locals>.noise_genc                   3   s    	 ˆj jˆ dE d H  q)NT)rk  )r®  rñ   rJ   r  rJ   rK   r  ¤  s   €ÿr)  r§  gš™™™™™©?r÷   r›  )rT   r©  r_  )rT   r©  Úoffset_other_byr¨  r_  )r6  r_  )Úlhotse.dataset.dataloadingr  rH   rv   r!  r•  rÿ  rý  r®  r  r+  rþ  rL   rô  rª  r©  r{   r   r  r6  r,  Ú_maybe_truncate_cutr°  r_  r¨  r7  )
rQ   r  r  r®  r_   Úcut_snrÚtarget_mixed_durationÚto_mixÚmixedÚmixed_in_durationrJ   r  rK   r‰  Ž  sb   €

ÿýþ
ÿû	
ÿñþÈzLazyCutMixer.__iter__r_   Útarget_durationrk  c                 C   s0   | j r|j|kr|j| d|j| ¡|d}|S )Nr   r4  )r­  r6  r7  rô  )rQ   r_   r  rk  rJ   rJ   rK   r  ä  s   þz LazyCutMixer._maybe_truncate_cutc                 C   r‡  rG   )r  rþ  rY   rJ   rJ   rK   rˆ  î  rM   zLazyCutMixer.__len__r/   c                 C   s
   t | |ƒS rG   )r/   rV   rJ   rJ   rK   Ú__add__ñ  rM   zLazyCutMixer.__add__)NFr¦  Nr§  r   FT)rF   r/   )rŒ  r  rŽ  r  r   r;   r  r   r9   r   rÚ  r”  rµ   r   r!  r•  rR   r‰  r   r  rˆ  r  rJ   rJ   rJ   rK   r¯  G  sZ    (õþýüûúùø	÷
öõ
ô"Vÿÿÿ
þ
r¯  )r   FNN)NN)NNNNFr|   )NNNFr|   r‹  rŠ  )•rÚ  r  r0  r!  Úsecretsrž  Úcollectionsr   r  r   r   r   Ú	functoolsr   r   r   r	   Úpathlibr
   Útypingr   r   r   r   r   r   r   r   r   r   r   r   r   r   r}  r  rØ  Úintervaltreer   Ú	tqdm.autor   Úlhotse.audior   r   Úlhotse.augmentationr   Úlhotse.cut.baser   Úlhotse.cut.datar   Úlhotse.cut.mixedr    r!   Úlhotse.cut.monor"   Úlhotse.cut.multir#   Úlhotse.cut.paddingr$   Úlhotse.featuresr%   r&   r'   Úlhotse.features.baser(   r)   rO  r*   r+   Úlhotse.lazyr,   r-   r.   r/   r0   r1   r2   r3   Úlhotse.serializationr4   Úlhotse.supervisionr5   r6   Úlhotse.utilsr7   r8   r9   r:   r;   r<   r=   r>   r?   r@   rA   rB   rC   rD   r  rL   rN   ÚutilsrZ   ÚDatasetrl  rÚ  r°  rµ   r”  rg  r¹   r¹  rº  rÄ  r„   rƒ   rÜ  r¿   r†   rt  r&  r/  r<  r<  r8  rY  r_  ra  rc  r  r  rf  rp  r‰  r  r  r’  r£  rš  rœ  r²  rµ  r¹  r·  r»  r•  rm  r¶   r¯  rJ   rJ   rJ   rK   Ú<module>   s   @(
<                    t	úÿþýüûú
ù øÿþýüûúùø	
÷ üÿþýü
û
	ýÿþ
ý8úÿþýüûú
ùpúÿþýüûú
ù !ÿÿ
þÿ
ûÿ
úÿ
ùÿþ
ý	úÿþýüûú
ù+õÿþýü
ûúùø	÷
öõ
ô/