o
    Si                    @   s  d dl Z d dlZd dlmZ d dlmZmZ d dlmZ d dl	m
Z
 d dlmZmZmZmZmZmZmZmZmZmZ d dlZd dlZd dlmZ d dlmZmZ d d	lmZm Z m!Z! d d
l"m#Z# d dl$m%Z%m&Z&m'Z' d dl(m)Z)m*Z*m+Z+m,Z, d dl-m.Z. d dl/m0Z0 d dl1m2Z2 d dl3m4Z4 d dl5m6Z6m7Z7m8Z8 d dl9m:Z: d dl;m<Z< d dl=m>Z> d dl?m@Z@ d dlAmBZBmCZCmDZDmEZEmFZFmGZGmHZHmIZImJZJmKZKmLZLmMZMmNZNmOZOmPZP eG dd dZQeG dd de0ZRdS )    N)	dataclass)partialreduce)BytesIO)add)
AnyCallableDict	GeneratorIterableListLiteralOptionalTupleUnion)IntervalTree)ArrayTemporalArray)	Recording	VideoInfo%get_audio_duration_mismatch_tolerance)
save_audio)
AudioMixer
VideoMixeraudio_energy)AudioTransform	AugmentFnLoudnessNormalizationReverbWithImpulseResponse)Codec)Cut)DataCut)
PaddingCut)FeatureExtractorFeatureMixer create_default_feature_extractor)Features)FeaturesWriter)Image)SupervisionSegment)DEFAULT_PADDING_VALUELOG_EPSILONDecibelsPathlikeSecondsadd_durationscompute_num_framescompute_num_samplesfastcopyhash_str_to_intmerge_items_with_delimiteroverlapsperturb_num_samplesrich_exception_infouuid4c                   @   sr   e Zd ZU dZeeef ed< dZe	ed< dZ
eed< dZee ed< dd	 Zed
efddZdefddZdS )MixTrackz
    Represents a single track in a mix of Cuts. Points to a specific DataCut or PaddingCut and holds information on
    how to mix it with other Cuts, relative to the first track in a mix.
    cutNtype        offsetsnrc                 C   s   t | jj| _ d S N)r;   r:   __name__self rC   D/home/ubuntu/.local/lib/python3.10/site-packages/lhotse/cut/mixed.py__post_init__N   s   zMixTrack.__post_init__datac                 C   s8   ddl m} | d}| d|d< t||fi | S )N   )deserialize_cutr:   r;   )setrH   popr9   )rF   rH   cut_dictrC   rC   rD   	from_dictQ   s   
zMixTrack.from_dictreturnc                 C   s.   | j  | j| jd}| jd ur| j|d< |S )N)r:   r;   r=   r>   )r:   to_dictr;   r=   r>   rB   ansrC   rC   rD   rN   Z   s   

zMixTrack.to_dict)r@   
__module____qualname____doc__r   r!   r"   __annotations__r;   strr=   r.   r>   r   r,   rE   staticmethoddictrL   r	   rN   rC   rC   rC   rD   r9   B   s   
 r9   c                       sR  e Zd ZU dZeed< ee ed< dZe	ee
  ed< edee fddZedefd	d
ZedefddZedeeee f fddZedefddZedefddZedefddZedefddZdedefddZede	e fddZede	e fddZede	e fddZede	e fd d!Zede	e fd"d#Zede	e fd$d%Zede	e fd&d'Z de!fd(d)Z"de#e$eee%e&e'e(e)f f ddf fd*d+Z*d,ed-e+ddf fd.d/Z,d0ede+fd1d2Z-d0edefd3d4Z.d0ede/j0fd5d6Z1d7ede$ee2f fd8d9Z3	:	;	;	;dd<ed=ed>ed?edd f
d@dAZ4	:ddBeddCfdDdEZ5dFdd;dGddHdIedJe	e dKedLedMe	e6ee7f  de8fdNdOZ9dPdGd;dQdJedRedLedSedd f
dTdUZ:ddde;dVdGdfdJedWedXedYe<dRedLedZe	e6eeee<f f  de8fd[d\Z=	G	dd]ed^ed_e	e dd fd`daZ>	b	c	Gddde?dee<dfefdgdhZ@ddie<d^edd fdjdkZAddie<d^edd fdldmZBddie<d^edd fdndoZC	G	F	;	p	;ddqedre<dsedte	e d^edd fdudvZD	Gddwe<dxed^eddyfdzd{ZEdd;dGd;d|gddd;fd}e	d~ deded^edee de	e de	e dxedd fddZFeGddede	e/j0 fddZHeG	Gddedede	e/j0 fddZIede	eJ fddZKeG	;	;	Gddededede	e$eLjMe	eLjM f  fddZNdd ZOdd ZPdddZQdddZRdddZSdddZTdddZU		;ddeVdeWde	eX dede2f
ddZY	Gddededd fddZZde[egef de8fddZ\		ddede	e[ee]e+ ge+f  dd fddZ^de[egef de8fddZ_e`de!dd fddZadebdd fddZcdebdd fddZdede2fddZeedefddZfede2fddZgedefddÄZh  ZiS )MixedCutaw  
    :class:`~lhotse.cut.MixedCut` is a :class:`~lhotse.cut.Cut` that actually consists of multiple other cuts.
    Its primary purpose is to allow time-domain and feature-domain augmentation via mixing the training cuts
    with noise, music, and babble cuts. The actual mixing operations are performed on-the-fly.

    Internally, :class:`~lhotse.cut.MixedCut` holds other cuts in multiple tracks (:class:`~lhotse.cut.MixTrack`),
    each with its own offset and SNR that is relative to the first track.

    Please refer to the documentation of :class:`~lhotse.cut.Cut` to learn more about using cuts.

    In addition to methods available in :class:`~lhotse.cut.Cut`, :class:`~lhotse.cut.MixedCut` provides the methods to
    read all of its tracks audio and features as separate channels:

        >>> cut = MixedCut(...)
        >>> mono_features = cut.load_features()
        >>> assert len(mono_features.shape) == 2
        >>> multi_features = cut.load_features(mixed=False)
        >>> # Now, the first dimension is the channel.
        >>> assert len(multi_features.shape) == 3

    .. note:: MixedCut is different from MultiCut, which is intended to represent multi-channel recordings
        that share the same supervisions.

    .. note:: Each track in a MixedCut can be either a MonoCut, MultiCut, or PaddingCut.

    .. note:: The ``transforms`` field is a list of dictionaries that describe the transformations
        that should be applied to the track after mixing.

    See also:

        - :class:`lhotse.cut.Cut`
        - :class:`lhotse.cut.MonoCut`
        - :class:`lhotse.cut.MultiCut`
        - :class:`lhotse.cut.CutSet`
    idtracksN
transformsrM   c                 C   s   dd | j D S )z
        Lists the supervisions of the underlying source cuts.
        Each segment start time will be adjusted by the track offset.
        c                 S   s&   g | ]}|j jD ]}||jqqS rC   )r:   supervisionswith_offsetr=   ).0tracksegmentrC   rC   rD   
<listcomp>   s    
z)MixedCut.supervisions.<locals>.<listcomp>rZ   rA   rC   rC   rD   r\      s   zMixedCut.supervisionsc                 C   s   dS Nr   rC   rA   rC   rC   rD   start   s   zMixedCut.startc                 C   s    dd | j D }tt|ddS )Nc                 s   s    | ]
}|j |jj V  qd S r?   )r=   r:   durationr^   r_   rC   rC   rD   	<genexpr>       z$MixedCut.duration.<locals>.<genexpr>   ndigits)rZ   roundmax)rB   track_durationsrC   rC   rD   re      s   zMixedCut.durationc                 C   s   | j }|dkrtt|S dS )NrG   r   )num_channelslistrange)rB   ro   rC   rC   rD   channel   s   zMixedCut.channelc                 C      | j jS r?   )_first_non_padding_cuthas_featuresrA   rC   rC   rD   ru         zMixedCut.has_featuresc                 C   rs   r?   )rt   has_recordingrA   rC   rC   rD   rw      rv   zMixedCut.has_recordingc                 C   rs   r?   )rt   	has_videorA   rC   rC   rD   rx      rv   zMixedCut.has_videoc                 C      t dd | jD S )Nc                 s       | ]}|j jV  qd S r?   )r:   is_in_memoryrf   rC   rC   rD   rg          z(MixedCut.is_in_memory.<locals>.<genexpr>)anyrZ   rA   rC   rC   rD   r{         zMixedCut.is_in_memoryfieldc                 C   s   | j |S r?   )rt   has)rB   r   rC   rC   rD   r      s   zMixedCut.hasc                 C   s   | j rt| j| j| jdS d S )N)re   frame_shiftsampling_rate)ru   r0   re   r   r   rA   rC   rC   rD   
num_frames   s   zMixedCut.num_framesc                 C      | j d jjS rc   )rZ   r:   r   rA   rC   rC   rD   r         zMixedCut.frame_shiftc                 C   r   rc   )rZ   r:   r   rA   rC   rC   rD   r      r   zMixedCut.sampling_ratec                 C   s   t | j| jS r?   )r1   re   r   rA   rC   rC   rD   num_samples   r   zMixedCut.num_samplesc                 C   r   rc   )rZ   r:   num_featuresrA   rC   rC   rD   r      r   zMixedCut.num_featuresc                 C   ry   )Nc                 s   rz   r?   )r:   ro   rf   rC   rC   rD   rg      r|   z(MixedCut.num_channels.<locals>.<genexpr>)rm   rZ   rA   rC   rC   rD   ro      s   zMixedCut.num_channelsc                 C   s   | j r| jjjS d S r?   )ru   rt   featuresr;   rA   rC   rC   rD   features_type   r~   zMixedCut.features_typec                 C   s>   | j dd | jD t| jd}| jrdd | jD |d< |S )Nc                 S      g | ]}|  qS rC   rN   r^   trC   rC   rD   ra          z$MixedCut.to_dict.<locals>.<listcomp>)rY   rZ   r;   c                 S   r   rC   r   r   rC   rC   rD   ra      r   r[   )rY   rZ   r;   r@   r[   rO   rC   rC   rD   rN      s   zMixedCut.to_dictc                 C   s
   | j  S )a  
        Iterate over each data piece attached to this cut.
        Returns a generator yielding tuples of ``(key, manifest)``, where
        ``key`` is the name of the attribute under which ``manifest`` is found.
        ``manifest`` is of type :class:`~lhotse.Recording`, :class:`~lhotse.Features`,
        :class:`~lhotse.TemporalArray`, :class:`~lhotse.Array`, or :class:`~lhotse.Image`.

        For example, if ``key`` is ``recording``, then ``manifest`` is ``self.recording``.
        )rt   	iter_datarA   rC   rC   rD   r      s   
zMixedCut.iter_datakeyvaluec                    s.   || j v rt || dS t| j|| dS )a  
        This magic function is called when the user tries to set an attribute.
        We use it as syntactic sugar to store custom attributes in ``self._first_non_padding_cut.custom``
        field, so that they can be (de)serialized later.
        Setting a ``None`` value will remove the attribute from ``custom``.

        .. note:: MixedCut doesn't have its own ``custom`` field, and by convention
            always refers to the ``custom`` field on its first non padding cut.
        N)__dataclass_fields__super__setattr__setattrrt   )rB   r   r   	__class__rC   rD   r      s   

zMixedCut.__setattr__namec                 C   s   | drt | dr|dd }t| j|S |dkr2i }| jD ]}|jj }r/|| q"|S z| |\}}t	||W S  t
yN   td| dw )a  
        This magic function is called when the user tries to access an attribute
        of :class:`.MixedCut` that doesn't exist. It is used for accessing the custom
        attributes of cuts. We support exactly one scenario for mixed cuts:

        If :attr:`tracks` contains exactly one :class:`.MonoCut` object (and an arbitrary
        number of :class:`.PaddingCut` objects), we will look up the custom attributes
        of that cut.

        If one of the custom attributes is of type :class:`~lhotse.array.Array` or
        :class:`~lhotse.array.TemporalArray` we'll also support loading those arrays
        (see example below). Additionally, we will incorporate extra padding as
        dictated by padding cuts.

        Example:

            >>> cut = MonoCut('cut1', start=0, duration=4, channel=0)
            >>> cut.alignment = TemporalArray(...)
            >>> mixed_cut = cut.pad(10, pad_value_dict={'alignment': -1})
            >>> ali = mixed_cut.load_alignment()

        __load_   NcustomzNo such attribute: 'zt' (note: custom attributes are not supported when a MixedCut consists of more than one MonoCut with that attribute).)
startswithAttributeErrorr   load_customrZ   r:   r   update=_assert_one_data_cut_with_attr_and_return_it_with_track_indexgetattrAssertionError)rB   r   	attr_namerP   r   cstmnon_padding_idxmono_cutrC   rC   rD   __getattr__  s.   




zMixedCut.__getattr__c                 C   s   |  |\}}t||S r?   )r   hasattr)rB   r   r   r   rC   rC   rD   
has_custom?  s
   
zMixedCut.has_customc                 C   s   ddl m}m} | |\}}t||}t||r||S | j| j}| j	}t|t
r?| j|j	| ddj|dd S ||}	zdd | jD d j}
|
| }W n   t}Y ||	|j|j|||dS )	a  
        Load custom data as numpy array. The custom data is expected to have
        been stored in cuts ``custom`` field as an :class:`~lhotse.array.Array` or
        :class:`~lhotse.array.TemporalArray` manifest.

        .. note:: It works with Array manifests stored via attribute assignments,
            e.g.: ``cut.my_custom_data = Array(...)``.

        .. warning:: For :class:`.MixedCut`, this will only work if the mixed cut
            consists of a single :class:`.MonoCut` and an arbitrary number of
            :class:`.PaddingCuts`. This is because it is generally undefined how to
            mix arbitrary arrays.

        :param name: name of the custom attribute.
        :return: a numpy array with the data (after padding).
        r   )r   	pad_arrayleft)re   	directionrightc                 S   s   g | ]}t |jtr|jqS rC   
isinstancer:   r"   r   rC   rC   rD   ra   |  s
    z(MixedCut.load_custom.<locals>.<listcomp>)temporal_dimr   r=   padded_duration	pad_value)lhotse.arrayr   r   r   r   r   r   rZ   r=   re   r   to_cutpad
load_audior   r*   r   r   )rB   r   r   r   r   r   manifestleft_paddingr   arraypad_value_dictr   rC   rC   rD   r   G  sL   




	zMixedCut.load_customr   c                    sd   dd t | jD } fdd|D }t|dks(J dt| d  d|  |d \}}||fS )	Nc                 S   s&   g | ]\}}t |jtr||jfqS rC   r   r:   r!   r^   idxr   rC   rC   rD   ra     s    
zZMixedCut._assert_one_data_cut_with_attr_and_return_it_with_track_index.<locals>.<listcomp>c                    s,   g | ]\}}|j d ur |j v r||fqS r?   r   )r^   r   r:   r   rC   rD   ra     s
    rG   zThis MixedCut has z+ non-padding cuts with a custom attribute 'z'. We currently don't support mixing custom attributes. Consider dropping the attribute on all but one of DataCuts. Problematic cut:
r   )	enumeraterZ   len)rB   r   non_padding_cuts!non_padding_cuts_with_custom_attrr   r   rC   r   rD   r     s   
zFMixedCut._assert_one_data_cut_with_attr_and_return_it_with_track_indexflacTaudio_formatr   load_featuresr   c                    s"   t |  fdd| jD dS )a  
        Load data (audio, features, or custom arrays) into memory and attach them
        to a copy of the manifest. This is useful when you want to store cuts together
        with the actual data in some binary format that enables sequential data reads.

        Audio is encoded with ``audio_format`` (compatible with ``torchaudio.save``),
        floating point features are encoded with lilcom, and other arrays are pickled.
        c              
      s(   g | ]}t ||jj d dqS ))r   r   r   r   r:   )r2   r:   move_to_memoryr   r   r   r   r   rC   rD   ra     s    
z+MixedCut.move_to_memory.<locals>.<listcomp>rb   r2   rZ   )rB   r   r   r   r   rC   r   rD   r     s   
zMixedCut.move_to_memoryencodingr    c                 K   s`   | j dd}t }t||| j|d tj| | jd}t|	 dd | j
D | jd jjdS )	a  
        Convert this MixedCut to a MonoCut by mixing all tracks and channels into a single one.
        The result audio array is stored in memory, and can be saved to disk by calling
        ``cut.save_audio(path, ...)`` on the result.

        .. hint:: the resulting MonoCut will have ``custom`` field populated with the
            ``custom`` value from the first track of the MixedCut.

        :param encoding: any of "wav", "flac", or "opus".
        :return: a new MonoCut instance.
        T)mono_downmix)formatrecording_idc                 S   s   g | ]}t |d dqS )r   )rr   )r2   r^   srC   rC   rD   ra     s    z$MixedCut.to_mono.<locals>.<listcomp>r   )r\   r   )r   r   r   r   r   
from_bytesgetvaluerY   r2   r   r\   rZ   r:   r   )rB   r   kwargssamplesstream	recordingrC   rC   rD   to_mono  s   zMixedCut.to_monor<   Fr=   re   keep_excessive_supervisionspreserve_id_supervisions_indexr=   re   r   r   r   c                C   s  |dksJ d| dg }| j }|du rt|| | jdnt||| jd}t| jdd dD ]o}	tt||	j | jdd}
tt|	j| | jdd}t|	j|	jj | jd}||k r[q/d}||krx|durot|| | jd}n	t|| | jd}t|	jj |
 | | jd}|dkrq/|t	|	jj
|
||||d	||	jd
 q/tdd |D dkrt|r| jntt || jdt|| jdS t|dkr|d jS t|r| jntt |d}tdd |jD rdd t|jD d }t|j| dd|j|< |S )a%  
        Returns a new MixedCut that is a sub-region of the current MixedCut. This method truncates the underlying Cuts
        and modifies their offsets in the mix, as needed. Tracks that do not fit in the truncated cut are removed.

        Note that no operation is done on the actual features - it's only during the call to load_features()
        when the actual changes happen (a subset of features is loaded).

        :param offset: float (seconds), controls the start of the new cut relative to the current MixedCut's start.
        :param duration: optional float (seconds), controls the duration of the resulting MixedCut.
            By default, the duration is (end of the cut before truncation) - (offset).
        :param keep_excessive_supervisions: bool. Since trimming may happen inside a SupervisionSegment, the caller has
            an option to either keep or discard such supervisions.
        :param preserve_id: bool. Should the truncated cut keep the same ID or get a new, random one.
        :return: a new MixedCut instance.
        r   z3Offset for truncate must be non-negative (provided ).Nr   c                 S      | j S r?   )r=   )r   rC   rC   rD   <lambda>      z#MixedCut.truncate.<locals>.<lambda>r   r   r:   r=   r>   c                 S      g | ]
}t |jts|qS rC   r   r   rC   rC   rD   ra   E      z%MixedCut.truncate.<locals>.<listcomp>r<   )rY   re   r   
feat_valuer   rG   rY   rZ   c                 s   s&    | ]}|j d upt|jtV  qd S r?   )r>   r   r:   r"   r   rC   rC   rD   rg   \  s    
z$MixedCut.truncate.<locals>.<genexpr>c                 S   s    g | ]\}}t |jts|qS rC   r   r   rC   rC   rD   ra   _  s    
)r>   )re   r/   r   sortedrZ   rm   r=   r:   appendr9   truncater>   r   r"   rY   rU   r8   r1   rX   allr   r2   )rB   r=   re   r   r   r   
new_tracksold_durationnew_mix_endr_   
cut_offsettrack_offset	track_endcut_duration_decreasenew_durationnew_cutfirst_non_padding_track_idxrC   rC   rD   r     s   







zMixedCut.truncateboth)r   r   pad_silencer   r   c                C   s   t d)a^  
        This raises a ValueError since extending a MixedCut is not defined.

        :param duration: float (seconds), duration (in seconds) to extend the MixedCut.
        :param direction: string, 'left', 'right' or 'both'. Determines whether to extend on the left,
            right, or both sides. If 'both', extend on both sides by the duration specified in `duration`.
        :param preserve_id: bool. Should the extended cut keep the same ID or get a new, random one.
        :param pad_silence: bool. See usage in `lhotse.cut.MonoCut.extend_by`.
        :return: a new MixedCut instance.
        z5The extend_by() method is not defined for a MixedCut.)
ValueError)rB   re   r   r   r   rC   rC   rD   	extend_byj  s   zMixedCut.extend_byr   r   r   pad_feat_valuer   c           	   
   C   s$   ddl m} || |||||||dS )a  
        Return a new MixedCut, padded with zeros in the recording, and ``pad_feat_value`` in each feature bin.

        The user can choose to pad either to a specific `duration`; a specific number of frames `num_frames`;
        or a specific number of samples `num_samples`. The three arguments are mutually exclusive.

        :param duration: The cut's minimal duration after padding.
        :param num_frames: The cut's total number of frames after padding.
        :param num_samples: The cut's total number of samples after padding.
        :param pad_feat_value: A float value that's used for padding the features.
            By default we assume a log-energy floor of approx. -23 (1e-10 after exp).
        :param direction: string, 'left', 'right' or 'both'. Determines whether the padding is added before or after
            the cut.
        :param preserve_id: When ``True``, preserves the cut ID from before padding.
            Otherwise, generates a new random ID (default).
        :param pad_value_dict: Optional dict that specifies what value should be used
            for padding arrays in custom attributes.
        :return: a padded MixedCut if duration is greater than this cut's duration, otherwise ``self``.
        rG   )r   )re   r   r   r   r   r   r   )rI   r   )	rB   re   r   r   r   r   r   r   r   rC   rC   rD   r   ~  s   zMixedCut.padr   affix_idrecording_fieldc                    sB   | j sJ dt|r| j d n| j fdd| jD dS )a  
        Return a new ``MixedCut`` that will lazily resample the audio while reading it.
        This operation will drop the feature manifest, if attached.
        It does not affect the supervision.

        :param sampling_rate: The new sampling rate.
        :param affix_id: Should we modify the ID (useful if both versions of the same
            cut are going to be present in a single manifest).
        :param recording_field: which recording field to resample.
        :return: a modified copy of the current ``MixedCut``.
        z-Cannot resample a MixedCut without Recording._rsc                    $   g | ]}t ||jj d dqS ))r   r   )r2   r:   resampler   r   r   rC   rD   ra         z%MixedCut.resample.<locals>.<listcomp>r   rw   rX   rY   rZ   )rB   r   r   r   rC   r  rD   r     s   zMixedCut.resampleopusGz?codeccompression_levelcompress_custom_fieldsc                    s0   | j sJ dt| j fdd| jD dS )a   
        Return a copy of this Cut that has Recordings in its sub-Cuts processed by a lossy encoding.

        :param codec: The codec to use for compression. Supported codecs are "opus", "mp3", "vorbis", "gsm".
        :param compression_level: The level of compression (from 0.0 to 1.0, higher values correspond to higher compression).
        :param compress_custom_fields: Whether to also compress any custom recording fields in sub-Cuts.

        :return: A modified :class:`~lhotse.MixedCut` containing audio processed by a codec
        z/Cannot compress a MixedCut without a Recording.c              	      s$   g | ]}t ||j d qS r   )r2   r:   compressr   r  r  r  rC   rD   ra     s    z%MixedCut.compress.<locals>.<listcomp>r   r  )rB   r  r  r  rC   r  rD   r
    s   zMixedCut.compressfactorc                    T   j sJ djrtd t rj d nj fddjD dS )a  
        Return a new ``MixedCut`` that will lazily perturb the speed while loading audio.
        The ``num_samples``, ``start`` and ``duration`` fields of the underlying Cuts
        (and their Recordings and SupervisionSegments) are updated to reflect
        the shrinking/extending effect of speed.
        We are also updating the offsets of all underlying tracks.

        :param factor: The speed will be adjusted this many times (e.g. factor=1.1 means 1.1x faster).
        :param affix_id: When true, we will modify the ``MixedCut.id`` field
            by affixing it with "_sp{factor}".
        :return: a modified copy of the current ``MixedCut``.
        z5Cannot perturb speed on a MixedCut without Recording.zAttempting to perturb speed on a MixedCut that references pre-computed features. The feature manifest(s) will be detached, as we do not support feature-domain speed perturbation._spc              
      F   g | ]}t ||jj d ttt|jjdj dddqS r  r   )r   r  ri   rj   )r:   r=   )r2   r:   perturb_speedrl   r6   r1   r=   r   rf   r   r  rB   rC   rD   ra     $    z*MixedCut.perturb_speed.<locals>.<listcomp>r   rw   ru   loggingwarningrX   rY   rZ   rB   r  r   rC   r  rD   r    s   zMixedCut.perturb_speedc                    r  )a  
        Return a new ``MixedCut`` that will lazily perturb the tempo while loading audio.

        Compared to speed perturbation, tempo preserves pitch.
        The ``num_samples``, ``start`` and ``duration`` fields of the underlying Cuts
        (and their Recordings and SupervisionSegments) are updated to reflect
        the shrinking/extending effect of tempo.
        We are also updating the offsets of all underlying tracks.

        :param factor: The tempo will be adjusted this many times (e.g. factor=1.1 means 1.1x faster).
        :param affix_id: When true, we will modify the ``MixedCut.id`` field
            by affixing it with "_tp{factor}".
        :return: a modified copy of the current ``MixedCut``.
        z5Cannot perturb tempo on a MixedCut without Recording.zAttempting to perturb tempo on a MixedCut that references pre-computed features. The feature manifest(s) will be detached, as we do not support feature-domain tempo perturbation._tpc              
      r  r  )r2   r:   perturb_temporl   r6   r1   r=   r   rf   r  rC   rD   ra   -  r  z*MixedCut.perturb_tempo.<locals>.<listcomp>r   r  r  rC   r  rD   r    s   zMixedCut.perturb_tempoc                    sR   | j sJ d| jrtd t r| j d n| j fdd| jD dS )a  
        Return a new ``MixedCut`` that will lazily perturb the volume while loading audio.
        Recordings of the underlying Cuts are updated to reflect volume change.

        :param factor: The volume will be adjusted this many times (e.g. factor=1.1 means 1.1x louder).
        :param affix_id: When true, we will modify the ``MixedCut.id`` field
            by affixing it with "_vp{factor}".
        :return: a modified copy of the current ``MixedCut``.
        z6Cannot perturb volume on a MixedCut without Recording.zAttempting to perturb volume on a MixedCut that references pre-computed features. The feature manifest(s) will be detached, as we do not support feature-domain volume perturbation._vpc                    r   )r  r   )r2   r:   perturb_volumerf   r   r  rC   rD   ra   V  r  z+MixedCut.perturb_volume.<locals>.<listcomp>r   r  r  rC   r  rD   r  @  s   zMixedCut.perturb_volume   hardgain_db	normalizeoversamplingc                    sX   | j sJ d| jrtd t r| j d n| j fdd| jD dS )aM  
        Return a new ``MixedCut`` that will lazily apply clipping while loading audio.
        Recordings of the underlying Cuts are updated to reflect clipping change.

        :param hard: If True, apply hard clipping (sharp cutoff); otherwise, apply soft clipping (saturation).
        :param gain_db: The amount of gain in decibels to apply before clipping.
        :param normalize: If True, normalize the input signal to 0 dBFS before applying clipping.
        :param oversampling: If provided, we will oversample the input signal by the given integer factor before applying saturation and then downsample back to the original sampling rate.
        :param affix_id: When true, we will modify the ``MixedCut.id`` field
            by affixing it with "_cl{gain_db}".
        :return: a modified copy of the current ``MixedCut``.
        z8Cannot apply saturation on a MixedCut without Recording.zAttempting to apply saturation on a MixedCut that references pre-computed features. The feature manifest(s) will be detached, as we do not support feature-domain saturation._clc                    s*   g | ]}t ||jj d dqS ))r  r   r!  r"  r   r   )r2   r:   clip_amplituderf   r   r   r  r!  r"  rC   rD   ra     s    z+MixedCut.clip_amplitude.<locals>.<listcomp>r   r  )rB   r  r   r!  r"  r   rC   r%  rD   r$  _  s   zMixedCut.clip_amplitudetarget	mix_firstr!   c                    s   | j sJ d| jrtd d| _|r=| jdur| j ng }|td t	|  r7| j
 d |dS | j
|dS t rH| j
 d n| j
 fdd| jD d	S )
a  
        Return a new ``MixedCut`` that will lazily apply loudness normalization.

        :param target: The target loudness in dBFS.
        :param mix_first: If true, we will mix the underlying cuts before applying
            loudness normalization. If false, we cannot guarantee that the resulting
            cut will have the target loudness.
        :param affix_id: When true, we will modify the ``DataCut.id`` field
            by affixing it with "_ln{target}".
        :return: a modified copy of the current ``DataCut``.
        zDCannot apply loudness normalization on a MixedCut without Recording.zAttempting to normalize loudness on a MixedCut that references pre-computed features. The feature manifest will be detached, as we do not support feature-domain loudness normalization.N)r&  _lnrY   r[   c                    r   ))r&  r   r   )r2   r:   normalize_loudnessrf   r   r&  rC   rD   ra     s    z/MixedCut.normalize_loudness.<locals>.<listcomp>r   )rw   ru   r  r  r   r[   copyr   r   r2   rY   rX   rZ   )rB   r&  r'  r   r[   rC   r+  rD   r*    s4   zMixedCut.normalize_loudnessr   rir_recordingr   normalize_output
early_onlyrir_channelsroom_rng_seedsource_rng_seedc	              	      s  | j sJ d| jrtd du s"tfdd|D s"J dt|dks5t|t| jks5J dtt du rEt	| j
 |gt| j }	|du r_fd	d
| jD }	|	d }|rdu rtddlm}
 |
| j|d}nd}| jdur| j ng }|t|dur|ndg|d t|  r| j
 d|dS | j
|dS t|dkr|t| j }t r| j
 dn| j
 fdd
t| j||	D dS )a  
        Return a new ``MixedCut`` that will convolve the audio with the provided impulse response.
        If no ``rir_recording`` is provided, we will generate an impulse response using a fast random
        generator (https://arxiv.org/abs/2208.04101).

        :param rir_recording: The impulse response to use for convolving.
        :param normalize_output: When true, output will be normalized to have energy as input.
        :param early_only: When true, only the early reflections (first 50 ms) will be used.
        :param affix_id: When true, we will modify the ``MixedCut.id`` field
            by affixing it with "_rvb".
        :param rir_channels: The channels of the impulse response to use. By default, first channel is used.
            If only one channel is specified, all tracks will be convolved with this channel. If a list
            is provided, it must contain as many channels as there are tracks such that each track will
            be convolved with one of the specified channels.
        :param room_rng_seed: Seed for the room configuration.
        :param source_rng_seed: Seed for the source position.
        :param mix_first: When true, the mixing will be done first before convolving with the RIR.
            This effectively means that all tracks will be convolved with the same RIR. If you
            are simulating multi-speaker mixtures, you should set this to False.
        :return: a modified copy of the current ``MixedCut``.
        z;Cannot apply reverberation on a MixedCut without Recording.zAttempting to reverberate a MixedCut that references pre-computed features. The feature manifest(s) will be detached, as we do not support feature-domain reverberation.Nc                 3   s    | ]}| j k V  qd S r?   )ro   )r^   c)r-  rC   rD   rg         

z&MixedCut.reverb_rir.<locals>.<genexpr>z(Invalid channel index in `rir_channels`.rG   z`Invalid number of channels in `rir_channels`, must be either 1 or equal to the number of tracks.c                    s   g | ]
}t  |jj qS rC   )r3   r:   rY   rf   )	uuid4_strrC   rD   ra     s    z'MixedCut.reverb_rir.<locals>.<listcomp>r   )FastRandomRIRGenerator)sr	room_seedsource_seed)rirr.  r/  r0  rir_generator_rvbr)  c                    s6   g | ]\}}}t ||jj |g|d dqS ))r-  r.  r/  r   r0  r1  r2  r   )r2   r:   
reverb_rir)r^   r_   rr   seed)r   r/  r.  r-  r1  rC   rD   ra   -  s    r   )rw   ru   r  r  r   r   rZ   rU   r8   r3   rY   lhotse.augmentation.utilsr6  r   r[   r,  r   r   r2   rX   zip)rB   r-  r.  r/  r   r0  r1  r2  r'  source_rng_seedsr6  r;  r[   rC   )r   r/  r.  r-  r1  r5  rD   r=    s   "


		zMixedCut.reverb_rirmixedc                 C   s  | j sdS | jd j}|rQtdd | jdd D rQ| jd jj}| }|jdkr7t| j	| j
f| }nt| j	| j
|jd f| }||d|j	df< |S d}d}d	d
 t| jD d \}}	t|	jj}
|j|	jkrx|	 }|
|}tt| jjj| |j|d}t| jdd ddD ]!\}}||kr|dur|}n|j }|j||j|j|jjd q|r|j}|jd | j	 dkr|d| j	ddf }|jd | j	 dkrtj||ddddf fdd}|jd | j	ksJ d|S |jS )a  
        Loads the features of the source cuts and mixes them on-the-fly.

        :param mixed: when True (default), the features are mixed together (as defined in
            the mixing function for the extractor). This could result in either a 2D or 3D
            array. For example, if all underlying tracks are single-channel, the output
            will be a 2D array of shape (num_frames, num_features). If any of the tracks
            are multi-channel, the output may be a 3D array of shape (num_frames, num_features,
            num_channels).
        :return: A numpy ndarray with features and with shape ``(num_frames, num_features)``,
            or ``(num_tracks, num_frames, num_features)``
        Nr   c                 s   s    | ]	}t |jtV  qd S r?   r   r   rC   rC   rD   rg   W      z)MixedCut.load_features.<locals>.<genexpr>rG   r  .c                 S   0   g | ]\}}t |jts|jd u r||jfqS r?   r   r:   r"   r>   r   rC   rC   rD   ra   n      

z*MixedCut.load_features.<locals>.<listcomp>)feature_extractor
base_featsr   reference_energyrd   )featsr>   r=   r   )axiszInconsistent number of frames in a MixedCut: please report this issue at https://github.com/lhotse-speech/lhotse/issues showing the output of print(cut) or str(cut) on whichload_features() was called.)ru   rZ   r:   r   r   r   ndimnponesr   r   shaper   r%   r   r;   rY   compute_energyr$   rt   r   
add_to_mixr>   r=   r   mixed_featsconcatenateunmixed_feats)rB   rB  	first_cutpadding_valfirst_cut_featsrL  reference_featsrJ  reference_posreference_cutrH  mixerposr_   rC   rC   rD   r   @  st    


"zMixedCut.load_featuresr   c                 C   s  | j sdS | jd j}d}d}dd t| jD d \}}|j|jkr+| }t|}t| jd j | jd jj|| jd j	d}t| jdd ddD ]\}	}
|	|kr\|dur\|}n|
j }|j
||
j|
j	d qM|owtd	d
 | jD }|o{|}|r|r|jn|j}tt | jd}|jd | j }d|  k r|k rn n|ddd| jf }| |  k rdk rn ntj|dd| fgdd}|jd | jksJ d| j d|jd  d|  dd | jpg D }|D ]}||| j}q|S |j}|S )a$  
        Loads the audios of the source cuts and mix them on-the-fly.

        :param mixed: When True (default), returns a mix of the underlying tracks. This will
            return a numpy array with shape ``(num_channels, num_samples)``, where ``num_channels``
            is determined by the ``num_channels`` property of the MixedCut. Otherwise returns a
            numpy array with the number of channels equal to the total number of channels
            across all tracks in the MixedCut. For example, if it contains a MultiCut with 2
            channels and a MonoCut with 1 channel, the returned array will have shape
            ``(3, num_samples)``.
        :param mono_downmix: If the MixedCut contains > 1 channels (for e.g. when one of its tracks
            is a MultiCut), this parameter controls whether the returned array will be down-mixed
            to a single channel. This down-mixing is done by summing the channels together.
        :return: A numpy ndarray with audio samples and with shape ``(num_channels, num_samples)``
        Nr   c                 S   rE  r?   rF  r   rC   rC   rD   ra     rG  z'MixedCut.load_audio.<locals>.<listcomp>)r   rJ  base_offsetrG   rK  )audior>   r=   c                 s   s    | ]}|j d kV  qdS )MultiCutN)r;   rf   rC   rC   rD   rg     r4  z&MixedCut.load_audio.<locals>.<genexpr>r   )r   r   reflect)modez7Inconsistent number of samples in a MixedCut. Expected z but the output of mix has zn. Please report this issue at https://github.com/lhotse-speech/lhotse/issues showing the cut below. MixedCut:
c                 S   s$   g | ]}t |tr|nt|qS rC   )r   r   rL   )r^   tnfmrC   rC   rD   ra     s    )rw   rZ   r:   r   rY   r   r   r   r   r=   rS  r>   r}   mixed_mono_audiomixed_audior1   r   rQ  r   rO  r   r[   unmixed_audio)rB   rB  r   rW  reference_audiorJ  r[  r\  r]  r^  r_   r`  tol_samplesnum_samples_diffr[   tfnrC   rC   rD   r     st   


zMixedCut.load_audioc                 C   s(   | j r| jj}|jt| j|jdS d S )N)r   )rx   rt   video	copy_withr1   re   fps)rB   vrC   rC   rD   rl  
  s   zMixedCut.video
with_audioc           	      C   s   | j sd S t| jd jjddd | jj| jd jd}t| jdd  ddD ]\}}|j	|jjddd |jd q'|j
}|rG| j||d}|t|fS )	Nr   F)rp  )rn  r_  rG   rK  )rl  r=   )rB  r   )rx   r   rZ   r:   
load_videorl  rn  r=   r   rS  mixed_videor   torch
from_numpy)	rB   rp  rB  r   r]  r^  r_   rl  r`  rC   rC   rD   rq    s    
zMixedCut.load_videoc           	      C   sx   ddl m} |t| j\}}| jdd}| | }}t|D ]\}}|j	t
|| ddd||d q#|S )z^
        Display the feature matrix as an image. Requires matplotlib to be installed.
        r   NFrB  rG   )vminvmax)matplotlib.pyplotpyplotsubplotsr   rZ   r   minrm   r   imshowrO  flip	transpose)	rB   pltfigaxesr   fminfmaxr   axrC   rC   rD   plot_tracks_features.  s   &zMixedCut.plot_tracks_featuresc           
   	   C   s   ddl m} | jdd}|jt| jddd\}}tt| j|D ]:\}\}}|| d}|	t
d| jt|| |jjD ]}	|	|jj}	|j|j|	j |j|	j ddd	 q@q!|S )
zi
        Display plots of the individual tracks' waveforms. Requires matplotlib to be installed.
        r   NFru  T)sharexshareygreeng?)coloralpha)rx  ry  r   rz  r   rZ   r   r@  squeezeplotrO  linspacere   r:   r\   trimaxvspanr=   rd   end)
rB   r  r`  r  r  r   r_   r  r   supervisionrC   rC   rD   plot_tracks_audio;  s    

zMixedCut.plot_tracks_audioc                 C   0   | j sJ d| j dt| dd | jD dS )zKReturn a copy of the current :class:`MixedCut`, detached from ``features``.zCCannot detach features from a MixedCut with no Recording (cut ID = r   c                 S      g | ]}t ||j d qS r	  )r2   r:   drop_featuresr   rC   rC   rD   ra   V      z*MixedCut.drop_features.<locals>.<listcomp>rb   )rw   rY   r2   rZ   rA   rC   rC   rD   r  P     zMixedCut.drop_featuresc                 C   r  )zMReturn a copy of the current :class:`.MixedCut`, detached from ``recording``.zCCannot detach recording from a MixedCut with no Features (cut ID = r   c                 S   r  r	  )r2   r:   drop_recordingr   rC   rC   rD   ra   _  r  z+MixedCut.drop_recording.<locals>.<listcomp>rb   )ru   rY   r2   rZ   rA   rC   rC   rD   r  Y  r  zMixedCut.drop_recordingc                 C      t | dd | jD dS )PReturn a copy of the current :class:`.MixedCut`, detached from ``supervisions``.c                 S   r  r	  )r2   r:   drop_supervisionsr   rC   rC   rD   ra   f  r  z.MixedCut.drop_supervisions.<locals>.<listcomp>rb   r   rA   rC   rC   rD   r  b     zMixedCut.drop_supervisionsc                 C   r  )r  c                 S   r  r	  )r2   r:   drop_alignmentsr   rC   rC   rD   ra   m  r  z,MixedCut.drop_alignments.<locals>.<listcomp>rb   r   rA   rC   rC   rD   r  i  r  zMixedCut.drop_alignmentsc                 C   r  )zYReturn a copy of the current :class:`MixedCut`, which doesn't contain any in-memory data.c                 S   r  r	  )r2   r:   drop_in_memory_datar   rC   rC   rD   ra   t  r  z0MixedCut.drop_in_memory_data.<locals>.<listcomp>rb   r   rA   rC   rC   rD   r  p  r  zMixedCut.drop_in_memory_data	extractorstorage
augment_fnmix_eagerlyc                    s   |r:ddl m} j jdd d}j|_|jdjdfddjD |dt	dr6j
d	S dd	S  fd
djD }tj|dS )a  
        Compute the features from this cut, store them on disk, and create a new `MonoCut` object with the
        feature manifest attached. This cut has to be able to load audio.

        :param extractor: a ``FeatureExtractor`` instance used to compute the features.
        :param storage: a ``FeaturesWriter`` instance used to store the features.
        :param augment_fn: an optional callable used for audio augmentation.
        :param mix_eagerly: when False, extract and store the features for each track separately,
            and mix them dynamically when loading the features.
            When True, mix the audio first and store the mixed features, returning a new ``MonoCut`` instance
            with the same ID. The returned ``MonoCut`` will not have a ``Recording`` attached.
        :return: a new ``MonoCut`` instance if ``mix_eagerly`` is True, or returns ``self``
            with each of the tracks containing the ``Features`` manifests.
        rG   )MonoCutr   )r   r  r   r=   rr   r  c                    s   g | ]	}t | jd qS )r   )r2   rY   r   rA   rC   rD   ra     s    z7MixedCut.compute_and_store_features.<locals>.<listcomp>Nr   )rY   rd   re   rr   r\   r   r   r   c                    s,   g | ]}t |jj d |j|jdqS ))r  r  r  r   )r9   r:   compute_and_store_featuresr=   r>   rf   )r  r  r  rC   rD   ra     s    
r   )monor  extract_from_samples_and_storer   r   rY   r   re   r\   r   r   rZ   rX   )rB   r  r  r  r  r  features_infor   rC   )r  r  rB   r  rD   r  w  s:   


z#MixedCut.compute_and_store_features	add_empty	shrink_okc                    s   t j}|dkr'|sS dd tjD d   fddtjD }nc|dks6J dt j dg }jD ]N}t |jjdkrK|| q;|jjd }|sr|j|j k s`|jj	krrt
d|j d	|j d
|jj	 d|t|t|jt||j j	dgdd q;t|dS )a  
        Fills the whole duration of a cut with a supervision segment.

        If the cut has one supervision, its start is set to 0 and duration is set to ``cut.duration``.
        Note: this may either expand a supervision that was shorter than a cut, or shrink a supervision
        that exceeds the cut.

        If there are no supervisions, we will add an empty one when ``add_empty==True``, otherwise
        we won't change anything.

        If there are two or more supervisions, we will raise an exception.

        .. note:: For :class:`.MixedCut`, we expect that only one track contains a supervision.
            That supervision will be expanded to cover the full MixedCut's duration.

        :param add_empty: should we add an empty supervision with identical time bounds as the cut.
        :param shrink_ok: should we raise an error if a supervision would be shrank as a result
            of calling this method.
        r   c                 S   s    g | ]\}}t |jtr|qS rC   r   r   rC   rC   rD   ra     s
    z-MixedCut.fill_supervision.<locals>.<listcomp>c                    sN   g | ]#\}}| kr#t |t |jtj|jj|j jd dgddn|qS )rD  )rY   r   rd   re   rr   r\   r   )r2   r:   r)   rY   r   r=   re   r   first_non_padding_idxrB   rC   rD   ra     s&    rG   z/Cannot expand more than one supervision (found .z!Cannot shrink supervision (start=z, end=z) to cut (start=0, duration=aU  ) because the argument `shrink_ok` is `False`. Note: this check prevents accidental data loss for speech recognition, as supervision exceeding a cut indicates there might be some spoken content beyond cuts start or end (an ASR model would be trained to predict more text than spoken in the audio). If this is okay, set `shrink_ok` to `True`.)rd   re   r  r   rb   )r   r\   r   rZ   r:   r   rd   r=   r  re   r   r2   )rB   r  r  n_supsr   r   suprC   r  rD   fill_supervision  sP   


zMixedCut.fill_supervisiontransform_fnc                    s@   t | }|jD ]}t|jtrq fdd|jjD |j_q|S )z
        Modify the SupervisionSegments by `transform_fn` of this MixedCut.

        :param transform_fn: a function that modifies a supervision as an argument.
        :return: a modified MixedCut.
        c                    s   g | ]}|  qS rC   )map)r^   r`   r  rC   rD   ra     s    
z-MixedCut.map_supervisions.<locals>.<listcomp>)r2   rZ   r   r:   r"   r\   )rB   r  new_mixed_cutr_   rC   r  rD   map_supervisions  s   	

zMixedCut.map_supervisions	delimitermerge_policycustom_merge_fnc           
         s  t td|dkd|dur| nfdd t| jdd dtd	kr'| S d
 j}d j}t|| | jd}t	dd D }t	dd D }t
dd td	d D rot
dd D rotd| j d tdd D dd D ||dddd D dd D dd D dd D  fdd|D fdd|D d}|  }	|g|	j_|	S )a$  
        Return a copy of the cut that has all of its supervisions merged into
        a single segment.

        The new start is the start of the earliest superivion, and the new duration
        is a minimum spanning duration for all the supervisions. The text fields are
        concatenated with a whitespace.

        .. note:: If you're using individual tracks of a mixed cut, note that this transform
             drops all the supervisions in individual tracks and assigns the merged supervision
             in the first :class:`.DataCut` found in ``self.tracks``.

        :param merge_policy: one of "keep_first" or "delimiter". If "keep_first", we
            keep only the first segment's field value, otherwise all string fields
            (including IDs) are prefixed with "cat#" and concatenated with a hash symbol "#".
            This is also applied to ``custom`` fields. Fields with a ``None`` value are omitted.
        :param custom_merge_fn: a function that will be called to merge custom fields values.
            We expect ``custom_merge_fn`` to handle all possible custom keys.
            When not provided, we will treat all custom values as strings.
            It will be called roughly like:
            ``custom_merge_fn(custom_key, [s.custom[custom_key] for s in sups])``
        #
keep_first)r  return_firstNc                    s    t t|S r?   )r  rU   )kvs)merge_func_rC   rD   r   B  s    z-MixedCut.merge_supervisions.<locals>.<lambda>c                 S   r   r?   rK  )r   rC   rC   rD   r   D  r   r   rG   r   rD  r   c                 s   .    | ]}|j d ur|j  D ]}|V  qqd S r?   )r   keysr^   r   r  rC   rC   rD   rg   N      $z.MixedCut.merge_supervisions.<locals>.<genexpr>c                 s   r  r?   )	alignmentr  r  rC   rC   rD   rg   Q  r  c                 s   s    | ]
\}}t ||V  qd S r?   )r5   )r^   s1s2rC   rC   rD   rg   U  rh   c                 s   s    | ]}|j d uV  qd S r?   textr   rC   rC   rD   rg   U  r4  zYou are merging overlapping supervisions that have text transcripts. The result is likely to be unusable if you are going to train speech recognition models (cut id: r   c                 s       | ]}|j V  qd S r?   )rY   r   rC   rC   rD   rg   _      c                 s   r  r?   r   r   rC   rC   rD   rg   a  r   c                 s       | ]	}|j r|j V  qd S r?   r  r   rC   rC   rD   rg   g  rC  c                 s   r  r?   )speakerr   rC   rC   rD   rg   h  rC  c                 s   r  r?   )languager   rC   rC   rD   rg   i  rC  c                 s   r  r?   )genderr   rC   rC   rD   rg   j  rC  c                    s&   i | ]    fd dD qS )c                 3   .    | ]}|j d ur |j v r|j   V  qd S r?   r   r   r  rC   rD   rg   n      9MixedCut.merge_supervisions.<locals>.<dictcomp>.<genexpr>rC   r^   )merge_customsupsr  rD   
<dictcomp>k  s    	
z/MixedCut.merge_supervisions.<locals>.<dictcomp>c                    s&   i | ]  t t fd dD qS )c                 3   r  r?   )r  r   r  rC   rD   rg   z  r  r  )r   r   r  )r  r  rD   r  v  s    

)rY   r   rd   re   rr   r  r  r  r  r   r  )r   r4   r   r\   r   rd   r  r/   r   rI   r}   r@  warningswarnrY   r)   joinr  rt   )
rB   r  r  mstartmend	mdurationcustom_keysalignment_keysmsupr   rC   )r  r  r  rD   merge_supervisions  s`   

(	

&
zMixedCut.merge_supervisions	predicatec                    s    t |  fdd| jD d}|S )a  
        Modify cut to store only supervisions accepted by `predicate`

        Example:
            >>> cut = cut.filter_supervisions(lambda s: s.id in supervision_ids)
            >>> cut = cut.filter_supervisions(lambda s: s.duration < 5.0)
            >>> cut = cut.filter_supervisions(lambda s: s.text is not None)

        :param predicate: A callable that accepts `SupervisionSegment` and returns bool
        :return: a modified MixedCut
        c                        g | ]}t ||j d qS r	  )r2   r:   filter_supervisionsrf   r  rC   rD   ra         z0MixedCut.filter_supervisions.<locals>.<listcomp>rb   r   )rB   r  r  rC   r  rD   r    s   
zMixedCut.filter_supervisionsrF   c                 C   sP   d| v r	|  d d }d| v rdd | d D }t| d dd | d D |dS )	Nr;   r[   c                 S      g | ]}t |qS rC   )r   rL   r   rC   rC   rD   ra         z&MixedCut.from_dict.<locals>.<listcomp>rY   c                 S   r  rC   )r9   rL   rf   rC   rC   rD   ra     r  rZ   )rY   rZ   r[   )rJ   rX   )rF   r[   rC   rC   rD   rL     s   
zMixedCut.from_dictpathc                    (   | j s| S t| j fdd| jD dS )Nc                    r  r	  )r2   r:   with_features_path_prefixr   r  rC   rD   ra     r  z6MixedCut.with_features_path_prefix.<locals>.<listcomp>r   )ru   rX   rY   rZ   rB   r  rC   r  rD   r       
z"MixedCut.with_features_path_prefixc                    r  )Nc                    r  r	  )r2   r:   with_recording_path_prefixr   r  rC   rD   ra     r  z7MixedCut.with_recording_path_prefix.<locals>.<listcomp>r   r  r  rC   r  rD   r    r  z#MixedCut.with_recording_path_prefixc                 C   rs   r?   )_first_non_padding_trackr:   rA   rC   rC   rD   first_non_padding_cut  rv   zMixedCut.first_non_padding_cutc                 C   s   dd | j D d S )Nc                 S   r   rC   r   r   rC   rC   rD   ra     r   z4MixedCut.first_non_padding_track.<locals>.<listcomp>r   rb   rA   rC   rC   rD   first_non_padding_track  r~   z MixedCut.first_non_padding_trackc                 C   r   r?   )r  rA   rC   rC   rD   rt        zMixedCut._first_non_padding_cutc                 C   r   r?   )r  rA   rC   rC   rD   r    r  z!MixedCut._first_non_padding_track)r   TTT)r   )FN)r  r  F)T)Fr<   Tr  T)TF)TTF)rM   rX   )NT)r  N)jr@   rQ   rR   rS   rU   rT   r   r9   r[   r   r   propertyr)   r\   r.   rd   re   r   intrr   boolru   rw   rx   r{   r   r   r   r   r   r   ro   r   rW   rN   r
   r   r   r&   r   r   r(   r   r   r   r   r   rO  ndarrayr   r!   r   r   r   r	   r   r    r   r   r+   floatr   r   r   r
  r  r  r  r$  r*  r=  r7   r   r   r   rl  rs  Tensorrq  r  r  r  r  r  r  r  r#   r'   r   r  r  r   r  r   r  r  rV   rL   r-   r  r  r  r  rt   r  __classcell__rC   rC   r   rD   rX   e   sL  
 $	

7E


!
"
 
	
-
 
-/!
0
2	

 h`

	
	


=
U

n
rX   )Sr  r  dataclassesr   	functoolsr   r   ior   operatorr   typingr   r   r	   r
   r   r   r   r   r   r   numpyrO  rs  intervaltreer   r   r   r   lhotse.audior   r   r   lhotse.audio.backendr   lhotse.audio.mixerr   r   r   lhotse.augmentationr   r   r   r   lhotse.augmentation.compressr   lhotse.cut.baser    lhotse.cut.datar!   lhotse.cut.paddingr"   lhotse.featuresr#   r$   r%   lhotse.features.baser&   lhotse.features.ior'   lhotse.imager(   lhotse.supervisionr)   lhotse.utilsr*   r+   r,   r-   r.   r/   r0   r1   r2   r3   r4   r5   r6   r7   r8   r9   rX   rC   rC   rC   rD   <module>   s:    0D"