o
    Si                     @   s  d dl Z d dlmZmZ d dlmZmZ d dlmZ d dl	m
Z
 d dlmZ d dlmZmZmZmZmZmZmZmZmZmZ d dlZd dlZd dlmZ d d	lmZmZ d d
l m!Z!m"Z" d dl#m$Z$ d dl%m&Z& d dl'm(Z( d dl)m*Z* d dl+m,Z,m-Z- d dl.m/Z/ d dl0m1Z1 d dl2m3Z3 d dl4m5Z5m6Z6m7Z7m8Z8m9Z9m:Z:m;Z;m<Z<m=Z=m>Z>m?Z?m@Z@mAZAmBZBmCZCmDZD eG dd de*e(edZEdS )    N)ABCMetaabstractmethod)	dataclassfield)
ROUND_DOWN)isclose)Path)
AnyCallableDict	GeneratorIterableListLiteralOptionalTupleUnion)IntervalTreeArrayTemporalArray)	Recording	VideoInfo)	AugmentFn)Codec)CustomFieldMixin)Cut)FeatureExtractorFeatures)FeaturesWriterImage)SupervisionSegment)LOG_EPSILONPathlikeSecondsTimeSpanadd_durationsasdict_nonullcompute_num_framescompute_num_samplesfastcopyis_module_availablemeasure_overlapoverlaps	overspansperturb_num_samplesrich_exception_infouuid4c                   @   s  e Zd ZU dZeed< eed< eed< eee	e f ed< e
edZe	e ed< dZee ed	< dZee ed
< dZeeeef  ed< defddZdeeeeeeeeef f ddf fddZedefddZ edefddZ!edefddZ"edefddZ#edefddZ$edee% fddZ&dedefddZ'edee fd d!Z(edee fd"d#Z)edee fd$d%Z*edee fd&d'Z+ee,dee fd(d)Z-edee fd*d+Z.edefd,d-Z/e0e,dee1j2 fd.d/Z3e0e,dee1j2 fd0d1Z4e0e,deee5j6ee5j6 f  fd2d3Z7	4	5	5	5dd6ed7ed8ed9edd:f
d;d<Z8			=dd>ed?ee1j2e5j6f d@ee dAee dBedd:fdCdDZ9ddEdFZ:ddGdHZ;ddIdJZ<ddKdLZ=ddMdNZ>	=ddOedPedd fdQdRZ?	ddSe@dTeAdUeeB dd fdVdWZCdXdd5d=ddYdZedee d[ed\ed]eeeeDf  dd fd^d_ZEd`d=d5dadedbed\edcedeFf
dddeZGdddeHdfd=dfdedgedhedieIdbed\edjeeeeeeIf f  deFfdkdlZJ	=	ddmednedoee dd fdpdqZKddreIdnedd fdsdtZLddreIdnedd fdudvZMddreIdnedd fdwdxZN	5ddyedzednedd fd{d|ZO	=dd}eIdnedd fd~dZPddnedd fddZQe,dd5d=d5dgddfded dedednede	e dee dee dd fddZR	=	X	5		5ddedeIdedee dnedd fddZS			=ddyeTdeIdedd fddZUdeVegef dd fddZWdeVegef dd fddZXe,		ddedeeVeeYe gef  dd fddZZe[e,d?edd fddZ\de]dd fddZ^de]dd fddZ_dedeee1j2e`f dd fddZadS )DataCuta  
    :class:`~lhotse.cut.DataCut` is a base class for cuts that point to actual audio data.
    It can be either a :class:`~lhotse.cut.MonoCut` or a :class:`~lhotse.cut.MultiCut`.
    This is as opposed to :class:`~lhotse.cut.MixedCut`, which is simply an operation on
    a collection of cuts.

    See also:

        - :class:`lhotse.cut.MonoCut`
        - :class:`lhotse.cut.MultiCut`
    idstartdurationchannel)default_factorysupervisionsNfeatures	recordingcustomreturnc                 C   sj   t | }| jr| j |d< | jd ur*| j D ]\}}t|tr)| |d |< qi |dt| j	iS )Nr;   r<   type)
r(   has_recordingr;   to_dictr<   items
isinstancer   r>   __name__)selfdkv rH   C/home/ubuntu/.local/lib/python3.10/site-packages/lhotse/cut/data.pyr@   Z   s   

zDataCut.to_dictc                 c   s`    | j r
d| jfV  | jrd| jfV  | jpi  D ]\}}t|ttt	t
tfr-||fV  qdS )a  
        Iterate over each data piece attached to this cut.
        Returns a generator yielding tuples of ``(key, manifest)``, where
        ``key`` is the name of the attribute under which ``manifest`` is found.
        ``manifest`` is of type :class:`~lhotse.Recording`, :class:`~lhotse.Features`,
        :class:`~lhotse.TemporalArray`, :class:`~lhotse.Array`, or :class:`~lhotse.Image`.

        For example, if ``key`` is ``recording``, then ``manifest`` is ``self.recording``.
        r;   r:   N)r?   r;   has_featuresr:   r<   rA   rB   r   r   r   r   r!   )rD   rF   rG   rH   rH   rI   	iter_datad   s   
zDataCut.iter_datac                 C   s   t dd |  D S )Nc                 s   s    | ]\}}|j V  qd S N)is_in_memory.0rF   rG   rH   rH   rI   	<genexpr>|   s    z'DataCut.is_in_memory.<locals>.<genexpr>)anyrK   rD   rH   rH   rI   rM   z      zDataCut.is_in_memoryc                 C   s   | j r| jjS | jjS rL   )r?   r;   r4   r:   recording_idrR   rH   rH   rI   rT   ~   rS   zDataCut.recording_idc                 C   
   | j d uS rL   r:   rR   rH   rH   rI   rJ         
zDataCut.has_featuresc                 C   rU   rL   r;   rR   rH   rH   rI   r?      rW   zDataCut.has_recordingc                 C   s   | j o| jjS rL   )r?   r;   	has_videorR   rH   rH   rI   rY      s   zDataCut.has_videoc                 C   s,   | j r| jj}|jt| j|jtddS d S )N)rounding)
num_frames)r?   r;   video	copy_withr*   r6   fpsr   )rD   rG   rH   rH   rI   r\      s   
zDataCut.videor   c                 C   s>   |dkr| j S |dkr| jS |dkr| jS | jd uo|| jv S )Nr;   r:   r\   )r?   rJ   rY   r<   )rD   r   rH   rH   rI   has   s   zDataCut.hasc                 C      | j r| jjS d S rL   )rJ   r:   frame_shiftrR   rH   rH   rI   ra         zDataCut.frame_shiftc                 C   s   | j rt| j| j| jdS d S )N)r6   ra   sampling_rate)rJ   r)   r6   ra   rc   rR   rH   rH   rI   r[      s   zDataCut.num_framesc                 C   s   | j r
t| j| jS d S rL   )r?   r*   r6   rc   rR   rH   rH   rI   num_samples   s
   zDataCut.num_samplesc                 C   r`   rL   )rJ   r:   num_featuresrR   rH   rH   rI   re      rb   zDataCut.num_featuresc                 C      d S rL   rH   rR   rH   rH   rI   num_channels      zDataCut.num_channelsc                 C   r`   rL   )rJ   r:   r>   rR   rH   rH   rI   features_type   rb   zDataCut.features_typec                 C   s   | j r| jjS | jjS rL   )rJ   r:   rc   r;   rR   rH   rH   rI   rc      s
   zDataCut.sampling_ratec                 K   rf   rL   rH   rD   kwargsrH   rH   rI   load_features   rh   zDataCut.load_featuresc                 K   rf   rL   rH   rj   rH   rH   rI   
load_audio   rh   zDataCut.load_audioc                 K   rf   rL   rH   rj   rH   rH   rI   
load_video   s   zDataCut.load_videoflacTaudio_formatrm   rl   load_customr   c           	         s   |rj s	j}njjjjj|d}|rjsj}n
jjjjd}|r0jdu r4j}nddl	m
 m  fddj D }td|||d	}|S )
a  
        Load data (audio, features, or custom arrays) into memory and attach them
        to a copy of the manifest. This is useful when you want to store cuts together
        with the actual data in some binary format that enables sequential data reads.

        Audio is encoded with ``audio_format`` (compatible with ``torchaudio.save``),
        floating point features are encoded with lilcom, and other arrays are pickled.
        )channelsoffsetr6   formatr5   r6   Nr   r   c                    sD   i | ]\}}|t | r| nt |r|jjjd n|qS )ru   )rB   move_to_memoryr5   r6   rN   r   r   rD   rH   rI   
<dictcomp>
  s    z*DataCut.move_to_memory.<locals>.<dictcomp>        )r5   r;   r:   r<   )r?   r;   rv   r7   r5   r6   rJ   r:   r<   lhotse.arrayr   r   rA   r+   )	rD   rp   rm   rl   rq   r;   r:   r<   cutrH   rw   rI   rv      s6   


zDataCut.move_to_memoryFnamedatara   temporal_dim
compressedc           
      C   s   ddl m}m} t| | jdur| j ni d}|r| n| }	t|tjr*|	 }|	 t
|||	j|j||||jd W d   |S 1 sHw   Y  |S )a  
        Attach a tensor to this MonoCut, described with an :class:`~lhotse.array.Array` manifest.
        The attached data is stored in-memory for later use, and can be accessed by
        calling ``cut.load_<name>()`` or :meth:`cut.load_custom`.

        This is useful if you want actions such as truncate/pad to propagate to the tensor, e.g.::

            >>> cut = MonoCut(id="c1", start=2, duration=8, ...)
            >>> cut = cut.attach_tensor(
            ...     "alignment",
            ...     torch.tensor([0, 0, 0, ...]),
            ...     frame_shift=0.1,
            ...     temporal_dim=0,
            ... )
            >>> half_alignment = cut.truncate(duration=4.0).load_alignment()

        .. note:: This object can't be stored in JSON/JSONL manifests anymore.

        :param name: attribute under which the data can be found.
        :param data: PyTorch tensor or numpy array.
        :param frame_shift: Optional float, when the array has a temporal dimension
            it indicates how much time has passed between the starts of consecutive frames
            (expressed in seconds).
        :param temporal_dim: Optional int, when the array has a temporal dimension,
            it indicates which dim to interpret as temporal.
        :param compressed: When True, we will apply lilcom compression to the array.
            Only applicable to arrays of floats.
        :return:
        r   )MemoryLilcomWriterMemoryRawWriterNr<   )keyvaluera   r~   r5   )lhotse.features.ior   r   r+   r<   copyrB   torchTensornumpysetattrstore_arrayr4   r5   )
rD   r|   r}   ra   r~   r   r   r   cpywriterrH   rH   rI   attach_tensor!  s.   %
zDataCut.attach_tensorc                 C   $   | j sJ d| j dt| ddS )zKReturn a copy of the current :class:`.DataCut`, detached from ``features``.zBCannot detach features from a DataCut with no Recording (cut ID = ).NrV   )r?   r4   r+   rR   rH   rH   rI   drop_features\  
   zDataCut.drop_featuresc                 C   r   )zLReturn a copy of the current :class:`.DataCut`, detached from ``recording``.zBCannot detach recording from a DataCut with no Features (cut ID = r   NrX   )rJ   r4   r+   rR   rH   rH   rI   drop_recordingc  r   zDataCut.drop_recordingc                 C   s   t | g dS )zOReturn a copy of the current :class:`.DataCut`, detached from ``supervisions``.r9   r+   rR   rH   rH   rI   drop_supervisionsj  s   zDataCut.drop_supervisionsc                 C   s   t | dd | jD dS )zMReturn a copy of the current :class:`.DataCut`, detached from ``alignments``.c                 S   s   g | ]}t |i d qS ))	alignmentr   rO   srH   rH   rI   
<listcomp>q      z+DataCut.drop_alignments.<locals>.<listcomp>r   r+   r9   rR   rH   rH   rI   drop_alignmentsn  s   zDataCut.drop_alignmentsc                 C   s   ddl m} d}| jdur-| j }|D ]}|| }t|ttttfr,|j	r,||||< qt
| | jr;| jj	r;|| jn| j| jrL| jj	rL|| j|dS | j|dS )a
  
        Return a copy of the current :class:`.DataCut`, detached from any in-memory data.
        The manifests for in-memory data are converted into placeholders that can still be looked up for
        metadata, but will fail on attempts to load the data.
        r   )to_shar_placeholderN)r;   r:   r<   )lhotse.shar.utilsr   r<   r   rB   r   r   r   r   rM   r+   r?   r;   rJ   r:   )rD   r   r<   rF   rG   rH   rH   rI   drop_in_memory_datat  s8   


zDataCut.drop_in_memory_data	add_empty	shrink_okc                 C   s   t | jdkr|s| S t| j| jd| j| jdg}nMt | jdks,J dt | j d| jd }t|jdr@t|j| jr@| S |jdk sK|j	| j	kr^|s^t
d|j d|j	 d| j d	t|d| jd
g}t| |dS )a  
        Fills the whole duration of a cut with a supervision segment.

        If the cut has one supervision, its start is set to 0 and duration is set to ``cut.duration``.
        Note: this may either expand a supervision that was shorter than a cut, or shrink a supervision
        that exceeds the cut.

        If there are no supervisions, we will add an empty one when ``add_empty==True``, otherwise
        we won't change anything.

        If there are two or more supervisions, we will raise an exception.

        :param add_empty: should we add an empty supervision with identical time bounds as the cut.
        :param shrink_ok: should we raise an error if a supervision would be shrank as a result
            of calling this method.
        r   )r4   rT   r5   r6   r7      z/Cannot expand more than one supervision (found .z!Cannot shrink supervision (start=z, end=z) to cut (start=0, duration=aU  ) because the argument `shrink_ok` is `False`. Note: this check prevents accidental data loss for speech recognition, as supervision exceeding a cut indicates there might be some spoken content beyond cuts start or end (an ASR model would be trained to predict more text than spoken in the audio). If this is okay, set `shrink_ok` to `True`.ru   r   )lenr9   r"   r4   rT   r6   r7   r   r5   end
ValueErrorr+   )rD   r   r   supsold_suprH   rH   rI   fill_supervision  s2   
zDataCut.fill_supervision	extractorstorage
augment_fnc                 O   s,   |j |  || j| j| j|d}t| |dS )a  
        Compute the features from this cut, store them on disk, and attach a feature manifest to this cut.
        This cut has to be able to load audio.

        :param extractor: a ``FeatureExtractor`` instance used to compute the features.
        :param storage: a ``FeaturesWriter`` instance used to write the features to a storage.
        :param augment_fn: an optional callable used for audio augmentation.
        :return: a new ``MonoCut`` instance with a ``Features`` manifest attached to it.
        )samplesr   rc   rs   r7   r   rV   )extract_from_samples_and_storerm   rc   r5   r7   r+   )rD   r   r   r   argsrk   features_inforH   rH   rI   compute_and_store_features  s   	z"DataCut.compute_and_store_featuresry   )rs   r6   keep_excessive_supervisionspreserve_id_supervisions_indexrs   r   r   r   c             	      s  dksJ d dt t| j| jdd}t|dur |n| j| jd}t| | jd}|dks;J d| t||| j | j | jd}	|	dkrVt||	 | jd}|du r{|r^tnt td|dfd	d
| jD }
 fdd|
D }n>|| j	 }|r|j
| d}n|jd | d d}g }|D ]}t|jt| }|dkr||j  qt| |r| j	ntt ||t|dd ddS )a  
        Returns a new MonoCut that is a sub-region of the current DataCut.

        Note that no operation is done on the actual features or recording -
        it's only during the call to :meth:`DataCut.load_features` / :meth:`DataCut.load_audio`
        when the actual changes happen (a subset of features/audio is loaded).

        .. hint::

            To extend a cut by a fixed duration, use the :meth:`DataCut.extend_by` method.

        :param offset: float (seconds), controls the start of the new cut relative to the current DataCut's start.
            E.g., if the current DataCut starts at 10.0, and offset is 2.0, the new start is 12.0.
        :param duration: optional float (seconds), controls the duration of the resulting DataCut.
            By default, the duration is (end of the cut before truncation) - (offset).
        :param keep_excessive_supervisions: bool. Since trimming may happen inside a SupervisionSegment,
            the caller has an option to either keep or discard such supervisions.
        :param preserve_id: bool. Should the truncated cut keep the same ID or get a new, random one.
        :param _supervisions_index: an IntervalTree; when passed, allows to speed up processing of Cuts with a very
            large number of supervisions. Intended as an internal parameter.
        :return: a new MonoCut instance. If the current DataCut is shorter than the duration, return None.
        r   z3Offset for truncate must be non-negative (provided r   rc   Nry   znew_duration=)r5   r   c                 3   s    | ]	}|   V  qd S rL   )with_offsetrO   segment)rs   rH   rI   rP     s    
z#DataCut.truncate.<locals>.<genexpr>c                    s   g | ]	} |r|qS rH   rH   r   )	criterionnew_time_spanrH   rI   r      s    z$DataCut.truncate.<locals>.<listcomp>)beginr   gMbP?g{Gz?c                 S      | j S rL   r5   r   rH   rH   rI   <lambda>C      z"DataCut.truncate.<locals>.<lambda>r   r4   r5   r6   r9   )maxr'   r5   rc   r6   r.   r/   r&   r9   r4   overlapenvelopr-   r}   appendr   r+   strr2   sorted)rD   rs   r6   r   r   r   	new_startuntilnew_durationduration_past_endnew_supervisionsr9   tree	intervalsinterval
olap_ratiorH   )r   r   rs   rI   truncate  sl   
 



zDataCut.truncateboth)	directionr   pad_silencer   r   c             	      s  ddl m} |dksJ d| djj d\}}|dks%|dkr;j| dk r3|r3|j }tj| d|dksC|dkraj| jjkrW|rW|jjj  }tj| jj t  j	d	}fd
dj
D }	dtt|f dtf fdd}
i }jr|
jrtd d|d< i }jdurĈj D ]\}}|||< t||r|
|rtd| d d||< qtf|r̈jntt |t|	dd dd|d|i}|dkr|j|j| d|d}|dkr|j|j| d|d}|S )aD	  
        Returns a new Cut (DataCut or MixedCut) that is an extended region of the current DataCut by extending
        the cut by a fixed duration in the specified direction.

        Note that no operation is done on the actual features or recording -
        it's only during the call to :meth:`DataCut.load_features` / :meth:`DataCut.load_audio`
        when the actual changes happen (an extended version of features/audio is loaded).

        .. hint::

            This method extends a cut by a given duration, either to the left or to the right (or both), using
            the "real" content of the recording that the cut is part of. For example, a DataCut spanning
            the region from 2s to 5s in a recording, when extended by 2s to the right, will now span
            the region from 2s to 7s in the same recording (provided the recording length exceeds 7s).
            If the recording is shorter, additional silence will be padded to achieve the desired duration
            by default. This behavior can be changed by setting ``pad_silence=False``.
            Also see :meth:`DataCut.pad` which pads a cut "to" a specified length.
            To "truncate" a cut, use :meth:`DataCut.truncate`.

        .. hint::

            If `pad_silence` is set to False, then the cut will be extended only as much as allowed
            within the recording's boundary.

        .. hint::

            If `direction` is "both", the resulting cut will be extended by the specified duration in
            both directions. This is different from the usage in :meth:`MonoCut.pad` where a padding
            equal to 0.5*duration is added to both sides.

        :param duration: float (seconds), specifies the duration by which the cut should be extended.
        :param direction: string, 'left', 'right' or 'both'. Determines whether to extend on the left,
            right, or both sides. If 'both', extend on both sides by the duration specified in `duration`.
        :param preserve_id: bool. Should the extended cut keep the same ID or get a new, random one.
        :param pad_silence: bool. Should the cut be padded with silence if the recording is shorter than
            the desired duration. If False, the cut will be extended only as much as allowed within the
            recording's boundary.
        :return: a new MonoCut instance.
        r   )r   z(Duration must be non-negative (provided r   )r   r   leftr   rightr   c                 3   s*    | ]}| tj  jd V  qdS )r   N)r   r'   r5   rc   r   )r   rD   rH   rI   rP     s    
z$DataCut.extend_by.<locals>.<genexpr>	attributer=   c                    sT   t | jj}t  | jj}t | j| jj}|| j }||d k p)||d kS )Nr   )r)   ra   rc   r5   r[   )r   new_start_framesnew_end_framesattribute_startattribute_endnew_endr   rD   rH   rI   _this_exceeds_duration  s   



z1DataCut.extend_by.<locals>._this_exceeds_durationzvAttempting to extend a MonoCut that exceeds the range of pre-computed features. The feature manifest will be detached.Nr:   zSAttempting to extend a MonoCut that exceeds the range of pre-computed custom data 'z$'. The custom data will be detached.c                 S   r   rL   r   r   rH   rH   rI   r     r   z#DataCut.extend_by.<locals>.<lambda>r   r   r<   )r6   r   r   )rz   r   r5   r   r   r;   r6   minr'   rc   r9   r   r   boolrJ   r:   loggingwarningr<   rA   rB   r+   r4   r   r2   r   pad)rD   r6   r   r   r   r   pad_left	pad_rightr   r   r   feature_kwargscustom_kwargsr|   arrayr{   rH   r   rI   	extend_byF  s|   /

"




zDataCut.extend_byr   r[   rd   pad_feat_valuepad_value_dictc           	   
   C   s$   ddl m} || |||||||dS )a  
        Return a new MixedCut, padded with zeros in the recording, and ``pad_feat_value`` in each feature bin.

        The user can choose to pad either to a specific `duration`; a specific number of frames `num_frames`;
        or a specific number of samples `num_samples`. The three arguments are mutually exclusive.

        :param duration: The cut's minimal duration after padding.
        :param num_frames: The cut's total number of frames after padding.
        :param num_samples: The cut's total number of samples after padding.
        :param pad_feat_value: A float value that's used for padding the features.
            By default we assume a log-energy floor of approx. -23 (1e-10 after exp).
        :param direction: string, 'left', 'right' or 'both'. Determines whether the padding is added before or after
            the cut.
        :param preserve_id: When ``True``, preserves the cut ID before padding.
            Otherwise, a new random ID is generated for the padded cut (default).
        :param pad_value_dict: Optional dict that specifies what value should be used
            for padding arrays in custom attributes.
        :return: a padded MixedCut if duration is greater than this cut's duration, otherwise ``self``.
        r   )r   )r6   r[   rd   r   r   r   r   )setr   )	rD   r6   r[   rd   r   r   r   r   r   rH   rH   rI   r     s   zDataCut.padrc   affix_idrecording_fieldc                 C   sp   | j sJ d| j}| j}|du r||}ni |||| |i}t| |r0| j d| n| j|d|dS )a  
        Return a new ``DataCut`` that will lazily resample the audio while reading it.
        This operation will drop the feature manifest, if attached.
        It does not affect the supervision.

        :param sampling_rate: The new sampling rate.
        :param affix_id: Should we modify the ID (useful if both versions of the same
            cut are going to be present in a single manifest).
        :param recording_field: which recording field to resample.
        :return: a modified copy of the current ``DataCut``.
        z,Cannot resample a DataCut without Recording.N_rs)r4   r;   r:   r<   )r?   r<   r;   resampler+   r4   )rD   rc   r   r   r<   r;   rH   rH   rI   r     s"   zDataCut.resamplefactorc           	            j sJ djrtd d_jj d} fddjD }tt	j
j}|j }tj}|j }t rLj d nj||||dS )	am  
        Return a new ``DataCut`` that will lazily perturb the speed while loading audio.
        The ``num_samples``, ``start`` and ``duration`` fields are updated to reflect the
        shrinking/extending effect of speed.
        We are also updating the time markers of the underlying ``Recording`` and the supervisions.

        :param factor: The speed will be adjusted this many times (e.g. factor=1.1 means 1.1x faster).
        :param affix_id: When true, we will modify the ``MonoCut.id`` field
            by affixing it with "_sp{factor}".
        :return: a modified copy of the current ``DataCut``.
        4Cannot perturb speed on a DataCut without Recording.zAttempting to perturb speed on a DataCut that references pre-computed features. The feature manifest will be detached, as we do not support feature-domain speed perturbation.Nr   r   c                       g | ]}|j j d qS )r   rc   r   )perturb_speedrc   r   r   r   rD   rH   rI   r   9      z)DataCut.perturb_speed.<locals>.<listcomp>_spr4   r;   r9   r6   r5   )r?   rJ   r   r   r:   r;   r   r9   r0   r*   r5   rc   rd   r+   r4   	rD   r   r   recording_spsupervisions_spstart_samplesr   new_num_samplesr   rH   r   rI   r     s6   

zDataCut.perturb_speedc           	         r   )	a  
        Return a new ``DataCut`` that will lazily perturb the tempo while loading audio.

        Compared to speed perturbation, tempo preserves pitch.
        The ``num_samples``, ``start`` and ``duration`` fields are updated to reflect the
        shrinking/extending effect of speed.
        We are also updating the time markers of the underlying ``Recording`` and the supervisions.

        :param factor: The tempo will be adjusted this many times (e.g. factor=1.1 means 1.1x faster).
        :param affix_id: When true, we will modify the ``MonoCut.id`` field
            by affixing it with "_tp{factor}".
        :return: a modified copy of the current ``DataCut``.
        r   zAttempting to perturb tempo on a DataCut that references pre-computed features. The feature manifest will be detached, as we do not support feature-domain speed perturbation.Nr   c                    r   r   )perturb_temporc   r   r   rH   rI   r   m  r   z)DataCut.perturb_tempo.<locals>.<listcomp>_tpr   )r?   rJ   r   r   r:   r;   r  r9   r0   r*   r5   rc   rd   r+   r4   r   rH   r   rI   r  O  s6   

zDataCut.perturb_tempoc                    sp   | j sJ d| jrtd d| _| jj d} fdd| jD }t|  r1| j	 d n| j	||dS )	a  
        Return a new ``DataCut`` that will lazily perturb the volume while loading audio.

        :param factor: The volume will be adjusted this many times (e.g. factor=1.1 means 1.1x louder).
        :param affix_id: When true, we will modify the ``DataCut.id`` field
            by affixing it with "_vp{factor}".
        :return: a modified copy of the current ``DataCut``.
        z5Cannot perturb volume on a DataCut without Recording.zAttempting to perturb volume on a DataCut that references pre-computed features. The feature manifest will be detached, as we do not support feature-domain volume perturbation.Nr   c                       g | ]	}|j  d qS )r   )perturb_volumer   r   r   rH   rI   r     s    z*DataCut.perturb_volume.<locals>.<listcomp>_vpr4   r;   r9   )
r?   rJ   r   r   r:   r;   r  r9   r+   r4   )rD   r   r   recording_vpsupervisions_vprH   r  rI   r    s&   zDataCut.perturb_volumecodecrestore_orig_src                    sr   | j sJ d| jrtd d| _| jj| d} fdd| jD }t|  r2| j	 d n| j	||dS )	ao  
        Return a new ``DataCut`` that will lazily apply narrowband effect.

        :param codec: Codec name.
        :param restore_orig_sr: Restore original sampling rate.
        :param affix_id: When true, we will modify the ``DataCut.id`` field
            by affixing it with "_nb_{codec}".
        :return: a modified copy of the current ``DataCut``.
        z>Cannot apply narrowband effect on a DataCut without Recording.zAttempting to apply narrowband effect on a DataCut that references pre-computed features. The feature manifest will be detached, as we do not support feature-domain volume perturbation.N)r  r  r   c                    r  ))r  r   )
narrowbandr   r   r  rH   rI   r     s    z&DataCut.narrowband.<locals>.<listcomp>_nb_r	  )
r?   rJ   r   r   r:   r;   r  r9   r+   r4   )rD   r  r  r   recording_nbsupervisions_nbrH   r  rI   r    s*   zDataCut.narrowbandtargetc                    sp   | j sJ d| jrtd d| _| jj d} fdd| jD }t|  r1| j	 d n| j	||dS )	aF  
        Return a new ``DataCut`` that will lazily apply loudness normalization.

        :param target: The target loudness in dBFS.
        :param affix_id: When true, we will modify the ``DataCut.id`` field
            by affixing it with "_ln{target}".
        :return: a modified copy of the current ``DataCut``.
        zCCannot apply loudness normalization on a DataCut without Recording.zAttempting to normalize loudness on a DataCut that references pre-computed features. The feature manifest will be detached, as we do not support feature-domain loudness normalization.N)r  r   c                    sF   g | ]}t | r|j d  n|j r|j d  n|jdqS )_lnr4   rT   r+   r4   rT   r   r   r  rH   rI   r     s    z.DataCut.normalize_loudness.<locals>.<listcomp>r  r	  )
r?   rJ   r   r   r:   r;   normalize_loudnessr9   r+   r4   )rD   r  r   rk   recording_lnsupervisions_lnrH   r  rI   r    s*   
zDataCut.normalize_loudnessc                    sh   | j sJ d| jrtd d| _| jj d} fdd| jD }t|  r-| j	 dn| j	||dS )	a  
        Return a new ``DataCut`` that will lazily apply WPE dereverberation.

        :param affix_id: When true, we will modify the ``DataCut.id`` field
            by affixing it with "_wpe".
        :return: a modified copy of the current ``DataCut``.
        z0Cannot apply WPE on a DataCut without Recording.zAttempting to de-reverberate a DataCut that references pre-computed features. The feature manifest will be detached, as we do not support feature-domain de-reverberation.Nr   c                    s>   g | ]}t | r|j d n|j r|j d n|jdqS )_wper  r  r   r  rH   rI   r     s    z(DataCut.dereverb_wpe.<locals>.<listcomp>r  r	  )
r?   rJ   r   r   r:   r;   dereverb_wper9   r+   r4   )rD   r   recording_wpesupervisions_wperH   r  rI   r    s    	
zDataCut.dereverb_wper   rir_recordingr   normalize_output
early_onlyrir_channelsroom_rng_seedsource_rng_seedc                 C   rf   rL   rH   )rD   r   r!  r"  r   r#  r$  r%  rH   rH   rI   
reverb_rir  s   zDataCut.reverb_rir   hardgain_db	normalizeoversamplingc                 C   s^   | j sJ d| jrtd | jj|||||d}t| |r)| j d| |dS | j|dS )a  
        Return a new ``DataCut`` that will lazily apply clipping while loading audio.

        :param hard: If True, apply hard clipping (sharp cutoff); otherwise, apply soft clipping (saturation).
        :param gain_db: The amount of gain in decibels to apply before clipping.
        :param normalize: If True, normalize the input signal to 0 dBFS before applying clipping.
        :param oversampling: If provided, we will oversample the input signal by the given integer factor before applying saturation and then downsample back to the original sampling rate.
        :param affix_id: When true, we will modify the ``DataCut.id`` field
            by affixing it with "_cl{gain_db}".
        :return: a modified copy of the current ``DataCut``.
        z7Cannot apply saturation on a DataCut without Recording.zAttempting to apply saturation on a DataCut that references pre-computed features. The feature manifest will be detached, as we do not support feature-domain saturation.)r(  r)  r*  r+  r   _cl)r4   r;   )r?   rJ   r   r   r;   clip_amplituder+   r4   )rD   r(  r)  r*  r+  r   recording_saturatedrH   rH   rI   r-  +  s.   zDataCut.clip_amplitudeopusGz?compression_levelcompress_custom_fieldsc                    sh   | j sJ d| j}|r(t|tr(tdd | D r( fdd| D }t| | j	 |dS )a  
        Return a copy of this Cut that has its Recordings processed by a lossy audio encoder.

        :param codec: The codec to use for compression. Supported codecs are "opus", "mp3", "vorbis", "gsm".
        :param compression_level: The level of compression (from 0.0 to 1.0, higher values correspond to higher compression).
        :param compress_custom_fields: Whether to also compress any custom recording fields in the Cut.

        :return: A modified :class:`~lhotse.DataCut` containing audio processed by a codec
        z.Cannot compress a DataCut without a Recording.c                 s   s    | ]}t |tV  qd S rL   )rB   r   )rO   rG   rH   rH   rI   rP   i  s    

z#DataCut.compress.<locals>.<genexpr>c                    s,   i | ]\}}|t |tr| n|qS rH   )rB   r   compressrN   r  r1  rH   rI   rx   l  s    z$DataCut.compress.<locals>.<dictcomp>)r;   r<   )
r?   r<   rB   dictrQ   valuesrA   r+   r;   r3  )rD   r  r1  r2  r<   rH   r4  rI   r3  V  s   zDataCut.compresstransform_fnc                        t |  fdd| jD d}|S )z
        Return a copy of the cut that has its supervisions transformed by ``transform_fn``.

        :param transform_fn: a function that modifies a supervision as an argument.
        :return: a modified MonoCut.
        c                    s   g | ]}|  qS rH   )mapr   r7  rH   rI   r     s    z,DataCut.map_supervisions.<locals>.<listcomp>r   r   )rD   r7  new_cutrH   r:  rI   map_supervisionsy  s   	zDataCut.map_supervisions	predicatec                    r8  )a  
        Return a copy of the cut that only has supervisions accepted by ``predicate``.

        Example::

            >>> cut = cut.filter_supervisions(lambda s: s.id in supervision_ids)
            >>> cut = cut.filter_supervisions(lambda s: s.duration < 5.0)
            >>> cut = cut.filter_supervisions(lambda s: s.text is not None)

        :param predicate: A callable that accepts `SupervisionSegment` and returns bool
        :return: a modified MonoCut
        c                    s   g | ]} |r|qS rH   rH   r   r=  rH   rI   r     r   z/DataCut.filter_supervisions.<locals>.<listcomp>r   r   )rD   r=  r;  rH   r>  rI   filter_supervisions  s   zDataCut.filter_supervisions	delimitermerge_policycustom_merge_fnc                 K   rf   rL   rH   )rD   rA  rB  rk   rH   rH   rI   merge_supervisions  s   zDataCut.merge_supervisionsc                 C   rf   rL   rH   )r}   rH   rH   rI   	from_dict  rh   zDataCut.from_dictpathc                 C      | j s| S t| | j|dS )NrV   )rJ   r+   r:   with_path_prefixrD   rE  rH   rH   rI   with_features_path_prefix     z!DataCut.with_features_path_prefixc                 C   rF  )NrX   )r?   r+   r;   rG  rH  rH   rH   rI   with_recording_path_prefix  rJ  z"DataCut.with_recording_path_prefixr   path_or_objectc                 C   s
  t dsJ dddlm} ddlm} t| | jdur | j ni d}t|t	t
frcddlm} ||}|j\}}	W d   n1 sFw   Y  t
|}
t	|
j}t	|
j}|d||||	d	}n| }| |||}W d   n1 syw   Y  ||j|< |S )
a  
        Attach an image to this cut, wrapped in an Image class and stored
        under `key` in the `custom` dict.

        The image can be specified as:
        - A path to an image file
        - A numpy array with shape (height, width, channels)
        - Raw bytes of an image file

        Example::

            >>> cut = cut.attach_image('thumbnail', 'path/to/image.jpg')
            >>> # Access the image later
            >>> img_array = cut.load_thumbnail()  # Returns numpy array

        :param key: The key to store the image under in the custom dict.
        :param path_or_object: The image as a path, numpy array, or bytes.
        :return: A new DataCut with the image attached.
        PILz7In order to use images, please run 'pip install pillow'r   r    )PillowInMemoryWriterNr   pillow_files)storage_typestorage_pathstorage_keywidthheight)r,   lhotse.image.imager!   lhotse.image.iorN  r+   r<   r   rB   r   r   	PIL.Imageopensizer|   parentstore_image)rD   r   rL  r!   rN  r   PILImageimgrS  rT  rE  rR  rQ  image_manifestr   rH   rH   rI   attach_image  s>   

	
zDataCut.attach_image)ro   TTT)NNF)r=   r3   )TFrL   )FN)T)TT)F)Fry   Tr'  T)r/  r0  F)r@  N)brC   
__module____qualname____doc__r   __annotations__r%   r   intr   r   listr9   r"   r:   r   r   r;   r   r<   r   r	   r5  r@   r   r   r   r   r!   rK   propertyr   rM   rT   rJ   r?   rY   r   r\   r_   ra   r[   rd   re   r   rg   ri   rc   r1   npndarrayrl   rm   r   r   rn   rv   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r#   floatr   r   r   r  r  r  r  r  r&  r-  r   r3  r
   r<  r?  r   rC  staticmethodrD  r$   rI  rK  bytesr_  rH   rH   rH   rI   r3   6   s<  
 




D

;




7

j
 	
-
%24$
(
-$	
-
#

r3   )	metaclass)Fr   abcr   r   dataclassesr   r   decimalr   mathr   pathlibr   typingr	   r
   r   r   r   r   r   r   r   r   r   rg  r   intervaltreer   rz   r   r   lhotse.audior   r   lhotse.augmentationr   lhotse.augmentation.compressr   lhotse.customr   lhotse.cut.baser   lhotse.featuresr   r   r   r   lhotse.imager!   lhotse.supervisionr"   lhotse.utilsr#   r$   r%   r&   r'   r(   r)   r*   r+   r,   r-   r.   r/   r0   r1   r2   r3   rH   rH   rH   rI   <module>   s.    0H