o
    SiH                     @   s   d dl Z d dlmZ d dlmZmZmZmZmZm	Z	m
Z
mZ d dlZd dlZd dlmZ d dlmZ d dlmZ d dlmZ d dlmZ d d	lmZmZmZmZmZmZm Z m!Z! eG d
d deZ"dS )    N)	dataclass)AnyCallableDictIterableListOptionalTupleUnion)	Recording)	VideoInfo)Cut)FeatureExtractor)SupervisionSegment)LOG_EPSILONPathlikeSecondscompute_num_framescompute_num_samplesfastcopyperturb_num_samplesuuid4c                   @   s  e Zd ZU dZeed< eed< eed< eed< dZ	e
e ed< dZe
e ed< dZe
e ed	< dZe
e ed
< dZe
e ed< dZe
e ed< edefddZedd ZedefddZedefddZedefddZedefddZedefddZedefddZdedefdd Zedefd!d"Zdefd#d$Zde
e j! fd%d&Z"de
e j! fd'd(Z#	)dyd*ede
e$e%j&e
e%j& f  fd+d,Z'd-dd)d.d/d0ede
e d1ed2edd f
d3d4Z(d5d.d)d6ded7ed2ed8edd f
d9d:Z)ddde*d;d.dfdeded
ed<ed7ed2ed=e
e+ee,eef f  de-fd>d?Z.	.	dzded@edAe
e dd fdBdCZ/dydDed@edd fdEdFZ0dydDed@edd fdGdHZ1dydDed@edd fdIdJZ2dd)d.d)dKgddfdLe
dM dNedOed@edPe3e dQe
e dRe
e dd fdSdTZ4	.d{dUed@edd fdVdWZ5d|dXdYZ6d|dZd[Z7d|d\d]Z8d|d^d_Z9d|d`daZ:dbe;de-fdcddZ<d|dedfZ=d|dgdhZ>die?e@ge@f dd fdjdkZAd|dldmZBdne?eCgef dd fdodpZDeEdqedd fdrdsZFdteGdd fdudvZHdteGdd fdwdxZIdS )}
PaddingCutaG  
    :class:`~lhotse.cut.PaddingCut` is a dummy :class:`~lhotse.cut.Cut` that doesn't refer to
    actual recordings or features --it simply returns zero samples in the time domain
    and a specified features value in the feature domain.
    Its main role is to be appended to other cuts to make them evenly sized.

    Please refer to the documentation of :class:`~lhotse.cut.Cut` to learn more about using cuts.

    See also:

        - :class:`lhotse.cut.Cut`
        - :class:`lhotse.cut.MonoCut`
        - :class:`lhotse.cut.MixedCut`
        - :class:`lhotse.cut.CutSet`
    iddurationsampling_rate
feat_valueN
num_framesnum_featuresframe_shiftnum_samplesvideocustomreturnc                 C      dS Nr    selfr&   r&   F/home/ubuntu/.local/lib/python3.10/site-packages/lhotse/cut/padding.pystart<      zPaddingCut.startc                 C   s   g S Nr&   r'   r&   r&   r)   supervisions@   r+   zPaddingCut.supervisionsc                 C   r$   r%   r&   r'   r&   r&   r)   channelD   r+   zPaddingCut.channelc                 C   
   | j d uS r,   )r   r'   r&   r&   r)   has_featuresH      
zPaddingCut.has_featuresc                 C   r/   r,   r    r'   r&   r&   r)   has_recordingL   r1   zPaddingCut.has_recordingc                 C   s   | j o| jd uS r,   )r3   r!   r'   r&   r&   r)   	has_videoP   s   zPaddingCut.has_videoc                 C   r$   N   r&   r'   r&   r&   r)   num_channelsT   r+   zPaddingCut.num_channelsc                 C   r$   )NFr&   r'   r&   r&   r)   is_in_memoryX   r+   zPaddingCut.is_in_memoryfieldc                 C   s>   |dkr| j S |dkr| jS |dkr| jS | jd uo|| jv S )N	recordingfeaturesr!   )r3   r0   r4   r"   )r(   r9   r&   r&   r)   has\   s   zPaddingCut.hasc                 C   r$   )NPADr&   r'   r&   r&   r)   recording_idf   r+   zPaddingCut.recording_idc                 C   r$   )zEmpty iterable.r&   r&   r'   r&   r&   r)   	iter_dataj   r+   zPaddingCut.iter_datac                 O   s&   | j rt| j| jftj| j S d S r,   )r0   nponesr   r   float32r   r(   argskwargsr&   r&   r)   load_featureso   s   zPaddingCut.load_featuresc                 O   s&   | j rtdt| j| jftjS d S r5   )r3   r@   zerosr   r   r   rB   rC   r&   r&   r)   
load_audiox   s
   zPaddingCut.load_audioT
with_audioc                 C   sT   | j r(d }|rtjdt| j| jtjd}tj| jjd| jj	| jj
tjd|fS d S )Nr6   )dtype   )r4   torchrG   r   r   r   rB   r!   r   heightwidthuint8)r(   rI   audior&   r&   r)   
load_video   s$   
zPaddingCut.load_video        F)offsetr   keep_excessive_supervisionspreserve_idrS   rT   rU   c             
   K   s   |d u r	| j | n|}|dksJ t| |r| jntt || j| jd ur-t|| j| j	dnd | j
d ur<t|| j	ddS d dS )NrR   r   r   r   r   r   r   r   r   r   r    r   r   r   strr   r   r   r   r   r   r    r   )r(   rS   r   rT   rU   rE   new_durationr&   r&   r)   truncate   s*   	

zPaddingCut.truncateboth)	directionrU   pad_silencer^   r_   c             
   C   s   | j | }|dkr||7 }|dksJ t| |r| jntt || j| jdur/t|| j| j	dnd| j
dur>t|| j	ddS ddS )a  
        Return a new PaddingCut with region extended by the specified duration.

        :param duration: The duration by which to extend the cut.
        :param direction: string, 'left', 'right' or 'both'. Determines whether the cut should
            be extended to the left, right or both sides. By default, the cut is extended by
            the specified duration on both sides.
        :param preserve_id: When ``True``, preserves the cut ID from before padding.
            Otherwise, generates a new random ID (default).
        :param pad_silence: See usage in :func:`lhotse.cut.MonoCut.extend_by`. It is ignored here.
        :return: an extended PaddingCut.
        r]   rR   NrV   rW   rX   rY   )r(   r   r^   rU   r_   r[   r&   r&   r)   	extend_by   s.   


zPaddingCut.extend_byrightpad_feat_valuepad_value_dictc           	   
   C   s$   ddl m} || |||||||dS )a  
        Return a new MixedCut, padded with zeros in the recording, and ``pad_feat_value`` in each feature bin.

        The user can choose to pad either to a specific `duration`; a specific number of frames `num_frames`;
        or a specific number of samples `num_samples`. The three arguments are mutually exclusive.

        :param duration: The cut's minimal duration after padding.
        :param num_frames: The cut's total number of frames after padding.
        :param num_samples: The cut's total number of samples after padding.
        :param pad_feat_value: A float value that's used for padding the features.
            By default we assume a log-energy floor of approx. -23 (1e-10 after exp).
        :param direction: string, 'left', 'right' or 'both'. Determines whether the padding is added before or after
            the cut.
        :param preserve_id: When ``True``, preserves the cut ID from before padding.
            Otherwise, generates a new random ID (default).
        :param pad_value_dict: Optional dict that specifies what value should be used
            for padding arrays in custom attributes.
        :return: a padded MixedCut if duration is greater than this cut's duration, otherwise ``self``.
        r6   )pad)r   r   r    rb   r^   rU   rc   )setrd   )	r(   r   r   r    rb   r^   rU   rc   rd   r&   r&   r)   rd      s   zPaddingCut.padaffix_idrecording_fieldc              	   C   sB   | j sJ dt| |r| j d| n| j|t| j|ddddS )a  
        Return a new ``PaddingCut`` that will "mimic" the effect of resampling on ``sampling_rate``, ``duration``, and ``num_samples``.

        :param sampling_rate: The new sampling rate.
        :param affix_id: Should we modify the ID (useful if both versions of the same
            cut are going to be present in a single manifest).
        :param recording_field: which recording field to resample. Ignored, present for interface compatibility.
        :return: a modified copy of the current ``PaddingCut``.
        z/Cannot resample a PaddingCut without Recording._rsN)r   r   r    r   r   r   )r3   r   r   r   r   )r(   r   rf   rg   r&   r&   r)   resample  s   
zPaddingCut.resamplefactorc              	   C   r   | j rtd d}d}d}n	| j}| j}| j}t| j|}|| j }t	| |r/| j
 d| n| j
|||||dS )a  
        Return a new ``PaddingCut`` that will "mimic" the effect of speed perturbation
        on ``duration`` and ``num_samples``.

        :param factor: The speed will be adjusted this many times (e.g. factor=1.1 means 1.1x faster).
        :param affix_id: When true, we will modify the ``PaddingCut.id`` field
            by affixing it with "_sp{factor}".
        :return: a modified copy of the current ``PaddingCut``.
        zAttempting to perturb speed on a MonoCut that references pre-computed features. The feature manifest will be detached, as we do not support feature-domain speed perturbation.N_spr   r    r   r   r   r   r0   loggingwarningr   r   r   r   r    r   r   r   r(   rj   rf   new_num_framesnew_num_featuresnew_frame_shiftnew_num_samplesr[   r&   r&   r)   perturb_speed&  s*   
zPaddingCut.perturb_speedc              	   C   rk   )a  
        Return a new ``PaddingCut`` that will "mimic" the effect of tempo perturbation
        on ``duration`` and ``num_samples``.

        Compared to speed perturbation, tempo preserves pitch.
        :param factor: The tempo will be adjusted this many times (e.g. factor=1.1 means 1.1x faster).
        :param affix_id: When true, we will modify the ``PaddingCut.id`` field
            by affixing it with "_tp{factor}".
        :return: a modified copy of the current ``PaddingCut``.
        zAttempting to perturb tempo on a MonoCut that references pre-computed features. The feature manifest will be detached, as we do not support feature-domain tempo perturbation.N_tprm   rn   rq   r&   r&   r)   perturb_tempoJ  s*   
zPaddingCut.perturb_tempoc                 C   &   t | |r| j d| dS | jdS )a  
        Return a new ``PaddingCut`` that will "mimic" the effect of volume perturbation
        on amplitude of samples.

        :param factor: The volume will be adjusted this many times (e.g. factor=1.1 means 1.1x louder).
        :param affix_id: When true, we will modify the ``PaddingCut.id`` field
            by affixing it with "_vp{factor}".
        :return: a modified copy of the current ``PaddingCut``.
        _vpr   r   r   )r(   rj   rf   r&   r&   r)   perturb_volumeo  s   &zPaddingCut.perturb_volumer   rir_recordingr   normalize_output
early_onlyrir_channelsroom_rng_seedsource_rng_seedc                 C   s"   t | |r| j ddS | jdS )a  
        Return a new ``PaddingCut`` that will "mimic" the effect of reverberation with impulse response
        on original samples.

        :param rir_recording: The impulse response to use for convolving.
        :param normalize_output: When true, output will be normalized to have energy as input.
        :param early_only: When true, only the early reflections (first 50 ms) will be used.
        :param affix_id: When true, we will modify the ``PaddingCut.id`` field
            by affixing it with "_rvb".
        :param rir_channels: The channels of the impulse response to use.
        :return: a modified copy of the current ``PaddingCut``.
        _rvbr{   r|   )r(   r~   r   r   rf   r   r   r   r&   r&   r)   
reverb_rir|  s   "zPaddingCut.reverb_rirtargetc                 K   ry   )aQ  
        Return a new ``PaddingCut`` that will "mimic" the effect of loudness normalization

        :param target: The target loudness in dBFS.
        :param affix_id: When true, we will modify the ``DataCut.id`` field
            by affixing it with "_ln{target}".
        :return: a modified copy of the current ``DataCut``.
        _lnr{   r|   )r(   r   rf   rE   r&   r&   r)   normalize_loudness  s   zPaddingCut.normalize_loudnessc                 C   s(   | j sJ d| j dt| ddddS )zNReturn a copy of the current :class:`.PaddingCut`, detached from ``features``.zBCannot detach features from a MonoCut with no Recording (cut ID = ).N)r   r   r   )r3   r   r   r'   r&   r&   r)   drop_features  s
   zPaddingCut.drop_featuresc                 C   s$   | j sJ d| j dt| ddS )zOReturn a copy of the current :class:`.PaddingCut`, detached from ``recording``.zECannot detach recording from a PaddingCut with no Features (cut ID = r   Nr2   )r0   r   r   r'   r&   r&   r)   drop_recording  s
   zPaddingCut.drop_recordingc                 C      | S zNo-opr&   r'   r&   r&   r)   drop_supervisions  r+   zPaddingCut.drop_supervisionsc                 C   r   r   r&   r'   r&   r&   r)   drop_alignments  r+   zPaddingCut.drop_alignmentsc                 C   r   )zNo-op.r&   r'   r&   r&   r)   drop_in_memory_data  r+   zPaddingCut.drop_in_memory_data	extractorc                 O   s*   t | || jt| j|j| jd|jdS )z
        Returns a new PaddingCut with updates information about the feature dimension and number of
        feature frames, depending on the ``extractor`` properties.
        rV   )r   r   r   )r   feature_dimr   r   r   r   )r(   r   rD   rE   r&   r&   r)   compute_and_store_features  s   
z%PaddingCut.compute_and_store_featuresc                 O   r   zT
        Just for consistency with :class`.MonoCut` and :class:`.MixedCut`.
        r&   rC   r&   r&   r)   fill_supervision     zPaddingCut.fill_supervisionc                 O   r   r   r&   rC   r&   r&   r)   move_to_memory  r   zPaddingCut.move_to_memorytransform_fnc                 C   r   )z
        Just for consistency with :class:`.MonoCut` and :class:`.MixedCut`.

        :param transform_fn: a dummy function that would be never called actually.
        :return: the PaddingCut itself.
        r&   )r(   r   r&   r&   r)   map_supervisions  s   zPaddingCut.map_supervisionsc                 O   r   )z~
        Just for consistency with :class:`.MonoCut` and :class:`.MixedCut`.

        :return: the PaddingCut itself.
        r&   rC   r&   r&   r)   merge_supervisions  s   zPaddingCut.merge_supervisions	predicatec                 C   r   )z
        Just for consistency with :class:`.MonoCut` and :class:`.MixedCut`.

        :param predicate: A callable that accepts `SupervisionSegment` and returns bool
        :return: a modified MonoCut
        r&   )r(   r   r&   r&   r)   filter_supervisions  s   	zPaddingCut.filter_supervisionsdatac                 C   s   |  dd  tdi | S )Ntyper&   )popr   )r   r&   r&   r)   	from_dict  s   zPaddingCut.from_dictpathc                 C   r   r,   r&   r(   r   r&   r&   r)   with_features_path_prefix     z$PaddingCut.with_features_path_prefixc                 C   r   r,   r&   r   r&   r&   r)   with_recording_path_prefix  r   z%PaddingCut.with_recording_path_prefix)T)FN)F)r#   r   )J__name__
__module____qualname____doc__rZ   __annotations__r   intfloatr   r   r   r   r    r!   r   r"   dictpropertyr*   r-   r.   boolr0   r3   r4   r7   r8   r<   r>   r   r?   r@   ndarrayrF   rH   r	   rL   TensorrQ   r\   r`   r   r   r
   r   rd   ri   rv   rx   r}   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   staticmethodr   r   r   r   r&   r&   r&   r)   r      sD  
 

		

#
-	
-
$%	










	
r   )#ro   dataclassesr   typingr   r   r   r   r   r   r	   r
   numpyr@   rL   lhotse.audior   lhotse.audio.utilsr   lhotse.cut.baser   lhotse.featuresr   lhotse.supervisionr   lhotse.utilsr   r   r   r   r   r   r   r   r   r&   r&   r&   r)   <module>   s    ((