o
    Si                  	   @   sl   d dl mZmZ d dlmZ d dlmZ d dlmZ G dd dZ		dd	ee d
edee defddZ
dS )    )OptionalSequence)CutSet)Cut)Secondsc                	   @   sH   e Zd ZdZ			ddededee ddfdd	Zd
edefddZ	dS )CutConcatenateaE  
    A transform on batch of cuts (``CutSet``) that concatenates the cuts to minimize the total amount of padding;
    e.g. instead of creating a batch with 40 examples, we will merge some of the examples together
    adding some silence between them to avoid a large number of padding frames that waste the computation.
          ?Ngapduration_factormax_durationreturnc                 C   s   || _ || _|| _dS )a\  
        CutConcatenate's constructor.

        :param gap: The duration of silence in seconds that is inserted between the cuts;
            it's goal is to let the model "know" that there are separate utterances in a single example.
        :param duration_factor: Determines the maximum duration of the concatenated cuts;
            by default it's 1, setting the limit at the duration of the longest cut in the batch.
        :param max_duration: If a value is given (in seconds), the maximum duration of concatenated cuts
            is fixed to the value while duration_factor is ignored.
        N)r	   r
   r   )selfr	   r
   r    r   ]/home/ubuntu/.local/lib/python3.10/site-packages/lhotse/dataset/cut_transforms/concatenate.py__init__   s   
zCutConcatenate.__init__cutsc                 C   s8   |j dd}t|| j| jr| jdS |d j| j dS )NF)	ascendingr   )r	   r   )sort_by_durationconcat_cutsr	   r   durationr
   )r   r   r   r   r   __call__#   s   zCutConcatenate.__call__)r   r   N)
__name__
__module____qualname____doc__r   floatr   r   r   r   r   r   r   r   r      s    
r   r   Nr   r	   r   r   c                 C   s   t | dkrt| S t| dd dd} |du r| d jn|}d}	 d}| d	 }t|t | d D ]+}| | }|j| |j |k}|rX||j| || |< | dd	 }  n|d7 }q1|s`nq"t| S )
a  
    We're going to concatenate the cuts to minimize the amount of total padding frames used.
    This means that some samples in the batch will be merged together into one sample,
    separated by an interval of silence.
    This is actually solving a knapsack problem.
    In this initial implementation we're using a greedy approach:
    going from the back (i.e. the shortest cuts) we'll try to concat them to the longest cut
    that still has some "space" at the end.

    :param cuts: a list of cuts to pack.
    :param gap: the duration of silence inserted between concatenated cuts.
    :param max_duration: the maximum duration for the concatenated cuts
        (by default set to the duration of the first cut).
    :return a list of packed cuts.
       c                 S   s   | j S )N)r   )cr   r   r   <lambda>C   s    zconcat_cuts.<locals>.<lambda>T)keyreverseNr   F)lenr   	from_cutssortedr   rangepadappend)r   r	   r   current_idxcan_fitshortestidxcutr   r   r   r   .   s(   


r   )r   N)typingr   r   lhotser   
lhotse.cutr   lhotse.utilsr   r   r   r   r   r   r   <module>   s    '