o
    Si.$                     @   s   d dl Z d dlmZ d dlmZ d dlmZmZmZ d dl	Z
d dlmZ d dlZd dlmZmZmZ d dlmZmZmZ d dlmZ d d	lmZ d d
lmZ d dlmZmZmZmZ G dd deZdede dedefddZ!dS )    N)defaultdict)partial)ListOptionalUnion)tqdm)RecordingSetSupervisionSetdill_enabled)CutSetMixedCutMixTrack)mix)parallel_map)uuid4)MAX_TASKS_WAITINGBaseMeetingSimulatorMeetingSamplerreverberate_cutsc                       s   e Zd ZdZd'dedef fddZdd	 Zed
d(dee	 ddfddZ
dee deej defddZed
								d)dedee dee deeee f deee  dee dee d ed!edefd"d#Zded$edefd%d&Z  ZS )*"SpeakerIndependentMeetingSimulatora  
    This simulator uses the simulation method used in the end-to-end neural diarization (EEND)
    paper: https://arxiv.org/abs/1909.06247 (Algorithm 1). It samples segments of each speaker from the
    input CutSet, and concatenates them into speaker-specific channels, with pauses sampled
    from an exponential distribution. The speaker channels are then mixed together
    (possibly after adding room impulse responses) to create the simulated meeting.
    Since the speakers are simulated independently, the resulting mixtures can contain more
    overlap than is usually present in real meetings.

    In the paper, a single hyper-parameter `beta` is used which is equivalent to the scale
    parameter of the exponential distribution. Here, we use both `loc` and `scale`, where
    `loc` would mean the minimum silence duration between two consecutive utterances from
    the same speaker. These parameters can be either provided in initialization, or learned
    from a dataset using the `fit()` method.
                   @locscalec                    s   t    || _|| _dS )aW  
        :param loc: the minimum silence duration between two consecutive utterances from
            the same speaker. [Default: 0.0]
        :param scale: the scale parameter of the exponential distribution used to sample
            the silence duration between two consecutive utterances from a speaker.
            [Default: 2.0]
        N)super__init__r   r   )selfr   r   	__class__ k/home/ubuntu/.local/lib/python3.10/site-packages/lhotse/workflows/meeting_simulation/speaker_independent.pyr   (   s   

z+SpeakerIndependentMeetingSimulator.__init__c                 C   s   | j jd| j d| j d S )Nz(loc=, scale=))r   __name__r   r   )r   r   r   r    __repr__4   s   z+SpeakerIndependentMeetingSimulator.__repr__TNmeetingsreturnc              
   C   s   |du rt d| j d| j  dS t|tsJ dddlm} ddlm	} |dd	 t
|d
d	 d}g }| D ]&}t
|dd	 d}tdt|D ]}|td|| j||d  j  qLq;||\| _| _td| jdd| jd dS )z
        Learn the distribution of the meeting parameters from a given dataset.
        :param meetings: a SupervisionSet containing the meetings to be used for
        Nz4No meetings provided, using default parameters: loc=r!   z2The meetings must be provided as a SupervisionSet.r   )groupby)exponc                 S      | j | jfS Nrecording_idspeakersr   r   r    <lambda>L       z8SpeakerIndependentMeetingSimulator.fit.<locals>.<lambda>c                 S   r)   r*   r+   r.   r   r   r    r0   M   r1   )keyc                 S   s   | j S r*   )startr.   r   r   r    r0   S   s       zLearned parameters: loc=z.2f)logginginfor   r   
isinstancer	   cytoolz.itertoolzr'   scipy.statsr(   sortedvaluesrangelenappendmaxr3   endfitprint)r   r%   r'   r(   speaker_segmentsinter_speech_intervalssegmentsir   r   r    rA   7   s4    z&SpeakerIndependentMeetingSimulator.fit
utterancessilence_durationsc           
      C   s   g }t t||D ]A\}\}}t|}|d }t|dd |dd D ]\}}	t||	|j| dd}q$t|t||dkr?dn|d d}|| q	tt	t
 |dS )a  
        Create a MixedCut object from a list of speaker-wise MonoCuts and silence intervals.
        Each `track` in the resulting MixedCut represents a different speaker. Each `track`
        itself can be a MonoCut or a MixedCut (if the speaker has multiple utterances).
        r   r4   NT)offsetallow_padding)cuttyperI   )idtracks)	enumerateziplistr   durationr   rL   r>   r   strr   )
r   rG   rH   rN   rF   spk_utterancesspk_silencestracksiluttr   r   r    _create_mixture^   s   "z2SpeakerIndependentMeetingSimulator._create_mixture         4@   r   r4   cutsnum_meetingsnum_repeatsnum_speakers_per_meetingspeaker_count_probsmax_duration_per_speakermax_utterances_per_speakerseednum_jobsc
              
   C   s   |du r|du rt d|durd}t|tr|g}|du r)dt| gt| }t||||||||d}
t|
}tt|| d}g }|	dkrXtt	|||dD ]}|
| qOntt|||	|	t d|d	d
D ]}|
| qgt|S )a  
        Simulate the desired number of multi-speaker meetings.
        :param cuts: CutSet containing the MonoCut objects to be used for simulation.
        :param num_meetings: the number of meetings to simulate.
            [Default: None]
        :param num_repeats: the number of times to repeat the provided cuts. This means that
            the number of simulated meetings depends on how many cuts are available.
        :param num_speakers_per_meeting: the number of speakers per meeting. If a list is
            provided, the number of speakers per meeting is sampled from this list.
            [Default: 2]
        :param speaker_count_probs: the probability of each number of speakers per meeting.
            [Default: None]
        :param max_duration_per_speaker: the maximum duration of a speaker's utterances.
            [Default: 20.0]
        :param max_utterances_per_speaker: the maximum number of utterances per speaker.
            [Default: 5]
        :param seed: the random seed to be used for simulation. [Default: 0]
        :param num_jobs: the number of jobs to use for simulation. Use more jobs to speed up
            simulation when you have large number of source utterances. [Default: 1]
        Nz4Either num_meetings or num_repeats must be provided.g      ?)r_   r^   rb   rc   r`   ra   rd   )rd   	simulatorr4   )total)re   
queue_sizezSimulating meetings)rg   desc)
ValueErrorr7   intr=   r   iterr   _simulate_workerr   mapr>   r   r   r   	from_cuts)r   r]   r^   r_   r`   ra   rb   rc   rd   re   samplersampler_iterworkmixturesmixturer   r   r    simulatev   sN   !




z+SpeakerIndependentMeetingSimulator.simulaterirsc                 G   s   t |g|R  S r*   )r   )r   r]   rv   r   r   r    reverberate   s   z.SpeakerIndependentMeetingSimulator.reverberate)r   r   r*   )NNrZ   Nr[   r\   r   r4   )r#   
__module____qualname____doc__floatr   r$   r
   r   r	   rA   r   r   nparrayr   rY   rk   r   ru   r   rw   __classcell__r   r   r   r    r      sX    &

	
Qr   rG   rd   rf   r&   c                    st   t j| tt}D ]}||jd j | qdd | D  fddt	t
D }|}|S )Nr   c                 S   s   g | ]}t |qS r   )r   ro   ).0r]   r   r   r    
<listcomp>   s    z$_simulate_worker.<locals>.<listcomp>c                    s*   g | ]}j  jjt| d  qS ))r   size)r   exponentialr   r=   )r   rF   nprrf   rG   r   r    r      s    )r|   randomRandomStater   rQ   supervisionsr-   r>   r;   r<   r=   rY   )rG   rd   rf   utts_by_speakerrX   rH   rt   r   r   r    rm      s   
rm   )"r5   collectionsr   	functoolsr   typingr   r   r   numpyr|   r   lhotser   r	   r
   
lhotse.cutr   r   r   lhotse.cut.setr   lhotse.parallelr   lhotse.utilsr   (lhotse.workflows.meeting_simulation.baser   r   r   r   r   rk   rm   r   r   r   r    <module>   s0     6