o
    2wiP<                     @   s   d dl Z d dlmZ d dlmZ d dlmZmZmZm	Z	 d dl
Zd dlmZ d dlmZmZmZ d dlmZmZmZ d dlmZ d d	lmZ d d
lmZmZ d dlmZmZmZm Z  G dd deZ!de"de!fddZ#dS )    N)defaultdict)partial)AnyListOptionalUnion)tqdm)RecordingSetSupervisionSetdill_enabled)CutSetMixedCutMixTrack)mix)parallel_map)add_durationsuuid4)MAX_TASKS_WAITINGBaseMeetingSimulatorMeetingSamplerreverberate_cutsc                       s,  e Zd ZdZ				d0dedededef fd	d
ZdefddZdd Zde	j
defddZedd1dee ddfddZ	d2dededefddZed								 	!d3d"ed#ee d$ee d%eeee f d&eee  d'ee d(ee ded)ed*edefd+d,Zd"ed-edefd.d/Z  ZS )4ConversationalMeetingSimulatora  
    This simulator uses the method described in https://arxiv.org/abs/2204.00890 and
    implemented in https://github.com/BUTSpeechFIT/EEND_dataprep. Note that a similar
    method of meeting simulation is also implemented in
    https://github.com/jsalt2020-asrdiar/jsalt2020_simulate.

    The basic idea is to sample the silence and overlap durations collectively for all
    speakers so that we get similar speech/silence/overlap characteristics as the
    original distribution. In general, this method produces more realistic overlap
    durations than the `SpeakerIndependentMeetingSimulator`. This is done by learning
    the histogram of 3 distributions: same speaker pause, different speaker pause, and
    different speaker overlap. In this implementation, we learn the histograms from
    provided data, otherwise we use the initialization values as shift of a Gamma
    distribution with scale 1.0.
          ?       @      ?same_spk_pausediff_spk_pausediff_spk_overlapprob_diff_spk_overlapc                    sN   t    |||fD ]}|du s|dksJ dq
|| _|| _|| _|| _dS )a  
        :param same_spk_pause: the mean pause duration between utterances of the same
            speaker. [Default: 2.0]
        :param diff_spk_pause: the mean pause duration between utterances of different
            speakers. [Default: 3.0]
        :param diff_spk_overlap: the mean overlap duration between utterances of
            different speakers. [Default: 2.0]
        :param prob_diff_spk_overlap: the probability of overlap between utterances of
            different speakers. [Default: 0.5]
        Nr   zDurations must be > 0.)super__init__r   r   r   r   )selfr   r   r   r   duration	__class__ o/home/ubuntu/sommelier/.venv/lib/python3.10/site-packages/lhotse/workflows/meeting_simulation/conversational.pyr    '   s   

z'ConversationalMeetingSimulator.__init__returnc              
   C   s6   | j j d| jdd| jdd| jdd| jdd
S )Nz (same_spk_pause=z.2fz, diff_spk_pause=z, diff_spk_overlap=z, prob_diff_spk_overlap=))r$   __name__r   r   r   r   )r!   r%   r%   r&   __repr__A   s   
z'ConversationalMeetingSimulator.__repr__c                 C   sF   ddl m} |dd| jd| _|dd| jd| _|dd| jd| _d S )Nr   )gammar   )ascaleloc)scipy.statsr+   r   same_spk_pause_distr   diff_spk_pause_distr   diff_spk_overlap_dist)r!   r+   r%   r%   r&   _init_defaultsJ   s   z-ConversationalMeetingSimulator._init_defaultsvaluesc                 C   s,   ddl m} tj|ddd\}}|||fS )z
        Compute the histogram of the given values and return the bin edges and the
        corresponding probabilities.
        r   )rv_histogramd   T)binsdensity)r/   r5   np	histogram)r!   r4   r5   hist	bin_edgesr%   r%   r&   _compute_histogram_distQ   s   z6ConversationalMeetingSimulator._compute_histogram_distTNmeetingsc           	      C   s  |du rt d |   dS t|tsJ dddlm} g }g }g }|dd t|dd d	}| D ]S}t	d
t
|D ]I}|| j||d
  jkr\||| j||d
  j  q>|| j||d
  jkrx||| j||d
  j  q>|||d
  j|| j  q>q5| t|| _| t|| _| t|| _t
|t
| dkrt
|t
|t
|  nd| _| j | _| j | _| j | _td|   dS )z
        Learn the distribution of the meeting parameters from a given dataset.
        :param meetings: a SupervisionSet containing the meetings to be used for
        Nz/No meetings provided, using default parameters.z2The meetings must be provided as a SupervisionSet.r   )groupbyc                 S      | j S N)recording_idsr%   r%   r&   <lambda>q       z4ConversationalMeetingSimulator.fit.<locals>.<lambda>c                 S   s   | j | jfS rA   )rB   startrC   r%   r%   r&   rE   r   s    key   r   zLearned parameters: )logginginfor3   
isinstancer
   cytoolz.itertoolzr?   sortedr4   rangelenspeakerappendrG   endr=   r9   arrayr0   r1   r2   r   meanr   r   r   print)	r!   r>   r?   same_spk_pause_valuesdiff_spk_pause_valuesdiff_spk_overlap_valuesrecording_segmentssegmentsir%   r%   r&   fit[   sf   
z"ConversationalMeetingSimulator.fitF
utterancesallow_3fold_overlapc              
   C   s  |j }t|}dd | jj|dD }dd | jj|dD }dd | jj|dD }| jj| j|d}t|}dg}	|d j	}
|d j
d j}d	d
 |D }|
||< tt| dd}|d j}tdt|D ]}|| j
d j}||d  j
d j}||kr|| }n>|| dkr|| }n3|| }t|dkr|st|t|
||  |dt|
|d  |d}nt|t|
||  |d}| }t|
||d}
|	|
 t|
|| j	|d}
|
||< tt| dd}|d }
qittt||	dd d \}}	tt}t||	D ]\}}||j
d j ||f qg }| D ]3\}}|d \}}|dd D ]\}}t||t|| |ddd}q.t|t||d}|| qt|dd d}ttt |dS )a  
        Create a MixedCut object from a list of MonoCuts (utterances).
        We sample pauses and/or overlaps from the initilized or learned distributions.
        Then, we create a MixedCut where each track represents a different speaker.

        :param utterances: a CutSet containing the utterances to be mixed.
        :param allow_3fold_overlap: if True, allow 3-fold overlaps between speakers.
            [Default: False]
        :return: a MixedCut object.
        c                 S      g | ]}t |d qS    round.0xr%   r%   r&   
<listcomp>       zBConversationalMeetingSimulator._create_mixture.<locals>.<listcomp>)sizec                 S   ra   rb   rd   rf   r%   r%   r&   ri      rj   c                 S   ra   rb   rd   rf   r%   r%   r&   ri      s    
)prk           r   c                 S   s   i | ]}|d qS )rm   r%   )rg   spkrr%   r%   r&   
<dictcomp>   s    zBConversationalMeetingSimulator._create_mixture.<locals>.<dictcomp>T)reverserJ   )sampling_ratec                 S   s   | d S )NrJ   r%   rh   r%   r%   r&   rE      s    z@ConversationalMeetingSimulator._create_mixture.<locals>.<lambda>rH   N)offsetallow_padding)cuttypers   c                 S   r@   rA   )rs   rr   r%   r%   r&   rE     rF   )idtracks)speakersrQ   r0   rvsr1   r2   	bernoullir   listr"   supervisionsrR   rO   r4   rq   rP   minr   rS   zipr   itemsr   r   rv   r   strr   )r!   r_   r`   ry   Nsame_spk_pausesdiff_spk_pausesdiff_spk_overlapsdiff_spk_bernoullioffsets
cur_offset	first_spklast_utt_endlast_utt_end_timessrr]   cur_spkprev_spkot
spk_tracksuttrs   rx   spkspk_uttstrackrG   r%   r%   r&   _create_mixture   s   







z.ConversationalMeetingSimulator._create_mixturerc         4@   r   rJ   cutsnum_meetingsnum_repeatsnum_speakers_per_meetingspeaker_count_probsmax_duration_per_speakermax_utterances_per_speakerseednum_jobsc              
   C   s  ddl m} |du r|du rtd|durd}t|tr |g}|du r/dt| gt| }t| dddu r;|   || _t||||||||	d}t	|}t
t| |d}g }|
d	krntt|||d
dD ]}|| qentt|||
|
t d|d
dD ]}|| q}t|S )a8  
        Simulate the desired number of multi-speaker meetings.
        :param cuts: CutSet containing the MonoCut objects to be used for simulation.
        :param num_meetings: the number of meetings to simulate.
        :param num_repeats: the number of times to repeat the provided cuts. This means that
            the number of simulated meetings depends on how many cuts are available.
        :param num_speakers_per_meeting: the number of speakers per meeting. If a list is
            provided, the number of speakers per meeting is sampled from this list.
            [Default: 2]
        :param speaker_count_probs: the probability of each number of speakers per meeting.
            [Default: None]
        :param max_duration_per_speaker: the maximum duration of each speaker in a meeting.
            [Default: 20.0]
        :param max_utterances_per_speaker: the maximum number of utterances per speaker in a
            meeting. [Default: 5]
        :param allow_3fold_overlap: if True, allow 3-fold overlaps between speakers.
            [Default: False]
        :param seed: the random seed to be used for simulation. [Default: 0]
        :param num_jobs: the number of jobs to use for simulation. Use more jobs to speed up
            simulation when you have large number of source utterances. [Default: 1]
        r   )r{   Nz4Either num_meetings or num_repeats must be provided.r   r0   )r   r   r   r   r   r   r   )	simulatorr`   rJ   zSimulating meetings)totaldesc)r   
queue_size)r/   r{   
ValueErrorrM   intrQ   getattrr3   r   iterr   _simulate_workerr   maprS   r   r   r   	from_cuts)r!   r   r   r   r   r   r   r   r`   r   r   r{   samplersampler_iterworkmixturesmixturer%   r%   r&   simulate	  sb   #





z'ConversationalMeetingSimulator.simulaterirsc                 G   s   t |g|R  S rA   )r   )r!   r   r   r%   r%   r&   reverberatel  s   z*ConversationalMeetingSimulator.reverberate)r   r   r   r   rA   )F)	NNrc   Nr   r   Fr   rJ   )r)   
__module____qualname____doc__floatr    r   r*   r3   r9   ndarrayr   r=   r   r   r
   r^   r   boolr   r   r   r   r   r   r	   r   __classcell__r%   r%   r#   r&   r      s|    	
C
k
	
br   r`   r   c                 C   s   |j | |dS )N)r`   )r   )r_   r`   r   r%   r%   r&   r   p  s   r   )$rK   collectionsr   	functoolsr   typingr   r   r   r   numpyr9   r   lhotser	   r
   r   
lhotse.cutr   r   r   lhotse.cut.setr   lhotse.parallelr   lhotse.utilsr   r   (lhotse.workflows.meeting_simulation.baser   r   r   r   r   r   r   r%   r%   r%   r&   <module>   s(      \