o
    SiO&                     @   s   d Z ddlZddlZddlmZ ddlmZmZmZ ddl	Z
ddlmZ ddlmZmZ ddlmZ ddlmZ dd	lmZmZ d
ZG dd dejZG dd dZdededefddZdS )z
This is an experimental workflow that can be used to simulate multi-speaker meetings from
a CutSet containing MonoCut objects.
    N)groupby)ListOptionalUnion)tqdm)RecordingSetSupervisionSet)CutSet)DynamicCutSampler)fastcopyis_module_availablei  c                
   @   s   e Zd ZdZdd ZdefddZejdde	e
 ddfd	d
Zej		ddede	e de	e defddZejdededefddZdS )BaseMeetingSimulatora>  
    Base class for meeting simulators. A simulator consists of a `fit()`, a `simulate()`,
    and a `reverberate()` method.

    The `fit()` method is used to learn the distribution of the meeting parameters
    (e.g. turn-taking, overlap ratio, etc.) from a given dataset, presented in the form of
    a SupervisionSet. The parameters themselves are simulator specific.

    The `simulate()` method takes a CutSet containing MonoCut objects and simulates the
    desired number of multi-speaker meetings, based on the learned distribution. The output
    is a CutSet containing MixedCut objects, where each track represents a different speaker.

    The `reverberate()` method takes a CutSet containing MixedCut objects (usually the output
    of `simulate()`) and applies a reverberation effect to each track. We can apply single
    or multi-channel room impulse responses (RIRs) to each track. The output is a CutSet
    containing MixedCut objects, where each track represents a different speaker, convolved
    with a different RIR.

    The base class should be inherited from and the different methods should be implemented.

    The output is expected to be a CutSet containing MixedCut objects, where each track
    represents a different speaker, possibly convolved with a different RIR. This is
    analogous to the "mixture model" of speech signals.

    Example usage:
    >>> simulator = MyMeetingSimulator()
    >>> simulator.fit(cuts)
    >>> simulated_cuts = simulator.simulate(mono_cuts, num_meetings=10)
    >>> simulated_cuts = simulator.reverberate(simulated_cuts, rirs)
    c                 C   s(   t | tu r
tdtdstdd S )NzNBaseMeetingSimulator is an abstract base class and should not be instantiated.scipyz!Please 'pip install scipy' first.)typer   	TypeErrorr   ImportErrorself r   \/home/ubuntu/.local/lib/python3.10/site-packages/lhotse/workflows/meeting_simulation/base.py__init__5   s   zBaseMeetingSimulator.__init__returnc                 C   s   t | j dS )Nz())r   __name__r   r   r   r   __repr__>   s   zBaseMeetingSimulator.__repr__Nmeetingsc                 C      dS )zX
        Learn the distribution of the meeting parameters from a given dataset.
        Nr   )r   r   r   r   r   fitA      zBaseMeetingSimulator.fitcutsnum_meetingsnum_repeatsc                 C   r   )zH
        Simulate the desired number of multi-speaker meetings.
        Nr   )r   r   r   r    r   r   r   simulateH   s   
zBaseMeetingSimulator.simulaterirsc                 G   r   )z=
        Apply a reverberation effect to each track.
        Nr   )r   r   r"   r   r   r   reverberateT   r   z BaseMeetingSimulator.reverberateN)NN)r   
__module____qualname____doc__r   strr   abcabstractmethodr   r   r   r	   intr!   r   r#   r   r   r   r   r      s(    	r   c                   @   s~   e Zd ZdZ							ddedee d	ee d
eeee f deee	  dee	 dee defddZ
dd Zdd ZdS )MeetingSamplera>  
    Create a sampler that will be used to sample groups of utterances from the sources.
    The cuts are partitioned into speaker-wise buckets, and a SimpleCutSampler is created
    for each bucket. When we sample a group of utterances, we first sample the number of
    speakers in the meeting, and then sample the utterances of each speaker. This is done
    by sampling a batch from the corresponding SimpleCutSampler.

    :param cuts: a CutSet containing MonoCut objects.
    :param num_repeats: the number of times each cut will be repeated (by default, they
        are repeated infinitely).
    :param num_meetings: the number of meetings to simulate.
    :param num_speakers_per_meeting: the number of speakers per meeting.
    :param speaker_count_probs: the probabilities of the number of speakers per meeting.
    :param max_duration_per_speaker: the maximum duration of a speaker in a meeting.
    :param max_utterances_per_speaker: the maximum number of utterances of a speaker in a meeting.
    :param seed: the random seed.
    :return: a DynamicCutSampler object.
    N         4@   r   r   r    r   num_speakers_per_meetingspeaker_count_probsmax_duration_per_speakermax_utterances_per_speakerseedc	                 C   s  t dd |D sJ d| t dd |D s J d| t|dks-J d| t|t|ks9J di | _ttt|d	d
 ddd
 ddD ]\}	}
tt	t
|
j|dd||d|d}|| j|	< qM|| _|| _tj|| _t|| _|| _d S )Nc                 s       | ]}|d kV  qdS )   Nr   ).0nr   r   r   	<genexpr>|       z*MeetingSampler.__init__.<locals>.<genexpr>z@The number of speakers per meeting must be greater than 1. Got: c                 s   r5   )g        Nr   )r7   pr   r   r   r9      r:   zUThe probabilities of the number of speakers per meeting must be greater than 0. Got: g      ?zLThe probabilities of the number of speakers per meeting must sum to 1. Got: zTThe number of speakers per meeting and the number of probabilities must be the same.c                 S      | j d jS Nr   supervisionsspeakercutr   r   r   <lambda>       z)MeetingSampler.__init__.<locals>.<lambda>)keyc                 S   r<   r=   r>   rA   r   r   r   rC      rD   z%Creating samplers for each speaker...)descF)timespreserve_idT)max_durationmax_cutsshuffler4   )allsumlensamplersr   r   sortedr
   r	   	from_cutslistrepeatr0   r1   nprandomRandomStatenprRandomrng_remaining_meetings)r   r   r    r   r0   r1   r2   r3   r4   spkspk_cutssamplerr   r   r   r   p   sV   
	
zMeetingSampler.__init__c                 C   s   | j  D ]}t| q| S r$   )rO   valuesiter)r   r]   r   r   r   __iter__   s   
zMeetingSampler.__iter__c              	   C   s   | j d ur| j dkrt t| jt| jk rt t| jj| j| jdt| j}| j	
t| j |}tg }|D ]}| j| }z
t|}||7 }W q= ty[   | j|= Y q=w | }| j d url|  j d8  _ t|dkrt|S t| S )Nr   )r;   r6   )rZ   StopIterationrN   rO   minr0   rW   choicer1   rY   samplerR   keysr	   rQ   nextrK   )r   Nthis_batch_spk_ids
utterancesspk_idr]   
this_batchr   r   r   __next__   s.   


zMeetingSampler.__next__)NNr-   Nr.   r/   r   )r   r%   r&   r'   r	   r   r+   r   r   floatr   r`   rl   r   r   r   r   r,   \   s8    
	
<r,   r   r"   r   c           	   	      s   g }t |dkrtdd |D nd}| D ]A}t |j  |krOg }| fdd }t|j|D ]\}}|t||j	|d q3|t||d q||	  qt
|S )a  
    Use provided RIRs to convolve each track of the input CutSet. The cuts here are
    MixedCut objects containing different speakers in different tracks. To reverberate,
    we choose a random RIR containing as many Recording objects as there are tracks
    in the MixedCut.

    If impulse responses are not provided, we use the fast randomized approximation
    method to simulate artificial single-channel RIRs.

    :param cuts: a CutSet containing MixedCut objects.
    :param rirs: one or more RecordingSet (each set is a group of RIRs from the same room).
    :return: a CutSet containing MixedCut objects reverberated with the provided RIRs.
    r   c                 s   s    | ]}t |V  qd S r$   rN   )r7   	rir_groupr   r   r   r9      r:   z#reverberate_cuts.<locals>.<genexpr>c                    s   t |  kS r$   rn   )rnum_speakersr   r   rC      rD   z"reverberate_cuts.<locals>.<lambda>rA   )tracks)rN   maxrs   filterrU   zipappendr   rB   
reverb_rirr	   rQ   )	r   r"   out_cutsmax_sourcesrB   rs   ro   trackrirr   rq   r   reverberate_cuts   s   "

r}   )r'   r)   rU   	itertoolsr   typingr   r   r   numpyrT   r   lhotser   r   
lhotse.cutr	   lhotse.dataset.samplingr
   lhotse.utilsr   r   MAX_TASKS_WAITINGABCr   r,   r}   r   r   r   r   <module>   s    Gx