o
    pi                     @   sp   d dl mZ d dlmZ d dlZd dlmZ d dlmZ d dl	m
Z
 d dlmZ d dlmZ G dd	 d	eZdS )
    )	lru_cache)OptionalN)	rearrange)MFCC)Model)Taskc                       s   e Zd Z			ddededee f fddZed	ed
efddZdded
efddZ	dded
efddZ
ed
efddZdd Zdejd
ejfddZ  ZS )SimpleSegmentationModel>     Nsample_ratenum_channelstaskc                    sP   t  j|||d t| jjddddd| _tj| jj| jj	 ddd	d	d
| _
d S )N)r   r   r   (      orthoF)r   n_mfccdct_typenormlog_mels    r
   T)
num_layersbatch_firstbidirectional)super__init__r   hparamsr   mfccnnLSTMr   r   lstm)selfr   r   r   	__class__ \/home/ubuntu/.local/lib/python3.10/site-packages/pyannote/audio/models/segmentation/debug.pyr   %   s   z SimpleSegmentationModel.__init__num_samplesreturnc                 C   sD   | j jjj}| j jjj}| j jjj}|rd||  S d|| |  S )a|  Compute number of output frames for a given number of input samples

        Parameters
        ----------
        num_samples : int
            Number of input samples

        Returns
        -------
        num_frames : int
            Number of output frames

        Source
        ------
        https://pytorch.org/docs/stable/generated/torch.stft.html#torch.stft

        r
   r   MelSpectrogramspectrogram
hop_lengthn_fftcenter)r    r%   r*   r+   r,   r#   r#   r$   
num_frames=   s   z"SimpleSegmentationModel.num_framesr-   c                 C   s(   | j jjj}| j jjj}||d |  S )a
  Compute size of receptive field

        Parameters
        ----------
        num_frames : int, optional
            Number of frames in the output signal

        Returns
        -------
        receptive_field_size : int
            Receptive field size.
        r
   )r   r(   r)   r*   r+   )r    r-   r*   r+   r#   r#   r$   receptive_field_sizeZ   s   z,SimpleSegmentationModel.receptive_field_sizer   framec                 C   s@   | j jjj}| j jjj}| j jjj}|r|| S || |d  S )zCompute center of receptive field

        Parameters
        ----------
        frame : int, optional
            Frame index

        Returns
        -------
        receptive_field_center : int
            Index of receptive field center.
        r   r'   )r    r/   r*   r+   r,   r#   r#   r$   receptive_field_centerl   s   z.SimpleSegmentationModel.receptive_field_centerc                 C   s0   t | jtr
td| jjr| jjS t| jjS )zDimension of outputz7SimpleSegmentationModel does not support multi-tasking.)
isinstancespecificationstuple
ValueErrorpowersetnum_powerset_classeslenclassesr    r#   r#   r$   	dimension   s
   z!SimpleSegmentationModel.dimensionc                 C   s   t d| j| _|  | _d S )N@   )r   Linearr:   
classifierdefault_activation
activationr9   r#   r#   r$   build   s   zSimpleSegmentationModel.build	waveformsc                 C   s.   |  |}| t|d\}}| | |S )z

        Parameters
        ----------
        waveforms : (batch, time, channel)

        Returns
        -------
        scores : (batch, time, classes)
        zb c f t -> b t (c f))r   r   r   r?   r=   )r    rA   r   outputhiddenr#   r#   r$   forward   s   
zSimpleSegmentationModel.forward)r	   r
   N)r
   )r   )__name__
__module____qualname__intr   r   r   r   r-   r.   r0   propertyr:   r@   torchTensorrD   __classcell__r#   r#   r!   r$   r   $   s&    
r   )	functoolsr   typingr   rJ   torch.nnr   einopsr   torchaudio.transformsr   pyannote.audio.core.modelr   pyannote.audio.core.taskr   r   r#   r#   r#   r$   <module>   s   