o
    pi                     @   st   d dl mZ d dlmZ d dlZd dlmZ d dlmZm	Z	 d dl
mZ d dlmZ d dlmZ G dd	 d	eZdS )
    )	lru_cache)OptionalN)	rearrangereduce)MFCC)Model)Taskc                       s   e Zd Z			ddededee f fddZed	ed
efddZdded
efddZ	dded
efddZ
ed
efddZdejd
ejfddZ  ZS )SimpleEmbeddingModel>     Nsample_ratenum_channelstaskc                    sP   t  j|||d t| jjddddd| _tj| jj| jj	 ddd	d	d
| _
d S )N)r   r   r   (      orthoF)r   n_mfccdct_typenormlog_mels    r   T)
num_layersbatch_firstbidirectional)super__init__r   hparamsr   mfccnnLSTMr   r   lstm)selfr   r   r   	__class__ Y/home/ubuntu/.local/lib/python3.10/site-packages/pyannote/audio/models/embedding/debug.pyr   %   s   zSimpleEmbeddingModel.__init__num_samplesreturnc                 C   sD   | j jjj}| j jjj}| j jjj}|rd||  S d|| |  S )a|  Compute number of output frames for a given number of input samples

        Parameters
        ----------
        num_samples : int
            Number of input samples

        Returns
        -------
        num_frames : int
            Number of output frames

        Source
        ------
        https://pytorch.org/docs/stable/generated/torch.stft.html#torch.stft

        r   r   MelSpectrogramspectrogram
hop_lengthn_fftcenter)r!   r&   r+   r,   r-   r$   r$   r%   
num_frames=   s   zSimpleEmbeddingModel.num_framesr.   c                 C   s(   | j jjj}| j jjj}||d |  S )a
  Compute size of receptive field

        Parameters
        ----------
        num_frames : int, optional
            Number of frames in the output signal

        Returns
        -------
        receptive_field_size : int
            Receptive field size.
        r   )r   r)   r*   r+   r,   )r!   r.   r+   r,   r$   r$   r%   receptive_field_sizeZ   s   z)SimpleEmbeddingModel.receptive_field_sizer   framec                 C   s@   | j jjj}| j jjj}| j jjj}|r|| S || |d  S )zCompute center of receptive field

        Parameters
        ----------
        frame : int, optional
            Frame index

        Returns
        -------
        receptive_field_center : int
            Index of receptive field center.
        r   r(   )r!   r0   r+   r,   r-   r$   r$   r%   receptive_field_centerl   s   z+SimpleEmbeddingModel.receptive_field_centerc                 C   s   dS )zDimension of output@   r$   )r!   r$   r$   r%   	dimension   s   zSimpleEmbeddingModel.dimension	waveformsc                 C   s*   |  |}| t|d\}}t|ddS )z

        Parameters
        ----------
        waveforms : (batch, time, channel)

        Returns
        -------
        embedding : (batch, dimension)
        zb c f t -> b t (c f)zb t f -> b fmean)r   r    r   r   )r!   r4   r   outputhiddenr$   r$   r%   forward   s   
zSimpleEmbeddingModel.forward)r
   r   N)r   )r   )__name__
__module____qualname__intr   r   r   r   r.   r/   r1   propertyr3   torchTensorr8   __classcell__r$   r$   r"   r%   r	   $   s$    r	   )	functoolsr   typingr   r>   torch.nnr   einopsr   r   torchaudio.transformsr   pyannote.audio.core.modelr   pyannote.audio.core.taskr   r	   r$   r$   r$   r%   <module>   s   