o
    i                     @   s6   d dl Z d dl mZ ddlmZ G dd dejZdS )    N)nn   )ISTFTc                	       sR   e Zd ZdZddedededef fddZejj	d	ej
d
ej
fddZ  ZS )	ISTFTHeada  
    ISTFT Head module for predicting STFT complex coefficients.

    Args:
        dim (int): Hidden dimension of the model.
        n_fft (int): Size of Fourier transform.
        hop_length (int): The distance between neighboring sliding window frames, which should align with
                          the resolution of the input features.
        padding (str, optional): Type of padding. Options are "center" or "same". Defaults to "same".
    centerdimn_fft
hop_lengthpaddingc                    s8   t    |d }tj||| _t||||d| _d S )N   )r   r	   
win_lengthr
   )super__init__torchr   Linearoutr   istft)selfr   r   r	   r
   out_dim	__class__ G/home/ubuntu/.local/lib/python3.10/site-packages/soprano/vocos/heads.pyr      s   
zISTFTHead.__init__xreturnc                 C   sv   |  |dddd}|jddd\}}t|}tj|dd}t|}t|}||d|   }| |}|S )ay  
        Forward pass of the ISTFTHead module.

        Args:
            x (Tensor): Input tensor of shape (B, L, H), where B is the batch size,
                        L is the sequence length, and H denotes the model dimension.

        Returns:
            Tensor: Reconstructed time-domain audio signal of shape (B, T), where T is the length of the output signal.
        r   r   )r   g      Y@)maxy              ?)	r   	transposechunkr   expclipcossinr   )r   r   magpySaudior   r   r   forward   s   



zISTFTHead.forward)r   )__name__
__module____qualname____doc__intstrr   r   compilerdisableTensorr'   __classcell__r   r   r   r   r      s
      r   )r   r   spectral_opsr   Moduler   r   r   r   r   <module>   s    