o
    ¡¿¯i%8  ã                   @   s¨   d dl Zd dlZd dlmZ d dlm  mZ d dlm	Z	 G dd„ dej
ƒZG dd„ dej
ƒZG dd„ deƒZd	d
„ ZedkrRedddddddddd	Zeeƒ dS dS )é    N)Údprnnc                       s2   e Zd Z						d‡ fdd„	Zd	d
„ Z‡  ZS )Ú	BF_moduleé   é   éd   Tç        Úifasnetc              	      sþ   t ƒ  ¡  |
dv sJ dƒ‚|| _|| _|| _|| _|| _|| _|| _t	j
d| j| j| j| j |||	d| _d| _|
| _|
dkrJt | j| jd¡| _n$|
dkrnt t | j| jd¡t ¡ ¡| _t t | j| jd¡t ¡ ¡| _|| _tj| j| jdd	d
| _d S )N)Úfasnetr   z'fasnet_type should be fasnet or ifasnetÚlstm)Ú
num_layersÚbidirectionalÚdropoutç:Œ0âŽyE>r   é   r	   F©Úbias)ÚsuperÚ__init__Ú	input_dimÚfeature_dimÚ
hidden_dimÚ
output_dimÚlayerÚsegment_sizeÚnum_spkr   Ú	DPRNN_TACÚdprnn_modelÚepsÚfasnet_typeÚnnÚConv1dÚoutputÚ
SequentialÚTanhÚSigmoidÚoutput_gateÚBN)Úselfr   r   r   r   r   r   r   r   r   r   ©Ú	__class__© úM/home/ubuntu/.local/lib/python3.10/site-packages/espnet2/enh/layers/fasnet.pyr      sB   

ý
ù	ÿÿzBF_module.__init__c                 C   sþ   |j \}}}}| || ||¡}|  |¡}t || j¡\}}	| ||d|j d |j d ¡}|  ||¡ || | j | j| jd¡}
t 	|
|	¡}
| j
dkrh|  |
¡|  |
¡ }| dd¡ ¡  ||| jd| j¡}|S | j
dkr}|  |
¡}| ||| j| jd¡}|S )Néÿÿÿÿr   é   r	   r   r   )ÚshapeÚviewr&   r   Úsplit_featurer   r   r   r   Úmerge_featurer   r!   r%   Ú	transposeÚ
contiguousr   )r'   ÚinputÚnum_micÚ
batch_sizeÚchÚNÚ
seq_lengthÚenc_featureÚenc_segmentsÚenc_restr!   Ú	bf_filterr*   r*   r+   ÚforwardL   s>   
ÿÿü
ÿ
ý
ù
ÿzBF_module.forward)r   r   r   Tr   r   ©Ú__name__Ú
__module__Ú__qualname__r   r>   Ú__classcell__r*   r*   r(   r+   r      s    õ9r   c                       sR   e Zd Z						d‡ fdd„	Zdd	„ Zd
d„ Zdd„ Zdd„ Zdd„ Z‡  Z	S )ÚFaSNet_baseé   r   é   r   é€>  c                    sx   t t| ƒ ¡  || _tt|
| d ƒdƒ| _| jd | _|
| _|| _	|	| _
|| _|| _|| _|| _|| _|| _d| _d S )Néè  r   r   )r   rD   r   Úwin_lenÚmaxÚintÚwindowÚstrideÚsrÚcontext_lenr   Úenc_dimr   r   r   r   r   r   )r'   rP   r   r   r   r   ÚnspkrI   rO   r   rN   r(   r*   r+   r   |   s   
zFaSNet_base.__init__c           
      C   sˆ   |j \}}}| j}||||  |  }|dkr+t |||¡ | ¡ ¡}t ||gd¡}t |||¡ | ¡ ¡}	t |	||	gd¡}||fS )z3Zero-padding input according to window/stride size.r   r   )r.   rM   ÚtorchÚzerosÚtypeÚcat)
r'   r4   rL   r6   ÚnmicÚnsamplerM   ÚrestÚpadÚpad_auxr*   r*   r+   Ú	pad_inputœ   s   zFaSNet_base.pad_inputc                    sö   |   ||¡\‰}ˆj\}}}|d }t |||¡ ˆ ¡ ¡}	t |	ˆ|	gd¡‰d| | d }
t |
¡| ‰ t ˆ ¡ ˆ ¡ ¡ 	¡  
ddd¡‰ ˆ  |||
¡‰ ‡ ‡fdd„td| | ƒD ƒ}t |d¡}|dd…dd…dd…||| …f }|||fS )z¡Segmenting the signal into chunks with specific context.

        input:
            x: size (B, ch, T)
            window: int
            context: int
        r   r   r,   c                    s$   g | ]}t  ˆd ˆ | ¡ d¡‘qS )r   r-   )rR   ÚgatherÚ	unsqueeze)Ú.0Úi©Ú	begin_idxr4   r*   r+   Ú
<listcomp>È   s    ÿÿz2FaSNet_base.seg_signal_context.<locals>.<listcomp>r-   N)r[   r.   rR   rS   rT   rU   ÚnpÚarangeÚ
from_numpyÚlongr/   ÚexpandÚrange)r'   ÚxrL   ÚcontextrX   r6   rV   rW   rM   Úpad_contextÚnchunkÚchunksÚcenter_framer*   r`   r+   Úseg_signal_context­   s     ÿþ$
zFaSNet_base.seg_signal_contextc                 C   s  |j \}}}t |||¡ | ¡ ¡}g }g }t|ƒD ]Q}	| t |dd…dd…|	d…f |dd…dd…d| |	 …f gd¡ d¡¡ | t |dd…dd…|	d d…f |dd…dd…d|	d …f gd¡ d¡¡ qt |d¡}t |d¡}t || d¡|gd¡}
|
S )z¯signal context function

        Segmenting the signal into chunks with specific context.
        input:
            x: size (B, dim, nframe)
            context: int
        Nr   r   )r.   rR   rS   rT   rh   ÚappendrU   r]   )r'   ri   rj   r6   ÚdimÚnframeÚzero_padÚpad_pastÚ
pad_futurer_   Úall_contextr*   r*   r+   Úsignal_contextÓ   s"   	FÿHÿÿzFaSNet_base.signal_contextc              	   C   s–  |  d¡|  d¡ksJ dƒ‚|  d¡|  d¡ksJ dƒ‚|  d¡}|  d¡}|  d¡|  d¡krH| |  d¡|  d¡|  d¡¡ ¡ }|  d¡}n|  d¡|  d¡k rd| |  d¡|  d¡|  d¡¡ ¡ }tj| dd|  d¡¡ d¡t |  d¡|  d¡ d|  d¡¡ 	| 	¡ ¡|| d}| 
¡ | j }|jddd ddd¡| j }tj| dd|  d¡¡| dd|  d¡¡|| d}|||  }| ||d¡S )	z—Cosine similarity between some reference mics and some target mics

        ref: shape (nmic1, L, seg1)
        target: shape (nmic2, L, seg2)
        r   zInputs should have same length.r   z;Reference input should be no smaller than the target input.r   r,   ©Úgroups)rq   )Úsizerg   r3   ÚFÚconv1dr/   ÚpowrR   ÚonesrT   Úsqrtr   Únorm)r'   ÚrefÚtargetr9   Ú	larger_chÚref_normÚtarget_normÚcos_simr*   r*   r+   Úseq_cos_simñ   sH   ÿþ

ÿþÿþ,ýÿýzFaSNet_base.seq_cos_simc                 C   s   dS )zÐabstract forward function

        input: shape (batch, max_num_ch, T)
        num_mic: shape (batch, ), the number of channels for each input.
                 Zero for fixed geometry configuration.
        Nr*   )r'   r4   r5   r*   r*   r+   r>     s   zFaSNet_base.forward)rE   r   rF   rF   r   rG   )
r@   rA   rB   r   r[   ro   rw   r‡   r>   rC   r*   r*   r(   r+   rD   {   s    õ &-rD   c                       s$   e Zd Z‡ fdd„Zdd„ Z‡  ZS )Ú
FaSNet_TACc                    s¨   t t| ƒj|i |¤Ž t| j| j d ƒ| _| jd d | _t| j| j	 | j
| j| j| j| j| j| jdd	| _tjd| j	| jd | j dd| _tjd| j	dd	| _d S )
NrH   r   r   r	   )r   r   Fr   r   )r   )r   rˆ   r   rK   rN   rO   rj   Ú
filter_dimr   rP   r   r   r   r   r   r   Úall_BFr   r    rL   ÚencoderÚ	GroupNormÚenc_LN)r'   ÚargsÚkwargsr(   r*   r+   r   *  s$   
÷ÿzFaSNet_TAC.__init__c              	      sÜ  |  d¡}|  d¡}|  || j| j¡\}}}|  d¡}|  | dd| jd | j ¡¡ || || j¡ dd¡ ¡ }	|  	|	¡ ||| j|¡}	|d d …df  ¡  dd| j¡}
| dd¡ ¡  |d| jd | j ¡}|  
||
¡}| |||| j¡ dddd¡ ¡ }t |	|gd¡}|  |ˆ¡}t | || d|| jd | j ¡g| j d¡}tj| dd| jd | j ¡| dd| j¡|| | j | d}| ||| j|| j¡}| || | j d| jd ¡‰ ˆ d d …d d …d | j…f  ¡  || | j dd¡d d …d d …| jd …f }ˆ d d …d d …| jd …f  ¡  || | j dd¡d d …d d …d | j …f }|| ‰ |dkrCˆ d d …d d …d | …f ‰ ˆ  ||| jd¡‰ ˆ ¡ dkrZˆ  d¡‰ ˆ S ‡ ‡fdd„t|ƒD ƒ‰ t ˆ d¡‰ ˆ S )	Nr   r   r   r,   r-   rx   c                    s,   g | ]}ˆ |d ˆ| …f   d¡ d¡‘qS )Nr   )Úmeanr]   )r^   Úb©Ú	bf_signalr5   r*   r+   rb   š  s    ÿÿz&FaSNet_TAC.forward.<locals>.<listcomp>)rz   ro   rL   rj   r‹   r/   rP   r2   r3   r   r‡   r‰   ÚpermuterR   rU   rŠ   r   r{   r|   rM   rJ   r   rh   )r'   r4   r5   r6   rV   Úall_segÚall_mic_contextrX   r9   Ú
enc_outputÚref_segrv   Úall_cos_simÚinput_featureÚ
all_filterÚmic_contextÚall_bf_outputÚ
bf_signal1Ú
bf_signal2r*   r’   r+   r>   C  sŒ   



ÿ
ü

ÿ 
ýýÿÿûù	ýÿÿ*þÿ,þÿ

ÿ
úþzFaSNet_TAC.forwardr?   r*   r*   r(   r+   rˆ   )  s    rˆ   c                 C   st   t  ddd¡}t  t ddg¡¡ d¡ | ¡ ¡}t  d¡ | ¡ ¡}| || ¡ ƒ}| || ¡ ƒ}t	|j
|j
ƒ d S )Nr   r   i }  r-   r,   r   )rR   Úrandre   rc   Úarrayr/   rT   rS   rf   Úprintr.   )Úmodelri   r5   Únone_micÚy1Úy2r*   r*   r+   Ú
test_model£  s   ÿ
ûr§   Ú__main__é@   é€   r   é2   r   rF   rG   )	rP   r   r   r   r   rQ   rI   rO   rN   )Únumpyrc   rR   Útorch.nnr   Útorch.nn.functionalÚ
functionalr{   Úespnet2.enh.layersr   ÚModuler   rD   rˆ   r§   r@   Ú	model_TACr*   r*   r*   r+   Ú<module>   s0   i /z÷ò