o
    ߥi                      @   s   d Z ddlZddlm  mZ ddlm  mZ	 ddlmZ dd Z
dd
dZG dd dejZG dd dejZG dd dejZG dd dejZG dd dejZG dd dejZG dd dejZG dd dejZdS )zH Some implementations are adapted from https://github.com/yuyq96/D-TDNN
    N)nnc                 C   s   t  }| dD ]C}|dkr|dt jdd q	|dkr(|dt | q	|dkr6|dt | q	|dkrF|dt j|dd	 q	td
||S )N-reluTinplaceprelu	batchnorm
batchnorm_F)affinezUnexpected module ({}).)	r   
Sequentialsplit
add_moduleReLUPReLUBatchNorm1d
ValueErrorformat)
config_strchannels	nonlinearname r   [/home/ubuntu/.local/lib/python3.10/site-packages/modelscope/models/audio/sv/DTDNN_layers.pyget_nonlinear   s   r   FT{Gz?c                 C   s@   | j |d}| j||d}tj||gdd}|r|j|d}|S )Ndim)r   unbiasedr   )meanstdtorchcat	unsqueeze)xr   keepdimr   epsr   r    statsr   r   r   statistics_pooling   s   r(   c                   @   s   e Zd Zdd ZdS )	StatsPoolc                 C   s   t |S N)r(   selfr$   r   r   r   forward'   s   zStatsPool.forwardN)__name__
__module____qualname__r-   r   r   r   r   r)   %   s    r)   c                       s0   e Zd Z					d	 fdd	Zdd Z  ZS )
	TDNNLayer   r   Fbatchnorm-reluc	           	   	      sj   t t|   |dk r |d dksJ d||d d | }tj|||||||d| _t||| _d S )Nr      r2   4Expect equal paddings, but got even kernel size ({})stridepaddingdilationbias)	superr1   __init__r   r   Conv1dlinearr   r   )	r,   in_channelsout_channelskernel_sizer7   r8   r9   r:   r   	__class__r   r   r<   -   s    	zTDNNLayer.__init__c                 C      |  |}| |}|S r*   )r>   r   r+   r   r   r   r-   E      

zTDNNLayer.forward)r2   r   r2   Fr3   r.   r/   r0   r<   r-   __classcell__r   r   rB   r   r1   +   s    r1   c                       s2   e Zd Z	d
 fdd	Zdd Zddd	Z  ZS )CAMLayerr4   c	           	   	      sl   t t|   tj|||||||d| _t||| d| _tjdd| _t|| |d| _	t
 | _d S )Nr6   r2   Tr   )r;   rH   r<   r   r=   linear_locallinear1r   r   linear2Sigmoidsigmoid)	r,   bn_channelsr@   rA   r7   r8   r9   r:   	reductionrB   r   r   r<   M   s   	zCAMLayer.__init__c                 C   sJ   |  |}|jddd| | }| | |}| | |}|| S )Nr   T)r%   )rI   r   seg_poolingr   rJ   rM   rK   )r,   r$   ycontextmr   r   r   r-   d   s
   
zCAMLayer.forwardd   avgc                 C   s   |dkrt j|||dd}n|dkrt j|||dd}ntd|j}|djg ||R  jg |d d dR  }|dd |jd f }|S )NrU   T)rA   r7   	ceil_modemaxzWrong segment pooling type.r   .)F
avg_pool1d
max_pool1dr   shaper#   expandreshape)r,   r$   seg_lenstypesegr[   r   r   r   rP   k   s*   
zCAMLayer.seg_pooling)r4   )rT   rU   )r.   r/   r0   r<   r-   rP   rG   r   r   rB   r   rH   K   s
    
rH   c                       s8   e Zd Z					d
 fdd	Zdd Zdd	 Z  ZS )CAMDenseTDNNLayerr2   Fr3   c
              	      s   t t|   |d dksJ d||d d | }
|	| _t||| _tj||ddd| _	t||| _
t|||||
||d| _d S )Nr4   r2   r5   Fr:   r6   )r;   ra   r<   r   memory_efficientr   
nonlinear1r   r=   rJ   
nonlinear2rH   	cam_layer)r,   r?   r@   rN   rA   r7   r9   r:   r   rc   r8   rB   r   r   r<   }   s$   
zCAMDenseTDNNLayer.__init__c                 C   s   |  | |S r*   )rJ   rd   r+   r   r   r   bn_function   s   zCAMDenseTDNNLayer.bn_functionc                 C   s:   | j r| jrt| j|}n| |}| | |}|S r*   )trainingrc   cp
checkpointrg   rf   re   r+   r   r   r   r-      s
   
zCAMDenseTDNNLayer.forwardr2   r2   Fr3   F)r.   r/   r0   r<   rg   r-   rG   r   r   rB   r   ra   {   s    ra   c                       s0   e Zd Z					d fdd	Zdd Z  ZS )	CAMDenseTDNNBlockr2   Fr3   c                    sV   t t|   t|D ]}t|||  |||||||	|
d	}| d|d  | qd S )N)	r?   r@   rN   rA   r7   r9   r:   r   rc   ztdnnd%dr2   )r;   rl   r<   rangera   r   )r,   
num_layersr?   r@   rN   rA   r7   r9   r:   r   rc   ilayerrB   r   r   r<      s   

zCAMDenseTDNNBlock.__init__c                 C   s$   | D ]}t j|||gdd}q|S )Nr2   r   )r!   r"   )r,   r$   rp   r   r   r   r-      s   zCAMDenseTDNNBlock.forwardrk   rF   r   r   rB   r   rl      s    rl   c                       *   e Zd Z		d fdd	Zdd Z  ZS )TransitLayerTr3   c                    s2   t t|   t||| _tj||d|d| _d S Nr2   rb   )r;   rr   r<   r   r   r   r=   r>   r,   r?   r@   r:   r   rB   r   r   r<      s   zTransitLayer.__init__c                 C   rD   r*   )r   r>   r+   r   r   r   r-      rE   zTransitLayer.forward)Tr3   rF   r   r   rB   r   rr      
    	rr   c                       rq   )
DenseLayerFr3   c                    s2   t t|   tj||d|d| _t||| _d S rs   )r;   rv   r<   r   r=   r>   r   r   rt   rB   r   r   r<      s   zDenseLayer.__init__c                 C   sB   t |jdkr| |jddjdd}n| |}| |}|S )Nr4   r   r   )lenr[   r>   r#   squeezer   r+   r   r   r   r-      s
   

zDenseLayer.forward)Fr3   rF   r   r   rB   r   rv      ru   rv   c                       s*   e Zd ZdZd fdd	Zdd Z  ZS )BasicResBlockr2   c              	      s   t t|   tj||d|dfddd| _t|| _tj||ddddd| _t|| _	t
 | _|dks=|| j| krYt
tj|| j| d|dfddt| j| | _d S d S )N   r2   F)rA   r7   r8   r:   )rA   r7   r:   )r;   ry   r<   r   Conv2dconv1BatchNorm2dbn1conv2bn2r   shortcut	expansion)r,   	in_planesplanesr7   rB   r   r   r<      s6   

zBasicResBlock.__init__c                 C   sB   t | | |}| | |}|| |7 }t |}|S r*   )rX   r   r~   r|   r   r   r   )r,   r$   outr   r   r   r-     s
   
zBasicResBlock.forward)r2   )r.   r/   r0   r   r<   r-   rG   r   r   rB   r   ry      s    ry   )r   FTr   )__doc__r!   torch.nn.functionalr   
functionalrX   torch.utils.checkpointutilsrj   ri   r   r(   Moduler)   r1   rH   ra   
ModuleListrl   rr   rv   ry   r   r   r   r   <module>   s   
	 0)!