o
    ߥi8                     @   s   d dl Zd dlZd dlmZ d dlm  mZ ddlm	Z	m
Z
mZmZmZmZ dZdd Zdd ZG d	d
 d
ejZG dd dejZG dd dejZG dd dejZG dd dejZG dd dejZ					dddZdS )    N   )HEADER_BLOCK_SIZEActivationType	LayerTypef32ToI32printNeonMatrixprintNeonVectorFc                 C   s:   t jt jt jd t| }|dd}|dd}d| S )z function that transform as str numpy mat to standard kaldi str matrix

        Args:
            np_mat:          numpy mat

        Returns:  str
    )	threshold	linewidth[ ]z[ %s ]
)npset_printoptionsinfnanstrreplace)np_matout_str r   ]/home/ubuntu/.local/lib/python3.10/site-packages/modelscope/models/audio/kws/farfield/fsmn.pyto_kaldi_matrix   s
   r   c                 C   s4   d}|     }|t|7 }|d7 }t| dS )zV print torch tensor for debug

    Args:
        torch_tensor:           a tensor
    r   <!EndOfComponent>
N)detachsqueezenumpyr   print)torch_tensorre_strxr   r   r   print_tensor   s
   r!   c                       4   e Zd Z fddZdd Zdd Zdd Z  ZS )	LinearTransformc                    s<   t t|   || _|| _tj||dd| _d| _d | _	d S )NF)bias)
superr#   __init__	input_dim
output_dimnnLinearlineardebugdataoutselfr'   r(   	__class__r   r   r&   ,   s   
zLinearTransform.__init__c                 C      |  |}| jr|| _|S Nr+   r,   r-   r/   inputoutputr   r   r   forward5      
zLinearTransform.forwardc                 C   s   t | jj d S r3   )r   r+   weightr/   r   r   r   print_model=   s   zLinearTransform.print_modelc                 C   sP   d}|d| j | jf 7 }|d7 }|  d }|  }|t|7 }|d7 }|S )Nr   z<LinearTransform> %d %d
z<LearnRateCoef> 1
linear.weightr   r(   r'   
state_dictr   r   r   )r/   r   linear_weightsr    r   r   r   to_kaldi_nnet@   s   zLinearTransform.to_kaldi_nnet__name__
__module____qualname__r&   r8   r<   rA   __classcell__r   r   r0   r   r#   *   s
    	r#   c                       r"   )	AffineTransformc                    s8   t t|   || _|| _t||| _d| _d | _	d S )NF)
r%   rG   r&   r'   r(   r)   r*   r+   r,   r-   r.   r0   r   r   r&   P   s   
zAffineTransform.__init__c                 C   r2   r3   r4   r5   r   r   r   r8   Z   r9   zAffineTransform.forwardc                 C   s   t | jj t| jj d S r3   )r   r+   r:   r   r$   r;   r   r   r   r<   b   s   zAffineTransform.print_modelc                 C   st   d}|d| j | jf 7 }|d7 }|  d }|  }|t|7 }|  d }|  }|t|7 }|d7 }|S )Nr   z<AffineTransform> %d %d
z4<LearnRateCoef> 1 <BiasLearnRateCoef> 1 <MaxNorm> 0
r=   zlinear.biasr   r>   )r/   r   r@   r    linear_biasr   r   r   rA   f   s   zAffineTransform.to_kaldi_nnetrB   r   r   r0   r   rG   N   s
    
rG   c                       sB   e Zd ZdZ				d fdd	Zdd Zdd Zd	d
 Z  ZS )Fsmnz
    FSMN implementation.
    Nc                    s   t t|   || _|d u rd S || _|| _|| _|| _tj	| j| j|df|df| jdd| _
|dkrGtj	| j| j|df|df| jdd| _nd | _d| _d | _d S )Nr   Fdilationgroupsr$   r   )r%   rI   r&   dimlorderrorderlstriderstrider)   Conv2d	conv_left
conv_rightr,   r-   )r/   r'   r(   rN   rO   rP   rQ   r0   r   r   r&   }   s4   



zFsmn.__init__c           	      C   s   t |d}|dddd}t|dd| jd | j dg}| jd urOt|ddd| j| j	 g}|d d d d | j	d d d f }|| 
| | | }n|| 
| }|dddd}|d}| jri|| _|S Nr   r         )torch	unsqueezepermuteFpadrN   rP   rT   rO   rQ   rS   r   r,   r-   )	r/   r6   r    x_pery_lefty_rightoutout1r7   r   r   r   r8      s    
"
zFsmn.forwardc                 C   s   | j j}t|jd |jd }t|jd D ]}||dd d df |d d |f< qt| | jd urc| jj}t|jd |jd }t|jd D ]}||dd d df |d d |f< qJt| d S d S NrW   r   )rS   r:   rX   zerosshaperanger   rT   r/   tmpwtmpwmjr   r   r   r<      s   "
"zFsmn.print_modelc                 C   s   d}|d| j | j f 7 }|dd| j| j| j| jf 7 }|  d }t| 	 j
}|t|7 }| jd urM|  d }| 	 j
}|t|7 }|d7 }|S )Nr   z<Fsmn> %d %d
zQ<LearnRateCoef> %d <LOrder> %d <ROrder> %d <LStride> %d <RStride> %d <MaxNorm> 0
r   zconv_left.weightzconv_right.weightr   )rM   rN   rO   rP   rQ   r?   r   flipudr   r   Tr   rT   )r/   r   lfitersr    rfitersr   r   r   rA      s   
zFsmn.to_kaldi_nnet)NNNN)	rC   rD   rE   __doc__r&   r8   r<   rA   rF   r   r   r0   r   rI   x   s    'rI   c                       s,   e Zd Z fddZdd Zdd Z  ZS )RectifiedLinearc                    s"   t t|   || _t | _d S r3   )r%   ro   r&   rM   r)   ReLUrelur.   r0   r   r   r&      s   zRectifiedLinear.__init__c                 C   s
   |  |S r3   )rq   )r/   r6   r   r   r   r8      s   
zRectifiedLinear.forwardc                 C   s$   d}|d| j | j f 7 }|d7 }|S )Nr   z<RectifiedLinear> %d %d
r   )rM   )r/   r   r   r   r   rA      s   zRectifiedLinear.to_kaldi_nnet)rC   rD   rE   r&   r8   rA   rF   r   r   r0   r   ro      s    ro   c                       sh   e Zd ZdZ							d fdd		Ze	
				dddZdd Zdd Zdd Z	dd Z
  ZS )FSMNNetz'
    FSMN net for keyword spotting
          
   r         c                    st   t t|   || _|| _|| _|| _|| _|| _|| _	t
||| _t||| _| |||||| _t
||| _dS )a  
        Args:
            input_dim:              input dimension
            linear_dim:             fsmn input dimension
            proj_dim:               fsmn projection dimension
            lorder:                 fsmn left order
            rorder:                 fsmn right order
            num_syn:                output dimension
            fsmn_layers:            no. of sequential fsmn layers
        N)r%   rr   r&   r'   
linear_dimproj_dimrN   rO   num_synfsmn_layersrG   linear1ro   rq   _build_repeatsfsmnlinear2)r/   r'   rx   ry   rN   rO   rz   r{   r0   r   r   r&      s   zFSMNNet.__init__   D   rV   rW   c                    &    fddt |D }tj| S )Nc                    s<   g | ]}t t td d t t  qS r   )r)   
Sequentialr#   rI   rG   ro   .0irx   rN   ry   rO   r   r   
<listcomp>  s    z*FSMNNet._build_repeats.<locals>.<listcomp>re   r)   r   rx   ry   rN   rO   r{   repeatsr   r   r   r}     s   
	zFSMNNet._build_repeatsc                 C   s,   |  |}| |}| |}| |}|S r3   )r|   rq   r~   r   )r/   r6   x1x2x3x4r   r   r   r8   '  s
   



zFSMNNet.forwardc                 C   sH   | j   | jD ]}|d   |d   |d   q| j  d S )Nr   r   rW   )r|   r<   r~   r   )r/   layerr   r   r   r<   .  s   

zFSMNNet.print_modelc                 C   s  dgt  d }d|d< d|d< | j|d< | j|d< d|d< d}ttjj|t | d < d|t | d < | j|t | d < | j|t | d < d|t | d < ttj	j|t | d < |d7 }ttj
j|t | d < d|t | d < | j|t | d < | j|t | d < | j|t | d < | j|t | d < | j|t | d	 < d
|t | d < |d7 }ttjj|t | d < d|t | d < | j|t | d < | j|t | d < d|t | d < ttjj|t | d < |D ]}tt| qd S )Ng        rw   r   r   rW   rV   g      ?rv      g         )r   r'   rz   floatr   LAYER_DENSEvaluerx   r   ACTIVATION_RELULAYER_SEQUENTIAL_FSMNry   rN   rO   r{   ACTIVATION_SOFTMAXr   r   )r/   headerhidxhr   r   r   print_header8  sT   

zFSMNNet.print_headerc                 C   s   d}|d7 }|| j  7 }|| j 7 }| jD ]"}||d  7 }||d  7 }||d  7 }||d  7 }q|| j 7 }|d| j| jf 7 }|d7 }|d	7 }|S )
Nr   z<Nnet>
r   r   rW   rV   z<Softmax> %d %d
r   z</Nnet>
)r|   rA   rq   r~   r   rz   )r/   r   r~   r   r   r   rA   n  s   
zFSMNNet.to_kaldi_nnet)rs   rt   rt   ru   r   rv   rw   )r   r   rV   rW   rv   )rC   rD   rE   rn   r&   staticmethodr}   r8   r<   r   rA   rF   r   r   r0   r   rr      s*    $
6rr   c                       s>   e Zd ZdZ						d fdd	Zdd	 Zd
d Z  ZS )DFSMNz
    One deep fsmn layer
    @   rt      r   c                    s   t t|   || _|| _|| _|| _t||| _t	||| _
tj|||df|df|dd| _|dkrEtj|||df|df|dd| _dS d| _dS )a|  
        Args:
            dimproj:                projection dimension, input and output dimension of memory blocks
            dimlinear:              dimension of mapping layer
            lorder:                 left order
            rorder:                 right order
            lstride:                left stride
            rstride:                right stride
        r   FrJ   r   N)r%   r   r&   rN   rO   rP   rQ   rG   expandr#   shrinkr)   rR   rS   rT   )r/   dimproj	dimlinearrN   rO   rP   rQ   r0   r   r   r&     s.   
zDFSMN.__init__c                 C   s   t | |}| |}t|d}|dddd}t |dd| jd | j	 dg}| j
d ur\t |ddd| j| j g}|d d d d | jd d d f }|| | | 
| }n|| | }|dddd}	||	d }
|
S rU   )r[   rq   r   r   rX   rY   rZ   r\   rN   rP   rT   rO   rQ   rS   r   )r/   r6   f1p1r    r]   r^   r_   r`   ra   r7   r   r   r   r8     s   
 
"zDFSMN.forwardc                 C   s   | j   | j  | jj}t|jd |jd }t|jd D ]}||dd d df |d d |f< q!t	| | j
d urm| j
j}t|jd |jd }t|jd D ]}||dd d df |d d |f< qTt	| d S d S rb   )r   r<   r   rS   r:   rX   rc   rd   re   r   rT   rf   r   r   r   r<     s   

"
"zDFSMN.print_model)r   rt   r   r   r   r   )rC   rD   rE   rn   r&   r8   r<   rF   r   r   r0   r   r     s    +r   rt   r   r   r   c                    r   )z
    build stacked dfsmn layers
    Args:
        linear_dim:
        proj_dim:
        lorder:
        rorder:
        fsmn_layers:

    Returns:

    c                    s$   g | ]}t t d d qS r   )r)   r   r   r   r   r   r   r     s    z'build_dfsmn_repeats.<locals>.<listcomp>r   r   r   r   r   build_dfsmn_repeats  s   
r   )rt   r   r   r   r   )r   r   rX   torch.nnr)   torch.nn.functional
functionalr[   	model_defr   r   r   r   r   r   DEBUGr   r!   Moduler#   rG   rI   ro   rr   r   r   r   r   r   r   <module>   s*    $*d Y