o
    iP'                     @   s   d dl Z d dlm  mZ d dl mZmZ d dlmZ dd Zdd Z	dd	 Z
d
d Zdd ZG dd dejZG dd dejZG dd dejZG dd dejZG dd dejZG dd dejZdS )    N)nneinsum	rearrangec                 O   s   | S N )targskwargsr   r   W/home/ubuntu/.local/lib/python3.10/site-packages/funasr/models/mossformer/mossformer.pyidentity   s   r   c                 C   s&   |dkr| S | j g | jd| R  S )Nr      )viewshape)xnum_dimsr   r   r   append_dims   s   r   c                 C   s   | d uS r   r   )valr   r   r   exists   s   r   c                 C   s   t | r| S |S r   )r   )r   dr   r   r   default   s   r   c                 C   s   | | }|dkr
dS || S )Nr   r   )nmult	remainderr   r   r   padding_to_multiple_of   s   r   c                       s.   e Zd ZdZdef fddZdd Z  ZS )	Transposez9Wrapper class of torch.transpose() for Sequential module.r   c                    s   t t|   || _d S r   )superr   __init__r   )selfr   	__class__r   r   r   #   s   
zTranspose.__init__c                 C   s   |j | j S r   )	transposer   )r   r   r   r   r   forward'   s   zTranspose.forward)__name__
__module____qualname____doc__tupler   r#   __classcell__r   r   r    r   r       s    r   c                       sN   e Zd ZdZ			ddedededed	ed
eddf fddZdd Z  ZS )DepthwiseConv1da]  
    When groups == in_channels and out_channels == K * in_channels, where K is a positive integer,
    this operation is termed in literature as depthwise convolution.
    Args:
        in_channels (int): Number of channels in the input
        out_channels (int): Number of channels produced by the convolution
        kernel_size (int or tuple): Size of the convolving kernel
        stride (int, optional): Stride of the convolution. Default: 1
        padding (int or tuple, optional): Zero-padding added to both sides of the input. Default: 0
        bias (bool, optional): If True, adds a learnable bias to the output. Default: True
    Inputs: inputs
        - **inputs** (batch, in_channels, time): Tensor containing input vector
    Returns: outputs
        - **outputs** (batch, out_channels, time): Tensor produces by depthwise 1-D convolution.
    r   r   Fin_channelsout_channelskernel_sizestridepaddingbiasreturnNc              	      s@   t t|   || dksJ dtj|||||||d| _d S )Nr   z7out_channels should be constant multiple of in_channels)r+   r,   r-   groupsr.   r/   r0   )r   r*   r   r   Conv1dconv)r   r+   r,   r-   r.   r/   r0   r    r   r   r   <   s   	zDepthwiseConv1d.__init__c                 C   s
   |  |S r   )r4   r   inputsr   r   r   r#   S   s   
zDepthwiseConv1d.forward)r   r   F)	r$   r%   r&   r'   intboolr   r#   r)   r   r   r    r   r*   +   s*    r*   c                       sF   e Zd ZdZ			ddedededed	d
f
 fddZdd Z  ZS )
ConvModulea  
    Conformer convolution module starts with a pointwise convolution and a gated linear unit (GLU).
    This is followed by a single 1-D depthwise convolution layer. Batchnorm is  deployed just after the convolution
    to aid training deep models.
    Args:
        in_channels (int): Number of channels in the input
        kernel_size (int or tuple, optional): Size of the convolving kernel Default: 31
        dropout_p (float, optional): probability of dropout
    Inputs: inputs
        inputs (batch, time, dim): Tensor contains input sequences
    Outputs: outputs
        outputs (batch, time, dim): Tensor produces by conformer convolution module.
          皙?r+   r-   expansion_factor	dropout_pr1   Nc              
      sd   t t|   |d d dksJ d|dksJ dttddt|||d|d d d| _d S )	Nr   r;   r   z5kernel_size should be a odd number for 'SAME' paddingz+Currently, Only Supports expansion_factor 2)r   r;   )r   )r.   r/   )r   r9   r   r   
Sequentialr   r*   
sequential)r   r+   r-   r=   r>   r    r   r   r   f   s   
zConvModule.__init__c                 C   s   ||  |dd S )Nr   r;   )r@   r"   r5   r   r   r   r#   x   s   zConvModule.forward)r:   r;   r<   )	r$   r%   r&   r'   r7   floatr   r#   r)   r   r   r    r   r9   W   s"    r9   c                       s&   e Zd Zd fdd	Zdd Z  ZS )OffsetScaler   c                    sH   t    tt||| _tt||| _tj	j
| jdd d S )Ng{Gz?)std)r   r   r   	Parametertorchonesgammazerosbetainitnormal_)r   dimheadsr    r   r   r   }   s   
zOffsetScale.__init__c                 C   s    t d|| j| j }|jddS )Nz... d, h d -> ... h drL   )r   rG   rI   unbind)r   r   outr   r   r   r#      s   zOffsetScale.forwardr   )r$   r%   r&   r   r#   r)   r   r   r    r   rB   |   s    rB   c                       s,   e Zd Zejdf fdd	Zdd Z  ZS )FFConvMr<   c              	      s<   t    t||t||t t|t|| _d S r   )	r   r   r   r?   LinearSiLUr9   Dropoutmdl)r   dim_indim_out
norm_klassdropoutr    r   r   r      s   


zFFConvM.__init__c                 C   s   |  |}|S r   )rV   )r   r   outputr   r   r   r#      s   
zFFConvM.forward)r$   r%   r&   r   	LayerNormr   r#   r)   r   r   r    r   rR      s    
rR   c                	       sJ   e Zd Zddddddejdd fd	d

ZddddZdddZ  ZS )FLASH_ShareA_FFConvM      g      ?Fr<   NT)
group_sizequery_key_dimr=   causalrZ   rotary_pos_embrY   shift_tokensc       	            s   t    t|| }
|| _|| _|	| _|| _t|| _	t
||
||d| _t
||||d| _t|dd| _t
|d |||d| _t | _d S )N)rW   rX   rY   rZ      )rM   r;   )r   r   r7   r`   rb   rd   rc   r   rU   rZ   rR   	to_hiddento_qkrB   qk_offset_scaleto_outSigmoidgateActivate)r   rL   r`   ra   r=   rb   rZ   rc   rY   rd   
hidden_dimr    r   r   r      s6   
zFLASH_ShareA_FFConvM.__init__)maskc             	   C   s   |}|}| j r!|jddd\}}tj|ddd}tj||fdd}| |jddd\}}| |}	| |	\}
}}}| 	||
|||||\}}|| | 
||  }|| | }|S )z
        b - batch
        n - sequence length (within groups)
        g - group dimension
        d - feature dimension (keys)
        e - feature dimension (values)
        i - sequence dimension (source)
        j - sequence dimension (target)
        r;   rO   )r   r   r   rn           value)rd   chunkFpadrE   catrf   rg   rh   cal_attentionrk   ri   )r   r   rm   normed_xresidualx_shiftx_passvuqkquad_qlin_qquad_klin_katt_vatt_urQ   r   r   r   r#      s   
zFLASH_ShareA_FFConvM.forwardc	                    sl  |j d |j d |jjf\}	 }
}t|r"t|d}|| d}tjr6tjj||||f\}}}}t	 |dkrltfdd||||||f\}}}}}}t
|tj|	 f|
tjd}tj|dfdd	}tfd
d||||||f\}}}}}}t|rt|d|d}td||| }t|d }|}t|r|| d}jrtj||ftj|
dd}||d}td||}td||}jrtd||| }|jdd}tj|ddd	}td||}td||| }|jdd}tj|ddd	}td||}ntd||  }td||}td||  }td||}t fdd|| || fS )Nr   rN   z... -> ... 1ro   c                    s   t j| ddd fddS )Nr   ro   rp   )rs   rt   r   )r/   r   r   <lambda>  s    z4FLASH_ShareA_FFConvM.cal_attention.<locals>.<lambda>)devicedtypeFrp   c                    s   t | d jdS )Nzb (g n) d -> b g n dr   )r   r`   r   )r   r   r   r     s    zb (g j) -> b g 1 j)jz... i d, ... j d -> ... i jr;   )r   r   r   z... i j, ... j d -> ... i dzb g n d, b g n e -> b g d erO   )r   r   r   r   r   rn   zb g d e, b g n d -> b g n ezb g n d, b g n e -> b d ezb g n d, b d e -> b g n ec                    s   t | dd d d  f S )Nzb g n d -> b (g n) dr   r   r   r   r   r   ;  s    )r   r   r`   r   r   masked_fillrc   maprotate_queries_or_keysr   r   rE   rF   r8   rs   rt   r   relurZ   rb   triucumsum)r   r   r~   r   r   r   r{   r|   rm   br   glin_masksimattncausal_mask
quad_out_v
quad_out_ulin_kv	lin_out_vlin_ku	lin_out_ur   )r   r/   r   r   rv      sb   $






z"FLASH_ShareA_FFConvM.cal_attentionr   )	r$   r%   r&   r   r\   r   r#   rv   r)   r   r   r    r   r]      s    1!r]   )rE   torch.nn.functionalr   
functionalrs   r   einopsr   r   r   r   r   r   Moduler   r*   r9   rB   rR   r]   r   r   r   r   <module>   s    ,%