o
    i                     @   s   d dl Z d dlZd dlmZ d dlm  mZ d dlmZ G dd dej	Z
G dd dejZG dd dejZG d	d
 d
ejZdddZdd ZdejdejfddZdd ZdefddZdd ZdS )    N)MultiheadAttentionc                       $   e Zd Z fddZdd Z  ZS )Fp32LayerNormc                       t  j|i | d S Nsuper__init__selfargskwargs	__class__ P/home/ubuntu/.local/lib/python3.10/site-packages/funasr/models/data2vec/utils.pyr	         zFp32LayerNorm.__init__c                 C   L   t | | j| jd ur| j nd | jd ur| j nd | j}||S r   )F
layer_normfloatnormalized_shapeweightbiasepstype_asr   inputoutputr   r   r   forward      
zFp32LayerNorm.forward__name__
__module____qualname__r	   r   __classcell__r   r   r   r   r          r   c                       r   )Fp32GroupNormc                    r   r   r   r
   r   r   r   r	      r   zFp32GroupNorm.__init__c                 C   r   r   )r   
group_normr   
num_groupsr   r   r   r   r   r   r   r   r   "   r    zFp32GroupNorm.forwardr!   r   r   r   r   r'      r&   r'   c                       &   e Zd Zd fdd	Zdd Z  ZS )TransposeLastNc                    s   t    || _d S r   )r   r	   deconstruct_idx)r   r,   r   r   r   r	   .   s   

zTransposeLast.__init__c                 C   s    | j d ur
|| j  }|ddS )N)r,   	transposer   xr   r   r   r   2   s   

zTransposeLast.forwardr   r!   r   r   r   r   r+   -   s    r+   c                       r*   )SamePadFc                    s6   t    |r|d | _d S |d dkrdnd| _d S )N      r   )r   r	   remove)r   kernel_sizecausalr   r   r   r	   9   s   
zSamePad.__init__c                 C   s,   | j dkr|d d d d d | j  f }|S Nr   )r5   r0   r   r   r   r   @   s   
zSamePad.forward)Fr!   r   r   r   r   r2   8   s    r2   r.   c                 C   sr   | d u rdS |  |}|| }t|| | }| r | dfS dd|  d }tj| g |d|R |d|fS )Nr8   r   )r   r.   r4   )value)sizemathceil
is_integerr   pad)r1   multipledimr9   tszm	remainder
pad_offsetr   r   r   pad_to_multipleF   s   
"rE   c                 C   sH   t tdstdtj t_d|  dttj| dt| d     S )N_ar4   g      ?r3   gHm?   )	hasattrgelu_accurater;   sqrtpirF   torchtanhpowr1   r   r   r   rI   T   s   
,rI   r1   returnc                 C   s   t jj|  | S r   )rL   nn
functionalgelur   r   rO   r   r   r   rS   Z   s   rS   c                   C   s   g dS )N)relurS   	gelu_fastrI   rM   linearr   r   r   r   r   get_available_activation_fns^   s   rW   
activationc                 C   sb   | dkrt jS | dkrtS | dkrtS | dkrtjS | dkr"dd S | dkr*tjjS td		| )
z=Returns the activation function corresponding to `activation`rT   rS   rI   rM   rV   c                 S   s   | S r   r   rO   r   r   r   <lambda>u   s    z#get_activation_fn.<locals>.<lambda>swishz --activation-fn {} not supported)
r   rT   rS   rI   rL   rM   rQ   SiLURuntimeErrorformat)rX   r   r   r   get_activation_fni   s   r^   c                 C   s   dd }t | tjr|| jj | jdur| jj  t | tjr5|| jj | jdur5| jj| j   t | t	rQ|| j
jj || jjj || jjj dS dS )a  
    Initialize the weights specific to the BERT Model.
    This overrides the default initializations depending on the specified arguments.
        1. If normal_init_linear_weights is set then weights of linear
           layer will be initialized using the normal distribution and
           bais will be set to the specified value.
        2. If normal_init_embed_weights is set then weights of embedding
           layer will be initialized using the normal distribution.
        3. If normal_init_proj_weights is set then weights of
           in_project_weight for MultiHeadAttention initialized using
           the normal distribution (to be validated).
    c                 S   s$   |  |  jddd| j d S )Ng        g{Gz?)meanstd)copy_cpunormal_todevice)datar   r   r   rc      s   $z!init_bert_params.<locals>.normal_N)
isinstancerQ   Linearr   rf   r   zero_	Embeddingpadding_idxr   q_projk_projv_proj)modulerc   r   r   r   init_bert_params|   s   


rp   )r.   r   )r;   rL   torch.nnrQ   torch.nn.functionalrR   r   *funasr.models.data2vec.multihead_attentionr   	LayerNormr   	GroupNormr'   Moduler+   r2   rE   rI   TensorrS   rW   strr^   rp   r   r   r   r   <module>   s   
