o
    i0                     @   s   d dl Z d dlZd dlmZmZ d dlZd dlZd dlm	Z	 d dl
m	  mZ d dlmZ d dlmZ G dd de	jZdd ZG d	d
 d
e	jZG dd de	jZdS )    N)ListTuple)utils)MultiheadAttentionc                       sR   e Zd Z				ddeeeeef  dededed	ef
 fd
dZ	dd Z
  ZS )ConvFeatureExtractionModel        defaultF   conv_layersdropoutmode	conv_biasin_dc                    s   t    |dv sJ 			d fdd	}t | _t|D ]0\}}t|dks1J dt| |\ }	}
| j|| |	|
|dk|dkoH|d	k|d
  }qd S )N>   r   
layer_normFc                    s    fdd}|o|dksJ d|r3t | t jdt t tjddt t  S |rJt | t jdtjddt  S t | t jdt  S )	Nc                     s&   t j d} t j| j | S )N)stridebias)nnConv1dinitkaiming_normal_weight)convr   kn_inn_outr    S/home/ubuntu/.local/lib/python3.10/site-packages/funasr/models/data2vec/wav2vec2.py	make_conv)   s   zEConvFeatureExtractionModel.__init__.<locals>.block.<locals>.make_convFz'layer norm and group norm are exclusive)pTelementwise_affine)affine)r   
SequentialDropoutr   TransposeLastFp32LayerNormGELUFp32GroupNorm)r   r   r   r   is_layer_normis_group_normr   r   dimr   r   r   block    s2   	


z2ConvFeatureExtractionModel.__init__.<locals>.block   zinvalid conv definition: r   r   r   )r)   r*   r   )FFF)	super__init__r   
ModuleListr
   	enumeratelenstrappend)selfr
   r   r   r   r   r-   iclr   r   	__class__r+   r   r0      s.   

'
z#ConvFeatureExtractionModel.__init__c                 C   s>   t |jdkr|d}n|dd}| jD ]}||}q|S )N   r	   )r3   shape	unsqueeze	transposer
   )r6   xr   r   r   r   forwardY   s   

z"ConvFeatureExtractionModel.forward)r   r   Fr	   )__name__
__module____qualname__r   r   intfloatr4   boolr0   r@   __classcell__r   r   r9   r   r      s"    Er   c                 C   s   t j| | ||d |d}d}tdd|  ||   }t jj|jd|d t j|jd t j	j
|ddd}t |t	|t  }|S )	Nr;   kernel_sizepaddinggroupsr      g      ?)meanstdr   )namer,   )r   r   mathsqrtr   normal_r   	constant_r   r   weight_normr#   SamePadr'   )er   gpos_convr   rN   r   r   r   make_conv_posd   s   rY   c                       sN   e Zd Zdd Z fddZdddZ			dd	d
Zdd Zdd Z  Z	S )TransformerEncoderc              
   C   sD   | j dkrt| j| j| j| j| j| j| j| j	d}|S t
d |S )Ntransformer)embedding_dimffn_embedding_dimnum_attention_headsr   attention_dropoutactivation_dropoutactivation_fnlayer_norm_firstz.Only transformer is supported for data2vec now)
layer_typeTransformerSentenceEncoderLayerr\   encoder_ffn_embed_dimencoder_attention_headsr   r_   r`   ra   rb   loggingerror)r6   layerr   r   r   build_encoder_layerx   s   

z&TransformerEncoder.build_encoder_layerc                    s   t    | _| _| _|dkr)|}td|| }dd }| j||| _nt j|| _| _|	 _	|
 _
| _| _| _| _| _| _t fddt|D  _tj j _ tj d S )Nr	   r.   c                    s    t j fddt|D  S )Nc                    sR   g | ]%}t t j  d  dtt tj j ddt t  qS )r;   rH   Fr    )	r   r#   r   r   rU   r%   torch	LayerNormr'   .0_rV   rW   r   r   r   
<listcomp>   s"    zHTransformerEncoder.__init__.<locals>.make_conv_block.<locals>.<listcomp>)r   r#   range)rV   r   rW   lr   rp   r   make_conv_block   s
   z4TransformerEncoder.__init__.<locals>.make_conv_blockc                    s   g | ]}   qS r   )rj   rm   r6   r   r   rq          z/TransformerEncoder.__init__.<locals>.<listcomp>)r/   r0   r   r\   required_seq_len_multiplemaxrX   rY   rc   re   rf   r_   r`   ra   rb   	layerdropmax_positionsr   r1   rr   layersrk   rl   r   applyr   init_bert_params)r6   r   encoder_embed_dimrw   pos_conv_depthconv_posconv_pos_groupsrc   encoder_layersre   rf   r_   r`   ra   rb   encoder_layerdroprz   
num_layersr   rt   r9   ru   r   r0      s4   
zTransformerEncoder.__init__Nc                 C   s2   |  |||\}}| jr|d u r| |}||fS N)extract_featuresrb   r   )r6   r?   padding_maskri   layer_resultsr   r   r   r@      s   
zTransformerEncoder.forwardr   c                    s  |d urd||< |  |dd}|dd}|| }| js#| |}tj|| jddd\}  dkrS|d u rS|j|d|dft	j
d}d|d d   d f< ntj|| jddd\}}tj|| j| jd	}|dd}g }d }t| jD ]6\}	}
| jdkrtj nd}| jr|| jkr|
||d
\}\}}|	|kr||||f |	|kr|} nqx|d ur|}|dd} dkr|d d d   f } fddfdd|D }||fS )Nr   r	   r;   )r,   value)dtypeT)r   training)self_attn_padding_maskc                    s4   | d    |d ur|d    n||d    fS r   r   )abc)
pad_lengthr   r   undo_pad  s   z5TransformerEncoder.extract_features.<locals>.undo_padc                    s   g | ]} | qS r   r   )rn   u)r   r   r   rq     rv   z7TransformerEncoder.extract_features.<locals>.<listcomp>)rX   r>   rb   r   r   pad_to_multiplerw   	new_zerossizerk   rF   Fr   r   r2   r{   ry   nprandomr5   )r6   r?   r   	tgt_layer	min_layerx_convro   r   rr7   ri   dropout_probabilityzlrr   )r   r   r   r      sH   
 

z#TransformerEncoder.extract_featuresc                 C   s   | j S )z/Maximum output length supported by the encoder.)rz   ru   r   r   r   rz     s   z TransformerEncoder.max_positionsc                 C   s   |S )z@Upgrade a (possibly old) state dict for new versions of fairseq.r   )r6   
state_dictrO   r   r   r   upgrade_state_dict_named!  s   z+TransformerEncoder.upgrade_state_dict_namedNN)NNr   )
rA   rB   rC   rj   r0   r@   r   rz   r   rG   r   r   r9   r   rZ   w   s    
L
ArZ   c                       sz   e Zd ZdZ								dded	ed
ededededededdf fddZ		dde	j
de	j
de	j
fddZ  ZS )rd   z_
    Implements a Transformer Encoder Layer used in BERT/XLM style pre-trained
    models.
             皙?reluFr\   r]   r^   r   r_   r`   ra   rb   returnNc	           	         s   t    || _|| _|| _t|| _t| j||dd| _	t
|| _t
| j| _t
|| _|| _tj
| j| _t
| j|| _t
|| j| _tj
| j| _d S )NT)r   self_attention)r/   r0   r\   r   r`   r   get_activation_fnra   r   	self_attnr   r$   dropout1dropout2dropout3rb   rk   rl   self_attn_layer_normLinearfc1fc2final_layer_norm)	r6   r\   r]   r^   r   r_   r`   ra   rb   r9   r   r   r0   ,  s&   
z(TransformerSentenceEncoderLayer.__init__r?   self_attn_maskr   c                 C   s  |}| j rE| |}| j|||||dd\}}| |}|| }|}| |}| | |}| |}| |}|}| 	|}|| }n>| j||||dd\}}| |}|| }| |}|}| | |}| |}| |}|}| 	|}|| }| |}|||ffS )z
        LayerNorm is applied either before or after the self-attention/ffn
        modules similar to the original Transformer imlementation.
        F)querykeyr   key_padding_mask	attn_maskneed_weights)r   r   r   r   r   )
rb   r   r   r   r   ra   r   r   r   r   )r6   r?   r   r   residualattnlayer_resultr   r   r   r@   U  sP   















z'TransformerSentenceEncoderLayer.forward)r   r   r   r   r   r   r   Fr   )rA   rB   rC   __doc__rD   rE   r4   rF   r0   rk   Tensorr@   rG   r   r   r9   r   rd   &  sL    	
,rd   )rg   rP   typingr   r   numpyr   rk   torch.nnr   torch.nn.functional
functionalr   funasr.models.data2vecr   *funasr.models.data2vec.multihead_attentionr   Moduler   rY   rZ   rd   r   r   r   r   <module>   s   Q 0