o
    ´©i¤a  ã                   @   s’   d dl Z d dlZd dlmZmZmZmZ d dlZd dlm	  m
Z d dlmZm	Z	 d dlmZ d dlmZ G dd„ de	jƒZG dd	„ d	e	jƒZdS )
é    N)ÚDictÚListÚOptionalÚTuple)ÚTensorÚnn)Ú	Parameter)Úquant_noisec                	       sR   e Zd Zd‡ fdd„	Zddefdd„Z		dded	ed
eee  fdd„Z	‡  Z
S )ÚFairseqDropoutNc                    s    t ƒ  ¡  || _|| _d| _d S )NF)ÚsuperÚ__init__ÚpÚmodule_nameÚapply_during_inference)Úselfr   r   ©Ú	__class__© ú^/home/ubuntu/.local/lib/python3.10/site-packages/funasr/models/data2vec/multihead_attention.pyr      s   

zFairseqDropout.__init__FÚinplacec                 C   s.   | j dkr| js| jrtj|| j d|dS |S )Nr   T)r   Útrainingr   )r   r   r   ÚFÚdropout)r   Úxr   r   r   r   Úforward   s   zFairseqDropout.forwardÚnameÚretain_dropoutÚretain_dropout_modulesc                 K   sn   |r5|d ur| j d u rt d |¡¡ d S |d u s| j |v r+t d |¡¡ d| _d S t d |¡¡ d S d S )NzTCannot enable dropout during inference for module {} because module_name was not setz0Enabling dropout during inference for module: {}Tz Disabling dropout for module: {})r   ÚloggingÚwarningÚformatÚinfor   )r   r   r   r   Úkwargsr   r   r   Úmake_generation_fast_   s   þ

óz$FairseqDropout.make_generation_fast_©N)F)FN)Ú__name__Ú
__module__Ú__qualname__r   Úboolr   Ústrr   r   r#   Ú__classcell__r   r   r   r   r
      s    	üþý
ür
   c                       sÒ  e Zd ZdZ										d4‡ fdd„	Zd	d
„ Zdd„ Zdefdd„Zde	e
eef  fdd„Zdd„ Z							d5dee dee dee deeeeeee f f  dededee dedede
eee f fdd „Zedee d!ee d"ed#ededee fd$d%„ƒZejjdeeeeee f f d&efd'd(„ƒZdeeeeeee f f  deeee f fd)d*„Zdeeeeee f f d+eeee f fd,d-„Zd.ed#ed/efd0d1„Zd2d3„ Z‡  ZS )6ÚMultiheadAttentionzSMulti-headed attention.

    See "Attention Is All You Need" for more details.
    Nç        TFé   c                    st  t ƒ  ¡  || _|d ur|n|| _|d ur|n|| _| j|ko#| j|k| _|| _t|| jj	d| _
|| | _| j| | jksBJ dƒ‚| jd | _|	| _|
| _| jrX| jsXJ dƒ‚ttj| j||d||ƒ| _ttj| j||d||ƒ| _ttj|||d||ƒ| _ttj|||d||ƒ| _|r¥tt dd|¡ƒ| _tt dd|¡ƒ| _nd  | _| _|| _|  ¡  d| _d| _d S )N)r   z(embed_dim must be divisible by num_headsg      à¿zCSelf-attention requires query, key and value to be of the same size)Úbiasé   F)r   r   Ú	embed_dimÚkdimÚvdimÚqkv_same_dimÚ	num_headsr
   r   r%   Údropout_moduleÚhead_dimÚscalingÚself_attentionÚencoder_decoder_attentionr	   r   ÚLinearÚk_projÚv_projÚq_projÚout_projr   Útorchr   Úbias_kÚbias_vÚadd_zero_attnÚreset_parametersÚ
onnx_traceÚskip_embed_dim_check)r   r0   r4   r1   r2   r   r.   Úadd_bias_kvrB   r8   r9   Úq_noiseÚqn_block_sizer   r   r   r   <   sJ   

ÿÿÿÿÿÿ
zMultiheadAttention.__init__c                 C   ó
   d| _ d S ©NT)rD   ©r   r   r   r   Úprepare_for_onnx_export_|   ó   
z+MultiheadAttention.prepare_for_onnx_export_c                 C   sø   | j r1tjj| jjdt d¡ d tjj| jjdt d¡ d tjj| j	jdt d¡ d ntj | jj¡ tj | jj¡ tj | j	j¡ tj | j
j¡ | j
jd ur`tj | j
jd¡ | jd urltj | j¡ | jd urztj | j¡ d S d S )Nr/   é   )Úgainr,   )r3   r   ÚinitÚxavier_uniform_r;   ÚweightÚmathÚsqrtr<   r=   r>   r.   Ú	constant_r@   Úxavier_normal_rA   rK   r   r   r   rC      s    

ÿz#MultiheadAttention.reset_parametersÚnum_heads_to_keepc                    sž  g }g }g }t | jƒD ]z}|| j }|d | j }| t t | jj||…f ¡¡ 	¡ t t | jj
||… ¡¡ 	¡  ¡ | t t | jj||…f ¡¡ 	¡ t t | jj
||… ¡¡ 	¡  ¡ | t t | jj||…f ¡¡ 	¡ t t | jj
||… ¡¡ 	¡  ¡ qg ‰ t | jƒD ]}ˆ  || ||  ||  ¡ qtt | jƒ‡ fdd„dd}g }	t |ƒD ]}|| | j }
|| d | j }|	 |
|f¡ q³|	S )Nr/   c                    s   ˆ |  S r$   r   )Úk©Ú
heads_normr   r   Ú<lambda>¬   s    z<MultiheadAttention._get_reserve_head_index.<locals>.<lambda>T)ÚkeyÚreverse)Úranger4   r6   Úappendr?   ÚsumÚabsr;   rR   Útolistr.   r=   r<   Úsorted)r   rW   Úk_proj_heads_normÚq_proj_heads_normÚv_proj_heads_normÚiÚ	start_idxÚend_idxÚsorted_head_indexÚreserve_head_indexÚstartÚendr   rY   r   Ú_get_reserve_head_index“   s>   
 ÿÿ ÿÿ ÿÿ z*MultiheadAttention._get_reserve_head_indexrk   c                 C   s  g }g }g }g }g }g }g }|D ]Z}	|	\}
}|  | jj|
|…f ¡ |  | jj|
|… ¡ |  | jj|
|…f ¡ |  | jj|
|… ¡ |  | jj|
|…f ¡ |  | jj|
|… ¡ |  | jjd d …|
|…f ¡ qt |¡ 	¡ }t |¡ 	¡ }t |¡ 	¡ }tj|dd 	¡ }d|_
d|_
d|_
d|_
t |¡ 	¡ }d|_
t |¡ 	¡ }d|_
t |¡ 	¡ }d|_
tj |¡| j_tj |¡| j_tj |¡| j_tj |¡| j_tj |¡| j_tj |¡| j_tj |¡| j_t|ƒ| _| j| j | _| j| j_| j| j_| j| j_d S )Néÿÿÿÿ©ÚdimT)r_   r=   rR   r.   r;   r<   r>   r?   ÚcatÚdetachÚrequires_gradr   r   Úlenr4   r6   r0   Úout_features)r   rk   Únew_q_weightÚ
new_q_biasÚnew_k_weightÚ
new_k_biasÚnew_v_weightÚ
new_v_biasÚnew_out_proj_weightÚelerh   ri   r   r   r   Ú_adaptive_prune_heads´   sT    


z(MultiheadAttention._adaptive_prune_headsc                 C   rI   rJ   )rE   rK   r   r   r   Ú_set_skip_embed_dim_checkî   rM   z,MultiheadAttention._set_skip_embed_dim_checkr\   ÚvalueÚkey_padding_maskÚincremental_stateÚneed_weightsÚ	static_kvÚ	attn_maskÚbefore_softmaxÚneed_head_weightsÚreturnc                 C   sH  |
rd}|j jdk}| ¡ \}}}|}| js&|| jks&J d|› d| j› ƒ‚t| ¡ ƒ|||gks3J ‚|dur\| ¡ \}}}tj ¡ s\||ksIJ ‚|dusOJ ‚|s\J ||j	dd… kƒ‚| j
s¹|s¹|du r¹|s¹tj ¡ s¹| js¹|durw|dusyJ ‚tj|||| j| jt dg¡t | jj| jj| jjf¡| j| j| j| jj| jj| jj| jp©| jj|||d| jj| jj| jjdS |durÙ|  |¡}|durØd	|v rØ|rØ| jrÒ| jrÔJ ‚d }}nd}| jrî|  |¡}|  |¡}|  |¡}n@| jr|  |¡}|du r|du sJ ‚d }}n&|  |¡}|  |¡}n|dur|dusJ ‚|  |¡}|  |¡}|  |¡}|| j 9 }| jdur…| jdusAJ ‚t || j !d
|d
¡g¡}t || j !d
|d
¡g¡}|durptj|| "| d¡d
¡gd
d}|dur…tj|| "| d¡d
¡gd
d}| #¡  $||| j | j%¡ &dd
¡}|dur¬| #¡  $d|| j | j%¡ &dd
¡}|durÂ| #¡  $d|| j | j%¡ &dd
¡}|dur{d	|v rý|d	 }|dus×J ‚| $|| j d| j%¡}|rè|}n|dusïJ ‚tj||gd
d}| d
¡}d|v r.|d }|dusJ ‚| $|| j d| j%¡}|r|}n|dus%J ‚tj||gd
d}d}d|v r9|d }|durC|dusEJ ‚t'j(|||| d
¡|d}| $|| jd| j%¡|d	< | $|| jd| j%¡|d< ||d< |dusuJ ‚|  )||¡}|dus‚J ‚| d
¡|ksŒJ ‚|durš| *¡ dkršd}|dur³| d¡|ks©J ‚| d
¡|ks³J ‚| jr!|dus¾J ‚|d
7 }tj|| "| d¡d
f| ¡ dd…  ¡gd
d}tj|| "| d¡d
f| ¡ dd…  ¡gd
d}|dur	tj|| "| d¡d
¡gd
d}|dur!tj|t +| d¡d
¡ ,|¡gd
d}t -|| &d
d¡¡}|  .||||¡}t| ¡ ƒ|| j ||gksDJ ‚|dur`| /d¡}| j
r\| !| d¡d
d
¡}||7 }|dur¢| $|| j||¡}|s„| 0| /d
¡ /d¡ 1tj2¡t3dƒ¡}n| &dd¡}| 0|t3dƒ¡}| &dd¡}| $|| j ||¡}|	r©||fS tj4|dtj5d}| ,|¡}|  |¡}|dusÃJ ‚t -||¡}t| ¡ ƒ|| j || j%gksÛJ ‚| j
rò| d
¡d
krò| #¡  $||| j¡}n| &dd
¡ #¡  $||| j¡}|  |¡}d}|r | $|| j||¡ &d
d¡}|
s |j6dd}||fS )a¥  Input shape: Time x Batch x Channel

        Args:
            key_padding_mask (ByteTensor, optional): mask to exclude
                keys that are pads, of shape `(batch, src_len)`, where
                padding elements are indicated by 1s.
            need_weights (bool, optional): return the attention weights,
                averaged over heads (default: False).
            attn_mask (ByteTensor, optional): typically used to
                implement causal attention, where the mask prevents the
                attention from looking forward in time (default: None).
            before_softmax (bool, optional): return the raw attention
                weights and values before the attention softmax.
            need_head_weights (bool, optional): return the attention
                weights for each head. Implies *need_weights*. Default:
                return the average attention weights over all heads.
        TÚxlaz
query dim z != NrN   r   )Úuse_separate_proj_weightÚq_proj_weightÚk_proj_weightÚv_proj_weightÚprev_keyr/   rp   ro   Ú
prev_valueÚprev_key_padding_mask)r‚   r‘   Ú
batch_sizeÚsrc_lenr…   z-inf)rq   Údtype)7ÚdeviceÚtypeÚsizerE   r0   Úlistr?   ÚjitÚis_scriptingÚshaperD   r   Úmulti_head_attention_forwardr4   Úemptyrr   r=   r.   r;   r<   r@   rA   rB   r5   r   r>   rR   r   r   Ú_get_input_bufferr9   r8   r7   ÚrepeatÚ	new_zerosÚ
contiguousÚviewr6   Ú	transposer+   Ú_append_prev_key_padding_maskÚ_set_input_bufferrq   ÚzerosÚtype_asÚbmmÚapply_sparse_maskÚ	unsqueezeÚmasked_fillÚtor(   ÚfloatÚsoftmaxÚfloat32Úmean)r   Úqueryr\   r   r‚   rƒ   r„   r…   r†   r‡   rˆ   Úis_tpuÚtgt_lenÚbszr0   r“   Úkey_bszÚ_Úsaved_stateÚqrX   ÚvÚ	_prev_keyr   Ú_prev_valuer   r‘   Úattn_weightsÚattn_weights_floatÚ
attn_probsÚattnr   r   r   r   ñ   sr   
ÿþÿü	÷
ë
€










 
þû"
"
"




û
22
 
þû"


þ

$
ÿzMultiheadAttention.forwardr‘   r’   r“   c                 C   s  |d ur
|r
|}|S |d ur!| d ur!t j| ¡ |  ¡ gdd}|S |d urP|| d¡krJt j||| d¡ f|jd}t j| ¡ | ¡ gdd}|S | ¡ }|S | d ur||  d¡kryt j|||  d¡ f| jd}t j| ¡ |  ¡ gdd}|S |  ¡ }|S |}|S )Nr/   rp   )r•   )r?   rr   r­   r—   r¦   r•   )r‚   r‘   r’   r“   r…   Únew_key_padding_maskÚfillerr   r   r   r¤   ô  s@   	ãÿêþÿôõþýÿz0MultiheadAttention._append_prev_key_padding_maskÚ	new_orderc                 C   sn   |   |¡}|dur5| ¡ D ]!}|| }|dur.| jr&| d¡| d¡kr& n	| d|¡||< q|  ||¡}|S )z=Reorder buffered internal state (for incremental generation).Nr   )rž   Úkeysr9   r—   Úindex_selectr¥   )r   rƒ   rÂ   Úinput_bufferrX   Úinput_buffer_kr   r   r   Úreorder_incremental_state  s   
ÿ€z,MultiheadAttention.reorder_incremental_statec                 C   s    |   |d¡}|d ur|S i }|S ©NÚ
attn_state)Úget_incremental_state)r   rƒ   ÚresultÚempty_resultr   r   r   rž   2  s
   z$MultiheadAttention._get_input_bufferÚbufferc                 C   s   |   |d|¡S rÈ   )Úset_incremental_state)r   rƒ   rÍ   r   r   r   r¥   <  s   z$MultiheadAttention._set_input_bufferr³   r´   c                 C   s   |S r$   r   )r   r¼   r³   r“   r´   r   r   r   r©   C  s   z$MultiheadAttention.apply_sparse_maskc                 C   s^  |dkr|d nd}i }g }|  ¡ D ]…}| |d ¡r—t|| jd d ƒ}|| d |… ||d < || |d| … ||d < || d| d … ||d	 < | |¡ |d
 }||  ¡ v r—t|| jd d ƒ}|| d |… ||d < || |d| … ||d < || d| d … ||d < | |d
 ¡ q|D ]}||= qš| ¡ D ]\}	}
|
||	< q¤d S )NÚ Ú.Úin_proj_weightr   é   zq_proj.weightrN   zk_proj.weightzv_proj.weightÚin_proj_biaszq_proj.biaszk_proj.biaszv_proj.bias)rÃ   ÚendswithÚintr›   r_   Úitems)r   Ú
state_dictr   ÚprefixÚitems_to_addÚkeys_to_removerX   rq   Úk_biasr\   r   r   r   r   Úupgrade_state_dict_namedF  s.   
€
ÿz+MultiheadAttention.upgrade_state_dict_named)
NNr,   TFFFFr,   r-   )NNTFNFF)r%   r&   r'   Ú__doc__r   rL   rC   rÕ   rn   r   r   r   r€   r   r   r   r)   r(   r   Ústaticmethodr¤   r?   r™   ÚexportrÇ   rž   r¥   r©   rÜ   r*   r   r   r   r   r+   6   s     	ó@!:õýüûúùø	÷
öõ
ô  ÿþýüûú)þýÿ
þ
þ
ýr+   )r   rS   Útypingr   r   r   r   r?   Útorch.nn.functionalr   Ú
functionalr   r   Útorch.nnr   Ú"funasr.models.data2vec.quant_noiser	   ÚModuler
   r+   r   r   r   r   Ú<module>   s   $