o
    ߥiE                     @   sD   d dl Z d dlmZ G dd dejZdd Zedkr e  dS dS )    Nc                       sD   e Zd ZdZ fddZdddZdd Zd	d
 ZdddZ  Z	S )MultiheadAttentionz%
    Multi head attention layer.
    c                    s   || dksJ t t|   || _|| _|| | _| jd | _t||d | _	t||| _
tj|d| _tjdd| _d S )Nr   g         )pdim)superr   __init__
hidden_dim	num_headshead_dimscalennLinear
linear_qkv
linear_outDropoutdropout_layerSoftmaxsoftmax)selfr
   r   dropout	__class__ k/home/ubuntu/.local/lib/python3.10/site-packages/modelscope/models/nlp/space/modules/multihead_attention.pyr	      s   
zMultiheadAttention.__init__Fc                 C   sL   | |d|d| j| j}|r|dddd}|S |dddd}|S )Nr         r   )reshapesizer   r   permute)r   xis_keyr   r   r   _split_heads   s
    zMultiheadAttention._split_headsc                 C   s0   | dddd}||d|d| j}|S )Nr   r   r   r   )r    r   r   r
   )r   r!   r   r   r   _merge_heads   s   zMultiheadAttention._merge_headsc                 C   s   t ||}|| j }|d ur'|d}|d| jdd}|| td | 	|}| 
|}|d ur>	 || d t ||}|S )Nr   z-infg        )torchmatmulr   	unsqueezerepeatr   masked_fill_boolfloatr   r   )r   querykeyvaluemaskscoresattnoutr   r   r   _attn$   s    



	zMultiheadAttention._attnNc           	      C   s   |  |}tj|| jdd\}}}| |}| j|dd}| |}|durLd|v rDd|v rDtj|d |gdd}tj|d |gdd}||d< ||d< | ||||}| |}| |}|S )	z$ Forward process of self attention. r   r   T)r"   Nr-   r.   r   )	r   r%   splitr
   r#   catr3   r$   r   )	r   inpr/   cacheqkvr,   r-   r.   r2   r   r   r   forwardB   s   




zMultiheadAttention.forward)F)NN)
__name__
__module____qualname____doc__r	   r#   r$   r3   r9   __classcell__r   r   r   r   r      s    
r   c                  C   sr   dd l } tddd}| jdddd}t|}| jddddkd}t|}|||d d}t| d S )Nr   
   r   g      ?r   float32)r/   r7   )numpyr   randomrandastyper%   tensorprint)npmodelr6   r/   r2   r   r   r   main\   s   

rI   __main__)r%   torch.nnr   Moduler   rI   r:   r   r   r   r   <module>   s   U
