o
    ߥi                     @   s   d dl Z d dlZd dlmZ dd ZG dd dejZG dd dejZG dd	 d	ejZG d
d dejZ	G dd dejZ
dS )    Nc                 C   s6   d|  dt tdtj | dt | d     S )Ng      ?      gHm?   )torchtanhmathsqrtpipowx r   Y/home/ubuntu/.local/lib/python3.10/site-packages/modelscope/models/nlp/use/transformer.pygelu	   s   "r   c                       &   e Zd Zd fdd	Zdd Z  ZS )PositionwiseFeedForward皙?c                    s\   t t|   t||| _t||| _tj|dd| _t	| _
t|| _t|| _d S )Nư>eps)superr   __init__nnLinearw_1w_2	LayerNorm
layer_normr   actvDropout	dropout_1	dropout_2)selfd_modeld_ffdropout	__class__r   r   r      s   z PositionwiseFeedForward.__init__c              	   C   s4   |  | | | |}| | |}|| S N)r    r   r   r   r!   r   )r"   r   interoutputr   r   r   forward   s   zPositionwiseFeedForward.forwardr   __name__
__module____qualname__r   r+   __classcell__r   r   r&   r   r      s    	r   c                       s(   e Zd Zd fdd	ZdddZ  ZS )	MultiHeadedAttentionr   c                    s   || dksJ || | _ || _tt|   || _t||| j  | _t||| j  | _	t||| j  | _
tjdd| _t|| _t||| _d S )Nr   )dim)dim_per_head	model_dimr   r2   r   
head_countr   r   linear_klinear_vlinear_qSoftmaxsoftmaxr   r%   linear)r"   r7   r6   r%   r&   r   r   r   !   s   
zMultiHeadedAttention.__init__Nc                    s  | d | j| j fdd} fdd}| | ddd}| | ddd}| | ddd}|t	 }t
||dd	}|d urk|d|}||d
}| |}| |}	t
|	|dd  d }
| |
}|S )Nr   c                    s   |   dddS )z  projection r3   r   r   )view	transposer   
batch_sizer5   r7   r   r   shape5   s   z+MultiHeadedAttention.forward.<locals>.shapec                    s   |  dd  d S )z  compute context r   r   r3   )r?   
contiguousr>   r   r@   r   r   unshape:   s   z-MultiHeadedAttention.forward.<locals>.unshaper3   r   r   r   g    _)sizer5   r7   r8   r>   r?   r9   r:   r   r   r   matmul	unsqueeze	expand_asmasked_fillr<   r%   rC   r=   )r"   keyvaluequerymaskrB   rD   scoresattn	drop_attncontextr*   r   r@   r   r+   0   sH   




zMultiHeadedAttention.forwardr,   r(   r-   r   r   r&   r   r2      s    r2   c                       r   )PositionalEncoding   c                    s   t t|   t||}td|d}ttjd|dtjdt	
d|   }t| | |d d dd df< t| | |d d dd df< |d}| d| d S )Nr   r   r   )dtypeg     @pe)r   rR   r   r   zerosarangerG   expfloatr   logsincosregister_buffer)r"   r4   max_lenrU   positiondiv_termr&   r   r   r   Y   s   $$
zPositionalEncoding.__init__c                 C   s,   | d}| jd d d |f }|| }|S )Nr   )rE   rU   )r"   r   Lpos_embr   r   r   r+   d   s   
zPositionalEncoding.forward)rS   r-   r   r   r&   r   rR   W   s    rR   c                       s$   e Zd Z fddZdd Z  ZS )TransformerEncoderLayerc                    sL   t t|   t|||d| _t|||| _tj|dd| _	t
|| _d S )N)r%   r   r   )r   rc   r   r2   	self_attnr   feed_forwardr   r   r   r   r%   )r"   r#   headsr$   r%   r&   r   r   r   m   s
   z TransformerEncoderLayer.__init__c                 C   sL   |dkr
|  |}n|}|d}| j||||d}| || }| |S )Nr   r   )rM   )r   rG   rd   r%   re   )r"   iterrL   inputsrM   
input_normrQ   outr   r   r   r+   u   s   

zTransformerEncoderLayer.forwardr-   r   r   r&   r   rc   k   s    rc   c                       r   )TransformerEncoderr   c                    sX   t t|   | _|| _t| _t fddt	|D | _
t| _d S )Nc                    s   g | ]	}t  qS r   )rc   ).0_r$   r#   r%   rf   r   r   
<listcomp>   s    z/TransformerEncoder.__init__.<locals>.<listcomp>)r   rk   r   r#   layersrR   rb   r   
ModuleListrangetransformer_interr   r%   )r"   r#   r$   rf   rp   r%   r&   rn   r   r      s   

zTransformerEncoder.__init__c                 C   sB   |  |}| |}t| jD ]}| j| ||||d}q|S )Nr   )rb   r%   rr   rp   rs   eq)r"   r   rM   ir   r   r   r+      s
   

zTransformerEncoder.forwardr,   r-   r   r   r&   r   rk      s    rk   )r   r   torch.nnr   r   Moduler   r2   rR   rc   rk   r   r   r   r   <module>   s   8