o
    wi                     @  sp   d dl mZ d dlZd dlZd dlmZ ddd
dZdd Z										ddddZ	G dd deZ
dS )    )annotationsN)OpRunx
np.ndarrayaxisintreturnc                 C  s6   t j| |dd}t | | }t j||dd}|| S )NT)r   keepdims)npmaxexpsum)r   r   x_maxtmps r   \/home/ubuntu/sommelier/.venv/lib/python3.10/site-packages/onnx/reference/ops/op_attention.py_softmax   s   r   c                 C  s&   |dkr| | }t |}|| S | S )Nr   )r   tanh)XsoftcapYr   r   r   _softcap   s
   
r   FQKV	attn_masknp.ndarray | Nonepast_key
past_valuec           )      C  s  t | jt |j  krt |jksJ  J t | j}| jd }t | jdkr| jd }|jd }|jd }|d ur?|	d usAJ t|| }||| jd |g}t| |} t||	 }||	|jd |g}t||}t||	 }||	|jd |g}t||}t | jdkrt |jdkrt |jdksJ |d u r| jd }dt| }t|}|d urtj||fdd}n|}|d urtj||fdd}n|}|}|}| jd }|jd }tj||f| jd}|dkr|d u sJ tj	||ft
d}tj|dd}t|}tjj||d	}|jtd
d}|d ur@|dks J |jt
kr<t|}tjj||d	}|jtd
d}n||7 }|d u rJ| jd }|	d u rZ|jd } |jd }!n|	} |	}!|| kr||  dkr| |!krt||  }"d|"ddg}#t||#}t||#}t|d}$t| | |$| }%|%| }&|dkr|%| }%|d urt|&|}&|dkr|&}%|
d ur|&tj|
}&t|&}'|dkr|'}%|%| j}%t|'|| j}(|dkrt|(d}(t|(|(jd |(jd df}(|(|||%fS )Nr               )r   )dtype)k)maskz-inf)
fill_value)r   r#   r!   r"   )r   r"   r#   r!   r   )lenshaper   r   reshapesqrtconcatenatezerosr%   onesbooltrillogical_notmaarrayfilledfloattile	transposematmulr   astypeonnxhelpertensor_dtype_to_np_dtyper   ))r   r   r   r   r   r    scale	is_causalq_num_headskv_num_headssoftmax_precisionr   qk_matmul_output_modeinput_shape_len
batch_sizehidden_size_qhidden_size_khidden_size_vhead_size_qnew_shape_qhead_size_knew_shape_khead_size_vnew_shape_vq_head_sizepresent_keypresent_valueq_sequence_lengthkv_sequence_length	attn_bias	temp_maskattn_bias_mak_num_headsv_num_headsseq_repsrepsk_transposeqk_matmul_outputqk_with_bias
qk_softmaxoutputr   r   r   _compute_attention   s   .




.





















r`   c                   @  s,   e Zd Z										ddddZdS )	AttentionNFr   r   r   r   r   r   r   r    r	   c                 C  s&   t |||||||||	|
|||d}|S )N)
r   r   r    r>   r?   r@   rA   rB   r   rC   )r`   )selfr   r   r   r   r   r    r>   r?   r@   rA   rB   r   rC   resr   r   r   _run   s    zAttention._run
NNNNFNNNNNr   r   r   r   r   r   r   r   r   r   r    r   r	   r   )__name__
__module____qualname__rd   r   r   r   r   ra      s    ra   )r   )r   r   r   r   r	   r   re   rf   )
__future__r   numpyr   r;   onnx.reference.op_runr   r   r   r`   ra   r   r   r   r   <module>   s&    