o
    toi%                     @  sz   d dl mZ d dlZd dlZd dlmZ ddd
dZdd Zdd Z												dd ddZ
G dd deZdS )!    )annotationsN)OpRunx
np.ndarrayaxisintreturnc                 C  s6   t j| |dd}t | | }t j||dd}|| S )NT)r   keepdims)npmaxexpsum)r   r   x_maxtmps r   S/home/ubuntu/.local/lib/python3.10/site-packages/onnx/reference/ops/op_attention.py_softmax   s   r   c                 C  s&   |dkr| | }t |}|| S | S )Nr   )r   tanh)XsoftcapYr   r   r   _softcap   s
   
r   c                 C  sf   | j dd \}}tjtj||| f| jddd}tj ||dk< | ddd|df  |7  < | S )zApplies a causal mask on the input `mask`:
    ``mask[i, j] = -inf if past_sequence_length + i > j else 0``.
    Because a softmax is applied on the mask, -inf becomes 0 and 0 becomes 1.
    The modification is done inplace.
    Ndtype   )k.)shaper   triuonesr   inf)maskpast_sequence_lengthq_sequence_lengthtotal_sequence_lengthr    r   r   r   _apply_causal   s   
r'   FQKV	attn_masknp.ndarray | Nonepast_key
past_valuenonpad_kv_seqlenc           ,      C  s  t | jt |j  krt |jksJ  J t | j}| jd }t | jdkr| jd }|jd }|jd }|	d ur?|
d usAJ t||	 }|| jd |	|g}t| |} t| d} t||
 }||jd |
|g}t||}t|d}t||
 }||jd |
|g}t||}t|d}t | jdkrt |jdkrt |jdksJ |d u r| jd }dt| }t|}|d urtj||fdd}n|}|d urtj||fdd}n|}|}|}| jd }|jd }tj||f| j	d}|d ur*||jd	  }|dkr*d
g|j
d  d|fg } |j	tjkrdntj }!tj|| d|!d}|rs|d u rMtj||f| j	d}"t|"|d urH|jd ndd}nF|j	tjkr`d| | j	tj  }t| |d urn|jd ndd}n |d ur|j	tjkrd| | j	}tj ||dk< || }|d ur|dd|j
  |j }t||d d tjf k }#|#|dd|}#t|#dtj }#||#7 }|	d u r| jd }	|
d u r|jd }$|jd }%n|
}$|
}%|	|$kr|	|$ dkr|$|%kr|	|$ }&tj||&dd}tj||&dd}t|d}'t| | |'| }(|(| })|dkr(|) }(|d ur9t|)|})|dkr9|)}(|d urG|)tj|})t|)}*|dkrR|*}(|(| j	}(t|*|| j	}+|dkr|t|+d}+t|+|+jd |+jd d	f}+|+|||(fS )Nr         r   )r   r1   r   r0      )r   r   r   )r   r   Fconstant)modeconstant_values)r$   )r   )repeatsr   )r   r   r0   r1   )lenr   r   r   reshape	transposesqrtconcatenatezerosr   ndimbool_r"   padr'   astypecopyarangenewaxiswhererepeatmatmulr   onnxhelpertensor_dtype_to_np_dtyper   ),r(   r)   r*   r+   r-   r.   r/   scale	is_causalq_num_headskv_num_headssoftmax_precisionr   qk_matmul_output_modeinput_shape_len
batch_sizehidden_size_qhidden_size_khidden_size_vhead_size_qintermediate_shape_qhead_size_kintermediate_shape_khead_size_vintermediate_shape_vq_head_sizepresent_keypresent_valuer%   kv_sequence_length	attn_bias	pad_width	pad_shape	pad_value	temp_maskpadding_maskk_num_headsv_num_headsseq_repsk_transposeqk_matmul_outputqk_with_bias
qk_softmaxoutputr   r   r   _compute_attention.   s   .




.






















rm   c                   @  s.   e Zd Z											ddddZdS )	AttentionNFr(   r   r)   r*   r+   r,   r-   r.   r/   r	   c                 C  s$   t |||||||||	|
||||dS )N)r+   r-   r.   r/   rJ   rK   rL   rM   rN   r   rO   )rm   )selfr(   r)   r*   r+   r-   r.   r/   rJ   rK   rL   rM   rN   r   rO   r   r   r   _run   s    zAttention._runNNNNNFNNNNNr(   r   r)   r   r*   r   r+   r,   r-   r,   r.   r,   r/   r,   r	   r   )__name__
__module____qualname__rp   r   r   r   r   rn      s    rn   )r   )r   r   r   r   r	   r   rq   rr   )
__future__r   numpyr   rG   onnx.reference.op_runr   r   r   r'   rm   rn   r   r   r   r   <module>   s*    :