o
    ۗi5                     @  s  d dl mZ ddlmZ ddlmZ ddlmZ d_d	d
Zd_ddZej	edd Z
ej	eeddd Zej	eedd`ddZej	edd Zedd Zedd Zedd Zedd Zedd Zed d! Zed"d# Zej	eejd$d%d&d'dad*d+Zej	eejd,d-d.dbd/d0Zed1d2 Zed3d4 Zed5d6 Zed7d8 Zej	eejd9d%d&d'dad:d;Zej	eejd<d-d.dbd=d>Zed?d@ Zej	eedAdcdBdAZ edCdD Z!ej	ej"edEdddFdGZ#ej	ee$dHdedIdHZ%edJdK Z&ej	ee$dLdedMdLZ'edfdOdPZ(edgdSdTZ)ej	ed(ej*fdhdWdXZ+dYdZ Z,ej	edid[d\Z-ed]d^ Z.d(S )j    )annotations   )jit   )core)mathicore.constexprc                 C  s4   d}| j }|dkr|dL }|d7 }|dks	t|S )Nr   r   valuer   	constexpr)r   log2n r   V/home/ubuntu/transcripts/venv/lib/python3.10/site-packages/triton/language/standard.py_log2
   s   
r   c                 C  s$   | j }t||d @ dko|dkS )Nr   r   r
   )r   r   r   r   r   _is_power_of_two   s   r   c                 C  s   | | d | S )z
    Computes the ceiling division of :code:`x` by :code:`div`

    :param x: the input number
    :type x: Block
    :param div: the divisor
    :type div: Block
    r   r   )xdivr   r   r   cdiv   s   r   sigmoidc                 C  s   ddt |    S )Nr   )r   expr   r   r   r   r   +   s   softmaxFc                 C  s0   | t | d }t|}t|d}t|||S )Nr   )maxr   r   sumfdiv)r   ieee_roundingznumdenr   r   r   r   2   s   

c                 C  s   t j| | jgddS )zn
    Returns a contiguous flattened view of :code:`x`.

    :param x: the input tensor
    :type x: Block
    T)can_reorder)r   reshapenumelr   r   r   r   ravel<   s   	r$   c                 C  sX   | | | }|| }|| }|| }t || |}|| }|||  }	|| }
|	|
fS )a  
    Transforms the indices of a row-major `size_i * size_j` matrix into
    the indices of a column-major matrix for each group of `size_g` rows.

    For example, for :code:`size_i = size_j = 4` and :code:`size_g = 2`, it will
    transform ::

        [[0 , 1 , 2 , 3 ],
         [4 , 5 , 6 , 7 ],
         [8 , 9 , 10, 11],
         [12, 13, 14, 15]]

    into ::

        [[0, 2,  4 , 6 ],
         [1, 3,  5 , 7 ],
         [8, 10, 12, 14],
         [9, 11, 13, 15]]
    r   minimum)r   jsize_isize_jsize_gijsize_gjgroup_idoff_inew_inew_jr   r   r   	swizzle2dH   s   r1   c                 C  s   t | d|S )a'  
    Returns a tensor filled with the scalar value 0 for the given :code:`shape` and :code:`dtype`.

    :param shape: Shape of the new array, e.g., (8, 16) or (8, )
    :type shape: tuple of ints
    :param dtype: Data-type of the new array, e.g., :code:`tl.float16`
    :type dtype: DType
    r   )r   full)shapedtyper   r   r   zerosp   s   
r5   c                 C  s   t | j| jS )z
    Returns a tensor of zeros with the same shape and type as a given tensor.

    :param input: input tensor
    :type input: Tensor
    )r5   r3   r4   )inputr   r   r   
zeros_like}   s   r7   c           	      C  sJ   |r| |ko	||k }nd}| |kp|}t || |}t |||}||fS NFr   where)	value1index1value2index2tie_break_lefttiegtv_reti_retr   r   r   _argmax_combine      rD   c                 C     t | |||dS NTrD   r;   r<   r=   r>   r   r   r   _argmax_combine_tie_break_left      rJ   c                 C  rF   r8   rH   rI   r   r   r   _argmax_combine_tie_break_fast   rK   rL   c                 C     t | |S N)r   maximumabr   r   r   _elementwise_max      rS   rO   return_indicesreturn_indices_tie_break_left)return_indices_argtie_break_argNTc                 C  s   t | } |r|rt j| |t|dS t j| |t|dS t | jjt dk rEt | j r6| 	t j
} n| j s?J d| 	t j} t j| |t|dS N	keep_dims    z"Expecting input to be integer type)r   _promote_bfloat16_to_float32_reduce_with_indicesrJ   rL   r   r4   primitive_bitwidthis_floatingtofloat32is_intint32reducerS   r6   axisrU   rV   r[   r   r   r   r      s   
r   zmaximum indexr?   )rX   c                 C     t | |d||d\}}|S NT)rU   rV   r[   )r   r6   rg   r?   r[   _retr   r   r   argmax      rm   c           	      C  sJ   |r| |ko	||k }nd}| |k p|}t || |}t |||}||fS r8   r9   )	r;   r<   r=   r>   r?   r@   lt	value_ret	index_retr   r   r   _argmin_combine   rE   rr   c                 C  rF   rG   rr   rI   r   r   r   _argmin_combine_tie_break_left   rK   rt   c                 C  rF   r8   rs   rI   r   r   r   _argmin_combine_tie_break_fast   rK   ru   c                 C  rM   rN   r%   rP   r   r   r   _elementwise_min   rT   rv   r&   c                 C  s   t | } |r|rt j| |t|dS t j| |t|dS t | jjdk rBt | j r3| 	t j
} n| j s<J d| 	t j} t j| |t|dS rY   )r   r]   r^   rt   ru   r   r4   r_   r`   ra   rb   rc   rd   re   rv   rf   r   r   r   min   s   
rw   zminimum indexc                 C  rh   ri   )rw   rj   r   r   r   argmin   rn   rx   c                 C  s   | | S rN   r   rP   r   r   r   _sum_combine      ry   r   c                 C  s   t | } t j| |t|dS )NrZ   )r   r]   re   ry   )r6   rg   r[   r   r   r   r     s   
c                 C  s   | |A S rN   r   rP   r   r   r   _xor_combine  rz   r{   zxor sumc                 C  s<   | j j}| stdtj| |d} tj| |t|||dS )Nz#xor_sum only supported for integers)_builder)r[   r|   
_generator)typescalarrc   
ValueErrorr   r]   re   r{   )r6   rg   r[   r|   r}   	scalar_tyr   r   r   xor_sum  s
   r   cumsumc                 C     t | } t | |t|S rN   )r   r]   associative_scanry   r6   rg   reverser   r   r   r   %     
c                 C  s   | | S rN   r   rP   r   r   r   _prod_combine1  rz   r   cumprodc                 C  r   rN   )r   r]   r   r   r   r   r   r   r   6  r   n_dimsc                 C  s<  | j |? }|d|  dd|| d  g}t| |}tddd d d d f }tt|d|  dd d d d d f ||j}tt|| dd d d d d f ||j}	t|| j}t|	| j}	tj	| jj
dd}
|j|
dd}|	j|
dd}| j|
dd}|t||	k|k||A t|A }|j| jddS )Nr   r   r   T)bitwidthsigned)bitcast)r#   r   r"   arangebroadcast_tor   ra   r4   r3   get_int_dtyper_   r:   r7   )r   flipr   r   n_outerr3   ymaskleftrightidtypeileftirightixrl   r   r   r   _compare_and_swapB  s   
40"r   stageorderc                 C  s   | j |? }t||k |dkr6|d|d |   dd| g}tttddddddf || j}n|}t|D ]}t| ||||  |} q=| S )zb
    order_type 0 == ascending
    order_type 1 == descending
    order_type 2 == alternating
    r   r   r   N)	r#   r   static_assertr"   r   r   r3   static_ranger   )r   r   r   r   r   r3   r   r   r   r   r   _bitonic_mergeV  s   
.r   dim
descendingc                 C  sv   |du rt | jd n|}t|t | jd kd t| j| }td|d D ]}t| |||k r4dn||} q)| S )a  
    Sorts a tensor along a specified dimension.

    :param x: The input tensor to be sorted.
    :type x: Tensor
    :param dim: The dimension along which to sort the tensor. If None, the tensor is sorted along the last dimension. Currently, only sorting along the last dimension is supported.
    :type dim: int, optional
    :param descending: If set to True, the tensor is sorted in descending order. If set to False, the tensor is sorted in ascending order.
    :type descending: bool, optional
    Nr   z+only minor dimension is currently supportedr   )lenr3   r   r   r   r   r   )r   r   r   _dimr   r   r   r   r   sorto  s   r   c                 C  sJ   t | } t |}| d u rt|d } | t|d ks J dt | S )Nr   z2Currently only support flipping the last dimension)r   _unwrap_if_constexprr   r   )r   r3   r   r   r   _get_flip_dim  s   


r   c           	      C  s  t t| jt|| j  t t| j t| j}t| jt| jt|| j  }t | dg| }t ||}t 	dddddf dt 	dd k}t 
||D ]*}|}t 
d|d D ]}||krr||d krrt ||}q`t|| |d dd}qTt || j} | S )z
    Flips a tensor `x` along the dimension `dim`.

    :param x: the first input tensor
    :type x: Block
    :param dim: the dimension to flip along (currently only final dimension supported)
    :type dim: int
    r   r   Nr   TrZ   )r   r   r   r3   r   r#   r   r"   expand_dimsr   r   r   )	r   r   stepsstartr   r   r   flip2r'   r   r   r   r     s    
 (r   c                 C  sD   t | |}t|jdkr|S t ||jdd d|jd  g S )a7  
    Interleaves the values of two tensors along their last dimension. The two tensors must have the same shape.
    Equivalent to `tl.join(a, b).reshape(a.shape[:-1] + [2 * a.shape[-1]])`

    :param a: The first input tensor.
    :type a: Tensor
    :param b: The second input tensor.
    :type b: Tensor
    r   Nr   )r   joinr   r3   r"   )rQ   rR   cr   r   r   
interleave  s   &r   )r   r	   )F)NFTF)TFr8   )NFNN)r   F)r   r	   r   r	   )r   r	   r   r	   r   r	   )r   r	   r   r	   rN   )/
__future__r   runtime.jitr    r   r   r   r   _tensor_member_fnr   _add_math_1arg_docstrr   r   r$   r1   r5   r7   rD   rJ   rL   rS   _add_reduction_docstrr   rm   rr   rt   ru   rv   rw   rx   ry   r   r{   builtinr   _add_scan_docstrr   r   r   r   r   CONSTEXPR_0r   r   r   r   r   r   r   r   <module>   s    

	


'











	
		