o
    پi                     @   s(  d Z ddlZddlZddlmZ dZdZdZdZ	ej
dejfdd	Zej
d
ejdejfddZej
dejdejdejdejdejdejdejfddZej
dejfddZdejdejfddZdedefdd Ze d!d"d#d$d%d%d&d'd(ejd)ed*ed+ed,ed-ed.ed/edefd0d1ZdS )2zLogits processing.    Nl   kJ l   5.e i-'igVrc                 C   s   | |> | d| ? B S )N     )xr   r   r   P/home/ubuntu/.local/lib/python3.10/site-packages/sglang/srt/layers/multimodal.py_rotl32   s   r   C1C2c                 C   sX   t d|t j}t d|t j}| | d? N } | | } | | d? N } | | } | | d? N } | S )Nr         )tlfulluint32)r   r   r	   c1c2r   r   r   _fmix32   s   r   FM_C1FM_C2POS_APOS_BTILEBLOCKUSE_CGc                  C   s
  t jdd}||	 }t d|t j}t d|t j}t d|t j}t d|t j}t jdt jd}t jdt jd}t d|	|
D ]}|| t d|
 }||k }|r_t j| | |ddd}n
t j| | |dd}|t j}|t j}|| | t	|dA }|| | t	|d	A }t
||A ||d
}t
||A ||d
}t |}t |||}t |||}|t j|ddt j7 }|t j|ddt j7 }qAt d|d t j}||N }||N }t
|||d
}		 t
|||d
}|t jd> |t jB }t || | d S )Nr   axisr   dtypez.cg)maskothercache_modifierr   r      r   )r   r	      Fr   )r   
program_idr   r   zerosstatic_rangearangeloadtor   r   
zeros_likewheresum	FMIX32_C1	FMIX32_C2uint64store) in_ptrout_ptrn_u32seed1seed2r   r   r   r   r   r   r   pidbases1s2posAposBh1h2offidxmviup1p2k1k2zero32nbytesoutr   r   r   hash_tiles32_kernel_blocked+   sF   
rI   CHUNKc                 C   s~   t jdd}|| }t jdt jd}t d|D ]}|| }||k }	t j| | |	ddt j}
||
7 }qt || | d S )Nr   r   r   r   r    )r   r#   r$   r.   r%   r'   r(   r/   )r0   r1   n_elemsrJ   r5   starthir>   r?   r@   r   r   r   add_tree_reduce_u64_kernelk   s   
rO   treturnc                 C   s   | j sJ d|  tj}| }d|d@  d@ }|r<tj|| tj|jd}|d | | ||d  	  |}|tj
S )NUse .cuda() firstr"      r   device)is_cuda
contiguousviewtorchuint8numelemptyrU   copy_zero_r   )rP   tbrG   padtb_pr   r   r   _as_uint32_wordsx   s   rb   r   c                 C   sL   d}| |M } | | d? N } | d |@ } | | d? N } | d |@ } | | d? N } | S )Nl       l   e9z    l   b&&&	    r   )r   r   r   r   r   _final_splitmix64   s   rf   ij?$i       i   r"   T)seed
tile_wordsblock_wordsreduce_chunk	num_warps
num_stagesuse_cgtensorrh   ri   rj   rk   rl   rm   rn   c                C   s   | j sJ dt| }| }	|	dkrdS t|	|f}
tj|
d tj|jd}t	|
 |||	|d@ |d dA d@ t
ttt|||||d |}| dkru| }t||f}tj|d tj|jd}t| ||||d	 |}| dksLtt| S )
NrR   r   rT   l    l   yn< l   >[= )r3   r4   r   r   r   r   r   r   r   rl   rm      )rJ   )rV   rb   r[   tritoncdivrY   r\   r.   rU   rI   r,   r-   POS_C1POS_C2rO   rf   intitem)ro   rh   ri   rj   rk   rl   rm   rn   u32ngrid1partialscurrK   grid2nxtr   r   r   gpu_tensor_hash   s@   r~   )__doc__rY   rq   triton.languagelanguager   r,   r-   rs   rt   jit	constexprr   r   rI   rO   Tensorrb   ru   rf   inference_modeboolr~   r   r   r   r   <module>   sv   	
?	
