o
    -i                     @   s   d dl Z d dlZd dlmZ d dlmZ d dlmZmZ d dl	m
Z
mZ ddd	Zd
d ZG dd de jjZ					ddeee jf dedededede
e j de
e fddZdS )    N)_topk_forward)_topk_backward)Tensor	Bitmatrix)OptionalUnionT   c                 C   s  t | ts&|d u r| jd n|| jd g}| jd | jd g}t| ||d} dd }	d}
d}d}t| jdks9J | jd	 d
k sBJ |dksHJ |sLJ | j\}}| j\}}| j}tj||f| j|d}|d urkd}ntj||ftj	|d}d}|	||| }|d }tj||	|dd ftj
|d}t|ddd | }|	||}|| }tj|ftj|d}t|	||
|}t|f | | d|||d|||d|d||||||
||||d ||d g}|d g}t||||d}|||fS )Nr   r   )shape	shape_maxc                 S   s   | | d | S )Nr    )abr   r   a/home/ubuntu/veenaModal/venv/lib/python3.10/site-packages/vllm/third_party/triton_kernels/topk.py<lambda>   s    ztopk_forward.<locals>.<lambda>          i   )dtypedeviceTF)BLOCK_MBLOCK_NAPPLY_SOFTMAXN_EXPTS_PADN_EXPTS_ACT)r	   r
   
scratchpad)
isinstancer   r	   lenr
   r   torchemptyr   int16uint32	transposeint32maxr   strider   )xkapply_softmaxdimreturn_bitmatrixy_indxn_rowsx_shapex_shape_maxcdivr   r   BLOCK_Sn_cols
n_rows_max_devy_valsuse_provided_indx
n_cols_padn_cols_words	bitmatrixs_blockss_colsr   pidsbitmatrix_shapebitmatrix_shape_maxr   r   r   topk_forward	   sR   
 

 

	
r?   c                 C   s   |j d |ks	J t| j d }t| }t|j d f ||d||d| | d||d| j d || j d |||d |S )Nr   r   )r   r   r   )r	   tritonnext_power_of_2r   
empty_liker   r%   )r&   r+   dy_valsr'   r,   r(   n_expts_paddxr   r   r   topk_backward9   s   
rF   c                   @   s$   e Zd Zedd Zedd ZdS )TopKc           
      C   sB   t |||||||\}}}	| || || _|| _|| _|||	fS N)r?   save_for_backwardr(   r'   r,   )
ctxr&   r'   r(   r)   r*   r+   r,   r5   r9   r   r   r   forwardG   s   
zTopK.forwardc                 C   s4   | j \}}t|||| j| j| j}|d d d d d d fS rH   )saved_tensorsrF   r'   r,   r(   )rJ   rC   _0_1r&   r+   rE   r   r   r   backwardP   s   
zTopK.backwardN)__name__
__module____qualname__staticmethodrK   rO   r   r   r   r   rG   E   s
    
rG   r&   r'   r(   r)   r*   r+   r,   c              	   C   s   t | ||||||}|S )a  
    Computes the top-k values and indices along a specified dimension of a tensor.
    Note that the input can be either a `Tensor` or a `torch.Tensor`, but the output will always be a `torch.Tensor`.

    Parameters
    ----------
    x : Union[triton_kernels.Tensor, torch.Tensor]
        Input tensor of shape (n_tokens, n_expts).
    k : int
        Number of top elements to retrieve.
    apply_softmax : bool, default True
        Whether to apply softmax to the input tensor before computing top-k.
    dim : int, default 1
        Dimension along which to compute top-k.
    return_bitmatrix : bool, default True
        A bitmatrix of shape (n_tokens, cdiv(n_expts, 32)).
        Each bit on [t, b] indicates whether the b-th expert was selected for the t-th token.
    y_indx : torch.Tensor, optional
        Pre-allocated tensor for storing indices of top-k elements with shape (n_tokens, k).
        If provided, we skip the computation of top-k indices and use this tensor instead.
    n_rows : int, optional
        Number of rows to apply top-k on. If None, we consider all rows in `x`.

    Returns
    -------
    (expt_scal, expt_indx, bitmatrix) : Tuple[torch.Tensor, torch.Tensor, Bitmatrix]
    )rG   apply)r&   r'   r(   r)   r*   r+   r,   retr   r   r   topkW   s   $rV   )Tr   TNN)r   r@   )triton_kernels.topk_details._topk_forwardr   *triton_kernels.topk_details._topk_backwardr   triton_kernels.tensorr   r   typingr   r   r?   rF   autogradFunctionrG   intboolrV   r   r   r   r   <module>   s:    
0