o
    پi                     @  s   d Z ddlmZ ddlmZmZmZmZ ddlZddlm	Z	 ddl
mZ ddlmZ ddlmZ er8dd	lmZ G d
d de	jZedgde dddZdS )zRadix linear attention.    )annotations)TYPE_CHECKINGOptionalTupleUnionN)nn)register_split_op)get_forward_context)register_custom_op)ForwardBatchc                      s8   e Zd ZdZ					dd  fddZd!ddZ  ZS )"RadixLinearAttentionz4
    The Linear Attention Layer Implementation.
    Nsilulayer_idintnum_q_headsnum_k_headsnum_v_heads
head_q_dim
head_k_dim
head_v_dimconv_weights7Optional[Union[torch.Tensor, Tuple[torch.Tensor, ...]]]bias
activationstrA_logOptional[torch.Tensor]dt_biasc                   st   t    || _|| _|| _|| _|| _|| _|| _|| | _	|| | _
|| | _|| _|	| _|
| _|| _|| _d S )N)super__init__r   r   r   r   r   r   r   q_dimk_dimv_dimr   r   r   r   r   )selfr   r   r   r   r   r   r   r   r   r   r   r   	__class__ \/home/ubuntu/.local/lib/python3.10/site-packages/sglang/srt/layers/radix_linear_attention.pyr   $   s    




zRadixLinearAttention.__init__forward_batchr   	mixed_qkvtorch.Tensorabreturnc                 C  sj   |j  r*t d ur*|jd }tjd|| j| jf|j|j	d}t
||||| j |S |jj| ||||dS )Nr      )dtypedevicelayerr(   r)   r+   r,   )forward_mode	is_extendr	   shapetorchemptyr   r   r/   r0   $unified_linear_attention_with_outputr   attn_backendforward)r#   r(   r)   r+   r,   seq_lenoutputr&   r&   r'   r:   G   s,   
zRadixLinearAttention.forward)NNr   NN)r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   )
r(   r   r)   r*   r+   r*   r,   r*   r-   r*   )__name__
__module____qualname____doc__r   r:   __classcell__r&   r&   r$   r'   r      s    #r   r<   )mutates_argsr)   r*   r+   r,   r   r   r-   Nonec           
      C  sr   t  }|j}|j}|| }|jj||| ||d}	| |	 ks.J d|  d|	  ||	j|	 dS )zB
    Custom op wrapper for linear attention computation only.
    r1   z Output tensor element mismatch: z != N)	r	   r(   attention_layersr9   r:   numelviewr5   copy_)
r)   r+   r,   r<   r   contextr(   rD   attention_layerretr&   r&   r'   r8   h   s    	r8   )r)   r*   r+   r*   r,   r*   r<   r*   r   r   r-   rC   )r@   
__future__r   typingr   r   r   r   r6   r   )sglang.srt.compilation.compilation_configr   0sglang.srt.compilation.piecewise_context_managerr	   sglang.srt.utils.custom_opr
   ,sglang.srt.model_executor.forward_batch_infor   Moduler   r8   r&   r&   r&   r'   <module>   s   
I