o
    پi&                    @  sX  U d Z ddlmZ ddlZddlZddlmZmZmZm	Z	m
Z
mZ ddlZddlmZ ddlm  mZ ddlZddlmZ ddlmZ ddlmZ ddlmZmZmZmZmZm Z m!Z!m"Z"m#Z#m$Z$ e  Z%e! Z&edome&Z'e# Z(e Z)e Z*e$ Z+e" Z,e%rdd	l-m.Z.m/Z/ ndZ.e'rdd
l0m1Z2 e# rddl3Z3dZ4dZ5dddZ6dddZ7dddZ8G dd deZ9G dd de9Z:G dd de9Z;	 	!ddd(d)Z<	 	!	*ddd/d0Z=	ddd7d8Z>ddd;d<Z?G d=d> d>e9Z@G d?d@ d@ejAZBdddBdCZCG dDdE dEejAZDG dFdG dGe9ZEG dHdI dIe9ZFG dJdK dKe9ZGG dLdM dMe9ZHddPdQZIejJdd^d_ZKddidjZLG dkdl dle9ZMG dmdn dneMZNejJddsdtZOddudvZPG dwdx dxeMZQG dydz dzeZRi ZSd{eTd|< 	*			}	ddddZ1dd ZUejVd*e d	9ddddZW	9ddddZXe(reXZYneWZY	*			}	ddddZZ	*			}	ddddZ[dS )zRotary Positional Embeddings.    )annotationsN)AnyDictListOptionalTupleUnion)MultiPlatformOp)get_global_server_args)
cpu_has_amx_supportget_bool_env_varget_compiler_backend
get_deviceis_cpuis_cudais_hipis_musais_npuis_xpuSGLANG_USE_AITER)FusedSetKVBufferArg%apply_rope_with_cos_sin_cache_inplace)get_ropei  i  xtorch.Tensorreturnc                 C  sH   | dd | j d d f }| d| j d d d f }tj| |fddS )N.   dimshapetorchcatr   x1x2 r'   V/home/ubuntu/.local/lib/python3.10/site-packages/sglang/srt/layers/rotary_embedding.py_rotate_neox:   s   r)   c                 C  sB   | dd d df }| ddd df }t j| |fdd} | dS )N.r      r   r   )r"   stackflattenr$   r'   r'   r(   _rotate_gptj@   s   
r.   cossinis_neox_styleboolc                 C  s   | d| j}| d| j}|rtj| ddd\}}n| ddddf }| ddddf }|| ||  }|| ||  }|rLtj||fddS tj||fdddS )z
    Args:
        x: [num_tokens, num_heads, head_size]
        cos: [num_tokens, head_size // 2]
        sin: [num_tokens, head_size // 2]
        is_neox_style: Whether to use the Neox-style or GPT-J-style rotary
            positional embeddings.
    r+   r   r   r   .Nr*   )	unsqueezetodtyper"   chunkr#   r,   r-   )r   r/   r0   r1   r%   r&   o1o2r'   r'   r(   _apply_rotary_embG   s   r9   c                      s   e Zd ZdZd4 fddZd5ddZd6ddZd7ddZdd Zd8ddZ			d9d:d'd(Z
		d9d:d)d*Z		d9d:d+d,Z		d9d:d-d.Zd;d0d1Z		d9d:d2d3Z  ZS )<RotaryEmbeddingz%Original rotary positional embedding.	head_sizeint
rotary_dimmax_position_embeddingsbaser1   r2   r5   torch.dtyper   Nonec           	        s   t    || _|| _|| _|| _|| _|| _|  }t	s"|
|}t	r)| jdvrPtsPtsPtsPtsPt	r:ddlm} ntrCddlm} nddlm} d| _|| _nd| _|  | jd|dd t| _t jd urt| j| _tjdd| j| _d	\| _| _ d S )
N)@         i   r   )rotary_embeddingTFcos_sin_cache
persistentdynamicNN)!super__init__r;   r=   r>   r?   r1   r5   _compute_cos_sin_cache_is_cudar4   _is_cpu_is_xpu_is_npu_is_musasglang.jit_kernel.pos_encrE   _is_hip
sgl_kernelvllm._custom_opsuse_fallback_kernelfallback_rotary_embeddingregister_bufferr9   _apply_rotary_emb_wrappedr
   rl_on_policy_targetforward_native_forward_methodr"   compileposition_cosposition_sin)	selfr;   r=   r>   r?   r1   r5   cacherE   	__class__r'   r(   rM   g   sL   
	


zRotaryEmbedding.__init__Union[int, float]r   c              	   C  sR   t  jdurdnd}d|tjd| jdtj|d| j   }t  jdur'| }|S )Compute the inverse frequency.Ncpu      ?r   r   r5   device)r
   r\   r"   aranger=   floatcuda)rb   r?   init_deviceinv_freqr'   r'   r(   _compute_inv_freq   s   	z!RotaryEmbedding._compute_inv_freqc                 C  sR   |  | j}tj| jtjd}td||}| }| }tj	||fdd}|S )Compute the cos and sin cache.r5   	i,j -> ijr   r   )
rq   r?   r"   rl   r>   rm   einsumr/   r0   r#   rb   rp   tfreqsr/   r0   rc   r'   r'   r(   rN      s   z&RotaryEmbedding._compute_cos_sin_cacheneeded_max_posc                 C  s   ddl m} t| jjd }||k rdS |j }|| | | }| jj}| jj}| 	| j
j|d}|}	tj|	||j|d}
|
 dkrGdS td|
|}| }| }tj||fddj|d	}tj| j|fddj||d
| _dS )z-Ensure cos_sin_cache length > needed_max_pos.r   )envsNrk   rj   zi,j->ijr   r   rs   rk   r5   )sglang.srt.environrz   r<   rF   r!   SGLANG_ROPE_CACHE_ALIGNgetrk   r5   rq   r?   r4   r"   rl   numelru   r/   r0   r#   )rb   ry   rz   cur_lenalignnew_lenrk   r5   rp   startt_new	freqs_newcos_newsin_newnew_rowsr'   r'   r(   _ensure_cos_sin_cache_length   s(   
z,RotaryEmbedding._ensure_cos_sin_cache_lengthc                 C  s~  |j dks|j dksJ |j dkrN| jd| }| d }|dd|d dddjddd\}}|ddd|	 |ddd|	 | _
| _d S | jsSJ | j| }| d }|jddd\}}| jrwt|| j}t|| j}n(tjdd t|j| jddD dd}tjd	d t|j| jddD dd}|ddddd|	 | _
|ddddd|	 | _d S )
Nr*   r   r   r   r+   r   c                 S     g | ]\}}|| qS r'   r'   .0imr'   r'   r(   
<listcomp>       z=RotaryEmbedding.get_cos_sin_with_position.<locals>.<listcomp>c                 S  r   r'   r'   r   r'   r'   r(   r      r   )ndimrF   index_selectr-   sizereshaperepeatr6   view
contiguousr`   ra   mrope_sectionmrope_interleavedapply_interleaved_roper"   r#   	enumeratesplit)rb   	positionscos_sinlast_dimr/   r0   r'   r'   r(   get_cos_sin_with_position   s4   
$

"z)RotaryEmbedding.get_cos_sin_with_positionseqlen!tuple[torch.Tensor, torch.Tensor]c                 C  s(   | j d | }|jddd\}}||fS )Nr   r   r   )rF   r6   )rb   r   r   r/   r0   r'   r'   r(   get_cos_sin  s   zRotaryEmbedding.get_cos_sinNr   querykeyoffsetsOptional[torch.Tensor]fused_set_kv_buffer_argOptional[FusedSetKVBufferArg]!Tuple[torch.Tensor, torch.Tensor]c                 C  s"  |du sJ d|dur|| }|  }|jd }| jd|}|jddd\}}	|j}
||d| j}|dd| jf }|d| jdf }| |||	| j	}t
j||fdd|
}|j}||d| j}|dd| jf }|d| jdf }| |||	| j	}t
j||fdd|}||fS )z-A PyTorch-native implementation of forward().NzBfused_set_kv_buffer_arg is not supported for native implementationr   r   r   r   .)r-   r!   rF   r   r6   r   r;   r=   r[   r1   r"   r#   r   )rb   r   r   r   r   r   
num_tokensr   r/   r0   query_shape	query_rot
query_pass	key_shapekey_rotkey_passr'   r'   r(   r]     s0   



zRotaryEmbedding.forward_nativec           
   	   C  sz   |du sJ d|j tjkr| jj tjkr| ||||S | jr#d}nd}g d}tj|||| j| j	||d\}}	||	fS )z*A PyTorch-npu implementation of forward().N?fused_set_kv_buffer_arg is not supported for npu implementationhalf
interleaver   r   r   r   rotary_mode)
r5   r"   bfloat16rF   rm   r]   r1   	torch_npu	npu_mroper;   )
rb   r   r   r   r   r   r   r   	query_outkey_outr'   r'   r(   forward_npu+  s&   


	zRotaryEmbedding.forward_npuc                 C  s\   |d u sJ d|d urt ||n|}tr%t jj|||| j| j| jS | 	|||||S )Nz?fused_set_kv_buffer_arg is not supported for cpu implementation)
r"   add_is_cpu_amx_availableopsrV   rotary_embedding_cpur;   rF   r1   r]   rb   r   r   r   r   r   r'   r'   r(   forward_cpuI  s    
		
zRotaryEmbedding.forward_cpuc              	   C  s   | j s!td|||| j| j| jd|d urt|dni  ||fS |d u s)J d| jj|j|jd| _| 	|||| j| j| j ||fS )Nr   r   r   r;   rF   is_neox)r   z=save kv cache is not supported for fallback_rotary_embedding.rs   r'   )
rX   r   r;   rF   r1   dictr4   rk   r5   rY   r   r'   r'   r(   forward_cudad  s6   


zRotaryEmbedding.forward_cudastrc                 C  s@   d| j  d| j }|d| j 7 }|d| j d| j 7 }|S )N
head_size=, rotary_dim=, max_position_embeddings=, base=, is_neox_style=)r;   r=   r>   r?   r1   rb   sr'   r'   r(   
extra_repr  s   zRotaryEmbedding.extra_reprc                 C  sF   |d u sJ d|d urt ||n|}t jj|||| j| j| jS )Nz?fused_set_kv_buffer_arg is not supported for xpu implementation)r"   r   r   rV   rE   r;   rF   r1   r   r'   r'   r(   forward_xpu  s   
	zRotaryEmbedding.forward_xpu)r;   r<   r=   r<   r>   r<   r?   r<   r1   r2   r5   r@   r   rA   r?   rf   r   r   r   r   )ry   r<   )r   r<   r   r   rK   )r   r   r   r   r   r   r   r   r   r   r   r   r   r   )__name__
__module____qualname____doc__rM   rq   rN   r   r   r   r]   r   r   r   r   r   __classcell__r'   r'   rd   r(   r:   d   s0    
7

!
!
*# 
&r:   c                      s:   e Zd ZdZd fddZdddZedddZ  ZS )LinearScalingRotaryEmbeddinga.  RotaryEmbedding extended with linear scaling.

    It supports multiple scaling factors. Since multiple LoRA adapters may have
    different scaling factors, we need multiple cos/sin caches. In this way,
    instead of running rotary embedding kernel per lora, we can run multiple
    lora in a batched way.

    In addition to that, we also keep the cos/sin cache for the scaling factor
    of 1 (default) at all times.

    Exemplary for two scaling factors x=1, y and z with embeddings
    [[x11, x12, ... x1m], ..., [xn1, xn2, ..., xnm]] and
    [[y11, y12, ... y1o], ..., [yn1, yn2, ..., yno]], and
    [[z11, z12, ... z1p], ..., [zn1, zn2, ..., znp]],

    we construct the cos/sin cache as follows:
    [[x11, x12, ... x1m, y11, y12, ... y1o, z11, z12, ... z1p],
        ...
     [xn1, xn2, ... xnm, yn1, yn2, ... yno, zn1, zn2, ... znp]]

    We then use offsets to index into the cos/sin cache for
    the respective scaling factors.

    The offset to cache can be accessed via `scaling_factor_to_offset` API.

    Credits to the Reddit user /u/kaiokendev
    r;   r<   r=   r>   r?   r1   r2   scaling_factorsUnion[List[float], float]r5   r@   r   rA   c                   s4   t |tr|g}|| _t |||||| |  d S N)
isinstancerm   r   rL   rM   )rb   r;   r=   r>   r?   r1   r   r5   rd   r'   r(   rM     s   

z%LinearScalingRotaryEmbedding.__init__r   c                   s   |  | j}g }g  | jD ]I}| j| }tj|tjd}|| }td||}| }|	 }tj
||fdd}	|s=d}
n d }|d jd }|| }
 |
 ||	 q fddt| jD | _t| jt ksoJ tj
|ddS )Nrs   rt   r   r   r   c                   s   i | ]\}}t | | qS r'   rm   )r   r   scaling_factorr   r'   r(   
<dictcomp>  s    zGLinearScalingRotaryEmbedding._compute_cos_sin_cache.<locals>.<dictcomp>)rq   r?   r   r>   r"   rl   rm   ru   r/   r0   r#   r!   appendr   _scaling_factor_to_offsetlen)rb   rp   
cache_listr   max_lenrw   rx   r/   r0   rc   offsetlast_offsetnext_max_lenr'   r   r(   rN     s.   



z3LinearScalingRotaryEmbedding._compute_cos_sin_cacheDict[float, int]c                 C     | j S r   )r   rb   r'   r'   r(   scaling_factor_to_offset  s   z5LinearScalingRotaryEmbedding.scaling_factor_to_offset)r;   r<   r=   r<   r>   r<   r?   r<   r1   r2   r   r   r5   r@   r   rA   r   )r   r   )	r   r   r   r   rM   rN   propertyr   r   r'   r'   rd   r(   r     s    
"r   c                      ,   e Zd ZdZd fddZdddZ  ZS ) DynamicNTKScalingRotaryEmbeddingrRotaryEmbedding extended with Dynamic NTK scaling.

    Credits to the Reddit users /u/bloc97 and /u/emozilla
    r;   r<   r=   r>   r?   r1   r2   r   rm   r5   r@   r   rA   c                       || _ t |||||| d S r   )r   rL   rM   )rb   r;   r=   r>   r?   r1   r   r5   rd   r'   r(   rM        
z)DynamicNTKScalingRotaryEmbedding.__init__r   c           	      C  s   | j | j }| j| j| | j  | jd  | j| jd    }| |}tj|tjd}td||}|	 }|
 }tj||fdd}|S )Nr*   r   rs   rt   r   r   )r>   r   r?   r=   rq   r"   rl   rm   ru   r/   r0   r#   	rb   r   r?   rp   rw   rx   r/   r0   rc   r'   r'   r(   rN     s   
z7DynamicNTKScalingRotaryEmbedding._compute_cos_sin_cache)r;   r<   r=   r<   r>   r<   r?   r<   r1   r2   r   rm   r5   r@   r   rA   r   r   r   r   r   rM   rN   r   r'   r'   rd   r(   r         r   '     num_rotationsr<   r   r?   rm   r>   c                 C  s*   |t || d t j   dt |  S Nr   )mathlogpi)r   r   r?   r>   r'   r'   r(   _yarn_find_correction_dim(  s   r   Tlow_rothigh_rottruncateTuple[int, int]c                 C  sL   t | |||}t ||||}|rt|}t|}t|dt||d fS )Nr   r*   )r   r   floorceilmaxmin)r  r  r   r?   r>   r  lowhighr'   r'   r(   _yarn_find_correction_range4  s   

r  r	  r
  r5   r@   rk   torch.devicec                 C  s>   | |kr|d7 }t j|||d|  ||   }t |dd}|S )NgMbP?rj   r   r*   )r"   rl   clamp)r	  r
  r   r5   rk   linear_func	ramp_funcr'   r'   r(   _yarn_linear_ramp_maskD  s
   r  r*   scalec                 C  s   | dkrdS dt |  d S Nr*   ri   皙?r   r   )r  r'   r'   r(   _yarn_get_mscaleO  s   r  c                      sD   e Zd ZdZddddddd fddZd ddZd!ddZ  ZS )"YaRNScalingRotaryEmbeddingfRotaryEmbedding extended with YaRN method.

    Credits to Peng et al. github.com/jquesnelle/yarn
    r*       T)extrapolation_factorattn_factor	beta_fast	beta_slowr  r;   r<   r=   r>   r?   r1   r2   r   rm   r5   r@   r  r  r  r  r  r   rA   c                  sR   || _ || _|	| _|
| _|| _|| _tt| j |	 | _t	 
|||||| d S r   r   r  r  r  r  r  rm   r  mscalerL   rM   )rb   r;   r=   r>   r?   r1   r   r5   r  r  r  r  r  rd   r'   r(   rM   [  s   z#YaRNScalingRotaryEmbedding.__init__r   c           	      C     | j tjd| jdtjd| j  }d| }d||  }t| j| j| j| j | j| j	\}}dt
||| jd tjd | j }|d|  ||  }|S Nr   r   rs   ri   r*   r?   r"   rl   r=   rm   r  r  r  r>   r  r  r  	rb   r   	pos_freqsinv_freq_extrapolationinv_freq_interpolationr	  r
  inv_freq_maskrp   r'   r'   r(   rq   w  s.   

z,YaRNScalingRotaryEmbedding._compute_inv_freqc                 C  d   |  | j}tj| j| j tjd}td||}| | j }|	 | j }tj
||fdd}|S Nrs   rt   r   r   rq   r   r"   rl   r>   float32ru   r/   r  r0   r#   rv   r'   r'   r(   rN        z1YaRNScalingRotaryEmbedding._compute_cos_sin_cache)r;   r<   r=   r<   r>   r<   r?   r<   r1   r2   r   rm   r5   r@   r  rm   r  rm   r  r<   r  r<   r  r2   r   rA   r   rm   r   r   r   r   r   r   r   rM   rq   rN   r   r'   r'   rd   r(   r  U  s    
r  c                      sJ   e Zd ZdZ		d&d' fddZd(ddZd)ddZ	d*d+d$d%Z  ZS ),!Phi3LongRoPEScaledRotaryEmbeddingznPhi3 family of models scaled rotary embedding.

    Based on the original RotaryEmbedding implementation.
    Nr;   r<   r=   r>    original_max_position_embeddingsr?   r1   r2   r5   r@   short_factorList[float]long_factorshort_mscaleOptional[float]long_mscalec                   s  t    |du rtd|| _|| _|| _|| _|| _|| _|	| _	| j| j }|dkr/d}nt
dt
|t
| j  }|
d u rE|}
|d u rK|}|
| _|| _| |||
}||}| jd|dd | ||	|}||}| jd|dd tj| j| jgdd	}| jd
|dd d S )NFz=`Phi3LongRoPEScaledRotaryEmbedding` only supports neox_style.ri   r*   short_cos_sin_cacherG   long_cos_sin_cacher   r   long_short_cos_sin_cache)rL   rM   
ValueErrorr=   r;   r>   r/  r?   r0  r2  r   sqrtr   r3  r5  rN   r4   rZ   r"   r#   r6  r7  )rb   r;   r=   r>   r/  r?   r1   r5   r0  r2  r3  r5  r  r   short_cache
long_cachelong_short_cacherd   r'   r(   rM     sP   



z*Phi3LongRoPEScaledRotaryEmbedding.__init__rescale_factorsr   r   c              	   C  s>   t j|t jd}d|| jt jd| jdt jd| j    }|S )Nrs   ri   r   r   )r"   tensorr*  r?   rl   r=   rm   )rb   r>  rp   r'   r'   r(   rq     s   
z3Phi3LongRoPEScaledRotaryEmbedding._compute_inv_freqr  rm   c           
      C  sV   |  |}tj|tjd}td||}| | }| | }tj||fdd}	|	S r(  )rq   r"   rl   rm   ru   r/   r0   r#   )
rb   r>   r>  r  rp   rw   rx   r/   r0   rc   r'   r'   r(   rN     s   
z8Phi3LongRoPEScaledRotaryEmbedding._compute_cos_sin_cacher   r   r   r   r   r   c                 C  sz  | dd| jf}| dd| jf}| j}t||k t||  }|d ur0t||n|}| j	
|j| _	|d urDt||n|}t| j	d|}|jddd\}	}
|	ddd}	|
ddd}
|dd | jf }|d| jd f }||	 t||
  }tj||fdd}|dd | jf }|d| jd f }||	 t||
  }tj||fdd}|d|dfS )Nr*   r   r   r   r   r+   .)	unflattenr;   r/  r"   anyrm   	full_likelongr   r8  r4   rk   r   r6   r   r3   r=   r)   r#   r-   )rb   r   r   r   r   klong_prompt_offsetidxr   r/   r0   r   r   r   r   r'   r'   r(   forward  s6   z)Phi3LongRoPEScaledRotaryEmbedding.forwardrK   )r;   r<   r=   r<   r>   r<   r/  r<   r?   r<   r1   r2   r5   r@   r0  r1  r2  r1  r3  r4  r5  r4  )r>  r1  r   r   )r>   r<   r>  r1  r  rm   r   r   r   
r   r   r   r   r   r   r   r   r   r   )	r   r   r   r   rM   rq   rN   rG  r   r'   r'   rd   r(   r.    s    
?
r.  r  c                 C  s"   | dkrdS d| t |  d S r  r  )r  r  r'   r'   r(   yarn_get_mscale(  s   rI  c                      sZ   e Zd ZdZdddddd, fddZd-ddZd.dd Z	d/d0d'd(Zd1d*d+Z  Z	S )2FourierRotaryEmbeddingz!Fourier RotaryEmbedding extended.r  TNrn   )fope_init_factorfope_sep_headnum_inv_freqrk   r;   r<   r=   r>   r?   r1   r2   r5   r@   num_kv_headsrK  rm   rL  rM  rk   Optional[str]r   rA   c                  s
  || _ |	| _|
| _|| _|| _t   || _|| _|| _	|| _
|| _|| _|| _ |	| _|
| _|| _|  | jd| | j
dd | jjd | _| jjd | _tjtj| j| j| jtjddd| _tjtj| j| j| jtjddd| _|  | jd|  dd d| _d S )Nrp   FrG   r   rs   )requires_gradrF   )rK  rL  rM  rN  rk   rL   rM   r;   r=   r>   r?   r1   r5   rZ   rq   rp   r!   	input_dim
output_dimnn	Parameterr"   emptyr*  cos_coefsin_coefrN   update_buffer)rb   r;   r=   r>   r?   r1   r5   rN  rK  rL  rM  rk   rd   r'   r(   rM   1  sN   


zFourierRotaryEmbedding.__init__rf   r   c                 C  s   d|t jd| jdt jdj| jt jd| j   }|dd |dd k s+J d	t j|t j	d}| j
dur@d
|| j
d< n
|dt j | j k}|| }|S )rg   ri   r   r   rs   r|   Nr   r*   z+Expected inv_freq to be in decreasing orderFg       @)r"   rl   r=   int64r4   rk   rm   all	ones_liker2   rM  r   r>   )rb   r?   rp   inv_freq_idx_selectedr'   r'   r(   rq   l  s,   
z(FourierRotaryEmbedding._compute_inv_freqc                 C  sP  t j| jt j| jd}t d|| j}| jr1| 	d
| jdd}| 	d
| jdd}n| }| }| jrQt d|| j }t d|| j }nt d|| j }t d|| j }tj|d| jd |d fdd	d
}tj|d| jd |d fdd	d
}t j||fdd}t j||fdd}t j||fdd}|S )rr   rj   rt   r   r   zhtD, hDd -> thdztD, Dd -> tdr   constantr*   )inputpadmodevaluer   )r"   rl   r>   rm   rk   ru   rp   rL  r/   r3   expandrN  r0   rW  rV  Fr_  r;   r   r#   )rb   rw   rx   pos_cospos_sinr0   r/   rc   r'   r'   r(   rN     s<   z-FourierRotaryEmbedding._compute_cos_sin_cacher   r   r   r   r   r   c                 K  sn  | j s|  | _d| _ |dd| jf}|dd| jf}|d ur't||n|}t| jd|j|j	d}|j
ddd\}}	| |   krPdksUJ d J d| dkra|	 dkscJ d	}
| dkrd}
|j}|j}|dd
}|	dd
}	|d}||d|d}||d|d}t||||	\}}|
r||}||}|d|dfS )NTr   r   rs   r   r      z-Expected query key (seq_len, heads, head_dim)Fr*   r+   )rX  rN   rF   r@  r;   r"   r   r   r4   r5   r6   r   r!   r-   r   r   apply_rotary_pos_emb_native)rb   r   r   r   r   kwargspositions_with_offsetsr   r/   r0   need_reshaper   r   seq_lenr'   r'   r(   rG    s@   



zFourierRotaryEmbedding.forwardr   c                 C  sp   d| j  d| j }|d| j 7 }|d| j d| j 7 }|d| j d| j 7 }|d| j d	| j 7 }|S )
Nr   r   r   r   r   z, fope_init_factor=z, fope_sep_head=z, num_inv_freq=z, num_kv_heads=)	r;   r=   r>   r?   r1   rK  rL  rM  rN  r   r'   r'   r(   r     s   z!FourierRotaryEmbedding.extra_repr)r;   r<   r=   r<   r>   r<   r?   r<   r1   r2   r5   r@   rN  r<   rK  rm   rL  r2   rM  r<   rk   rO  r   rA   r   r   r   rH  r   )
r   r   r   r   rM   rq   rN   rG  r   r   r'   r'   rd   r(   rJ  .  s    
;
..rJ  c                      s   e Zd ZdZddddddddd5 fddZd6dd Zd7d!d"Zd#d$ Zd%d& Z	d8d9d)d*Z		d8d:d/d0Z
	d8d:d1d2Z	d8d:d3d4Z  ZS );DeepseekScalingRotaryEmbeddingr  r*   r  r   N)r  r  r  r  r  mscale_all_dimrk   r;   r<   r=   r>   r?   r1   r2   r   rm   r5   r@   r  r  r  r  r  rm  rk   rO  r   rA   c                  s   || _ || _|	| _|
| _|| _tt| j t|t| j t| |	 | _d | _d | _	d | _
d | _|d ur6|nt | _t |||||| trM| j| _d S d S r   )r   r  r  r  r  rm   rI  r  cos_cached_totalsin_cached_total
cos_cached
sin_cachedr   rk   rL   rM   rU   r]   r^   )rb   r;   r=   r>   r?   r1   r   r5   r  r  r  r  r  rm  rk   rd   r'   r(   rM     s.   z'DeepseekScalingRotaryEmbedding.__init__r   c           	      C  s   | j tjd| jdtj| jd| j  }d| }d||  }t| j| j| j| j | j	\}}dt
||| jd tj| jd | j }|d|  ||  }|S )Nr   r   rj   ri   r*   )r?   r"   rl   r=   rm   rk   r  r  r  r>   r  r  r"  r'   r'   r(   rq     s4   	
z0DeepseekScalingRotaryEmbedding._compute_inv_freqc                 C  s   |  | j}tj| j| j | jtjd}td||}| | j	 }|
 | j	 }tj||fdd}tj||fdd}t|| j	 | _t
|| j	 | _|S )Nr|   rt   r   r   )rq   r   r"   rl   r>   rk   r*  ru   r/   r  r0   r#   rn  ro  )rb   rp   rw   rx   r/   r0   rc   embr'   r'   r(   rN   3  s   
z5DeepseekScalingRotaryEmbedding._compute_cos_sin_cachec                 C  r   r   )rn  r   r'   r'   r(   get_cos_cached_totalD     z3DeepseekScalingRotaryEmbedding.get_cos_cached_totalc                 C  r   r   )ro  r   r'   r'   r(   get_sin_cached_totalG  rt  z3DeepseekScalingRotaryEmbedding.get_sin_cached_totalr   r   c                 C  s   | j |d urt||n| dd|| _| j|d ur%t||n| dd|| _| j|j}| j|j}||fS )Nr+   )	rn  r"   r   r3   r4   rp  ro  rq  rk   )rb   r   r5   r   r/   r0   r'   r'   r(   get_cos_sin_cacheJ  s"   	z0DeepseekScalingRotaryEmbedding.get_cos_sin_cacher   r   r   r   c                 C  sf  |j }|dd| jf }|dd| jf }| j| jk r-|d| jdf }|d| jdf }	| j|dur9t||n| }
|
jddd\}}| jr]|ddd	d}|ddd	d}n|j
ddd	d}|j
ddd	d}| jrvtnt}|| |||  }|| |||  }| j| jk rtj||fdd}tj||	fdd}n|}|}||||fS )z6PyTorch-native implementation equivalent to forward()..Nr   r   r   r*   r+   )r5   r=   r;   rF   r"   r   r6   r1   r   r3   repeat_interleaver)   r.   r#   r4   )rb   r   r   r   r   r5   r   r   r   r   r   r/   r0   	rotate_fnr'   r'   r(   r]   a  s0   z-DeepseekScalingRotaryEmbedding.forward_nativec                 C  s  |j \}}}|j d }| ||j|\}	}
|dd | jf }|dd | jf }| j| jk r?|d| jd f }|d| jd f }t|||d| j|	|
}t|||d| j|	|
}||d| j}||d| j}| j| jk rtj	||fdd}tj	||fdd}||fS |}|}||fS )Nr*   .r   r   )
r!   rv  r5   r=   r;   r   npu_interleave_roper   r"   r#   )rb   r   r   r   r   r   num_q_heads_num_k_headsr/   r0   r   r   r   r   r'   r'   r(   r     s6   
z*DeepseekScalingRotaryEmbedding.forward_npuc                 C  sH   |d ur
t ||n|}trt jj|||| j| jdS | ||||S NF)	r"   r   r   r   rV   r   r;   rF   r]   )rb   r   r   r   r   r'   r'   r(   r     s   z*DeepseekScalingRotaryEmbedding.forward_cpu)r;   r<   r=   r<   r>   r<   r?   r<   r1   r2   r   rm   r5   r@   r  rm   r  rm   r  r<   r  r<   r  rm   rm  rm   rk   rO  r   rA   r,  r   r   )r   r   rH  )r   r   r   r   rM   rq   rN   rs  ru  rv  r]   r   r   r   r'   r'   rd   r(   rl    s,    
*
-,rl  c                      s,   e Zd Zd fddZd fddZ  ZS )Llama3RotaryEmbeddingr;   r<   r=   r>   r?   r1   r2   r5   r@   r   rm   low_freq_factorhigh_freq_factororig_max_positionr   rA   c                   s2   || _ || _|	| _|
| _t |||||| d S r   )r   r  r  r  rL   rM   )rb   r;   r=   r>   r?   r1   r5   r   r  r  r  rd   r'   r(   rM     s   zLlama3RotaryEmbedding.__init__rf   r   c                   s   t  |}| j| j }| j| j }dtj | }| j| jkr.| j| | j | j| j  }nd}t||k |t||k|| j	 d| | | j	 ||  }|S )Nr   r   r*   )
rL   rq   r  r  r  r   r   r"   wherer   )rb   r?   	inv_freqslow_freq_wavelenhigh_freq_wavelenwave_lensmooth	new_freqsrd   r'   r(   rq     s&   
	z'Llama3RotaryEmbedding._compute_inv_freq)r;   r<   r=   r<   r>   r<   r?   r<   r1   r2   r5   r@   r   rm   r  rm   r  rm   r  r<   r   rA   r   )r   r   r   rM   rq   r   r'   r'   rd   r(   r~    s    r~  c                      s@   e Zd Zd fd
dZd fddZdddZdddZ  ZS )Llama4VisionRotaryEmbeddingr;   r<   r=   r>   r?   r1   r2   r5   r@   c                   s   t  |||||| d S r   )rL   rM   )rb   r;   r=   r>   r?   r1   r5   rd   r'   r(   rM     s   	z$Llama4VisionRotaryEmbedding.__init__rf   r   r   c                   s"   t  |}|d | jd  }|S r   )rL   rq   r=   )rb   r?   r  rd   r'   r(   rq     s   z-Llama4VisionRotaryEmbedding._compute_inv_freqc                 C  s$  |  | j}| j}tj|tjd|d}tj||d d gdd}d|d< tt	
|}|| }|| }|d d |d d d d f  jdd	d}|d d |d d d d f  jdd	d}tj||gd	d  d
d d df }	|	|d	dddk d}	ttjt|	t|	gd	d}
|
S )Nrs   r*   r   r   r+   )r   r   ).Nr   r   .)rq   r?   r>   r"   rl   int32r   r#   r<   r   r:  rw  rm   r   masked_fillview_as_complexr,   r/   r0   )rb   rp   num_patchesimg_idxnum_patches_single_dimfrequencies_xfrequencies_yfreqs_xfreqs_yrx   rc   r'   r'   r(   rN     s(   (z2Llama4VisionRotaryEmbedding._compute_cos_sin_cacher   r   r   c                   s   | j |j| _ t| jg |jd d ddR   t| jg |jd d ddR  } fddt jD }| j j	| }t
 | d}t
|| d}||||fS )Nr   r   c                   s.   g | ]\}}|d ks| j d  kr|nd qS r*   )r   )r   r   dquery_r'   r(   r   $  s    z7Llama4VisionRotaryEmbedding.forward.<locals>.<listcomp>rf  )rF   r4   rk   r"   r  rm   r   r!   r   r   view_as_realr-   type_as)rb   r   r   key_broadcast_shapefreqs_cir   r   r'   r  r(   rG    s   ,,
z#Llama4VisionRotaryEmbedding.forward)r;   r<   r=   r<   r>   r<   r?   r<   r1   r2   r5   r@   r   r   )r   r   r   r   r   r   )r   r   r   rM   rq   rN   rG  r   r'   r'   rd   r(   r    s
    
r  c                      r   )DynamicNTKAlphaRotaryEmbeddingr   r;   r<   r=   r>   r?   r1   r2   scaling_alpharm   r5   r@   r   rA   c                   r   r   )r  rL   rM   )rb   r;   r=   r>   r?   r1   r  r5   rd   r'   r(   rM   4  r   z'DynamicNTKAlphaRotaryEmbedding.__init__r   c           	      C  sp   | j }| j| j| j| jd    }| |}tj|tjd}td||}|	 }|
 }tj||fdd}|S )Nr   rs   rt   r   r   )r>   r?   r  r=   rq   r"   rl   rm   ru   r/   r0   r#   r   r'   r'   r(   rN   C  s   
z5DynamicNTKAlphaRotaryEmbedding._compute_cos_sin_cache)r;   r<   r=   r<   r>   r<   r?   r<   r1   r2   r  rm   r5   r@   r   rA   r   r   r'   r'   rd   r(   r  .  r   r  r   	list[int]c                 C  st   | d   }| ddd|d d df |dd|d d df< | ddd|d d df |dd|d d df< |S )zApply interleaved MRoPE to 3D rotary embeddings.
    Reorganizes frequency layout from chunked [TTT...HHH...WWW] to
    interleaved [THTHWHTHW...TT], preserving frequency continuity.
    r   r*   .rf  r   )clone)r   r   x_tr'   r'   r(   r   S  s   22r   n_qhtl.constexprn_khhdrdpad_n_qhpad_n_khpad_hdmrope_section_tmrope_section_hmrope_section_wis_interleavedc           K      C  s  t d}| ||  } |||  }|
d }t |d|  | }t |d|  | }t |d|  | }|||
  }|||
  }|||
  }|| }|| }|| }t d|d }|r||d dk|d| k@ }|d dk|d| k@ } || B  }!n|}"|"| }#||k }!|"|k||#k @ }|#|k||k @ } t j|| |!dd}$t j|| |!dd}%t j|| |dd}&t j|| |dd}'t j|| | dd}(t j|| | dd})|$|& |( }*|%|' |) }+|rt d|d d d f |	 t d|d d d d f  },t d|d d d f |	 t d|d d d d f  }-t d|d d d f |k t d|d d d d f |
d k @ }.t d|d d d f |k t d|d d d d f |
d k @ }/t j| |, |.dd|+j}0t j||- |/dd|+j}1|,|
d  }2|-|
d  }3|.}4|/}5t j| |2 |4dd|+j}6t j||3 |5dd|+j}7|0|* |6|+  }8t j| |, |8|.d |6|* |0|+  }9t j| |2 |9|4d |1|* |7|+  }:t j||- |:|/d |7|* |1|+  };t j||3 |;|5d d S t d|d d d f |	 }<t d|d d d f |	 }=dt d|d d d d f  }>|>d }?|<|> }@|<|? }A|=|> }B|=|? }Ct d|d d d d f |
d k }Dt d|d d d f |k }Et d|d d d f |k }F|E|D@ }G|E|D@ }H|F|D@ }I|F|D@ }Jt j| |@ |Gdd|+j}0t j||B |Idd|+j}1t j| |A |Hdd|+j}6t j||C |Jdd|+j}7|0|* |6|+  }8t j| |@ |8|Gd |6|* |0|+  }9t j| |A |9|Hd |1|* |7|+  }:t j||B |:|Id |7|* |1|+  };t j||C |;|Jd d S )Nr   r   r*   rf  maskotherr  )tl
program_idloadrl   r4   r5   store)Kq_ptrk_ptrcos_sin_cache_ptrpositions_ptrq_stridek_stridepositions_strider  r  r  r  r  r  r  r  r  r  r  r1   pidhalf_rdrw   hwt_cosh_cosw_cost_sinh_sinw_sincos_offsetsh_maskw_maskt_maskt_endh_end	t_cos_row	t_sin_row	h_cos_row	h_sin_row	w_cos_row	w_sin_rowcos_rowsin_rowfirst_half_q_offsetsfirst_half_k_offsetsfirst_q_maskfirst_k_maskq_tile_1k_tile_1second_half_q_offsetssecond_half_k_offsetssecond_q_masksecond_k_maskq_tile_2k_tile_2new_q_tile_1new_q_tile_2new_k_tile_1new_k_tile_2base_qbase_keven_idxodd_idxeven_q_offsetsodd_q_offsetseven_k_offsetsodd_k_offsetsidx_maskqn_maskkn_maskeven_q_mask
odd_q_maskeven_k_mask
odd_k_maskr'   r'   r(   _triton_mrope_forward_fused^  s   
66""

 $r  qrD  rF   r   	List[int]r;   r=   r   rA   c	                 C  s0  | j \}	}
|j \}}|d dksJ ||ksJ ||	ksJ |
| dks&J || dks.J t|dks6J | ddkrU|ddkrU|ddkrU| dkrU| sWJ |
| }|| }t|}t|}t|}t|	f | |||| d|d|d||||||||d |d |d || dS )a	  The mrope triton kernel.

    Args:
        q: [num_tokens, num_heads * head_size]
        k: [num_tokens, num_kv_heads * head_size]
        cos_sin_cache: [max_position_embeddings, head_size]
        positions: [3, num_tokens]
        mrope_section: [t, h, w]
    r   r   rf  r*   N)r!   r   strider   is_contiguoustritonnext_power_of_2r  )r  rD  rF   r   r   r;   r=   r   r1   r   n_q_dimk_first_dimn_k_dimr  r  r  r  r  r'   r'   r(   triton_mrope_fused  sP   




r  c                      s   e Zd ZdZ		dDdE fddZdFddZ	dGdHddZ	dGdHdd ZdId!d"Z	dGdHd#d$Z	e
					dJdKd3d4Ze
					dJdLd5d6Ze
dMd<d=Ze
dNd>d?Ze
d@dA Ze
dBdC Z  ZS )OMRotaryEmbeddingz*Rotary Embedding with Multimodal Sections.NFr;   r<   r=   r>   r?   r1   r2   r5   r@   r   Optional[List[int]]r   r   rA   c	                   s0  t  |||||| || _|| _| jr|d }	t| j}
|
|	krtd|	 d|
 d|	  |
dkrV|	|
   fdd| jD | _t| j}||	krU| jd  |	| 7  < n%|	t| j gt| j | _|	t| j }t|D ]}| j|  d	7  < qotd
| j dt| j d t j	d ur| j
| _d S d S )Nr   z%MRoPE section sum mismatch: expected z, got z5. Adjusting mrope_section to match rotary_dim // 2 = r   c                   s   g | ]}t d t|  qS r  )r  r<   )r   sectionscale_factorr'   r(   r   h  s    z-MRotaryEmbedding.__init__.<locals>.<listcomp>r   r*   zCorrected mrope_section: z (sum=))rL   rM   r   r   sumprintr   ranger
   r\   r]   r^   )rb   r;   r=   r>   r?   r1   r5   r   r   expected_sum
actual_sumcurrent_sum	remainderr   rd   r  r(   rM   L  sH   


zMRotaryEmbedding.__init__r   r   c                 C  s:   | j j|jks| j j|jkr| j j|j|jd| _ d S d S )Nrs   )rF   rk   r5   r4   )rb   r   r'   r'   r(   _match_cos_sin_cache_dtype  s   z+MRotaryEmbedding._match_cos_sin_cache_dtyper   r   r   r   r   c                 C  s  |du sJ d|j dks|j dksJ |jd }| j| }|jddd\}}|j dkri| js1J | jrAt|| j}t|| j}n(tjdd t	|j
| jddD dd}tjd	d t	|j
| jddD dd}|jd
 }	|j}
||	d| j}|dd| jf }|d| jdf }t|||| j}tj||fdd|
}|jd
 }|j}||d| j}|dd| jf }|d| jdf }t|||| j}tj||fdd|}||fS )aJ  PyTorch-native implementation equivalent to forward().

        Args:
            positions:
                [num_tokens,] (text only) or
                [3, num_tokens] (T/H/W positions with multimodal inputs)
            query: [num_tokens, num_heads * head_size]
            key: [num_tokens, num_kv_heads * head_size]
        Nz4save kv cache is not supported for MRotaryEmbedding.r*   r   r   r   c                 S  r   r'   r'   r   r'   r'   r(   r     r   z3MRotaryEmbedding.forward_native.<locals>.<listcomp>c                 S  r   r'   r'   r   r'   r'   r(   r     r   r   .)r   r!   rF   r6   r   r   r   r"   r#   r   r   r   r;   r=   r9   r1   r   )rb   r   r   r   r   r   r   r/   r0   	seq_len_qr   r   r   	seq_len_kr   r   r   r'   r'   r(   r]     sF   






zMRotaryEmbedding.forward_nativec                 C  sF   |j dks|j dksJ |j dkr| jr| |||S | ||||S aI  Forward pass with optional Triton kernel acceleration.
        Args:
            positions:
                [num_tokens,] (text only) or
                [3, num_tokens] (T/H/W positions with multimodal inputs)
            query: [num_tokens, num_heads * head_size]
            key: [num_tokens, num_kv_heads * head_size]
        r*   r   )r   r   forward_tritonr]   rb   r   r   r   r   r'   r'   r(   r     s   zMRotaryEmbedding.forward_cudac              
   C  s@   | j sJ | | t||| j|| j | j| j| j| j	 ||fS r   )r   r  r  rF   r;   r=   r   r1   rb   r   r   r   r'   r'   r(   r    s   

zMRotaryEmbedding.forward_tritonc           	   	   C  sr   |d u sJ d|j d dkr| ||||S d}| jrd}nd}g d}tj|||| j| j||d\}}||fS )Nr   r*   i   r   r   r   r   )r!   r]   r1   r   r   rF   r;   )	rb   r   r   r   r   r   r   r   r   r'   r'   r(   r     s(   
	
	zMRotaryEmbedding.forward_npuspatial_merge_sizeimage_token_idvideo_token_idvision_start_token_id
model_typer   tokens_per_secondOptional[int]	input_idsOptional[torch.LongTensor]image_grid_thwvideo_grid_thwsecond_per_grid_tsr   c
           4   
   K  s  |dkrt j| ||||||||	f	i |
S |ds$|ds$|dr>|d ur>tj||d d df dd}d|d d df< g }|d urM|d usN|d urM|}tjd|jd |jd |j|jd	}d
\}}t	|D ]\}}d
\}}t
||kd}||d  }||k }||k }| }g }d}||}}t|| D ]B}||v r|dkr|||}nt|d }||v r|dkr|||}nt|d }||k r|| d || d || d }}}d} |d7 }|d8 }|}!n*|| d || d || d }}}|	d ur|	| } nd} |d7 }|d8 }|}!t|t|t|}"}#}$|"}%|#|  }&|$|  }'|!| }(t|dkrI|d  d nd})|t|(dddd|)  |dv rt|%dd}*|*d|&|' }+|+|  | },|, }-|- }.n"|dv rtj|%|jddd|%|&|' d}.ntd| tj|&|jdddd|%|&|'d}/tj|'|jdddd|%|&|'d}0|t|.|/|0g|( |)  |!|%|& |'  }q|t|k rt|dkr|d  d nd})t|| }(|t|(dddd|)  tj|dddd}1|1|j|d|d d f< ||1 d t||   qjtj||jdd}||fS |jd }2t|2}|dddd|j}|j ddd}3|3j dddd |2 }||fS )Nqwen3_omni_moeqwen3_vlqwen3_vl_moeqwen3_5r   r   r*   rf  rj   r   r   r   ri   r   )
qwen2_5_vlpaddleocr_vl)qwen2_vlr   r!  r"  qwen3_5_moer{   zUnimplemented model type: .Fr   keepdimTr)  )!r  get_rope_index_qwen3_omni
startswithr"   rw  onesr!   r5   rk   r   argwheresqueezer  tolistr  indexr   r<   r  r   rl   r   rb  rC  r-   r   RuntimeErrorr,   r#   r4   r?  r3   amax)4r  r  r  r  r  r  r  r  r  r  rh  mrope_position_deltastotal_input_idsposition_idsimage_indexvideo_indexr   
image_nums
video_numsvision_start_indicesvision_tokensinput_tokensllm_pos_ids_liststremain_imagesremain_videosr{  ed_imageed_videorw   r  r  second_per_grid_tedt_inth_intw_int
llm_grid_t
llm_grid_h
llm_grid_wtext_lenst_idxrange_tensorexpanded_rangetime_tensortime_tensor_longt_indexh_indexw_indexllm_positionsr   max_position_idsr'   r'   r(   get_rope_index  s$  






















zMRotaryEmbedding.get_rope_indexc	           ;      K  s  |	d }
|	d }|	d }|	 dd}|	 dd }|}g }|d ur|d us*|d ur|}tjd|jd |jd	 tj|jd
}d\}}}t|D ]k\}}d\}}}t||kd	}|	 dkr{||d	  }||k
 }|ru||k
 n||k
 }t
||k}| }g }d}|||} }!}"|r|| n|| | }#t|#D ]}$t|dkr|d  d	 nd}%||v s||v r|!dks| dkr|||nt|d	 }&|
|v r|"dkr|||nt|d	 }'t|&|'}(|(| })|)dkr	|t|)d	ddd|%  |%|)7 }%|(|&kr|&d	 |'krd\}*}+nd\}*}+|t|*d	ddd|%  |%|*7 }%|(|'krjt|| },t|,d	ddd|% }-||- |t|)|* |, |+ 7 }|d	7 }|"d	8 }"n|(|&kr||&d	  |kr|| d }.|d d d	f }/|d d df }0t|.d	 |  }1t|%|| |1|/|0|j}-||  | d  }2||- |t|)|* |2 |+ 7 }|d	7 }| d	8 } nk|(|&kr0||&d	  |kr0|| d }.|d d d	f }/|d d df }0t|.||    |  }1t|%|| |1|/|0|j}-||  | d  }3||- |t|)|* |3 |+ 7 }|d	7 }|!d	8 }!n|(|&kr5|&d	 |'kr5t|| },t|,d	ddd|% }4|| d }.|d d d	f }/|d d df }0t|.||    |  }1t|%|| |1|/|0|j}5d\}6}7|6|5jd k r|7|4jd k r|5d |6 |4d |7 kr||5d d |6|6d	 f  |6d	7 }6n||4d d |7|7d	 f  |7d	7 }7|6|5jd k r|7|4jd k s|6|5jd k r||5d d |6|5jd f  |7|4jd k r||4d d |7|4jd f  ||  | d  }3|t|)|* |, |3 |+ 7 }|d	7 }|d	7 }|!d	8 }!|"d	8 }"t|dkrD|d  d	 nd}%|t|+d	ddd|%  q|t|k rt|dkro|d  d	 nd}%t|| })|t|)d	ddd|%  tjdd |D d	ddd}8|8|j|d|d d f< ||8 d	 t|  qGtj||jdd	}||fS |jd	 }9t|9}|dddd|j}|jdddd jdddd }:|:d	 |9 }||fS )Naudio_token_idaudio_start_token_idposition_id_per_secondsuse_audio_in_videoFaudio_seqlensrf  r   r*   rj   r   r   )r   r   r*   r*   r   r#  c                 S  s   g | ]}|  qS r'   r   )r   itemr'   r'   r(   r     s    z>MRotaryEmbedding.get_rope_index_qwen3_omni.<locals>.<listcomp>r   .r{   r*  T) r   r"   zerosr!   rm   rk   r   r.  r/  r   r  r0  r  r   r  r1  r  r   rl   r   rb  r   _get_feat_extract_output_lengthsr<   _get_llm_pos_ids_for_visionprodrh   r#   r   r4   r?  r3   );r  r  r  r  r  r  r  r  r  rh  rX  rY  rZ  r[  r\  second_per_gridsr4  r5  r6  	image_idx	video_idx	audio_idxr   current_input_idsr9  r:  
audio_numsr;  r<  r=  r>  r?  r@  rA  remain_audiosmultimodal_numsr{  rM  ed_vision_started_audio_startmin_edrL  bos_leneos_len	audio_lenllm_pos_idsgrid_tgrid_hsgrid_wsrR  	image_len	video_lenaudio_llm_pos_idsvideo_llm_pos_idsvideo_data_indexaudio_data_indexrU  r   rV  r'   r'   r(   r+    s  






	









	


	







z*MRotaryEmbedding.get_rope_index_qwen3_omni	hf_configr   $Union[list[list[int]], torch.Tensor]attention_maskr   c           :   
   K  sh  |j }|j}|j}|jj}	g }
| durI|dus|durI| }|du r(t|}tjd| jd | jd | j	| j
d}d\}}d}||j
}t|D ]\}}|| }||dk }| }dgt| }d}t|D ]+\}}||krud	}n||kr{d}||kr|sd
||< qj||kr|rd||< qjd||< qjg }tt|dd D ]\}}t|}|d d }|d d d }||||f qg }d}|D ]G\} }!}"|r|d   d }#nd}#| d
kr`|| d || d || d }$}%}&t|$t|%t|&}'}(})|'}*|(|	 }+|)|	 },tj|*|j
ddd|*|+|, d}-tj|+|j
dddd|*|+|,d}.tj|,|j
dddd|*|+|,d}/|t|-|.|/g|#  |d7 }d}q| dkr|}$|| d }%|| d }&t|%t|&}(})|(|	 }+|)|	 },t|$D ]O}0tj|0|j
dddd|+|, d}-tj|+|j
ddddd|+|,d}.tj|,|j
ddddd|+|,d}/|t|-|.|/g|#  q|d7 }||| d kr|d7 }d}|d7 }q|"|! }1tj|1|j
d}2|2ddd|1|# }3||3 d}qtj|dddd}4|dk}5|4|j
|d||5f< |
|4 d t||   qJtj|
| j
dd}
||
fS |dur|  dd }|!|dkd |dddd|j
}|j"ddd}6|6j"dd	dd |jd  }
||
fS | jd }7| jd }8tj|7| j
dddd}9|9d|8|7}tj#|8dg| j
| j	d}
||
fS )z4Get mrope input positions and delta value for GLM4V.Nrf  r   r*   rj   r#   FTimagevideotextc                 S     | d S Nr*   r'   r   r'   r'   r(   <lambda>7	      z7MRotaryEmbedding.get_rope_index_glm4v.<locals>.<lambda>r   r   r{   r   .r(  r*  r|   )$r  video_start_token_idvideo_end_token_idvision_configr  r"   r[  r-  r!   r5   rk   r4   r   r0  r   	itertoolsgroupbylistr   r  r^  r<   rl   r   rb  r   r,   r  r?  r#   r3   rC  cumsummasked_fill_r3  r_  ):r  r{  r  r  r}  rh  r  r  r  r  r4  r5  r6  r7  r8  video_group_indexr   ids	curr_mask
ids_maskedr=  input_token_typevideo_check_flgjtokeninput_type_groupr   groupstart_index	end_indexr>  video_frame_nummodality_type	start_idxend_idxrM  rw   r  r  rF  rG  rH  rI  rJ  rK  rR  rS  rT  t_idxrL  
text_rangetext_posrU  r  rV  length
batch_size
arange_idsr'   r'   r(   get_rope_index_glm4v  s4  

























z%MRotaryEmbedding.get_rope_index_glm4vc           -   
   K  s`  |j }|j}|j}|j}|j}	g }
| dur|dus|dur| }tjd| jd | jd | j| j	d}d\}}t
|D ]\}} |  }g }d}|D ],}||krSd}n||krYd}||kre|se|d	 qJ||krq|rq|d
 qJ|d qJg }tt
|dd D ]\}}t|}|d d }|d d d }||||f qg }d}|D ]\}}}t|dkr|d  d nd}|d	kr0|| d || d || d }} }!| |  | |! | }"}#}$t|"ddd|#|$  }%t|#ddd|"d|$ }&t|$ddd|"|#d }'|t|%|&|'g|  |d7 }d}q|d
kr|| d || d || d }} }!| |	 |  | |! | }"}#}$t|"D ]C}(t|(ddd|#|$  }%t|#ddddd|$ }&t|$dddd|#d }'|t|%|&|'g|  qa|d7 }|d7 }q|| })|t|)dddd|  d}qtj|dddd}*|*|j	|d|ddf< |
|* d t||   q;tj|
| j	dd}
||
fS | jd }+t|+}|dddd| j	}|jdddd jdddd },|,d |+ }
||
fS )z7Get mrope input positions and delta value for Ernie VL.Nrf  r   r*   rj   r#  FTr  r  r  c                 S  r  r  r'   r  r'   r'   r(   r  	  r  z9MRotaryEmbedding.get_rope_index_ernie45.<locals>.<lambda>r   r   r   .r{   r*  )im_patch_idr  r  spatial_conv_sizetemporal_conv_sizer"   r-  r!   r5   rk   r   r0  r   r  r  r  r   r  r^  rl   r   rb  r-   r,   r  r?  r#   r   r4   r3   )-r  r{  r  r  rh  r  r  r  r  r  r4  r5  r6  r7  r8  r   r=  r  r  r  r  r   r  r  r  r>  r  r  r  r  rM  rw   r  r  rI  rJ  rK  rR  rS  rT  r  rL  rU  r   rV  r'   r'   r(   get_rope_index_ernie45	  s  





























z'MRotaryEmbedding.get_rope_index_ernie45c                 C  sD   | d }|d d d }|d d d d d d | d d  }|S )zs
        Computes the output length of the convolutional layers and the output length of the audio encoder
        d   r*   r      r'   )input_lengthsinput_lengths_leavefeat_lengthsoutput_lengthsr'   r'   r(   r`  v
  s
   &z1MRotaryEmbedding._get_feat_extract_output_lengthsc                 C  s   || | }|| | }t j||ddddt|d| }	t j||ddddt||d }
|ddd||  }t j||	|
gdd|  }|S )Nr{   r*   r   r   r   )r"   rl   r   rb  r   r-   r,   )rM  
vision_idxr  rR  rs  rt  rk   grid_hgrid_wrS  rT  rq  r'   r'   r(   ra  
  s   

z,MRotaryEmbedding._get_llm_pos_ids_for_visionr}  r;   r<   r=   r<   r>   r<   r?   r<   r1   r2   r5   r@   r   r   r   r2   r   rA   )r   r   r   rA   r   
r   r   r   r   r   r   r   r   r   r   )r   r   r   r   r   r   r   r   )NNNNN)r  r<   r  r<   r  r<   r  r<   r  r   r  r  r  r  r  r  r  r  r  r   r   r   )r  r<   r  r<   r  r<   r  r<   r  r  r  r  r  r  r  r  r  r   r   r   )r  r   r{  r   r  r|  r  r|  r}  r   r   r   )
r  r   r{  r   r  r|  r  r|  r   r   )r   r   r   r   rM   r  r]   r   r  r   staticmethodrW  r+  r  r  r`  ra  r   r'   r'   rd   r(   r  I  sP    

5>
 >  , T ,
r  c                      sH   e Zd ZdZddddddddd$ fddZd%d d!Zd&d"d#Z  ZS )'YaRNScalingMRotaryEmbeddingz9MRoPE-enabled rotary embedding with YaRN context scaling.NFr*   r  T)r   r   r  r  r  r  r  r;   r<   r=   r>   r?   r1   r2   r   rm   r5   r@   r   r   r   r  r  r  r  r  r   rA   c             
     sX   || _ |
| _|| _|| _|| _|| _tt| j | | _t	 j
||||||||	d d S )Nr   r   r  )rb   r;   r=   r>   r?   r1   r   r5   r   r   r  r  r  r  r  rd   r'   r(   rM   
  s"   
z$YaRNScalingMRotaryEmbedding.__init__r   c           	      C  r  r   r!  r"  r'   r'   r(   rq   
  s.   	
z-YaRNScalingMRotaryEmbedding._compute_inv_freqc                 C  r'  r(  r)  rv   r'   r'   r(   rN   
  r+  z2YaRNScalingMRotaryEmbedding._compute_cos_sin_cache)r;   r<   r=   r<   r>   r<   r?   r<   r1   r2   r   rm   r5   r@   r   r   r   r2   r  rm   r  rm   r  r<   r  r<   r  r2   r   rA   r,  r   r-  r'   r'   rd   r(   r  
  s    
$r  	q_stride0	k_stride0pos_stride0
section_hwc           >      C  s  t d}| ||  } |||  }|
d }t |d|  | t j}t |d|  | t j}t |d|  | t j}t d|d }||k }||k }|d@ dk}t |t ||||}t j|||
  | |dd}t j|||
  ||  |dd}|rat d|d d d f }t d|d d d f }t d|d d d d f }||k ||k @ }||k ||k @ } ||	 | }!||	 | }"|!| }#|"| }$t j| |! |dd|j}%t j| |# |dd|j}&t j||" | dd|j}'t j||$ | dd|j}(|d d d f })|d d d f }*|%|) |&|*  }+|&|) |%|*  },|'|) |(|*  }-|(|) |'|*  }.t j| |! |+|d t j| |# |,|d t j||" |-| d t j||$ |.| d d S t d|d d d f }t d|d d d f }t d|d d d d f }/||k |/|k @ }||k |/|k @ } d|/ }0|0d }1||	 |0 }2||	 |1 }3||	 |0 }4||	 |1 }5t j| |2 |dd|j}6t j| |3 |dd|j}7t j||4 | dd|j}8t j||5 | dd|j}9|d d d f })|d d d f }*|6|) |7|*  }:|7|) |6|*  };|8|) |9|*  }<|9|) |8|*  }=t j| |2 |:|d t j| |3 |;|d t j||4 |<| d t j||5 |=| d d S )Nr   r   r*   g        r  r  )	r  r  r  r4   r  rl   r  r5   r  )>r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r1   r  r  tposhposwposridxrmaskuse_hwuse_hposr/   r0   q_headk_headr  q_maskk_maskq_off0k_off0q_off1k_off1q0q1k0k1cos_bsin_bnq0nq1nk0nk1pevenodd
q_even_off	q_odd_off
k_even_off	k_odd_offq_evenq_oddk_evenk_oddnq_evennq_oddnk_evennk_oddr'   r'   r(   _triton_ernie45_rope_qk_fused
  s   
r  c                 C  s   | j r|j r|j r|j sJ |  dkr| dksJ | dkr)|jd dks+J | ddkr9|ddks;J |ddksDJ | dkrN| sPJ | jd }|jd |ks^J | jd |jd   kro|ksrJ  J | jd }	|jd }
|	| dkr|
| dksJ |	| }|
| }|}|d dksJ ||ksJ |\}}}||ksJ d|| | |d ksJ d|j| jks|j| jkr|j| j| jd}t	|}t	|}t	|}|| dkrd	nd
}t
|f | |||| d|d|d||||||||| ||d d S )Nr   r   rf  r*   z.Ernie4.5 layout assumes section_h == section_wz'mrope_section must sum to rotary_dim//2r|   i          )
r  r  r  r  r  r  r  r  r1   	num_warps)r   r   r!   r  r  r5   rk   r4   r  r  r  )r  rD  rF   r   r   r;   r=   r1   r   r  r  r  r  r  	section_h	section_w	section_tr  r  r  r  r'   r'   r(   !triton_ernie45_rope_fused_inplaceh  sb   
 
(







r  c                      sR   e Zd ZdZ		d#d$ fddZ	d%d&ddZ	d%d&ddZ	d%d'd!d"Z  ZS )(Ernie4_5_VLRotaryEmbeddingz=3D rotary positional embedding. [h w h w h w h w... t t t...]NFr;   r<   r=   r>   r?   r1   r2   r5   r@   r   r   r   r   rA   c	           	   
     sb   t  j||||||||d || _|| _|| _|| _|| _|| _|| _|| _	t
jddt| _d S )Nr  TrI   )rL   rM   r;   r=   r>   r?   r1   r5   r   r   r"   r_   r9   r[   )	rb   r;   r=   r>   r?   r1   r5   r   r   rd   r'   r(   rM     s&   
z#Ernie4_5_VLRotaryEmbedding.__init__r   r   r   r   torch.Tensor | None(tuple[torch.Tensor, torch.Tensor | None]c                 C  s  |j dks|j dksJ |d usJ |jd }| j| }|jddd\}}|j dkr| js/J | jd }| jd }	| jd }
||	ksDJ |d|
 d f }|dd ||	 df }|dd||	 df }|d |d |d }}}tj||gdd|jd d |jd d f }tj||gdd}|d|
 d f }|dd ||	 df }|dd||	 df }|d |d |d }}}tj||gdd|jd d |jd d f }tj||gdd}|j}|	|d| j
}|dd | jf }|d| jd f }| |||| j}tj||fdd|}|j}|	|d| j
}|dd | jf }|d| jd f }| |||| j}tj||fdd|}||fS )Nr*   r   r   r   r   .)r   r!   rF   r6   r   r"   r,   r   r#   r   r;   r=   r[   r1   )rb   r   r   r   r   r   r/   r0   r  r  r  section_cos_tsection_cos_hsection_cos_wcos_tcos_hcos_wcos_hwsection_sin_tsection_sin_hsection_sin_wsin_tsin_hsin_wsin_hwr   r   r   r   r   r   r'   r'   r(   r]     sT   







z)Ernie4_5_VLRotaryEmbedding.forward_nativec              
   C  s   |d usJ |j dv sJ | | |j dkr3| jd usJ t||| j|| j| j| j| jd ||fS trJt	d urJt	|||| j| j| jd ||fS | 
|||S )N)r*   r   r   )r  rD  rF   r   r   r;   r=   r1   r   )r   r  r   r  rF   r;   r=   r1   rO   r   r]   r  r'   r'   r(   r   
  s6   


z'Ernie4_5_VLRotaryEmbedding.forward_cudar   r   r   c                 C  s&   |j dks|j dksJ | |||S r  )r   r   r  r'   r'   r(   rG  4  s   z"Ernie4_5_VLRotaryEmbedding.forwardr}  r  r   )r   r   r   r   r   r  r   r  r  )	r   r   r   r   rM   r]   r   rG  r   r'   r'   rd   r(   r    s    
#=/r  c                      sV   e Zd ZdZd% fddZd&ddZd'ddZ	d(d)ddZd d! Zd*d#d$Z	  Z
S )+DualChunkRotaryEmbeddingz5Rotary positional embedding for Dual Chunk Attention.r;   r<   r=   r>   r?   r1   r2   r5   r@   
chunk_size
local_sizer   rA   c	                   s   t    || _|| _|| _|| _|| _|| _|| _|| _	t
dt
j  | _|  \}	}
}}}| jd|	dd | jd|
dd | jd|dd | jd|dd | jd|dd d S )	Nzcuda:cos_sin_q_cacheFrG   cos_sin_qc_cachecos_sin_k_cachecos_sin_qc_no_clamp_cachecos_sin_q_inter_cache)rL   rM   r;   r=   r>   r?   r1   r  r   r5   r"   rk   rn   current_devicerN   rZ   )rb   r;   r=   r>   r?   r1   r5   r  r   q_cacheqc_cachek_cacheqc_no_clamp_cacheq_inter_cacherd   r'   r(   rM   J  s&   
z!DualChunkRotaryEmbedding.__init__rf   r   c                 C  s(   d|t jd| jdt jd| j   }|S )rg   ri   r   r   rs   )r"   rl   r=   rm   )rb   r?   rp   r'   r'   r(   rq   k  s   z*DualChunkRotaryEmbedding._compute_inv_freqc                 C  s  |  | j}| j| j }tj|tjd}tj|tjd| j| jd}tj| jtjd| }tj|tjd| }tj|tjd| j }t	||}t	||}	t	||}
t	||}t	||}|
 }| }|	
 }|	 }|

 }|
 }|
 }| }|
 }| }tj||fddj| j| jd}tj||fddj| j| jd}tj||fddj| j| jd}tj||fddj| j| jd}tj||fddj| j| jd}|||||fS )rr   rs   )r  r   r   rj   )rq   r?   r  r   r"   rl   rm   r  r>   outerr/   r0   r#   r4   r5   rk   )rb   rp   	chunk_lenq_tqc_tk_tqc_no_clamp_t	q_inter_tq_freqsqc_freqsk_freqsqc_no_clamp_freqsq_inter_freqsq_cosq_sinqc_cosqc_sink_cosk_sinqc_no_clamp_cosqc_no_clamp_sinq_inter_cosq_inter_sinr  r  r	  r
  r  r'   r'   r(   rN   ~  sP   z/DualChunkRotaryEmbedding._compute_cos_sin_cacheNr   r   r   r   r   r   c                 C  s  |j g |jd d d| jR  }|j g |jd d d| jR  }|dd | jf }|dd | jf }| j| jk rO|d| jd f }|d| jd f }nd }d }|d ur]t||n|}	| | j|	 ||}| j| j	 }
| | j
|	|
  ||}| | j|	|
  ||}| | j|
d  |jd d||}| | j|	|
  ||}| | j|	|
  ||}tj|||||fdd}||fS )Nr   .r*   r   r   )r   r!   r;   r=   r"   r   _apply_rotary_embeddingr  r  r   r  r  r   r  r  r#   )rb   r   r   r   r   r   r   r   r   ri  r  
query_succquery_interquery_succ_criticalquery_inter_criticalr'   r'   r(   rG    sd   $$
z DualChunkRotaryEmbedding.forwardc                 C  s   |j ddd\}}| jr!|dddd}|dddd}n|jdddd}|jdddd}| jr:tnt}|| |||  }| j| jk rVt	j
||fdd}n|}|ddS )Nr   r   r   r*   r+   r   )r6   r1   r   r3   rw  r)   r.   r=   r;   r"   r#   r-   r/  )rb   r   
hidden_rothidden_passr/   r0   rx  hiddenr'   r'   r(   r"    s   z0DualChunkRotaryEmbedding._apply_rotary_embeddingr   c                 C  sX   d| j  d| j }|d| j 7 }|d| j d| j 7 }|d| j d| j 7 }|S )Nr   r   r   r   r   z, chunk_size=z, local_size=)r;   r=   r>   r?   r1   r  r   r   r'   r'   r(   r     s
   z#DualChunkRotaryEmbedding.extra_repr)r;   r<   r=   r<   r>   r<   r?   r<   r1   r2   r5   r@   r  r<   r   r<   r   rA   r   r   r   rH  r   )r   r   r   r   rM   rq   rN   rG  r"  r   r   r'   r'   rd   r(   r  G  s    
!
7@r  zDict[Tuple, RotaryEmbedding]
_ROPE_DICTri   max_positionrope_scalingOptional[Dict[str, Any]]Optional[torch.dtype]partial_rotary_factordual_chunk_attention_configc	                 C  s  |d u rt  }|d urdd | D }	t|	 }
nd }
|d ur2dd | D }t| }nd }|dk r>t|| }| |||||
||f}|tv rPt| S |d urldd | D }t| |||||fi |}np|d u r{t| |||||}nad|v r|d }nd|v r|d }ntd| |d	kr|d
 }|d }|d }|d }t	| |||||||||
}n%|dkrd|v rt
| ||||||d |ddd}n|ddrt| ||||||d |dd|dd|dd d
}nt| |||||}n|dkr|d
 }t| ||||||}n|dkr;|d
 }d|v r0t| |||||d |}nt| ||||||}n|dkr|d
 }|d }dd | D }|d d|d < d|v rvt| ||||||f|d |ddd|}nft| ||||||fi |}nW|d!kr|d
 }|d }d"d | D }t| ||||||fi |}n2|d#kr|d$ }|d% }|d }d&d | D }t| ||||||||f	i |}ntd'| |t|< |S )(Nc                 S  (   i | ]\}}|t |trt|n|qS r'   r   r  tupler   rD  vr'   r'   r(   r         zget_rope.<locals>.<dictcomp>c                 S  s0   i | ]\}}|d kr|t |trt|n|qS )sparse_attention_configr2  r4  r'   r'   r(   r   %  s
    ri   c                 S     i | ]\}}|d v r||qS ))r  r   r'   r4  r'   r'   r(   r   >  
    	rope_typetypez+Unknown RoPE scaling type, rope_scaling is llama3factorr  r  r/  defaultr   r   Fr  use_foperN  rK  r  rL  TrM  )rN  rK  rL  rM  linearrJ   alphayarnc                 S  r8  ))r  r  r  r  r'   r4  r'   r'   r(   r         r  deepseek_yarnc                 S  r8  )r  r  r  r  r  rm  r'   r4  r'   r'   r(   r     rC  longroper0  r2  c                 S  r8  ))r3  r5  r'   r4  r'   r'   r(   r     r9  zUnknown RoPE scaling type )r"   get_default_dtypeitemsr3  r<   r*  r  r:   r9  r~  r  r   rJ  r   r  r   r  r  rl  r.  )r;   r=   r+  r?   r1   r,  r5   r/  r0  rope_scaling_tuplerope_scaling_argsdual_chunk_attention_tupledual_chunk_attention_argsr   extra_kwargs
rotary_embscaling_typer   r  r  original_max_positionr0  r2  r'   r'   r(   r     s  

	










	


	







r   c                 C  sH   | dd| j d d f }| d| j d d df }tj| |fddS )z*Rotates half the hidden dims of the input..Nr   r   r   r    r$   r'   r'   r(   rotate_half  s   rQ  )rJ   backendr   c           	      C  s~   | j }|j }|  | } }|| }|| }| | t| |  }|| t||  }||}||}||fS r   )r5   rm   r3   rQ  r4   )	r  rD  r/   r0   unsqueeze_dimorig_q_dtypeorig_k_dtypeq_embedk_embedr'   r'   r(   rg    s   

rg  c                 C  s   |  dks|   dks| jd tks| jd tkr"t| ||||S ||d}||d}| d} |d}t| ||}t|||}|d}|d}||fS )zAscend implementation equivalent to apply_rotary_pos_emb_native.

    Args:
        q: [num_tokens, num_heads, head_size]
        k: [num_tokens, num_kv_heads, head_size]
        cos: [num_tokens, head_size]
        sin: [num_tokens, head_size]
    r   rf  r*   r   )	r   r!   NPU_ROTARY_MUL_MAX_NUM_HEADSNPU_ROTARY_MUL_MAX_HEAD_SIZErg  r3   r   npu_rotary_mulr/  )r  rD  r/   r0   rS  rV  rW  r'   r'   r(   apply_rotary_pos_emb_npu&  s   



r[  rO  c	                 C  s   |d u rt  }|d urdd | D }	t|	 }
nd }
|dk r(t|| }| |||||
|f}|tv r9t| S |d us?J |d }|dksKJ d|d }|d }d	d | D }||d
< t| ||||||fi |}|t|< |S )Nc                 S  r1  r'   r2  r4  r'   r'   r(   r   ]  r6  z get_rope_cpu.<locals>.<dictcomp>ri   r:  rD  z/Only deepseek_yarn is supported for CPU for nowr=  r/  c                 S  r8  rE  r'   r4  r'   r'   r(   r   y  rC  rk   )r"   rG  rH  r3  r<   r*  rl  )r;   r=   r+  r?   r1   r,  r5   r/  rk   rI  rJ  r   rO  r   rP  rM  rN  r'   r'   r(   get_rope_cpuN  sX   	
r\  c	           
   
   C  sB   |dkrt rtnt}	|	| |||||||S t| ||||||||	S )Nrh   )
_use_aiteraiter_get_roper   r\  )
r;   r=   r+  r?   r1   r,  r5   r/  rk   wrapperr'   r'   r(   get_rope_wrapper  s.   r`  )r   r   r   r   )
r   r   r/   r   r0   r   r1   r2   r   r   )r   r   )
r   r<   r   r<   r?   rm   r>   r<   r   rm   )r   r   T)r  r<   r  r<   r   r<   r?   rm   r>   r<   r  r2   r   r  r   )r	  rm   r
  rm   r   r<   r5   r@   rk   r  r   r   r  )r  rm   r   rm   r]  )r  rm   r  rm   r   rm   )r   r   r   r  r   r   )r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r1   r  )r  r   rD  r   rF   r   r   r   r   r  r;   r<   r=   r<   r   r2   r1   r2   r   rA   )r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r1   r  )r  r   rD  r   rF   r   r   r   r   r  r;   r<   r=   r<   r1   r2   r   rA   )TNNri   N)r;   r<   r=   r<   r+  r<   r?   r<   r1   r2   r,  r-  r5   r.  r/  rm   r0  r-  r   r:   )
r  r   rD  r   r/   r   r0   r   r   r   )r;   r<   r=   r<   r+  r<   r?   r<   r1   r2   r,  r-  r5   r.  r/  rm   rk   rO  r   r:   )r;   r<   r=   r<   r+  r<   r?   r<   r1   r2   r,  r-  r5   r.  r/  rm   rk   rO  )\r   
__future__r   r  r   typingr   r   r   r   r   r   r"   torch.nnrS  torch.nn.functional
functionalrc  r  triton.languagelanguager  sglang.srt.layers.utilsr	   sglang.srt.server_argsr
   sglang.srt.utilsr   r   r   r   r   r   r   r   r   r   rO   rU   r]  rR   r   rP   rQ   rS   sglang.jit_kernel.roper   r   aiter.rotary_embeddingr   r^  r   rX  rY  r)   r.   r9   r:   r   r   r   r  r  r  r  Moduler.  rI  rJ  rl  r~  r  r  r   jitr  r  r  r  r  r  r  r  r*  __annotations__rQ  r_   rg  r[  apply_rotary_pos_embr\  r`  r'   r'   r'   r(   <module>   s    0


  EW-H  : Z/?
% 
'D        \L
~G  E z"M