o
    پi4                     @   s   d Z ddlZddlZddlmZ ddlZddlmZ ddlm  m	Z
 ddlmZ ddlmZmZmZ ddlmZ ddlmZ ddlmZ dd	lmZ dd
lmZmZmZmZmZmZm Z  ddl!m"Z" e Z#e Z$e Z%e Z&e Z'e Z(e#sye(rddl)m*Z*m+Z+m,Z, ne'rddl)m*Z*m-Z-m+Z+m,Z, e rddl.Z.e/e0Z1G dd deZ2G dd deZ3G dd deZ4G dd dej5Z6G dd deZ7G dd deZ8G dd dej5Z9e: ej:dde4 e6 e8 dZ;				d+d e<d!ee d"ee= d#e>d$eej? d%ej5fd&d'Z@d(efd)d*ZAdS ),z&Fused operators for activation layers.    N)Optional)PretrainedConfig)divideget_tensor_model_parallel_rank$get_tensor_model_parallel_world_size)envs)QuantizationConfig)MultiPlatformOp)get_global_server_args)cpu_has_amx_supportis_cpuis_cudais_hipis_npuis_xpuset_weight_attrs)resolve_obj_by_qualname)gelu_and_mulgelu_tanh_and_mulsilu_and_mul)r   
gelu_quickr   r   c                       s   e Zd Z fddZdejdejfddZdejdejfddZdejdejfd	d
ZdejdejfddZ	dejdejfddZ
  ZS )
SiluAndMulc                    s.   t  j|i | t jd ur| j| _d S d S N)super__init__r
   rl_on_policy_targetforward_native_forward_method)selfargskwargs	__class__ P/home/ubuntu/.local/lib/python3.10/site-packages/sglang/srt/layers/activation.pyr   @   s   zSiluAndMul.__init__xreturnc                 C   s4   |j d d }t|dd |f |d|d f  S )N   .)shapeFsilur   r%   dr#   r#   r$   r   E   s   &zSiluAndMul.forward_nativec                 C   D   |j d d }|j d d |f }tj||j|jd}t|| |S Nr'   r(   dtypedevicer)   torchemptyr1   r2   r   r   r%   r-   output_shapeoutr#   r#   r$   forward_cudaI   
   
zSiluAndMul.forward_cudac                 C   s    t rtjj|}|S | |S r   )_is_cpu_amx_availabler4   ops
sgl_kernelsilu_and_mul_cpur   r   r%   r8   r#   r#   r$   forward_cpuP   s   
zSiluAndMul.forward_cpuc                 C   s   t |}|S r   )	torch_npu
npu_swiglur?   r#   r#   r$   forward_npuW   s   
zSiluAndMul.forward_npuc                 C   r.   r/   r3   r6   r#   r#   r$   forward_xpu[   r:   zSiluAndMul.forward_xpu)__name__
__module____qualname__r   r4   Tensorr   r9   r@   rC   rD   __classcell__r#   r#   r!   r$   r   ?   s    r   c                       s   e Zd Zd fdd	ZdejdejfddZdejdejfdd	Zdejdejfd
dZdejdejfddZ	dejdejfddZ
dejdejfddZ  ZS )
GeluAndMultanhc                    s   t    || _d S r   )r   r   approximate)r   rL   r!   r#   r$   r   d   s   

zGeluAndMul.__init__r%   r&   c                 C   sn   |j d d }|j d d |f }tj||j|jd}| jdkr't|| |S | jdkr3t|| |S td)Nr'   r(   r0   rK   nonez$GeluAndMul only support tanh or none)	r)   r4   r5   r1   r2   rL   r   r   RuntimeErrorr6   r#   r#   r$   _forward_implh   s   



zGeluAndMul._forward_implc                 C   s:   |j d d }tj|dd |f | jd|d|d f  S )Nr'   r(   .rL   )r)   r*   gelurL   r,   r#   r#   r$   r   t   s   ,zGeluAndMul.forward_nativec                 C   sB   t r| jdkrtjj|S t r| jdkrtjj|S | |S )NrK   rM   )r;   rL   r4   r<   r=   gelu_tanh_and_mul_cpugelu_and_mul_cpur   r   r%   r#   r#   r$   r@   x   s
   
zGeluAndMul.forward_cpuc                 C   
   |  |S r   rO   rT   r#   r#   r$   r9         
zGeluAndMul.forward_cudac                 C   rU   r   rV   rT   r#   r#   r$   rD      rW   zGeluAndMul.forward_xpuc                 C   s<   t j r
| |S tj|d| jdkrdnddd\}}|S )Nr'   rK      r   T)dimrL   activate_left)r   "SGLANG_NPU_FORWARD_NATIVE_GELUTANHgetr   rA   	npu_geglurL   )r   r%   y_npugelu_npur#   r#   r$   rC      s   


zGeluAndMul.forward_npu)rK   )rE   rF   rG   r   r4   rH   rO   r   r@   r9   rD   rC   rI   r#   r#   r!   r$   rJ   c   s    rJ   c                   @   s8   e Zd ZdejdejfddZdejdejfddZdS )NewGELUr%   r&   c                 C   s:   t dt j }d| dt||dt|d     S )Ng       @      ?g      ?gHm?g      @)mathsqrtpir4   rK   pow)r   r%   cr#   r#   r$   r      s   *zNewGELU.forward_nativec                 C   rU   r   r   rT   r#   r#   r$   r9      s   
zNewGELU.forward_cudaN)rE   rF   rG   r4   rH   r   r9   r#   r#   r#   r$   r`      s    r`   c                   @   s&   e Zd ZdZdejdejfddZdS )ReLU2zQ
    Applies the squared Rectified Linear Unit function.
    y = max(0, x)^2
    r%   r&   c                 C   s   t |}|| S r   )r*   relurT   r#   r#   r$   forward   s   
zReLU2.forwardN)rE   rF   rG   __doc__r4   rH   rj   r#   r#   r#   r$   rh      s    rh   c                   @   sd   e Zd ZdejdejfddZdejdejfddZdejdejfddZdejdejfd	d
ZdS )	QuickGELUr%   r&   c                 C   s   |t d|  S )NgZd;?)r4   sigmoidrT   r#   r#   r$   r      s   zQuickGELU.forward_nativec                 C   rU   r   rg   rT   r#   r#   r$   r9      rW   zQuickGELU.forward_cudac                 C   s$   t j|j|j|jd}t|| |S )Nr0   )r4   r5   r)   r1   r2   r   r?   r#   r#   r$   forward_hip   s   
zQuickGELU.forward_hipc                 C   s
   t |S r   )rA   npu_fast_gelurT   r#   r#   r$   rC      rW   zQuickGELU.forward_npuN)	rE   rF   rG   r4   rH   r   r9   rn   rC   r#   r#   r#   r$   rl      s
    rl   c                       s   e Zd ZdZddddejdfdededed	ed
ejdef fddZ	dej
dej
fddZdej
dej
fddZdej
dej
fddZ  ZS )XIELUz
    Applies the xIELU activation function introduced in https://arxiv.org/abs/2411.13010
    If the user has installed the nickjbrowning/XIELU, we import xIELU CUDA
    Otherwise, we emit a single warning and use xIELU Python
    g?ra   gưFalpha_p_initalpha_n_initbetaepsr1   with_vector_loadsc              
      s  t    ttttj||dd d| _	ttttj|| |dd d| _
| dtj||d | dtj||d || _t| j    | _t| j    | _d | _zFdd l}tjj | _d}zddlm}	 |	| j| _|d7 }W n ty }
 z|d	|
 d
7 }| j| _W Y d }
~
nd }
~
ww t | W d S  ty }
 zW Y d }
~
d S d }
~
ww )Nr1   rX   r   rs   rt   zUsing experimental xIELU CUDA.)allow_in_graphz& Enabled torch._dynamo for xIELU CUDA.z+ Could not enable torch._dynamo for xIELU (z*) - this may result in slower performance.)!r   r   nn	Parameterr4   logexptensor	unsqueezealpha_palpha_nregister_bufferru   floatrs   detachcpuitem_beta_scalarrt   _eps_scalar_xielu_cuda_obj	xielu.opsclassesxielurp   torch._dynamorw   _xielu_cuda_xielu_cuda_fn	Exceptionloggerwarning_once)r   rq   rr   rs   rt   r1   ru   r   msgrw   errr!   r#   r$   r      sL   
	
zXIELU.__init__r%   r&   c              
   C   sh   t j| j}| jt j| j }t|dk|| | | j|  tt	|| j
| | | j|  S Nr   )rx   
functionalsoftplusr~   rs   r   r4   whereexpm1minrt   )r   r%   r~   r   r#   r#   r$   _xielu_python   s   $zXIELU._xielu_pythonc                 C   s   | j dus	J d|j}| dk r|d}| dk s| dkr-|dd|d}||jkr:td||j | j || j	| j
| j| j| j}||S )z>Firewall function to prevent torch.compile from seeing .item()Nz"XIELU CUDA object must not be None   r   r'   rX   zWarning: xIELU input tensor expects 3 dimensions but got (shape: %s). Reshaping to (shape: %s).
Note: For SGLang this may be expected if sending[B*S,D] instead of [B,S,D].)r   r)   rY   r}   viewsizer   r   rj   r~   r   r   r   ru   )r   r%   original_shaperesultr#   r#   r$   r      s,   


	zXIELU._xielu_cudainputc                 C   s8   | j d ur|jrtj s| |S td | |S )Nz:torch._dynamo is compiling, using Python version of xIELU.)	r   r   r4   _dynamois_compilingr   r   r   r   )r   r   r#   r#   r$   rj     s   


zXIELU.forward)rE   rF   rG   rk   r4   bfloat16r   r1   boolr   rH   r   r   rj   rI   r#   r#   r!   r$   rp      s0    6	rp   c                
       sl   e Zd ZdZ		ddejdededee	j
 f fdd	Zd
e	jde	jfddZdejde	jfddZ  ZS )ScaledActivationzqAn activation function with post-scale parameters.

    This is used for some quantization methods like AWQ.
    TN
act_moduleintermediate_sizeinput_is_parallelparams_dtypec                    sl   t    || _|| _|rt }t||}n|}|d u r t }t	tj
||d| _t| jd| ji d S )Nrv   weight_loader)r   r   actr   r   r   r4   get_default_dtyperx   ry   r5   scalesr   r   )r   r   r   r   r   tp_sizeintermediate_size_per_partitionr!   r#   r$   r   +  s   
zScaledActivation.__init__r%   r&   c                 C   s   |  || j S r   )r   r   rT   r#   r#   r$   rj   A  s   zScaledActivation.forwardparamloaded_weightc                 C   sP   |j }| jrt }|jd }|| }|d||}|j|jks!J || d S r   )datar   r   r)   narrowcopy_)r   r   r   
param_datatp_rank
shard_size	start_idxr#   r#   r$   r   D  s   
zScaledActivation.weight_loader)TN)rE   rF   rG   rk   rx   Moduleintr   r   r4   r1   r   rH   rj   ry   r   rI   r#   r#   r!   r$   r   %  s    	r   rK   rP   )rQ   gelu_pytorch_tanhgelu_newrelu2r   Tact_fn_namequant_configr   r   r   r&   c                 C   s^   |   } | tvrtd| dt|  }|dur-| | v r-|du r&tdt||||S |S )z#Get an activation function by name.zActivation function z is not supported.NzDintermediate_size must be specified for scaled activation functions.)lower_ACTIVATION_REGISTRY
ValueErrorget_scaled_act_namesr   )r   r   r   r   r   act_fnr#   r#   r$   
get_act_fnX  s   r   configc                 C   s>   t | dr| jd ur| j}|dsJ dt| S t S )N$sbert_ce_default_activation_functionztorch.nn.modules.zVLoading of activation functions is restricted to torch.nn.modules for security reasons)hasattrr   
startswithr   rx   Identity)r   function_namer#   r#   r$   %get_cross_encoder_activation_functionq  s   

r   )NNTN)Brk   loggingrb   typingr   r4   torch.nnrx   torch.nn.functionalr   r*   transformersr   sglang.srt.distributedr   r   r   sglang.srt.environr   *sglang.srt.layers.quantization.base_configr   sglang.srt.layers.utilsr	   sglang.srt.server_argsr
   sglang.srt.utilsr   r   r   r   r   r   r   sglang.utilsr   _is_cuda_is_npur;   _is_cpu_is_hip_is_xpur=   r   r   r   r   rA   	getLoggerrE   r   r   rJ   r`   r   rh   rl   rp   r   GELUr   strr   r   r1   r   r   r#   r#   r#   r$   <module>   sv   $	
$/
n+

