o
    پi                     @   s   d dl mZ d dlmZmZ d dlZd dlmZmZ d dl	m
Z
mZmZmZmZmZmZ d dlmZmZmZ d dlmZ dd	 ZG d
d deZG dd deZG dd deZG dd deZdS )    )partial)CallableOptionalN)is_nsa_enable_prefill_cpnsa_use_prefill_cp)CommunicateContextCommunicateSimpleFnCommunicateSummableTensorPairFn&CommunicateWithAllReduceAndLayerNormFnLayerCommunicatorLayerScatterModesScatterMode)attn_cp_all_gather_into_tensorattn_cp_reduce_scatter_tensorget_local_dp_buffer)ForwardBatchc                   C   s   t  S N)r    r   r   Y/home/ubuntu/.local/lib/python3.10/site-packages/sglang/srt/layers/communicator_nsa_cp.pynsa_enable_prefill_cp*   s   r   c                       sR   e Zd Z			ddedejjdejjdededee	 f fd	d
Z
dd Z  ZS )NSACPLayerCommunicatorFNlayer_scatter_modesinput_layernormpost_attention_layernormallow_reduce_scatteris_last_layerqkv_latent_funcc                    s   t  |||||| d S r   )super__init__)selfr   r   r   r   r   r   	__class__r   r   r   2   s   
zNSACPLayerCommunicator.__init__c                 C   s~   | j jtjkr| jjdksJ dtjtjtj| jd| _t	jtjtj| j jtj| jd| _
tj| j jtjtj| jd| _d S )N   z3dp_size should be 1 when moe_runner_backend is none
input_modeoutput_modecontexthidden_states_input_moderesidual_input_modehidden_states_output_moderesidual_output_moder&   r(   r)   r%   r&   )r   mlp_moder   	SCATTERED_contextattn_dp_sizeNSACPCommunicateSimpleFnget_fn_communicate_simple_fn+NSACPCommunicateWithAllReduceAndLayerNormFn._communicate_with_all_reduce_and_layer_norm_fn$NSACPCommunicateSummableTensorPairFn$_communicate_summable_tensor_pair_fn)r   r   r   r   _post_init_communicateE   s,   z-NSACPLayerCommunicator._post_init_communicate)FFN)__name__
__module____qualname__r   torchnnModuleboolr   r   r   r8   __classcell__r   r   r    r   r   1   s$    r   c                   @   s&   e Zd ZedededefddZdS )r1   r$   r%   r&   c                 C   s&   | | |r	tjS td| d|)Nzinput_mode= output_mode=)is_same_group_sizer1   _trivialNotImplementedErrorr#   r   r   r   r2   `   s   zNSACPCommunicateSimpleFn.get_fnN)r9   r:   r;   staticmethodr   r   r2   r   r   r   r   r1   _   s    r1   c                   @   s\   e Zd ZdZedededededef
ddZed	ej	d
ej	de
dejjdef
ddZdS )r4   zpBesides communication, needs to
    1. All reduce in tp_attn_group on hidden_states
    2. Apply layer norm
    r(   r)   r*   r+   r&   c              	   C   sr   | t jksJ |t jksJ |t jksJ |t jkrtjS |t jkr)ttj|dS td| d|d|d|)N)r)   hidden_states_input_mode= residual_input_mode=z hidden_states_output_mode=z residual_output_mode=)r   r.   r4   _simpleFULLr   "_gather_hidden_states_and_residualrD   r'   r   r   r   r2   t   s   

z2NSACPCommunicateWithAllReduceAndLayerNormFn.get_fnhidden_statesresidualforward_batch	layernormc                C   sP   | j d dkr|| |\} }t|r$|jdksJ t | } }t| | | |fS )Nr   r"   )shaper   r0   r   r   )rK   rL   rM   rN   r&   r)   local_hidden_statesr   r   r   rJ      s   
zNNSACPCommunicateWithAllReduceAndLayerNormFn._gather_hidden_states_and_residualN)r9   r:   r;   __doc__rE   r   r   r2   r<   Tensorr   r=   r>   rJ   r   r   r   r   r4   l   s4    r4   c                   @   sX   e Zd ZdZededededefddZe	dd	ej	d
ej	de
dedef
ddZdS )r6   z^It is allowed to make (hidden_states, residual) := (hidden_states + residual, None) if needed.r(   r)   r%   r&   c                 C   s\   | | |r| ||rtjS | tjkr!|tjkr!|tjkr!tjS td| d|d|)NrF   rG   rA   )rB   r6   rC   r   rI   r.   _scatter_hidden_statesrD   r,   r   r   r   r2      s   



z+NSACPCommunicateSummableTensorPairFn.get_fnFrK   rL   rM   r   c                 C   s>   t |r|jdksJ | }| |j|j } t| | | |fS )Nr"   )r   r0   tensor_splitattn_cp_sizeattn_cp_rankr   )rK   rL   rM   r&   r   input_hidden_statesr   r   r   rS      s   


z;NSACPCommunicateSummableTensorPairFn._scatter_hidden_statesN)F)r9   r:   r;   rQ   rE   r   r   r2   r<   rR   r   r?   rS   r   r   r   r   r6      s2    r6   )	functoolsr   typingr   r   r<   %sglang.srt.layers.attention.nsa.utilsr   r   sglang.srt.layers.communicatorr   r   r	   r
   r   r   r   sglang.srt.layers.dp_attentionr   r   r   ,sglang.srt.model_executor.forward_batch_infor   r   r   r1   r4   r6   r   r   r   r   <module>   s   $	.
;