o
    پi                  	   @   s   d dl Z d dlmZ d dlmZmZmZ 	dde jdej	de jfddZ
	dde jd	edej	de jfd
dZ	dde jdedede jfddZ	dde jd	ede jfddZ	dde jd	edede jfddZe jjjjfde jde jjjde jfddZdS )     N)get_cfg_groupget_sp_groupget_tp_groupinput_tp_groupreturnc                 C   s   |pt  }|| S )z8All-reduce the input tensor across model parallel group.)r   
all_reduce)r   r    r	   n/home/ubuntu/.local/lib/python3.10/site-packages/sglang/multimodal_gen/runtime/distributed/communication_op.py tensor_model_parallel_all_reduce   s   

r   dimc                 C   s   |pt  }|| |S z8All-gather the input tensor across model parallel group.)r   
all_gather)r   r   r   r	   r	   r
    tensor_model_parallel_all_gather   s   
r         scatter_dim
gather_dimc                 C      t  | ||S )zZAll-to-all communication of 4D tensors (e.g. QKV matrices) across sequence parallel group.)r   all_to_all_4D)r   r   r   r	   r	   r
   %sequence_model_parallel_all_to_all_4D!      r   c                 C   s   t  | |S r   )r   r   )r   r   r	   r	   r
   "sequence_model_parallel_all_gather(   s   r   Fseparate_tensorsc                 C   r   r   )r   r   )r   r   r   r	   r	   r
   cfg_model_parallel_all_gather/   r   r   opc                 C   s   t  j| |dS )z6All-reduce the input tensor across CFG parallel group.)r   )r   r   )r   r   r	   r	   r
   cfg_model_parallel_all_reduce6   s   r   )N)r   N)r   r   )r   )r   F)torchtorch.distributeddistributeddist8sglang.multimodal_gen.runtime.distributed.parallel_stater   r   r   TensorProcessGroupr   intr   r   r   boolr   _C_distributed_c10dReduceOpSUMr   r	   r	   r	   r
   <module>   sr   
	





	