o
    }oi                     @   sF   d dl Z d dl mZ d dlmZmZmZ dedededefdd	ZdS )
    N)Tensor)ProcessGroup
all_gatherget_world_sizexseq_dimcp_groupreturnc              
      sf   t |} fddt|D }z	t| |d W n ty+ } ztd| d}~ww tj||dS )a  
    Concatenates tensors from multiple processes along a specified dimension.

    This function gathers tensors from all processes in the given process group
    and concatenates them along the specified dimension.

    Args:
        x (Tensor): The input tensor to be gathered and concatenated.
        seq_dim (int): The dimension along which to concatenate the gathered tensors.
        cp_group (ProcessGroup): The process group containing all the processes involved in the gathering.

    Returns:
        Tensor: A tensor resulting from the concatenation of tensors from all processes.

    Raises:
        RuntimeError: If the gathering of tensors fails.
    c                    s   g | ]}t  qS  )torch
zeros_like).0_r   r
   g/home/ubuntu/.local/lib/python3.10/site-packages/nemo/collections/diffusion/sampler/context_parallel.py
<listcomp>*   s    z"cat_outputs_cp.<locals>.<listcomp>)groupzGathering failed: N)dim)r   ranger   RuntimeErrorr   cat)r   r   r   
world_sizegathered_tensorser
   r   r   cat_outputs_cp   s   r   )r   r   torch.distributedr   r   r   intr   r
   r
   r
   r   <module>   s   