o
     i                  	   @   s  d dl mZmZ d dlZd dlZdejdejjddfddZdejdejjdeejeejj	 f fd	d
Z
dejdejjdeejeejj	 f fddZdejdejjdejfddZdejdejjdejfddZG dd dejjZdejdejjdejfddZG dd dejjZdejdejjdejfddZG dd dejjZdejdejjdejfddZG dd dejjZdejdejjdejfdd ZdS )!    )OptionalTupleNxprocess_groupreturnc                C   s0   |  }|dkr
d S tjj| tjjj|d d S )N   )tensoropgroup)sizetorchdistributed
all_reduceReduceOpSUM)r   r   mp_size r   [/home/ubuntu/.local/lib/python3.10/site-packages/xformers/ops/differentiable_collectives.pyr      s   
r   input_c                C   sX   |  }|dkr| d fS | | jd | f| jdd   }tjj|| |dd}||fS )Nr   r   T)output_tensorinput_tensorr
   async_op)r   	new_emptyshaper   r   all_gather_into_tensorr   r   r   outputhandler   r   r   gather_along_first_dim_async   s   $r   c                C   sv   |  }|dkr| d fS | jd | dksJ | | jd | f| jdd   }tjj|| tjjj|dd}||fS )Nr   r   T)r   inputr	   r
   r   )r   r   r   r   r   reduce_scatter_tensorr   r   r   r   r   r   $reduce_scatter_along_first_dim_async+   s   $r!   c                C   $   t | |d\}}|d ur|  |S Nr   )r   waitr   r   r   r   r   r   r   gather_along_first_dim?   s   r'   c                C   r"   r#   )r!   r%   r&   r   r   r   reduce_scatter_along_first_dimH   s   
r(   c                   @   P   e Zd ZedejdejjdejfddZedejde	ejdf fdd	Z
dS )
_CopyToModelParallelRegionr   r   r   c                 C   s
   || _ |S Nr$   ctxr   r   r   r   r   forwardT   s   z"_CopyToModelParallelRegion.forwardgrad_outputNc                 C   s   t || jd |d fS r#   )r   r   r-   r/   r   r   r   backward[   s   z#_CopyToModelParallelRegion.backward__name__
__module____qualname__staticmethodr   Tensorr   ProcessGroupr.   r   r1   r   r   r   r   r*   S       r*   c                 C      t | |S r+   )r*   applyr   r   r   r   r   copy_to_model_parallel_regionc      r=   c                   @   r)   )
_ReduceFromModelParallelRegionr   r   r   c                 C   s   t ||d | | |S r#   )r   
mark_dirtyr,   r   r   r   r.   j   s   
z&_ReduceFromModelParallelRegion.forwardr/   Nc                 C   s   |d fS r+   r   r0   r   r   r   r1   r   s   z'_ReduceFromModelParallelRegion.backwardr2   r   r   r   r   r?   i   s    r?   c                 C   r:   r+   )r?   r;   r<   r   r   r   !reduce_from_model_parallel_regiony   r>   rA   c                   @   r)   )
!_GatherFromSequenceParallelRegionr   r   r   c                 C      || _ t||dS r#   )r   r'   r-   r   r   r   r   r   r.         z)_GatherFromSequenceParallelRegion.forwardr/   Nc                 C      t || jdd fS r#   )r(   r   r0   r   r   r   r1      s
   z*_GatherFromSequenceParallelRegion.backwardr2   r   r   r   r   rB      r9   rB   c                 C   r:   r+   )rB   r;   r<   r   r   r   $gather_from_sequence_parallel_region   r>   rG   c                   @   r)   )
 _ScatterToSequenceParallelRegionr   r   r   c                 C   rC   r#   )r   r(   rD   r   r   r   r.      rE   z(_ScatterToSequenceParallelRegion.forwardr/   Nc                 C   rF   r#   )r'   r   r0   r   r   r   r1      s   z)_ScatterToSequenceParallelRegion.backwardr2   r   r   r   r   rH      r9   rH   c                 C   r:   r+   )rH   r;   r<   r   r   r   #scatter_to_sequence_parallel_region   r>   rI   )typingr   r   r   torch.distributedr7   r   r8   r   Workr   r!   r'   r(   autogradFunctionr*   r=   r?   rA   rB   rG   rH   rI   r   r   r   r   <module>   s   



	



