o
    Ti                     @   sx   d dl Z d dlZd dlmZmZmZ dddZdddZG dd de jj	Z
G d	d
 d
e jj	ZdddZdddZdS )    N)$bwc_tensor_model_parallel_world_sizebwc_tensor_model_parallel_rankbwc_tensor_model_parallel_groupc                    s   t jjj} t|}|dkrS tj|  j	j
d t jj t|d |dkrEt }|d | |d<  |}|S  fddt|D }tj||d }|S )z5Gather tensors and concatenate them along a dimension   )dtypedevice)groupr   c                    s,   g | ]}  d  |  qS r   )narrownumelview_as).0igather_bufferinput_ J/home/ubuntu/.local/lib/python3.10/site-packages/deepspeed/moe/mappings.py
<listcomp>.   s    z"_gather_tokens.<locals>.<listcomp>)dim)	deepspeedutilsgroupsmpu
contiguousr   torchemptyr   r   r   commall_gather_into_tensorr   listsizeviewrangecat)r   r   r   
world_sizeshapeoutputtensor_listr   r   r   _gather_tokens   s"   

	r(   c                 C   s|   t jjj}t|}|dkr| S t|}| j| | dks-J d| d| j|  d| d| j| | }t| ||| |S )z/Divide a tensor among the tensor parallel ranksr   r   zinput dimension z (z2) is not divisible by tensor parallel world size ())	r   r   r   r   r   r   r%   r   r
   )r   r   r   total_chunks
this_chunk
chunk_sizer   r   r   _drop_tokens8   s    
r-   c                   @   4   e Zd ZdZedd Zedd Zedd ZdS )	_GatherTokensz1All gather tokens among the tensor parallel ranksc                 C   
   t ||S N)r(   graphr   r   r   r   r   symbolicJ      
z_GatherTokens.symbolicc                 C      || _ t||S r1   )r   r(   ctxr   r   r   r   r   forwardN      
z_GatherTokens.forwardc                 C      t || jd fS r1   )r-   r   )r8   grad_outputr   r   r   backwardS      z_GatherTokens.backwardN__name__
__module____qualname____doc__staticmethodr4   r9   r=   r   r   r   r   r/   G       

r/   c                   @   r.   )	_DropTokensz5Divide tokens equally among the tensor parallel ranksc                 C   r0   r1   )r-   r2   r   r   r   r4   [   r5   z_DropTokens.symbolicc                 C   r6   r1   )r   r-   r7   r   r   r   r9   _   r:   z_DropTokens.forwardc                 C   r;   r1   )r(   r   )r8   r   r   r   r   r=   d   r>   z_DropTokens.backwardNr?   r   r   r   r   rF   X   rE   rF   c                 C   .   t jjj}|d u st|dkr| S t| |S Nr   )r   r   r   r   r   r/   applyr   r   r   r   r   r   gather_tokensi      
rK   c                 C   rG   rH   )r   r   r   r   r   rF   rI   rJ   r   r   r   drop_tokensq   rL   rM   r	   )r   r   deepspeed.utils.bwcr   r   r   r(   r-   autogradFunctionr/   rF   rK   rM   r   r   r   r   <module>   s   


