o
    Ti!                     @   sf   d Z ddlZddlmZ ddlmZ ddlmZ ddlmZ dd	 Z	G d
d dZ
G dd deZdS )z-
Copyright 2021 The Microsoft DeepSpeed Team
    N)get_accelerator)NotImplementedBuilder   )ReduceOp)TorchBackendc                  C   sB   t  d} | d u st| trd S |  }td|   d |S )NCCLCommBuilderz
DeepSpeed z built successfully)r   create_op_builder
isinstancer   loadprintabsolute_name)builderccl_cpp_module r   F/home/ubuntu/.local/lib/python3.10/site-packages/deepspeed/comm/ccl.pybuild_ccl_op   s   r   c                   @   s   e Zd ZdddZdd ZdS )
CCLHandlerNc                 C   s
   || _ d S N)ccl_comm_op)selfr   r   r   r   __init__   s   
zCCLHandler.__init__c                 C   s   d S r   r   r   r   r   r   wait   s   zCCLHandler.waitr   )__name__
__module____qualname__r   r   r   r   r   r   r      s    
r   c                       s  e Zd Zd0 fdd	Zdd Zdd	 Zejdd
fddZejdfddZ	d1ddZ
d1ddZd1ddZd1ddZd1ddZd2ddZd2ddZd1ddZd1d d!Zd1d"d#Zd3d$d%Zejdd
fd&d'Zejdd
fd(d)Z fd*d+Z fd,d-Z fd.d/Z  ZS )4
CCLBackendcclNc           	         s   t  | _| jd u rd| _d S tt| jdd||||d d| _|  }|  }| j	|}t
|t
jt  }tt| |d | j||| d| _tt|  g| _| j | _d S )NFr   torch)backendnamerank
world_sizetimeoutinit_methodr   T)r   r   initializedsuperr   r   r!   get_world_sizeget_rankget_kvs_addrr   tensortouint8r   current_device_name	broadcast
initializetuplerangegroupsget_available_collavailable_coll)	r   r!   r"   r#   mpur$   r%   sizemain_kvs	__class__r   r   r   %   s*   
zCCLBackend.__init__c                 C   s   | j S r   )r&   r   r   r   r   is_initialized<   s   zCCLBackend.is_initializedc                 K   s   || j v rAd|v r| |d |d< d|v r!|d |d |d< d|v r0|d |d |d< d| }t||   t| jS d| }t||   t| jS )Ngroupdstsrczself.ccl_comm_op.zsuper(CCLBackend, self).)r5   get_all_ranks_from_groupindexevalvaluesr   r   )r   r!   kwargsfuncr   r   r   run_collective?   s   


zCCLBackend.run_collectiveFc                 C   s   d}|r.|   d| }d}|| jv r#| |}| j|||||S | j||||||dS d}|| jv rC| |}| j||||S | j|||||dS )NF-all_reduce_caching)r!   r+   opmatch_idr<   async_op
all_reducer!   r+   rH   r<   rJ   )r7   r5   r?   r   rG   rE   rK   )r   r+   rH   r<   rJ   use_cachingrI   r!   r   r   r   rK   O   s&   



zCCLBackend.all_reducec                 C   s0   d}|| j v r| j||S | j|||d ddS )Ninference_all_reduceFrL   )r5   r   rN   rE   )r   r+   rH   r<   r!   r   r   r   rN   f   s   
zCCLBackend.inference_all_reducec                 C      | j d||||dS )Nr/   )r!   r+   r>   r<   rJ   rE   )r   r+   r>   r<   rJ   r   r   r   r/   m      zCCLBackend.broadcastc                 C   rO   )N
all_gather)r!   tensor_listr+   r<   rJ   rP   )r   rS   r+   r<   rJ   r   r   r   rR   p      zCCLBackend.all_gatherc                 C   rO   )Nreduce_scatter_tensor)r!   output_tensorinput_tensorrH   r<   rP   )r   rV   rW   rH   r<   rJ   r   r   r   rU   w   rT   z CCLBackend.reduce_scatter_tensorc                 C   s   | j d|||dS )Nall_gather_into_tensor)r!   rV   rW   r<   rP   )r   rV   rW   r<   rJ   r   r   r   rX   ~   s
   z!CCLBackend.all_gather_into_tensorc                 C      | j d|||||dS )Nall_to_all_single)r!   outputinputoutput_split_sizesinput_split_sizesr<   rP   )r   r[   r\   r]   r^   r<   rJ   r   r   r   rZ         zCCLBackend.all_to_all_singler   c                 C   rO   )Nsend)r!   r+   r=   r<   tagrP   )r   r+   r=   r<   ra   r   r   r   r`      rQ   zCCLBackend.sendc                 C   rO   )Nrecv)r!   r+   r>   r<   ra   rP   )r   r+   r>   r<   ra   r   r   r   rb      rQ   zCCLBackend.recvc                 C   rO   )Ngatherr!   r+   gather_listr=   r<   rP   r   r+   re   r=   r<   rJ   r   r   r   rc      rQ   zCCLBackend.gatherc                 C   rO   )Nscatterrd   rP   rf   r   r   r   rg      rQ   zCCLBackend.scatterc                 C   s   | j d||dS )Nbarrier)r!   r<   rJ   rP   )r   r<   rJ   r   r   r   rh         zCCLBackend.barrierc                 C   s   | j d|dS )Nmonitored_barrier)r!   r<   rP   )r   r<   r$   wait_all_ranksr   r   r   rj      s   zCCLBackend.monitored_barrierc                 C   rY   )Nreduce_scatter)r!   r[   
input_listrH   r<   rJ   rP   )r   r[   rm   rH   r<   rJ   r   r   r   rl      r_   zCCLBackend.reduce_scatterc                 C   rY   )Nreduce)r!   r+   r=   rH   r<   rJ   rP   )r   r+   r=   rH   r<   rJ   r   r   r   rn      s   zCCLBackend.reducec                    s   t t| |S r   )r'   r   	new_group)r   ranksr9   r   r   ro      ri   zCCLBackend.new_groupc                    s   t |}|  }| j||d k}t|tjt 	 }t
t| ||d | | j||||| | jt| d S Nr   )lenr)   r   get_sub_kvs_addrr   r+   r,   r-   r   r.   r'   r   r/   initialize_sub_commr@   r3   appendr1   )r   rp   r<   r7   r"   sub_main_kvsr9   r   r   
_new_group   s   zCCLBackend._new_groupc              	      sz   |d u rt t|  S d}g }z	 |tt| || |d7 }q ttfy-   Y nw t	|| j
vr;| || |S )Nr   Tr   )listr2   r(   ru   r'   r   get_global_rank
ValueErrorRuntimeErrorr1   r3   rw   )r   r<   r"   resultsr9   r   r   r?      s   z#CCLBackend.get_all_ranks_from_group)r   r   r   NNN)NFrq   )NNF)r   r   r   r   r;   rE   r   SUMrK   rN   r/   rR   rU   rX   rZ   r`   rb   rc   rg   rh   rj   rl   rn   ro   rw   r?   __classcell__r   r   r9   r   r   #   s,    










	r   )__doc__r   deepspeed.acceleratorr   deepspeed.ops.op_builderr   	reduce_opr   r   r   r   r   r   r   r   r   <module>   s   	
