o
    Ti                     @   sH   d dl Zd dlZd dlmZ d dlmZ d dlm	Z	 G dd de
ZdS )    N)get_accelerator)PackbitsBuilderc                   @   sF   e Zd ZdddZdd Zdd Zdd	 Zd
d Zdej	fddZ
dS )CompressedBackendNc                 C   sb   |d u rt jtt  d| _n	|| _| j | _t j| jd| _t j| jd| _	t
  | _d S )N)ranksgroup)dist	new_grouprangeget_world_sizeworld_groupmpuget_data_parallel_groupsizeget_rankrankr   loadpacker)selfr    r   U/home/ubuntu/.local/lib/python3.10/site-packages/deepspeed/runtime/comm/compressed.py__init__   s   zCompressedBackend.__init__c           	      C   sd   g }||kr%t |D ]}||kr|tj|| ||d q
|||< q
|S |tj|||d |S N)srcr   )r   dst)r
   appendr   irecvisend)	r   r   r   r   sendbufrecvbufrootreqidxr   r   r   
my_igather   s   
zCompressedBackend.my_igatherc                 C   sT   ||kr t |D ]}||krtj|| ||d q|||< qd S tj|||d d S r   )r
   r   recvsend)r   r   r   r   r   r   r    r"   r   r   r   	my_gather%   s   
zCompressedBackend.my_gatherc                 C   s&   | j | | | j}||dS N)r   packbitsfloatnumelr   reshape)r   bufferr   packedr   r   r   pack/   s   zCompressedBackend.packc                 C   s(   | j || | j}||d|S r'   )r   
unpackbitsr+   r   r,   to)r   r-   r   dtypeunpackedr   r   r   unpack4   s   zCompressedBackend.unpackbuffer_mc                    s  |  }t|dkrt|}| }| }||kr,tj|| |jd}t||g}|| tj	
|tt| ||| d  dd   | || j tjtj| j t| j gd jjd}	fddt| j D }
fd	dt| j D }tj|	t|
| jd
 tj|| jd
 |	tj }| || j tjt|d| j   d}|| tj	
|t|  }|||| d  dd   | |dtj}tj| j t|d g|	j|jdfddt| j D }tj| j dgj|jd  fddt| j D }tj||d | jd
 tj||| jd
 t|}|tj }|j!"| || j tj  j! ||krl|d| }t|dkrx|#|}|S )N   )deviceg      g       @r   r2   r7   c                       g | ]} | qS r   r   .0r"   )sign_list_packed_tmpr   r   
<listcomp>P       z:CompressedBackend.compressed_allreduce.<locals>.<listcomp>c                    s$   g | ]}t jd  jt  dqS )r6   r8   )torchzerosr2   r   current_device_name)r;   _)worker_scaler   r   r=   R   s    r   c                    r9   r   r   r:   )recvbuf_sign_server_tmpr   r   r=   o   r>   c                    r9   r   r   r:   )recvbuf_scale_server_tmpr   r   r=   v   r>   )$r   lenr?   flattenr+   r@   r7   catadd_linalgnormnpsqrtset_signboolr*   mul_r/   typeint8r   r2   r
   r   all_to_all_singlestackr   
all_gatheruint8r4   float32sumdatacopy_r,   )r   r5   worker_errorserver_error
local_rankoriginal_shapeoriginal_sizeworker_error_sizeempty_tensorrecvbuf_signsign_list_packedrecvbuf_scaleflattened_recvbuf_signcompensated_server_mserver_scaleserver_sign_packedrecvbuf_sign_serverrecvbuf_scale_serverflattened_recvbuf_sign_serverr   )rE   rD   r<   rC   r   compressed_allreduce9   sr   

0

$



z&CompressedBackend.compressed_allreduce)N)__name__
__module____qualname__r   r#   r&   r/   r4   r?   tensorrm   r   r   r   r   r      s    


r   )numpyrL   r?   deepspeed.commcommr   deepspeed.acceleratorr   deepspeed.ops.op_builderr   objectr   r   r   r   r   <module>   s   