o
    Ti                     @   sf   d dl Z d dlZd dlmZ ddlmZmZ ddlmZm	Z	m
Z
 ddlmZmZmZ dZd
dd	ZdS )    N)get_accelerator   )zero1_compilezero3_compile)make_backendlaunch_compile_passesinit_schedule)get_deepcompile_handleadd_pre_backward_hookis_backend_inductor   c                    s  | j d_jD ]}|  q	j  t }|| j|  |j	|j
t||j|jdd	 i  tjD ]x\}}jj| j| j| jt  dd |< dd  | D  |< d}	d}
|D ]H}|}||_j| }|r | |	 }|
rj| nd}||j|j||t| |	d7 }	d}
qd||j|j|tjdg|j|j dd qdq5 fd	d
}t!| |d u rg }|"dt#j$gf n|D ]}t%j&|d v rt'dqt(| t)| _)t*||d|j+dS )NFT)dtypedevicereturn_tensor_listc                 S   s   g | ]}|   qS  )clonedetach).0pr   r   M/home/ubuntu/.local/lib/python3.10/site-packages/deepspeed/compile/init_z1.py
<listcomp>*   s    zinit_z1.<locals>.<listcomp>r   r   )r   r   c                      s   t   _d S N)copyaveraged_gradientsr   grad_buffer	optimizerr   r   set_grad_buffer>   s   z init_z1.<locals>.set_grad_bufferz9A pass for ZeRO3 is not specified though ZeRO1 is enabled)compile_kwargsfree_activation	debug_log),r   contiguous_gradients_grad_acc_hooksremoveclearr	   initdata_parallel_groupzero_reduce_bucket_sizedouble_buffersymmetric_memoryr   sync_before_reducesync_after_reduce	enumeratebit16_groupsget_flat_partitionparams_in_partitionfirst_offsetpartition_sizegradient_accumulation_dtyper   current_device_nameget_param_idparam_idis_param_in_current_partitionregister_z1_paramshapeinttorchemptyr   r   r
   appendr   add_z1_reducer   add_z3_gather_release
ValueErrorr   r   r   r    )enginebackendcompile_configr   schedulehookdcigroupindex_in_partitionfirst_in_partitionr   r5   in_partitionbufoffsetr   optr   r   r   init_z1   sf   





*rN   r   )r   r:   deepspeed.acceleratorr   passesr   r   rA   r   r   r   utilr	   r
   r   WARMUPrN   r   r   r   r   <module>   s   