o
    Ti                     @   s   d dl Z d dlmZ d dlmZ d dlmZ ddlm	Z	m
Z
mZmZ ddlmZmZmZ ddlmZ dd	lmZmZmZ d
ZdddZdS )    N)comm)get_accelerator)&InsertPostInitMethodToModuleSubClasses   )zero3_compileprefetchselective_gatheroffload_parameters)make_backendlaunch_compile_passesinit_schedule)patch_fake_tensor)get_deepcompile_handleadd_pre_backward_hookis_backend_inductor   c                    s  | j   d urt drd  _t   t }|| j|  |j	|j
t||j|j|j|j	 | j D ]}|j|_q4 j   jD ]}|  qC j  ttdrZtjtjj_|j
rf| jj}t !| | j" D ]}	 j#|	j$ }
d|	_%|&|	j$|	j'|	j(|
|	j% qk fdd}t)| |d u rg }|j*r|+dt,j-t*j.gf n|+dt,j-gf |+t/t,j-t0j1t2j2gf t3| ddl4m5}m6}m7} |D ]\}}||v s||v r| | qt8| _8t9  |j:ot| }dtj;j<_=t>||||j?d	S )
N6_DeepSpeedZeroOptimizer_Stage3__ipg_bucket_flat_buffer	linear_bkFc                     s0   t  jD ]\} } fdd|D  j| < qd S )Nc                    s*   g | ]}|j r j|j nt|jqS  )requires_grad:_DeepSpeedZeroOptimizer_Stage3__param_id_to_grad_partitionds_idtorch
zeros_like	ds_tensor).0param	optimizerr   M/home/ubuntu/.local/lib/python3.10/site-packages/deepspeed/compile/init_z3.py
<listcomp>;   s
    
z4init_z3.<locals>.set_grad_buffer.<locals>.<listcomp>)	enumeratefp16_groupsaveraged_gradients)i	sub_groupr   r   r   set_grad_buffer9   s
   
z init_z3.<locals>.set_grad_bufferr   r   )move_opt_statesmove_opt_states_syncinit_offload_opt_states)compile_kwargsfree_activation	debug_log)@r   hasattrr   r   empty_cacher   initdata_parallel_groupzero_reduce_bucket_sizedouble_buffersymmetric_memoryr   sync_before_reducesync_after_reducesync_before_allgathersync_after_allgathermodulemodules_original_parameters_parametersparameter_offload_remove_module_hooks_grad_acc_hooksremoveclearr   r   r   nn
functionallinear
group_namedistenable_symm_mem_for_group
parametersr   r   
ds_persistregister_z3_paramds_shaper   r   r	   appendr   add_z3_gather_releaseoffload_parameter_fwdWARMUPr   schedule_prefetchr   r   passes.offload_adam_statesr'   r(   r)   r   r   r+   	_inductorconfigsize_assertsr
   r,   )enginebackendcompile_configr*   scheduledcmhookrD   pgrad_bufferr&   r'   r(   r)   _passesr+   r   r   r   init_z3   sh   









r_   )N)r   	deepspeedr   rE   deepspeed.acceleratorr   +deepspeed.runtime.zero.partition_parametersr   r^   r   r   r   r	   rU   r
   r   r   r   utilr   r   r   rN   r_   r   r   r   r   <module>   s   