o
    TiZ                     @   s   d dl mZ d dlmZ d dlZd dlmZ d dlmZ	 d dl
mZ ddlmZ ddlmZ d	Zd Zd Zd
ededee dedededefdd	ZdS )    )defaultdict)ListN)GraphModule)get_accelerator   )get_deepcompile_handle)DSGraphParamManagerselective_gathergmgraph_idgraph_order
mem_budgetparam_managerbwdreturnc           '   
      s  |s| S d }|D ]
\}	}
|
r|	} nq|d u s||kr| S d}|  D ]=\}}tdd |jD }t|jdkrBtdd |jD nd}t|||}t dkr`td| d| d| d|  q#t |  D ]\}}|j	  D ]\}}|j
jr|j|  qqqhi  tttt}tt}|  D ]\}}|j	}|  D ]\}}|j| }|j|jj  |< q|| }|jjD ]F}|jtjjjjkrd	|jv sJ |jd	  |jd
 < d|jv sJ |jd
   |jd 7  < |jd ||jd
 < |jd ||jd
 < q|jd urB|jjD ]5}|jtjjjjkr@d	|jv s J |jd	  |jd
 < d|jv s2J |jd
   |jd 7  < qqfdd D }|j fdddd  fdd|D }t }|  }tj!|gt"t # d}t$|tj%j& |d ' }d}|d|  | } t dkrtd| d| d| d|   i }!|  D ]\}	}"|"j	  D ]\}}|j
|!|"j| < qqd}#t( }$|  D ]4\}}%|#|% | kr | S |#|%7 }#|!| }&|$)| t dkr td| d|% d|# d|&j*  q| S ) Nr   c                 s       | ]}|d  V  qdS    N .0mr   r   ]/home/ubuntu/.local/lib/python3.10/site-packages/deepspeed/compile/passes/selective_gather.py	<genexpr>+       z#selective_gather.<locals>.<genexpr>c                 s   r   r   r   r   r   r   r   r   ,   r   zselective_gather graph_id=z	 max_mem=z fwd_max_mem=z bwd_max_mem=tensor_sizer   device_time	wall_timec                    s   g | ]}| vr|qS r   r   r   ds_id)persistent_ds_idsr   r   
<listcomp>W   s    z$selective_gather.<locals>.<listcomp>c                    s   |   |   S )Nr   )r   )ds_id_to_sizeds_id_to_timer   r   <lambda>X   s    z"selective_gather.<locals>.<lambda>T)keyreversec                    s   i | ]}| | qS r   r   r   )r"   r   r   
<dictcomp>f   s    z$selective_gather.<locals>.<dictcomp>)deviceg?   zselective_gather max_mem=z total_mem=z MEM_MARGIN=z available_mem=zSet persistent: z size: z persistent_mem: z shape: )+itemsmaxfwd_memlenbwd_memdistget_rankprintsetparamsparam
ds_persistaddds_idsr   floatnumeldtypeitemsize	fwd_graphnodestargettorchopsdcallgather_paramdefaultmetaargs	bwd_graphsortr   total_memorytensorr(   current_device
all_reduceReduceOpMINitemr   set_persistentds_shape)'r
   r   r   profiling_resultscreate_inputs_fnr   r   r   last_backward_graph_idg_id	needs_bwdpeak_memproffwd_max_membwd_max_mempmnameds_paramds_id_to_prof_dtimeds_id_to_prof_wtimer3   
param_namer4   r   profilenr7   sorted_ds_idsaccelerator	total_memvals_to_bcast
MEM_MARGINavailable_memds_id_to_paramg_pmpersistent_memnz3size	param_objr   )r"   r#   r    r   r	      s   &
	
")collectionsr   typingr   r?   torch.fxr   deepspeed.commcommr/   deepspeed.acceleratorr   utilr   graph_paramr   NAMEmax_alloc_memlast_optimize_stepintr8   boolr	   r   r   r   r   <module>   s(   