o
    Ti                     @   s   d dl mZ d dlZd dlmZmZmZ d dlmZ d dl	m
Z ddlmZ ddlmZ dZd	Zd
ZdZdZdZdd ZdefddZdededee dedededefddZdS )    )ListN)GraphNodeGraphModule)get_accelerator   )create_predictor)DSGraphParamManagerprefetchg?g?g    eAg    eAFc                 C   s   t  dkrt|  d S d S )Nr   )distget_rankprint)message r   U/home/ubuntu/.local/lib/python3.10/site-packages/deepspeed/compile/passes/prefetch.pyprint_rank_0   s   r   nodec                 C   s    | j tjjjjksJ | jd S )Nr   )targettorchopsdcallgather_paramdefaultargs)r   r   r   r   	get_ds_id    s   
r   gmgraph_idgraph_order
mem_budgetparam_managerbwdreturnc           +         s  t   dt  }tj|gtt   d}	t|	tj	j
 |	d  }|r,|| jn|| j}
|r8|| jn|| j}|rD|| jn|| j}dd |
D }dd |D }dd |D | j}tfdd	|jD }td
| d| dt    dt    dt    d| dt  d}d}|jD ]&}|j|v r||j d }||j d }qtd|j d ||f||j< qt }tt|j}g }g }g }d}t|D ];\}}|jdkr|t|d k sJ |j|v sJ ||d  }||j \}}|| |ks|t krNt|dkr2|!d}tfdd	|D }||8 }|"| t|dks1J nt|dkrA|"| g }nn|| |ks|t ks|j#tj$j%j&j'krtfdd	|D } || }!||j }"|| |j  }#t(|!|"d |#ko| |j  t)k }$t|dkr|$s|"| g }|"| ||j 7 }|"| |jdkr|j#tj$j%j*kr||d  jdkr|D ] }%t|%dksJ |"|% tfdd	|%D }||8 }qt|dkr|"| |tfdd	|D 8 }|dksJ |dksJ qt+ }&i  t|D ]?}t,|t-r2|&.| fdd}'|' |j< qdd	 |D }( fdd	|(D })dd	 |D }*|&j/tj$j%j0j'||)|*fd q|&1  |&| _| S )N   )devicer   c                 S   s   i | ]\}}}}|||fqS r   r   ).0name	alloc_memdeltapeakr   r   r   
<dictcomp>1   s    z%schedule_prefetch.<locals>.<dictcomp>c                 S   s   i | ]
\}}}|||fqS r   r   )r$   r%   device_time	wall_timer   r   r   r)   2   s    c                 S   s   i | ]\}}||qS r   r   )r$   r%   sizer   r   r   r)   3       c                    s(   g | ]}|j tjjjjkr |j qS r   )r   r   r   r   r   r   r%   )r$   ntensor_size_dictr   r   
<listcomp>7   s   ( z%schedule_prefetch.<locals>.<listcomp>zschedule_prefetch graph_id=z	 max_mem=z available_memory=z memory_allocated=z max_allocated=z total_param_size=z margin=znode z not in mem_dictplaceholderc                       g | ]} |j  qS r   r%   r$   ag_noder/   r   r   r1   _   r-   c                    r3   r   r4   r5   r/   r   r   r1   q   r-   g333333?c                    r3   r   r4   r5   r/   r   r   r1      r-   c                    r3   r   r4   r5   r/   r   r   r1      r-   c                    s
    | j  S )Nr4   )r.   envr   r   <lambda>   s   
 z#schedule_prefetch.<locals>.<lambda>c                 S   s   g | ]}|j d  qS )r   r   r5   r   r   r   r1      r-   c                    r3   r   r4   )r$   
param_noder7   r   r   r1      r-   c                 S   s   g | ]}t |qS r   )r   r5   r   r   r   r1      s    r:   )2r   total_memoryMARGINr   tensorr#   current_devicer   
all_reduceReduceOpMINitembwd_memfwd_membwd_timefwd_timebwd_tensor_sizesfwd_tensor_sizesgraphsumnodesr   available_memorymemory_allocatedmax_memory_allocatedr%   r   listreversed	enumerateoplenMAX_BUFFERED_SIZEpopappendr   r   r   r   r   maxMAX_FUSE_SIZEreload_parameterr   
isinstancer   	node_copycall_functionprefetch_params_fusedlint)+r   r   r   profiling_resultscreate_inputs_fnr   r   r    max_memvals_to_bcastmemop_timetensor_sizesmem_dict	time_dictrJ   total_param_sizeprev_mem	prev_peakr   comm_predictor	order_revnew_order_revprefetch_agsprefetch_ag_groupsag_tensor_size_sumi	next_nodenext_alloc_mem	next_peakfused_ag_nodestotal_ag_tensor_sizecurrent_ag_sizepred_time_currentpred_time_nextpred_time_fuseddo_fuseag_group	new_graphnew_nodeparam_nodesparam_nodes_copyds_idsr   )r8   r0   r   schedule_prefetch%   s   >







&


r   )typingr   r   torch.fxr   r   r   deepspeed.acceleratorr   deepspeed.commcommr   profilers.comm_profiler   graph_paramr	   NAMEFUSE_FACTORr=   rY   rU   run_prefetch_passr   r   intfloatboolr   r   r   r   r   <module>   s0   