o
    Tig+                     @   s  U d dl mZmZmZ d dlZd dlZd dlZd dlmZm	Z	 z%d dl
m  mZ d dlZd dlZd dlmZ d dlmZ d dlmZ W n	 eyM   Y nw d dlmZ dd	lmZ dd
lmZ ddlmZ ddlm Z  ddl!m!Z!m"Z"m#Z# ddl$m%Z%m&Z&m'Z'm(Z(m)Z) ddl*m+Z+ ddl,m-Z-m.Z. da/da0da1dZ2i Z3ee4ef e5d< g a6i a7ee4ef e5d< g Z8i Z9g a:d a;de<fddZ=dd Z>de4fddZ?defddZ@	d+d ee d!e	d"e4d#ee4 d$eAd%eBd&dfd'd(ZCi ddfd)d*ZDdS ),    )DictListCallableN)GraphGraphModule)make_boxed_func)aot_module_simplified)unset_fake_temporarily)get_accelerator   )add_free_activations)DSGraphParamManager)ProfilingResult)MemoryProfilingInterpreter)patch_compiled_funcunpatch_compiled_funcget_backward_inputs)get_input_nodesget_activation_node_namesget_index_by_graph_idget_deepcompile_handle	log_rank0)get_wrapped_partitioner)register_custom_ops$patch_create_aot_dispatcher_functionparam_managerprofiling_resultsnamec                 C   s   |t | < d S N)
opt_passes)r   opt_pass_fn r"   M/home/ubuntu/.local/lib/python3.10/site-packages/deepspeed/compile/backend.pyregister_compile_pass2   s   r$   c                 C   sj   t | tsJ dt|  | D ] \}}t |ts"J dt| t |ts0J dt| q| ad S )Nz#schedule should be a list, but got z4Each step in schedule should be an integer, but got z3Passes at a certain step should be a list, but got )
isinstancelisttypeintremaining_schedule)schedulesteppassesr"   r"   r#   init_schedule6   s
   r-   global_stepsc                 C   s~   t tdkr;| td d kr=td\}atd|  dt d tj  t   t	  t
  t  t  d S d S d S )Nr   z'Launching compile passes: global_steps=z passes=T)lenr)   popnext_passesr   torch_dynamoresetr   r   graph_orderclearr   r   )r.   _r"   r"   r#   launch_compile_passesB   s   

r8   graphc                 C   s   g }g }|j D ]1}||jd|jv r|jd ndd|jv r"|jd ndf ||jd|jv r4|jd ndf q|rQ|||  _|||  _|||  _|||  _d S |||  _|||  _	|||  _
|||  _d S )Ndevice_time        	wall_timetensor_sizer   )nodesappendr   meta	bwd_graphbwd_timebwd_tensor_sizesbwd_mem	fwd_graphfwd_timefwd_tensor_sizesfwd_mem)graph_idr9   membwdr   	node_timetensor_sizesnr"   r"   r#   set_time_and_tensor_sizeQ   s    
&





rO   Fr    gmrI   r5   
mem_budgetrK   returnc
              
   C   s&  t   t   t  t   W d    n1 sw   Y  t| D ]k\}
}td|
 d| d| |	d |||||||||}|d uro|}|j	  |
  t||	d}|j|   dd |jD }t||j||| t   t   t  t   W d    n1 sw   Y  q%d S )NzRunning opt pass z for graph z. bwd=enable)	debug_logc                 S   s    g | ]\}}}}||||fqS r"   r"   ).0r   current_allocdeltapeakr"   r"   r#   
<listcomp>   s     z"run_opt_passes.<locals>.<listcomp>)r	   r
   synchronizegccollectempty_cache	enumerater   r9   lint	recompiler   run
mem_recordrO   )r    rP   rI   r5   r   create_inputs_fnrQ   r   rK   rU   ir!   gm_newmem_profrJ   r"   r"   r#   run_opt_passesf   s2   


rh   c                    s"   t   dtf fdd}|S )NrP   c                    sf  t | j tdd |t f tdd |D }|r)dd t|D ntdd |D s6J dd	d t|D t	|  t
vrWt t
 < t
  _t
  _ fd
d} fdd}dkrdd }t| |||||td}tjjdi |S dkrt ||||t ddlm} |tjj_tj| |S td )Nc                 S   s   t | r| jS dS )NF)r2   	is_tensorrequires_grad)xr"   r"   r#   <lambda>   s    z2make_backend.<locals>.backend_fn.<locals>.<lambda>c                 s   s    | ]}t |d V  qdS )ds_idN)hasattrrV   vr"   r"   r#   	<genexpr>   s    z3make_backend.<locals>.backend_fn.<locals>.<genexpr>c                 S   ,   g | ]\}}t |tjjr||j|jfqS r"   )r%   r2   nn	Parameterrm   ds_shaperV   re   	input_valr"   r"   r#   rZ          z4make_backend.<locals>.backend_fn.<locals>.<listcomp>c                 s   s(    | ]}t |tjjrt|d V  qdS )param_idN)r%   r2   rs   rt   rn   ro   r"   r"   r#   rq      s    z%All param inputs should have param_idc                 S   rr   r"   )r%   r2   rs   rt   ry   shaperv   r"   r"   r#   rZ      rx   c                    s   t   }ttd }td}t| j|t< ||t|d    tt	| tt
 fdddtdd
 r:td7 atd|t   | f td	| d
 dt   d| j d | jS )Nr   r   c                      s    S r   r"   r"   real_inputs_with_rngr"   r#   rl      s    zImake_backend.<locals>.backend_fn.<locals>.make_fw_graph.<locals>.<lambda>r;   F
r    rP   rI   r5   r   rd   rQ   r   rK   rU   fwdzFwd end 
 graph_id= alloc_mem= graph=rS   )timer/   r5   fwd_real_inputsr0   r   r9   r   rh   r1   r   remaining_bwd_compile_countopt_pass_timesr?   r   r
   memory_allocated)rP   sample_inputs
time_startgraph_indexreal_inputs)rU   rI   needs_backwardparam_indicesr{   r#   make_fw_graph   s2   

"z7make_backend.<locals>.backend_fn.<locals>.make_fw_graphc                    s@  t   }tt}td| d dt   d| j d t }t|dkr*d S |	  t
t| tt fddd	td
d
 rkt | j\}}dd |D fddt| jD }t| jt| j|| td8 atdkrvt  td| d dt   d| j d |   td|t   | f | jS )Nz
Bwd start r   r   r   rS   r   c                      s   t  S r   )tupler"   )bwd_real_inputsr"   r#   rl      s    zImake_backend.<locals>.backend_fn.<locals>.make_bw_graph.<locals>.<lambda>r;   Tr}   c                 S   s   g | ]}|j qS r"   r   rV   rN   r"   r"   r#   rZ      s    zKmake_backend.<locals>.backend_fn.<locals>.make_bw_graph.<locals>.<listcomp>c                    s   g | ]
}|j  vr|j qS r"   r   r   )param_namesr"   r#   rZ      s    r   zBwd end rK   )r   r   r5   r   r
   r   r9   r   r/   r0   rh   r1   r   r   get_bwd_mappingr   r   r   r   r   ra   r   r?   )rP   r   r   r   bwd_inputs_stackparam_nodes_bwr7   non_param_input_names)rU   free_activationrI   )r   r   r#   make_bw_graph   sN   
"
"z7make_backend.<locals>.backend_fn.<locals>.make_bw_grapheagerc                    s    fdd}|S )Nc                    s    | |d u r	d S t | jS r   )r   forward)rP   r   make_graph_fnr"   r#   compiler_fn  s   zOmake_backend.<locals>.backend_fn.<locals>.make_compiler_fn.<locals>.compiler_fnr"   )r   r   r"   r   r#   make_compiler_fn   s   z:make_backend.<locals>.backend_fn.<locals>.make_compiler_fn)fw_compilerbw_compilerpartition_fninductorr   )#get_wrapped_choose_saved_values_setzUnsupported backend r"   )idr9   pytreetree_anyr5   r?   anyr_   allr   r   r   r   r   r   r   r2   r3   optimizer   r   partitionerr   
_functorchpartitionerschoose_saved_values_set	_inductorcompile
ValueError)rP   r   z3_partitionr   r   r   aot_modr   backendcompile_kwargsrU   r   )rI   r   r   r#   
backend_fn   sD   




 4z make_backend.<locals>.backend_fn)r   r   )r   r   r   rU   r   r"   r   r#   make_backend   s    r   )F)Etypingr   r   r   r   r\   r2   torch.fxr   r   torch.utils._pytreeutils_pytreer   torch._dynamotorch._inductor.schedulerfunctorch.compiler   torch._functorch.aot_autogradr   torch._subclasses.fake_tensorr	   ImportErrordeepspeed.acceleratorr
   fxr   graph_paramr   	profilersr   profilers.graph_profiler   r   r   r   utilr   r   r   r   r   r   r   r   r   r   r)   next_pass_stepr1   current_passesr   r(   __annotations__r5   r   r   r    r   r   strr$   r-   r8   rO   floatboolrh   r   r"   r"   r"   r#   <module>   sl   	
&