o
    Ti5                     @   sZ  d dl Z d dlZd dlmZmZmZ d dlmZ d dlZd dl	m
Z
mZ d dlmZmZmZ zd dlmZ W n	 ey?   Y nw d dlmZ d dlmZ d dlmZ d d	lmZ d
efddZdae ryejej ej!ej"ej#ej$ej%j&j'j(ej)hZ*dd Z+dd Z,dZ-g Z.dd Z/dd Z0dEde1defddZ2dd Z3ded
ee
 fddZ4dedeee(e(f  d
ee
 fd d!Z5d"e
d
efd#d$Z6d"e
d
efd%d&Z7d'ej8d
e(fd(d)Z9d
e(fd*d+Z:G d,d- d-Z;dFd.d/Z<defd0d1Z=defd2d3Z>ded4e1fd5d6Z?ded7ee
 d8ee1 fd9d:Z@G d;d< d<ZAded=e1fd>d?ZBd@e
d
efdAdBZCdCdD ZDdS )G    N)ListTupleDict)defaultdict)NodeGraph)map_aggregateArgumentmap_arg)unset_fake_temporarily)get_accelerator)required_torch_version)DeepCompileBuilderreturnc                   C   s   t dddot  dkS )Ng@g@)min_versionmax_versioncuda)r   r   device_name r   r   J/home/ubuntu/.local/lib/python3.10/site-packages/deepspeed/compile/util.pyis_deepcompile_supported   s   r   c                   C   s   t d u r	t  a t S N)	dc_handler   loadr   r   r   r   get_deepcompile_handle.   s   
r   c                 C   s   | dkS )Ninductorr   )backendr   r   r   is_backend_inductor5   s   r   Fc                 C   s   t |  d S r   )pre_backward_hooksappend)hookr   r   r   add_pre_backward_hook=   s   r!   c                 C   s$   t D ]}|  qt }||  d S r   )r   r   start_backward)!is_gradient_accumulation_boundaryr    dcr   r   r   deepcompile_backward_prologueA   s   r%   msgenablec                 C   s$   t  dkr|rt|  d S d S d S )Nr   )distget_rankprint)r&   r'   r   r   r   	log_rank0J   s   r+   c                   C   s<   t   tjjjjtjjjjtjjjjtjjjjtjj	j
jhS r   )r   torchopsatentdefaultviewdetachpermuter$   wait_allgatherr   r   r   r   get_no_copy_opsO   s   r5   graphc                 C   s   dd | j D S )Nc                 S   s   g | ]	}|j d kr|qS )placeholder)op.0nr   r   r   
<listcomp>Y       z#get_input_nodes.<locals>.<listcomp>)nodesr6   r   r   r   get_input_nodesX   s   r@   index_to_ds_idsc                    s   t |   fdd|D S )Nc                    s   g | ]	\}}} | qS r   r   )r:   i_all_input_nodesr   r   r<   ^   r=   z#get_param_nodes.<locals>.<listcomp>)r@   )r6   rA   r   rD   r   get_param_nodes\   s   rF   nodec                 C   s   d| j v o	| j d S )NcommmetarG   r   r   r   
is_comm_opa   s   rL   c                 C   s
   | j tv S r   )targetsym_size_opsrK   r   r   r   exclude_from_act_offloade      
rO   dtypec                 C   sD   | t jkr	d}|S | t jkrd}|S | t jkrd}|S td|  )N         Unsupported dtype: )r,   float32float64float16
ValueError)rQ   	elem_sizer   r   r   dtype_to_elem_sizei   s   


r[   c                 C   s   t | jdkr	dnttj| j}| j}|tjkrd}|| S |tj	ks)|tj
kr/d}|| S |tjks9|tjkr?d}|| S |tjkrJd}|| S td| )Nr      rR   rS   rT   rU   )lenshape	functoolsreduceoperatormulrQ   r,   rV   rW   int64rX   bfloat16boolrY   )tensor_metanumelrQ   rZ   r   r   r   tensor_meta_sizeu   s   "


rh   c                   @   sz   e Zd Zdd Zdd Zdd Zdeded	d
fddZded	efddZ	ded	efddZ
ded	efddZdddZd
S )NodeValueOffloadHelperc                 C   s   || _ i | _i | _d S r   )device
env_valuesoriginal_device)selfrj   r   r   r   __init__   s   
zNodeValueOffloadHelper.__init__c                 C   sV   t |r)t  |j}|d }|| j|< |W  d    S 1 s$w   Y  |S )Ncpu)r,   	is_tensorr   rj   tor2   rl   )rm   vrj   	offloadedr   r   r   _to_cpu   s   

 zNodeValueOffloadHelper._to_cpuc                 C   s(   t |r|| jv r|| j| S |S r   )r,   rp   rl   rq   )rm   rr   r   r   r   	_from_cpu   s   z NodeValueOffloadHelper._from_cpunamerr   r   Nc                    s   t | fddj|< d S )Nc                    s    r | S | S r   )rt   xoffloadrm   r   r   <lambda>       z-NodeValueOffloadHelper.save.<locals>.<lambda>r   rk   )rm   rv   rr   rz   r   ry   r   save   s   zNodeValueOffloadHelper.savec                    s   t  j|  fddS )Nc                    s
     | S r   )ru   rw   rm   r   r   r{         
 z-NodeValueOffloadHelper.load.<locals>.<lambda>r}   rm   rv   r   r   r   r      s   zNodeValueOffloadHelper.loadc                 C   s
   | j | S r   rk   r   r   r   r   get_offloaded_value   rP   z*NodeValueOffloadHelper.get_offloaded_valuec                 C   s
   || j v S r   r   r   r   r   r   	has_value   rP   z NodeValueOffloadHelper.has_valuec                 C   s   | j   | j  d S r   )rk   clearrl   r   r   r   r   r      s   
zNodeValueOffloadHelper.clear)r   N)__name__
__module____qualname__rn   rt   ru   strr	   r~   r   r   re   r   r   r   r   r   r   ri      s    	ri   c                    s,   ddl m fdd t|  fddS )Nr   )is_fakec              	      s   | rMt  > |  r+tj| j| j d u r| jn | j| j| 	 dW  d    S tj
| j| j d u r8| jn | jdW  d    S 1 sHw   Y  | S )N)rQ   rj   layoutrequires_grad
pin_memory)rQ   rj   r   )r   is_floating_pointr,   randnr^   rQ   rj   r   r   	is_pinnedzeros)r/   )rj   r   r   r   convert   s$   	 z!materialize_fake.<locals>.convertc                    s    | S r   r   rw   )r   r   r   r{      s    z"materialize_fake.<locals>.<lambda>)torch._subclasses.fake_tensorr   r   )rr   rj   r   )r   rj   r   r   materialize_fake   s   r   c                    s   dd t | jD i i t  dtdtf fddt| jD ]tjfdd tjfd	d q$fS )
Nc                 S      i | ]\}}||qS r   r   r:   rB   rG   r   r   r   
<dictcomp>       z!get_last_uses.<locals>.<dictcomp>r;   userc                    s   d}d }|j  v r!| v r!| }| }|  }| }||k}| vs'|rH|j  v r0| }|| < |g |  |rJ| |  d S d S d S )NF)rM   
setdefaultr   remove)r;   r   updateknown_last_use	last_userlast_use_positionknown_last_use_position)no_copy_opsnode_to_last_usepositionuser_to_last_usesr   r   register_last_uses   s"   
z)get_last_uses.<locals>.register_last_usesc                    
   |  S r   r   r;   rG   r   r   r   r{      r   zget_last_uses.<locals>.<lambda>c                    r   r   r   r   r   r   r   r{      r   )	enumerater>   r5   r   reversedr
   argskwargsr?   r   )r   rG   r   r   r   r   r   get_last_uses   s   r   c                    sf   t tt  dtdtf fddt| jD ]tjfdd tjfdd qS )Nr;   r   c                    sD   |j dkrd S |j  v r| }|  | d S |  | d S )Noutput)rM   extendr   )r;   r   users)r   node_to_usesr   r   r      s   

z)get_real_uses.<locals>.register_last_usesc                    r   r   r   r   r   r   r   r{      r   zget_real_uses.<locals>.<lambda>c                    r   r   r   r   r   r   r   r{      r   )	r   listr5   r   r   r>   r
   r   r   r?   r   )r   rG   r   r   r   get_real_uses   s   
r   	file_pathc                    s  dd t | jD }t| \}}d}t }|}g }g d}	||	 ddlm}
 |
| }tdd |jd D  | jD ]}|| ||v rR|| D ]}|	| qJd	|j
v s_J d
| dtdd |D }t fdd|D }||v r|||  ||  nd}|j|j
d	 ||dd |jD t|j ||v r|| nd|||v r|| ndt|g
}|| t||}q;dd l}t|ddd}||}|| W d    n1 sw   Y  td|  td|  d S )Nc                 S   r   r   r   r   r   r   r   r      r   z)count_inflight_values.<locals>.<dictcomp>r   )
r   tensor_sizeinflight_sizeinflight_size_in_outputr   r   r   lifetimer   inflight_valuesr\   )get_output_nodec                 S   s   g | ]	}t |tr|qS r   )
isinstancer   r9   r   r   r   r<     r=   z)count_inflight_values.<locals>.<listcomp>r   zNode z does not have tensor_sizec                 s   s    | ]}|j d  V  qdS r   NrI   r9   r   r   r   	<genexpr>  s    z(count_inflight_values.<locals>.<genexpr>c                 3   s"    | ]}| v r|j d  V  qdS r   rI   r9   values_in_outputr   r   r     s     c                 S   s   g | ]
}t |tr|jqS r   )r   r   rv   )r:   ar   r   r   r<   !  s    NAw )modenewlinezMax inflight size: zData successfully written to )r   r>   r   setr   fxr   r   addr   rJ   sumrv   r   r   keysmaxcsvopenwriter	writerowsr*   )r6   r   r   r   r   max_inflight_sizer   csv_filenamecsv_dataheaderr   output_noderG   	to_deleter   r   r   rowr   filer   r   r   r   count_inflight_values   sF   


  

r   param_nodes_bwfwd_output_namesc                 C   sL   t | }tdd |D }g }|D ]}|j|v r#|j|vr#||j q|S )Nc                 S   s   g | ]}|j qS r   rv   r9   r   r   r   r<   9  r|   z-get_activation_node_names.<locals>.<listcomp>)r@   r   rv   r   )r6   r   r   input_nodesparam_node_namesactivation_node_namesin_noder   r   r   get_activation_node_names6  s   

r   c                   @   s$   e Zd Zdd Zdd Zdd ZdS )TensorOffloadHelperc                 C   s(   i | _ i | _i | _g | _i | _i | _d S r   )devicesbase_tensorsviewsarg_listrs   
non_tensorr   r   r   r   rn   F  s   
zTensorOffloadHelper.__init__c                    s    fdd}t  }|D ] | r|t  q|D ]! t }| r1 j| j|<  | j|< n | j|< | j| q|D ] | rL j	
d _	q?d S )Nc                    s    t  o   ot| d S )Nds_id)r,   rp   _is_viewhasattr)tensorr   r   r   is_base_tensorP  s    z3TensorOffloadHelper.offload.<locals>.is_base_tensorro   )r   r   idrj   r   r   r   r   r   datarq   )rm   argumentr   base_tensor_idsa_idr   r   r   rz   N  s$   
zTensorOffloadHelper.offloadc                 C   s   i }| j D ],}|| jv r1| j| }|r'| j| || j| _| j| ||< q| j| |||< qg }| j D ]}|| jv rF|||  q7|| jv rS|| j|  q7|S r   )r   r   r   rq   r   r   r   )rm   in_placeloaded_base_tensorsr   rj   resultsr   r   r   reloadq  s"   





zTensorOffloadHelper.reloadN)r   r   r   rn   rz   r   r   r   r   r   r   D  s    #r   prefixc              	      s   dt f fdd}t| j}|D ]4}|jdkrq| | d|j }d|j }| jd||fi |d W d    n1 s?w   Y  qd S )	Nlabelc              	      s>   t  dkrt  d|  dt   dt    d S d S )Nr    z alloc_mem=z	 max_mem=)r(   r)   r*   r   memory_allocatedmax_memory_allocated)r   r   r   r   show_memory  s
   $z*add_mem_profile_nodes.<locals>.show_memoryr   zMem show_memory_call_functionr   )r   r   r>   r8   inserting_afterrv   create_node)r6   r   r   r>   rG   r&   rv   r   r   r   add_mem_profile_nodes  s   

r  r;   c                 C   s   | j tjjjjkS r   )rM   r,   r-   r$   release_paramr0   r   r   r   r   is_release_node  s   r  c                 C   s*   t | D ]\}\}}||kr|  S qdS )N)r   )graph_ordertarget_graph_idindexgraph_idrC   r   r   r   get_index_by_graph_id  s
   r	  )Fr   )Er_   ra   typingr   r   r   collectionsr   r,   torch.fxr   r   torch.fx.noder   r	   r
   r   r   ImportErrordeepspeed.commrH   r(   deepspeed.acceleratorr   deepspeed.utils.torchr   deepspeed.ops.op_builder.dcr   re   r   r   geleeqnegtltr-   r.   sym_sizeintgetitemrN   r   r   backward_startedr   r!   r%   r   r+   r5   r@   rF   rL   rO   rQ   r[   rh   ri   r   r   r   r   r   r   r  r  r	  r   r   r   r   <module>   sh   
		&
&$8N