o
    ߗi^                     @   s   d dl Z d dlZd dlZd dlZd dlZd dlZd dlZd dlZd dlZd dl	Z	d dl
Z
d dlZd dlZd dlmZmZmZmZmZmZmZmZmZ d dlmZ d dlZd dlmZmZmZ d dlmZ d dlm Z  d dl!m"Z" d dl#m$Z$ d d	l%m&Z&m'Z' d d
l(m)Z) d dl*m+Z+ ddl,m-Z-m.Z. ddl/m0Z0m1Z1m2Z2m3Z3m4Z4 ddl5m6Z6 e7e8Z9ee Z:e ;dddgZ<g dZ=e>dde?fddZ@		dAdee0 de?deeA ddfddZBdee0 dejCfddZD		 dBdee: d eeAeAf d!eeA d"eEddf
d#d$ZFd eeAeAf deeAe<f fd%d&ZGd'ejj$de:ddfd(d)ZHejIded fd*d+ZJG d,d- d-ZKG d.d/ d/ZLejMG d0d1 d1ZNeO ZPd2ed3eddfd4d5ZQd6eAdefd7d8ZRdd9d:ed;eAf d<ejSjTd=eeAef d>eeeAejUf  deAf
d?d@ZVdS )C    N)	AnyCallableDictIOIteratorListOptionalTypeUnion)patch)
draw_graphget_aot_graph_nameget_graph_being_compiled)fx)save_graph_repro)get_debug_dir)GraphModule)_extract_tensor_metadataTensorMetadata)legalize_graph)tree_map   )configir)BaseSchedulerNodeFusedSchedulerNodeNopKernelSchedulerNode
OutputNodeSchedulerNode)VBufMetanamen_origin)dotz-Gnslimit=2z-Gnslimit1=2z-Gmaxiter=5000returnc                   C   s2   zt jddgt jd W dS  t jy   Y dS w )Nwhichr#   )stderrTF)
subprocesscheck_outputPIPESubprocessError r+   r+   S/home/ubuntu/transcripts/venv/lib/python3.10/site-packages/torch/_inductor/debug.pyhas_dot-   s   r-   Fnodesprint_graphfnamec           	   	   C   s   t  s
td dS |du rt }t| }|jD ]?}d|jvr q|jd j}t|t	r<t|d t
r8|d f}n|d }d}t|tjrH|jj}t||ddddd}||jd< q|r^t| ti |}t| |j  t||dtjjd dS )z$
    Draw a graph in fname.svg.
    z*draw_buffers() requires `graphviz` packageNfusion_metar   tensor_metaF)
clear_metadot_graph_shape)r-   logwarningr   create_fx_from_snodesr.   metagroup
isinstancetupleintr   ComputedBufferdatadtyper   printr   r   graphlintr   r   tracer4   )	r.   r/   r0   rA   noder9   r?   metadatagmr+   r+   r,   draw_buffers6   s6   






rG   snodesc              
      sf  dt dtdtf fdd}tdg d}i }i }tj }d}g }d}| D ]}	|	 r1d	}
|
}n-|		 r:d
}
|
}n$t
|	trDd}
|
}nt
|	trOd}
|	j}nt
|	trZd}
|	j}ntdtjj|	 d}|
 d| }||}i }t|	drd|	 i}|j|d|d}dtttf dtf fdd  |	r|| |	 }||_|||	|
|jd< |||< |	 D ]}||| < q|du r|}q&| D ]Q}	|	 }|	jj }|| }g }|D ]9}|j|v r||j }n!|!| |"|j}|||j< W d   n	1 sw   Y  ||krq|| qt#||_$q|%t&|dkr,|d  |S t#| |S )B
    Creates a FX Graph from a list of SchedulerNode objects.
    r!   r$   .c                 S   s   dt dtfdd}| |_|S )Nargsr$   c                  W   s   dS Nr   r+   )rJ   r+   r+   r,   func1j      z;create_fx_from_snodes.<locals>.get_fake_func.<locals>.func1)r   r<   __name__)r!   rL   r+   r+   r,   get_fake_funci   s   z,create_fx_from_snodes.<locals>.get_fake_func
FusionMeta)r9   snodetypeNexterntemplatenopcomputefusedzUnknown node typeoriginal_atenz: 
get_devicedevicer+   rJ   kwargsrQ   c                    s8   t | trt fdd| jD S tdd |  D S )Nc                 3   s    | ]} |V  qd S Nr+   ).0x	in_outputr+   r,   	<genexpr>   s    z;create_fx_from_snodes.<locals>.in_output.<locals>.<genexpr>c                 s   s(    | ]}|j D ]	}t|jtV  qqd S r]   )usersr:   rD   r   )r^   bufuserr+   r+   r,   rb      s    
)r:   r   anyrH   get_outputs)rQ   r`   r+   r,   ra      s
   
z(create_fx_from_snodes.<locals>.in_outputr1   r   r   )'strr   r<   collections
namedtupletorchr   Graph	is_externis_templater:   r   r   r9   r   RuntimeError	_inductorutilsget_fused_kernel_name	get_nodeshasattrrY   call_functionr
   r   boolappendget_namer!   r8   rg   read_writesreadsinserting_beforeplaceholderr;   rJ   outputlen)rH   rO   rP   buf_to_fx_nodenode_to_fx_noderA   
first_nodeoutputsr9   rQ   	node_type
fused_name	func_name	node_funcr\   fx_noder!   rd   depsnew_argsdepdep_noder+   r`   r,   r7   d   s   




	



r7   node_name_to_buf_nameparent_buf_name	n_originsc           
      C   s   | d u rd S | D ]S}|  }| }|d ur)t|dkr)t|||d u r%|n| qt|dkr5|d |ks7J |j}|d u sC|jd u rDq|jD ]}|j}	|	|vrZ|d u rV|n|||	< qGqd S )Nr   r   )rx   rs   r~   $update_orig_fx_node_name_to_buf_namerD   originsr!   )
r.   r   r   r   rD   buf_namechildren_nodesir_nodeorigin	node_namer+   r+   r,   r      s0   
r   c                 C   sl   i }|   D ]\}}||vr|h||< q|| | qi }|   D ]\}}t|| }t||||< q"|S r]   )itemsaddr~   r    )r   buf_name_to_n_noder   r   node_name_to_buf_metan_noder+   r+   r,   get_node_name_to_buf_meta   s   r   rF   c                 C   sP   i }t || |du rdS t|}| jjD ]}|j|v r%||j|jd< qdS )rI   Nbuf_meta)r   r   rA   r.   r!   getr8   )rF   rH   r   r   rD   r+   r+   r,   annotate_orig_fx_with_snodes   s   

r   c               	   c   s    t jdddk} dd l}t|jjj}t	
 }| s,z
d V  W |  d S |  w |tdd t jt d}t j|dd tt j|d	t  d
}|tj |td || zd V  W || |  d S || |  w )NTORCH_COMPILE_DEBUG01r   z*functorch.compile.config.debug_partitionerTtorchinductor)exist_okaot_z
_debug.log3[%(filename)s:%(lineno)d %(levelname)s] %(message)s)osenvironr   torch._functorch.aot_autogradlogging	getLogger
_functorchaot_autogradrN   
contextlib	ExitStackcloseenter_contextr   pathjoinr   makedirsFileHandlerr   setLevelDEBUGsetFormatter	Formatter
addHandlerremoveHandler)compile_debugrk   r5   stackr   fhr+   r+   r,   enable_aot_logging  s>   




r   c                   @   s*  e Zd Ze Zededee fddZ	d)ddZ
deddfd	d
Z	d*dededededee f
ddZej	d*dededededeee  f
ddZdedefddZd)ddZd)ddZdededdfddZdeee  dee d ee ddfd!d"Zd)d#d$Zd%edeed&  fd'd(ZdS )+DebugContextfolder_namer$   c                 C   sV   t jjpt }tjD ]}tj|d|  d| }tj	|s(t
| |  S q
d S )Nr   .)r   rC   	debug_dirr   r   _counterr   r   r   existsr   )r   r   ndirnamer+   r+   r,   create_debug_dir7  s   

zDebugContext.create_debug_dirNc                 C   s   d | _ d | _t | _d S r]   )_prof_pathr   r   _stack)selfr+   r+   r,   __init__E  s   zDebugContext.__init__new_pathc                 C   s   | j sd S |dsJ |ddlm} z.|| d tj|r(t| t	| j | W d    W d S 1 s;w   Y  W d S  t
yT   td| j | Y d S w )Nz.debugr   )FileLockz.lockz(Failed to copy debug files from %s to %s)r   endswithfilelockr   r   r   r   shutilrmtreecopytreeOSErrorr5   r6   )r   r   r   r+   r+   r,   copyJ  s   
&
zDebugContext.copywfilename
write_moderJ   r\   c                 O   s.   | j sJ ttj| j ||g|R i |S r]   r   openr   r   r   )r   r   r   rJ   r\   r+   r+   r,   fopenZ  s   
$zDebugContext.fopenc                 o   s\    | j sJ ttj| j ||g|R i |}|V  W d    d S 1 s'w   Y  d S r]   r   )r   r   r   rJ   r\   fr+   r+   r,   fopen_contextd  s
   
&"zDebugContext.fopen_contextsuffixc                 C   s   | j sJ tj| j |S r]   )r   r   r   r   )r   r   r+   r+   r,   r   p  s   
zDebugContext.filenamec                 C   s   t jjd urJdd l}| jsJ tj| jtj| j d}|	|d}|j
| jtj| jd W d    n1 s=w   Y  t j| d S d S )Nr   z.tar.gzzw:gz)arcname)r   rC   
upload_tartarfiler   r   r   r   basenamer   r   )r   r   tar_filetarr+   r+   r,   r   t  s   
zDebugContext.upload_tarc                    s   t jr#td  j} tj dtdd f fdd}| j	|| | j
t|  t jjs2d S | t | _t jjrD| dtj t jjrQ| dtj d S d S )Nztorch._dynamolevelr$   c                    s     |  d S r]   )r   )r   r5   r+   r,   reset_log_level  s   z/DebugContext.__enter__.<locals>.reset_log_levelz	debug.logzinfo.log)r   debugr   r   r   r   r   r   r   callbackr   r   set_debug_handlerrC   enabledr   r   r   	debug_log_setup_log_captureinfo_logINFO)r   
prev_levelr   r+   r   r,   	__enter__  s   
zDebugContext.__enter__r   c                 C   sp   t d}| j| |}t |}|| |t d |	| |t
|j| | j|j| d S )Nztorch._inductorr   )r   r   r   r   r   StreamHandlerr   r   r   r   minr   r   r   )r   r   r   r5   fdchr+   r+   r,   r     s   



zDebugContext._setup_log_captureexc_typeexc_valexc_tbc                 C   sF   | j r| j   |   | jr|   tdt | j | j	  d S )Nz%s debug trace: %s)
r   disable_save_profile_datar   r   r5   r6   r   r   r   )r   r   r   r   r+   r+   r,   __exit__  s   
zDebugContext.__exit__c                 C   s   | j sJ | j | d | d)}tj| j |d}|  |d |d |d |d W d    d S 1 s?w   Y  d S )Nzcompile.profzcompile.stats)streamcumtimed   tottime)	r   
dump_statsr   r   pstatsStats
strip_dirs
sort_statsprint_stats)r   r   statsr+   r+   r,   r     s   



"zDebugContext._save_profile_datar!   ).Nc                 C   sb   t jjr$tt j|r$ztt| |W S  ty#   tjddd Y d S w dtdtdd fdd}|S )	Nz Ignoring exception in debug codeT)exc_inforJ   r\   r$   c                  _   s   d S r]   r+   r[   r+   r+   r,   ignored  rM   z)DebugContext.__getattr__.<locals>.ignored)	r   rC   r   getattrDebugFormatter	Exceptionr5   r6   r   )r   r!   r  r+   r+   r,   __getattr__  s   zDebugContext.__getattr__)r$   N)r   )rN   
__module____qualname__	itertoolscountr   staticmethodrh   r   r   r   r   r   r   r   r   contextmanagerr   r   r   r   r   r<   r   r	   BaseExceptionr   r   r   r  r+   r+   r+   r,   r   4  sh    









r   c                   @   s  e Zd ZdeddfddZdejjdeej	 ddfdd	Z
dejjdeej	 ddfd
dZdeddfddZdeddfddZdededdfddZdeddfddZdejjdeddfddZdeddfddZdedeej dedef dededdfd d!ZdS )"r
  handlerr$   Nc                 C   s"   |j | _ |j| _|j| _|| _d S r]   )r   r   r   r  )r   r  r+   r+   r,   r     s   
zDebugFormatter.__init__rF   inputsc              
   C   s   |  dH}d }tjjjjrtjj|}t	j
|j}tjjjj}tjjddd t|||d||d W d    n1 sAw   Y  W d    n1 sPw   Y  |  d}||jdd W d    d S 1 sow   Y  d S )Nzfx_graph_runnable.pyF)ztrace.enabledztrace.save_real_tensorsinductor)save_dirstable_hashzfx_graph_readable.pyprint_output)r   rk   rp   r   rC   save_real_tensors_subclasses
fake_utilstry_convert_fake_to_realr   r   r   r!   r   r   writeprint_readable)r   rF   r  r   r  r  r+   r+   r,   fx_graph  s.   "zDebugFormatter.fx_graphc                 C   sB   |  d}||jdd W d    d S 1 sw   Y  d S )Nzfx_graph_transformed.pyFr  )r   r  r   )r   rF   r  r   r+   r+   r,   fx_graph_transformed  s   "z#DebugFormatter.fx_graph_transformedr.   c                 C      |  d| d S )Nzir_pre_fusion.txt	_write_irr   r.   r+   r+   r,   ir_pre_fusion     zDebugFormatter.ir_pre_fusionc                 C   r#  )Nzir_post_fusion.txtr$  r&  r+   r+   r,   ir_post_fusion   r(  zDebugFormatter.ir_post_fusionr   c                 C   s`   |  |!}td|j |D ]}||  |d qW d    d S 1 s)w   Y  d S )NzWriting debug ir to  %sz


)r   r5   infor!   r  	debug_str)r   r   r.   r   rD   r+   r+   r,   r%    s   "zDebugFormatter._write_irc                 C   s   t || dd d S )Nzgraph_diagram.svg)r0   )rG   r   r&  r+   r+   r,   graph_diagram     zDebugFormatter.graph_diagramc                 C   s,   t || t|| ddtdtjjd d S )Nzorig_fx_graph_diagram.svgFT)r0   r3   progparse_stack_tracer4   )r   r   r   GRAPHVIZ_COMMAND_SCALABLEr   rC   r4   )r   rF   r.   r+   r+   r,   draw_orig_fx_graph  s   

z!DebugFormatter.draw_orig_fx_graphc                 C   s   t || d d S )Nzoutput_code.py)r   r   r   )r   r   r+   r+   r,   output_code   r-  zDebugFormatter.output_coder!   input_nodestimingsChoiceCallerelapseprecompile_elapsec                    s   dd l }ddlm  dtjdtttf f fdd|tj tj	 fdd	|D ||d
}| j
dddd,}| D ]\}	}
t|	 }|| |
|d< ||| |d q>W d    d S 1 shw   Y  d S )Nr   r   )FixedLayoutrD   r$   c                    st  t | dr	| j}nd}|t| jd}z\|  }t| rhd}zt|j}W n tyE   zt	j
jj|jdd}W n	 tyB   Y nw Y nw  |j|jg t	j
j|jg t	j
j|j|d}t||d< nt||d< W n ty } zW Y d }~nd }~ww z
t|  |d< W n ty } zW Y d }~nd }~ww z
t|  |d	< W n ty } zW Y d }~nd }~ww ztt	j
j|  |d
< W n ty } zW Y d }~nd }~ww ztt	j
j|  |d< W n ty } zW Y d }~nd }~ww ztt	j
j|  |d< W n ty" } zW Y d }~nd }~ww t | dr8t| jtjr8| j|d< |S )Nr!    )r!   rR   r   )fallback)r?   sizestrideoffsetlayoutr?   rZ   r<  r;  numelr>   )rt   r!   rR   rN   get_output_specr:   r<   r=  r  r   rA   sizevars	size_hintrZ   r?   
size_hintsr;  r<  rh   	get_dtyperY   
get_strideget_size	get_numelr>   r   IRNode)rD   r   	node_infor>  r=  static_layouter8  build_node_infor+   r,   rM  /  s   


z>DebugFormatter.log_autotuning_results.<locals>.build_node_infoc                    s   g | ]} |qS r+   r+   )r^   rD   )rM  r+   r,   
<listcomp>o  s    z9DebugFormatter.log_autotuning_results.<locals>.<listcomp>)op_namecuda_device_namecuda_device_countr3  autotuning_timeprecompile_timezautotuning_result_json_list.txtatzutf-8)encodingbenchmark_result
)jsonr   r8  rH  r   rh   rk   cudaget_device_namedevice_countr   r   dict	info_dictupdatedumpr  )r   r!   r3  r4  r6  r7  rX  general_propertiesr   callertimer]  r+   rL  r,   log_autotuning_results#  s,   "=
"z%DebugFormatter.log_autotuning_results)rN   r  r  r   r   rk   r   r   r   Tensorr!  r"  SchedulerNodeListr'  r)  rh   r%  r,  r1  r2  r   rH  r   floatrc  r+   r+   r+   r,   r
    s^    




r
  c                   @   s    e Zd ZU eed< ejed< dS )TensorMetadataHoldertensor_metadatarZ   N)rN   r  r  r   __annotations__rk   rZ   r+   r+   r+   r,   rg  ~  s   
 rg  rJ   r\   c            
      O   s   d}t j|st | dtdtfdd}t|| |f\}}d}| d| dtt d	}t|d
}t	
||f| W d   n1 sFw   Y  ttjr`d| d|d}	t|	 dS dS )z
    This function is used to save arguments for a compile_fx_inner function call
    to the file system.  Later on one can replay the compile_fx_inner call
    with the saved arguments using load_args_and_run_compile_fx_inner.
    z/tmp/inductor_saved_argsr_   r$   c                 S   s    t | tjrtt| | jS | S )z
        Pickle FakeTensor will result in error:
        AttributeError: Can't pickle local object 'WeakValueDictionary.__init__.<locals>.remove'

        Convert all Tensor to metadata. This may also makes pickle faster.
        )r:   rk   rd  rg  r   rZ   r_   r+   r+   r,   handle_tensor  s   z5save_args_for_compile_fx_inner.<locals>.handle_tensorcompile_fx_inner/_z.pklwbNz3
Arguments for a compile_fx_inner call is saved to z. To replay the call,
run the following:

from torch._inductor.debug import load_args_and_run_compile_fx_inner
load_args_and_run_compile_fx_inner(z
)
        )r   r   r   mkdirr   r   nextsave_args_cntr   pickler_  r5   isEnabledForr   r   r@   )
rJ   r\   folderrk  args_to_savekwargs_to_savefn_namer   r   messager+   r+   r,   save_args_for_compile_fx_inner  s$   
rz  r   c              	   C   s   ddl m} t| d}t|\}}W d    n1 sw   Y  dtdtfdd}tjjdd	}|6 t	
d
d t|||f\}}||i |W  d    W  d    S 1 s]w   Y  W d    d S 1 smw   Y  d S )Nr   )rl  rbr_   r$   c                 S   s0   t | trtjj| jj| jj| jj	| j
S | S r]   )r:   rg  rk   _dynamotestingrand_stridedrh  shaper<  r?   rZ   rj  r+   r+   r,   rk    s   
z9load_args_and_run_compile_fx_inner.<locals>.handle_tensorT)allow_non_fake_inputs	save_argsF)torch._inductor.compile_fxrl  r   rs  loadr   rk   r  FakeTensorModer   r   r   )r   rl  r   rJ   r\   rk  	fake_moder+   r+   r,   "load_args_and_run_compile_fx_inner  s   Rr  )package_pathfunc.exported_programinductor_configsr  c             
   C   s   ddl m} |jj}| }t|tjjsJ |j	\}}z| ||||||dW S  t
yD }	 z|r>ddlm}
 |
|d|d |	d }	~	ww )Nr   )r   )r  r  load_and_run)dump_to_minifycompile_fx_aot)options)torch._inductorr   aot_inductordump_aoti_minifiermoduler:   rk   r   r   example_inputsr  torch._dynamo.repro.aotir  )r  r  r  r  r   use_minifierrF   rJ   r\   rK  r  r+   r+   r,   aot_inductor_minifier_wrapper  s2   
r  )FNrK   )Wri   r   dataclasses	functoolsior  r   r   os.pathrs  r  r   r'   typingr   r   r   r   r   r   r   r	   r
   unittest.mockr   rk   functorch.compiler   r   r   r   torch._dynamo.repro.after_aotr   torch._dynamo.utilsr   torch.fx.graph_moduler   torch.fx.passes.shape_propr   r   torch.fx.passes.tools_commonr   torch.utils._pytreer   r9  r   r   	schedulerr   r   r   r   r   virtualizedr   r   rN   r5   re  rj   r    r0  	lru_cacherv   r-   rh   rG   rl   r7   r<   r   r   r   r  r   r   r
  	dataclassrg  r  rr  rz  r  exportExportedProgramBytesIOr  r+   r+   r+   r,   <module>   s    ,


.e

#



(  /.

