o
    i1                     @   s  U d dl Z d dlZd dlZd dlZd dlZd dlZd dlZd dlZd dlZd dl	Z	d dl
Z	d dlZd dlZd dlZd dlZd dlmZmZ d dlmZmZmZmZmZ d dlmZ d dlZd dlmZmZmZ d dlmZ d dl m!Z! d dl"m#Z# d d	l$m%Z% d d
l&m'Z' d dl(m)Z) d dl*m+Z+ d dl,m-Z- d dl.m/Z/m0Z0 d dl1m2Z2 d dl3m4Z4 d dl5m6Z6 d dl7m8Z8 ddl9m:Z:m;Z; ddl;m<Z< ddl=m>Z>m?Z?m@Z@mAZAmBZB ddlCmDZD eEeFZGdaHeeIeJeKeLf   eMd< daNeOeMd< daPeeJeQeeK f  eMd< e'eFdZRe'eFdZSeIe ZTe Uddd gZVg d!ZWejXd"eOfd#d$ZY		dxd%eIe> d&eOd'eeK d"dfd(d)ZZd*eIe> d"ej[fd+d,Z\		 dyd%eeT d-eJeKeKf d.eeK d/eQd"df
d0d1Z]d-eJeKeKf d"eJeKeVf fd2d3Z^d4ejj-d*eTd"dfd5d6Z_ej`d"ed fd7d8Zai abeJeKeJeKeIeK f f eMd9< i aceJeKeIeK f eMd:< dadeeQ eMd;< i aeeJeKeKf eMd<< i afeJeKeIeK f eMd=< d ageQeMd>< dzd?d@Zhej`d"ed fdAdBZiG dCdD dDZjG dEdF dFZkd%eTd"dfdGdHZld%eTd"dfdIdJZmdKeIeeKdf  d"dfdLdMZnd%ee> d"dfdNdOZodPeepeeqf  d"dfdQdRZrdzdSdTZsej`d"ed fdUdVZtejuG dWdX dXZvew ZxdYeeQ dZeJeKef d"eJeKeJeKeIeK f f fd[d\Zyd]eJeKef d"eJeKeJeKef f fd^d_Zzd"eJeKef fd`daZ{d"eJeKeJeKeIeK f f fdbdcZ|	d{ddeee> e<f deeKdfeOd"eeQ fdgdhZ}diedjed"dfdkdlZ~dmeKd"efdndoZddpdqedreKf dsejjdteJeKef duee4 d"eKf
dvdwZdS )|    N)IteratorSequence)AnyCallableIOOptionalUnion)patch)
draw_graphget_aot_graph_nameget_graph_being_compiled)fx)save_graph_repro)get_debug_dir)utils)getArtifactLogger)trace_structured)signpost_event)GraphModule)_extract_tensor_metadataTensorMetadata)legalize_graph)FileLike)
OrderedSet)tree_map   )configir)ExternKernel)BaseSchedulerNodeFusedSchedulerNodeNopKernelSchedulerNode
OutputNodeSchedulerNode)VGRAPH_EXECUTION_ORDERFRECORD_GRAPH_EXECUTIONGRAPH_COMPILE_IDSir_pre_fusionir_post_fusionBufMetanamen_origin)dotz-Gnslimit=2z-Gnslimit1=2z-Gmaxiter=5000returnc                   C   s   t dd uS )Nr-   )shutilwhich r1   r1   K/home/ubuntu/vllm_env/lib/python3.10/site-packages/torch/_inductor/debug.pyhas_dot>   s   r3   nodesprint_graphfnamec           	   	   C   s   t  s
td dS |du rt }t| }|jD ]?}d|jvr q|jd j}t|t	r<t|d t
r8|d f}n|d }d}t|tjrH|jj}t||ddddd}||jd< q|r^t| ti |}t| |j  t||dtjjd dS )z$
    Draw a graph in fname.svg.
    z*draw_buffers() requires `graphviz` packageNfusion_metar   tensor_metaF)
clear_metadot_graph_shape)r3   logwarningr   create_fx_from_snodesr4   metagroup
isinstancetupleintr   ComputedBufferdatadtyper   printr   r   graphlintr
   r   tracer:   )	r4   r5   r6   rG   noder?   rE   metadatagmr1   r1   r2   draw_buffersC   s6   






rM   snodesc              
      sf  dt dtdtf fdd}tdg d}i }i }tj }d}g }d}| D ]}	|	 r1d	}
|
}n-|		 r:d
}
|
}n$t
|	trDd}
|
}nt
|	trOd}
|	j}nt
|	trZd}
|	j}ntdtjj|	 d}|
 d| }||}i }t|	drd|	 i}|j|d|d}dtttf dtf fdd  |	r|| |	 }||_|||	|
|jd< |||< |	 D ]}||| < q|du r|}q&| D ]Q}	|	 }|	jj }|| }g }|D ]9}|j|v r||j }n!|!| |"|j}|||j< W d   n	1 sw   Y  ||krq|| qt#||_$q|%t&|dkr,|d  |S t#| |S )B
    Creates a FX Graph from a list of SchedulerNode objects.
    r+   r.   .c                 S   s   dt dtfdd}| |_|S )Nargsr.   c                  W   s   dS Nr   r1   )rP   r1   r1   r2   func1w      z;create_fx_from_snodes.<locals>.get_fake_func.<locals>.func1)r   rB   __name__)r+   rR   r1   r1   r2   get_fake_funcv   s   z,create_fx_from_snodes.<locals>.get_fake_func
FusionMeta)r?   snodetypeNexterntemplatenopcomputefusedzUnknown node typeoriginal_atenz: 
get_devicedevicer1   rP   kwargsrW   c                    s8   t | trt fdd| jD S tdd |  D S )Nc                 3   s    | ]} |V  qd S Nr1   ).0x	in_outputr1   r2   	<genexpr>   s    z;create_fx_from_snodes.<locals>.in_output.<locals>.<genexpr>c                 s   s(    | ]}|j D ]	}t|jtV  qqd S rc   )usersr@   rJ   r"   )rd   bufuserr1   r1   r2   rh      s    
)r@   r    anyrN   get_outputs)rW   rf   r1   r2   rg      s
   
z(create_fx_from_snodes.<locals>.in_outputr7   r   r   )'strr   rB   collections
namedtupletorchr   Graph	is_externis_templater@   r!   r#   r?   r    RuntimeError	_inductorr   get_fused_kernel_name	get_nodeshasattrr_   call_functionr   r   boolappendget_namer+   r>   rm   read_writesreadsinserting_beforeplaceholderrA   rP   outputlen)rN   rU   rV   buf_to_fx_nodenode_to_fx_noderG   
first_nodeoutputsr?   rW   	node_type
fused_name	func_name	node_funcrb   fx_noder+   rj   depsnew_argsdepdep_noder1   rf   r2   r=   q   s   




	



r=   node_name_to_buf_nameparent_buf_name	n_originsc           
      C   s   | d u rd S | D ]S}|  }| }|d ur)t|dkr)t|||d u r%|n| qt|dkr5|d |ks7J |j}|d u sC|jd u rDq|jD ]}|j}	|	|vrZ|d u rV|n|||	< qGqd S )Nr   r   )r}   rx   r   $update_orig_fx_node_name_to_buf_namerJ   originsr+   )
r4   r   r   r   rJ   buf_namechildren_nodesir_nodeorigin	node_namer1   r1   r2   r      s0   
r   c                 C   sp   i }|   D ]\}}||vrt|g||< q|| | qi }|   D ]\}}t|| }t||||< q$|S rc   )itemsr   addr   r*   )r   buf_name_to_n_noder   r   node_name_to_buf_metan_noder1   r1   r2   get_node_name_to_buf_meta   s   r   rL   c                 C   sP   i }t || |du rdS t|}| jjD ]}|j|v r%||j|jd< qdS )rO   Nbuf_meta)r   r   rG   r4   r+   getr>   )rL   rN   r   r   rJ   r1   r1   r2   annotate_orig_fx_with_snodes  s   

r   c               	   c   s    t jdddk} dd l}t|jjj}t	
 }| s,z
d V  W |  d S |  w |tdd t jt d}t j|dd tt j|d	t  d
}|tj |td || zd V  W || |  d S || |  w )NTORCH_COMPILE_DEBUG01r   z*functorch.compile.config.debug_partitionerTtorchinductor)exist_okaot_z
_debug.log3[%(filename)s:%(lineno)d %(levelname)s] %(message)s)osenvironr   torch._functorch.aot_autogradlogging	getLogger
_functorchaot_autogradrT   
contextlib	ExitStackcloseenter_contextr	   pathjoinr   makedirsFileHandlerr   setLevelDEBUGsetFormatter	Formatter
addHandlerremoveHandler)compile_debugrq   r;   stackr   fhr1   r1   r2   enable_aot_logging  s>   




r    _inductor_post_to_pre_grad_nodes._inductor_triton_kernel_to_post_grad_node_info_pre_grad_graph_id#_inductor_pre_grad_node_stack_trace_inductor_kernel_stack_trace(_inductor_kernel_provenance_debug_handlec                   C   s   da d S rQ   )r   r1   r1   r1   r2   -reset_inductor_kernel_provenance_debug_handleL  s   r   c                  c   sr    t } t }t }t }t }da i ai ai ai azdV  W | a |a|a|a|adS | a |a|a|a|aw )zzContext manager that resets provenance tracking globals upon entering
    and restores their original values when exiting.N)r   r   copyr   r   r   )original_pre_grad_graph_idoriginal_post_to_pre_grad_nodes-original_triton_kernel_to_post_grad_node_info+original_inductor_pre_grad_node_stack_trace$original_inductor_kernel_stack_tracer1   r1   r2   reset_provenance_globalsQ  s:   r   c                   @   s*  e Zd Ze Zededee fddZ	d)ddZ
deddfd	d
Z	d*dededededee f
ddZej	d*dededededeee  f
ddZdedefddZd)ddZd)ddZdededdfddZdeee  dee d ee ddfd!d"Zd)d#d$Zd%edeed&  fd'd(ZdS )+DebugContextfolder_namer.   c                 C   sV   t jjpt }tjD ]}tj|d|  d| }tj	|s(t
| |  S q
d S )Nr   .)r   rI   	debug_dirr   r   _counterr   r   r   existsr   )r   r   ndirnamer1   r1   r2   create_debug_dir  s   

zDebugContext.create_debug_dirNc                 C   s   d | _ d | _t | _d S rc   )_prof_pathr   r   _stack)selfr1   r1   r2   __init__  s   zDebugContext.__init__new_pathc                 C   s   | j sd S |dsJ |ddlm} z.|| d tj|r(t| t	| j | W d    W d S 1 s;w   Y  W d S  t
yT   td| j | Y d S w )Nz.debugr   )FileLockz.lockz(Failed to copy debug files from %s to %s)r   endswithfilelockr   r   r   r   r/   rmtreecopytreeOSErrorr;   r<   )r   r   r   r1   r1   r2   r     s   
&
zDebugContext.copywfilename
write_moderP   rb   c                 O   s.   | j sJ ttj| j ||g|R i |S rc   r   openr   r   r   )r   r   r   rP   rb   r1   r1   r2   fopen  s   
$zDebugContext.fopenc                 o   s\    | j sJ ttj| j ||g|R i |}|V  W d    d S 1 s'w   Y  d S rc   r   )r   r   r   rP   rb   fr1   r1   r2   fopen_context  s
   
&"zDebugContext.fopen_contextsuffixc                 C   s   | j sJ tj| j |S rc   )r   r   r   r   )r   r   r1   r1   r2   r     s   
zDebugContext.filenamec                 C   s   t jjd urJdd l}| jsJ tj| jtj| j d}|	|d}|j
| jtj| jd W d    n1 s=w   Y  t j| d S d S )Nr   z.tar.gzzw:gz)arcname)r   rI   
upload_tartarfiler   r   r   r   basenamer   r   )r   r   tar_filetarr1   r1   r2   r     s   
zDebugContext.upload_tarc                    s   t jr#td  j} tj dtdd f fdd}| j	|| | j
t|  t jjs2d S | t | _t jjrD| dtj t jjrQ| dtj d S d S )Nztorch._dynamolevelr.   c                    s     |  d S rc   )r   )r   r;   r1   r2   reset_log_level  s   z/DebugContext.__enter__.<locals>.reset_log_levelz	debug.logzinfo.log)r   debugr   r   r   r   r   r   r   callbackr   r$   set_debug_handlerrI   enabledr   r   r   	debug_log_setup_log_captureinfo_logINFO)r   
prev_levelr   r1   r   r2   	__enter__  s   
zDebugContext.__enter__r   c                 C   sp   t d}| j| |}t |}|| |t d |	| |t
|j| | j|j| d S )Nztorch._inductorr   )r   r   r   r   r   StreamHandlerr   r   r   r   minr   r   r   )r   r   r   r;   fdchr1   r1   r2   r     s   



zDebugContext._setup_log_captureexc_typeexc_valexc_tbc                 C   sF   | j r| j   |   | jr|   tdt | j | j	  d S )Nz%s debug trace: %s)
r   disable_save_profile_datar   r   r;   r<   r   r   r   )r   r  r	  r
  r1   r1   r2   __exit__  s   
zDebugContext.__exit__c                 C   s   | j sJ | j | d | d)}tj| j |d}|  |d |d |d |d W d    d S 1 s?w   Y  d S )Nzcompile.profzcompile.stats)streamcumtimed   tottime)	r   
dump_statsr   r   pstatsStats
strip_dirs
sort_statsprint_stats)r   r  statsr1   r1   r2   r    s   



"zDebugContext._save_profile_datar+   ).Nc                 C   sb   t jjr$tt j|r$ztt| |W S  ty#   tjddd Y d S w dtdtdd fdd}|S )	Nz Ignoring exception in debug codeTexc_inforP   rb   r.   c                  _   s   d S rc   r1   ra   r1   r1   r2   ignored  rS   z)DebugContext.__getattr__.<locals>.ignored)	r   rI   r   getattrDebugFormatter	Exceptionr;   r<   r   )r   r+   r  r1   r1   r2   __getattr__	  s   zDebugContext.__getattr__r.   N)r   )rT   
__module____qualname__	itertoolscountr   staticmethodrn   r   r   r   r   r   r   r   r   contextmanagerr   r   r   r   r  rB   r   rX   BaseExceptionr  r  r   r  r1   r1   r1   r2   r   |  sh    









r   c                   @   s  e Zd ZdeddfddZdejjdeej	 ddfdd	Z
dejjdeej	 ddfd
dZdeddfddZdeddfddZededefddZdeddfddZdejjdeddfddZd%dededdfddZdedeej dedef d ed!ed"ee ddfd#d$ZdS )&r  handlerr.   Nc                 C   s"   |j | _ |j| _|j| _|| _d S rc   )r   r   r   r(  )r   r(  r1   r1   r2   r     s   
zDebugFormatter.__init__rL   inputsc              
   C   s   |  dH}d }tjjjjrtjj|}t	j
|j}tjjjj}tjjddd t|||d||d W d    n1 sAw   Y  W d    n1 sPw   Y  |  d}||jdd W d    d S 1 sow   Y  d S )Nzfx_graph_runnable.pyF)ztrace.enabledztrace.save_real_tensorsinductor)save_dirstable_hashzfx_graph_readable.pyprint_output)r   rq   rv   r   rI   save_real_tensors_subclasses
fake_utilstry_convert_fake_to_realr   r   r   r+   r	   r   writeprint_readable)r   rL   r)  r  r+  r,  r1   r1   r2   fx_graph  s.   "zDebugFormatter.fx_graphc                 C   sB   |  d}||jdd W d    d S 1 sw   Y  d S )Nzfx_graph_transformed.pyFr-  )r   r3  r4  )r   rL   r)  r  r1   r1   r2   fx_graph_transformed=  s   "z#DebugFormatter.fx_graph_transformedr4   c                 C   @   |  d}|| | W d    d S 1 sw   Y  d S )Nzir_pre_fusion.txtr   r3  	_write_irr   r4   r  r1   r1   r2   r(   E     "zDebugFormatter.ir_pre_fusionc                 C   r7  )Nzir_post_fusion.txtr8  r:  r1   r1   r2   r)   I  r;  zDebugFormatter.ir_post_fusionc                 C   s2   t  }| D ]}||  |d q| S )Nz


)ioStringIOr3  	debug_strgetvalue)r4   rj   rJ   r1   r1   r2   r9  M  s
   zDebugFormatter._write_irc                 C   s   t || dd d S )Nzgraph_diagram.svg)r6   )rM   r   )r   r4   r1   r1   r2   graph_diagramU  s   zDebugFormatter.graph_diagramc                 C   s,   t || t|| ddtdtjjd d S )Nzorig_fx_graph_diagram.svgFT)r6   r9   progparse_stack_tracer:   )r   r
   r   GRAPHVIZ_COMMAND_SCALABLEr   rI   r:   )r   rL   r4   r1   r1   r2   draw_orig_fx_graphX  s   

z!DebugFormatter.draw_orig_fx_graphpyr   	extensionc                 C   s   t || d|  d S )Nzoutput_code.)r/   r   r   )r   r   rF  r1   r1   r2   output_codeg  s   zDebugFormatter.output_coder+   input_nodestimingsChoiceCallerelapseprecompile_elapseprescreening_elapsec                    s   ddl m  dt jdtttf f fdd|tj tj fdd|D |||d	}| j	d
ddd,}|
 D ]\}	}
t|	 }|| |
|d< t|| |d q;W d    d S 1 sew   Y  d S )Nr   )FixedLayoutrJ   r.   c                    s  t | dr	| j}nd}|t| jd}z\|  }t| rhd}zt|j}W n tyE   zt	j
jj|jdd}W n	 tyB   Y nw Y nw  |j|jg t	j
j|jg t	j
j|j|d}t||d< nt||d< W n	 tyx   Y nw z
t|  |d< W n	 ty   Y nw z
t|  |d	< W n	 ty   Y nw ztt	j
j|  |d
< W n	 ty   Y nw ztt	j
j|  |d< W n	 ty   Y nw ztt	j
j|  |d< W n	 ty   Y nw t | drt| jtjr| j|d< |S )Nr+    )r+   rX   r   )fallback)rE   sizestrideoffsetlayoutrE   r`   rR  rQ  numelrD   )ry   r+   rX   rT   get_output_specr@   rB   rS  r  r$   rG   sizevars	size_hintr`   rE   
size_hintsrQ  rR  rn   	get_dtyper_   
get_strideget_size	get_numelrD   r   IRNode)rJ   r   	node_inforT  rS  static_layoutrN  build_node_infor1   r2   rb  u  s   


z>DebugFormatter.log_autotuning_results.<locals>.build_node_infoc                    s   g | ]} |qS r1   r1   rd   rJ   )rb  r1   r2   
<listcomp>      z9DebugFormatter.log_autotuning_results.<locals>.<listcomp>)op_namecuda_device_namecuda_device_countrH  autotuning_timeprecompile_timeprescreening_timezautotuning_result_json_list.txtatzutf-8)encodingbenchmark_result
)r   rN  r^  dictrn   rq   cudaget_device_namedevice_countr   r   	info_dictupdatejsondumpr3  )r   r+   rH  rI  rK  rL  rM  general_propertiesr  callertimert  r1   ra  r2   log_autotuning_resultsj  s,   	"=	
"z%DebugFormatter.log_autotuning_results)rE  )rT   r!  r"  r   r   rq   r   r   listTensorr5  r6  SchedulerNodeListr(   r)   r%  rn   r9  r@  rD  rG  r   r^  rp  floatr   r{  r1   r1   r1   r2   r    sX    



r  c                 C   .   t tjrt dt|  tj	|  d S )NzBEFORE FUSION
%s)
ir_pre_fusion_logisEnabledForr   r  infor  r9  r$   r   r(   r4   r1   r1   r2   log_ir_pre_fusion     r  c                 C   r  )NzAFTER FUSION
%s)
ir_post_fusion_logr  r   r  r  r  r9  r$   r   r)   r  r1   r1   r2   log_ir_post_fusion  r  r  schedulec                    sD   zt ddd  fddd W d S  ty!   tjddd Y d S w )	Nartifactc                   S   
   dddS )Ninductor_collective_schedulerv  r+   rm  r1   r1   r1   r1   r2   <lambda>     z+_dump_collective_schedule.<locals>.<lambda>c                      s    S rc   r1   r1   r  r1   r2   r    s    metadata_fn
payload_fnzAFailed to log inductor_collective_schedule via structured loggingTr  )r   r  r;   r   r  r1   r  r2   _dump_collective_schedule  s   
r  c                    s&    fdd| D }|rt | d S d S )Nc                    s0   g | ]}t t|d d  tjrt ddqS )rJ   Npython_kernel_name)r@   r  r   _CollectiveKernelrc  opr1   r2   rd    s    
z+log_collective_schedule.<locals>.<listcomp>)r  )r4   r  r1   r  r2   log_collective_schedule  s   
r  node_runtimesc              	      sJ  zt jjjdttt  dtt ffdd}dtdtt fdd}g  | D ]]\}}t	|j
d| }t|j
r;d	nd
}g }z0| D ])}|j
}	|	 }
t|	jtjrX|	 nd}|	 }|||
||||d qDW n	 tyx   Y nw  ||||d q&tddd  fddd W dS  ty   tjddd Y dS w )zDLog per-op runtime estimates and output tensor metadata for TLParse.re   r.   c                    s   | d ur
t  | S g S rc   )r|  re   )to_size_hintsr1   r2   to_list  s   z,log_runtime_and_tensor_meta.<locals>.to_listrE   c                 S   s"   | d u rd S t | }|d}|S )Nztorch.)rn   removeprefix)rE   sr1   r1   r2   dtype_to_str  s
   
z1log_runtime_and_tensor_meta.<locals>.dtype_to_strr  
collectiver\   N)shaperR  rE   )r+   rX   estimated_runtime_nsr   r  c                   S   r  )N inductor_runtime_and_tensor_metarv  r  r1   r1   r1   r1   r2   r  &  r  z-log_runtime_and_tensor_meta.<locals>.<lambda>c                      s   d iS )Nopsr1   r1   )r  r1   r2   r  *      r  z.Failed to log inductor_runtime_and_tensor_metaTr  )r$   rG   rW  rY  r   r   r   r|  rn   r  rJ   r}   r   is_collectiverm   maybe_get_sizer@   rT  r   Layoutr[  maybe_get_dtyper|   r  r   r;   r   )r  r  r  r  
runtime_nsr+   op_typer   rj   irnoder  rR  rE   r1   )r  r  r2   log_runtime_and_tensor_meta  sV   
"
	
r  c                   C   sH   t sdS ztddd dd d W dS  ty#   tjddd	 Y dS w )
z:Emit a structured artifact with the graph execution order.Nr  c                   S   r  )Ngraph_executionrv  r  r1   r1   r1   r1   r2   r  7  r  z%log_graph_execution.<locals>.<lambda>c                   S   s   dt iS )Ngraph_execution_order)r%   r1   r1   r1   r2   r  ;  r  r  zFailed to log graph_executionTr  )r%   r   r  r;   r   r1   r1   r1   r2   log_graph_execution0  s   r  c                   c   sB    g a i adazdV  W t  dada dadS t  dada daw )z5Record graph execution order and log it once on exit.TNF)r%   r'   r&   r  r1   r1   r1   r2   $record_and_log_graph_execution_orderA  s   r  c                   @   s    e Zd ZU eed< ejed< dS )TensorMetadataHoldertensor_metadatar`   N)rT   r!  r"  r   __annotations__rq   r`   r1   r1   r1   r2   r  Q  s   
 r  pre_grad_graph_idpost_to_pre_grad_nodes_jsonc              
      s  i i d}t |tstd |S t | ts|S tt}tt}zdttt	f dt
fdd}| D ]\ }t |tsGtd |  W S |D ]q}||sV|    W S |d| kro||d	    |  |d	   fd
d|dg D }|r| \}	||	s|    W S |	d| kr||	d	   | |	d	  |fdd|	dg D  |s~qIq4dttt	f ddfdd}
|
| |
| ||dW S  ty } z"tdddt|t d td| td|  |W  Y d}~S d}~ww )zx
    Create bidirectional mappings between pre_grad graph nodes
    and post_grad graph code nodes, and vice versa.
    )	preToPost	postToPrezCProvenance tacking error: post_to_pre_grad_nodes_json is not a dictrJ   r.   c                 S   sB   t | tstd dS d| vsd| vsd| vrtd dS dS )NzVProvenance tacking error: node provenance in post_to_pre_grad_nodes_json is not a dictFgraph_idr+   	from_nodezYProvenance tacking error: node provenance in post_to_pre_grad_nodes_json has wrong formatT)r@   rp  r;   error)rJ   r1   r1   r2   check_formatv  s   
z8create_mapping_pre_post_grad_nodes.<locals>.check_formatzIProvenance tacking error: post_to_pre_grad_nodes_json value is not a listr  r+   c                    s   g | ]}| fqS r1   r1   rd   r   )	outer_keyr1   r2   rd    re  z6create_mapping_pre_post_grad_nodes.<locals>.<listcomp>r  c                 3   s    | ]}| fV  qd S rc   r1   r  )
parent_keyr1   r2   rh     s    
z5create_mapping_pre_post_grad_nodes.<locals>.<genexpr>dNc                 S   &   | D ]
}t | | | |< qt| } d S rc   r|  rp  r  keyr1   r1   r2   convert_sets_to_lists     zAcreate_mapping_pre_post_grad_nodes.<locals>.convert_sets_to_listsr*  provenance_tracking_error"create_mapping_pre_post_grad_nodesfunction	error_msgstack_tracez post_to_pre_grad_nodes_json:  %szpre_grad_graph_id:  %s)r@   rp  r;   r  rB   ro   defaultdictr   rn   r   r{   r   r|  r   r   popextendr  r   	traceback
format_exc)r  r  empty_returnpre_to_postpost_to_prer  
node_arrayrJ   r   current_noder  er1   )r  r  r2   r  Z  st   








	r  triton_kernel_to_post_grad_jsonc              
   C   s   i i d}t | tstd |S tt}z;|  D ]\}}t |ts.td |  W S |D ]	}|| 	| q0qdtt
tf ddfdd}|| | |dW S  tyy } ztd	d
dt
|t d td|  |W  Y d}~S d}~ww )zqCreate bidirectional mappings between triton kernel name and post_grad
    graph code nodes, and vice versa.
    )cppCodeToPostpostToCppCodezGProvenance tacking error: triton_kernel_to_post_grad_json is not a dictzMProvenance tacking error: triton_kernel_to_post_grad_json value is not a listr  r.   Nc                 S   r  rc   r  r  r1   r1   r2   r    r  zFcreate_node_mapping_kernel_to_post_grad.<locals>.convert_sets_to_listsr*  r  "create_mapping_kernel_to_post_gradr  z$triton_kernel_to_post_grad_json:  %s)r@   rp  r;   r  ro   r  r   r   r|  r   rn   r   r  r   r  r  )r  r  post_to_cpp_coder  r  	curr_noder  r  r1   r1   r2   'create_node_mapping_kernel_to_post_grad  sN   	



	r  c               
   C   s   z6i } t r0tt}i t|} tjjr0tj	dd}t
| | W d    n1 s+w   Y  d| d< | W S  tyX } ztdddt|t d i W  Y d }~S d }~ww )	Nz/inductor_provenance_tracking_node_mappings.jsonr   g       @versionr*  r  dump_inductor_provenance_infor  )r   r  r   r   r   rI   r   r$   r   r   rv  rw  r  r   rn   r  r  )node_mappingnode_mapping_kernelr  r  r1   r1   r2   r    s@   	r  c               
   C   s   zAt di } tt tt B }i }|D ]'}t|g }t }|D ]}|| |g  q$t|g |t|d||< q|W S  tyc } zt	dddt
|t d i W  Y d}~S d}~ww )zCreate kernel information JSONr  )stack_tracespost_grad_nodespre_grad_nodesr*  r  create_kernel_information_jsonr  N)r   r   r   r   keysr   ru  r|  r  r   rn   r  r  )r  all_kernelsresultkernel_namer  r  	post_noder  r1   r1   r2   r    s<   
	r  node_scheduler  rs   c           
   
      sb  zddl m}m} td7 ag }| dt }|rJt| tsJ t|g  | jr6| jj	}| vr5 
| n  fdd| jD  t|  }n:t| tsQJ t }| D ])}|||fvr|jdurt|g  ||j    fdd|jjD  qVt|}t|g | tW S  ty }	 ztdd	d
t|	t d W Y d}	~	dS d}	~	ww )z
    Set the mapping between `kernel_name` and the post_grad nodes in `node_schedule`.

    Returns a unique int debug handler for each call to this function.
    r   )DisableReductionEnableReduction:c                 3        | ]}|j  vr|j V  qd S rc   r+   rd   r   curr_node_infor1   r2   rh   i      
z:set_kernel_post_grad_provenance_tracing.<locals>.<genexpr>Nc                 3   r  rc   r  r  r  r1   r2   rh   {  r  r*  r  'set_kernel_post_grad_provenance_tracingr  )codegen.simd_kernel_featuresr  r  r   r@   r   r   
setdefaultorigin_noder+   r|   r  r   r|  get_stack_tracesr   rJ   ru  r   r  r   rn   r  r  )
r  r  rs   r  r  r  origin_node_namestack_traces_setrW   r  r1   r  r2   r  G  s`   

	r  rP   rb   c            
      O   s   d}t j|st | dtdtfdd}t|| |f\}}d}| d| dtt d	}t|d
}t	
||f| W d   n1 sFw   Y  ttjr`d| d|d}	t|	 dS dS )z
    This function is used to save arguments for a compile_fx_inner function call
    to the file system.  Later on one can replay the compile_fx_inner call
    with the saved arguments using load_args_and_run_compile_fx_inner.
    z/tmp/inductor_saved_argsre   r.   c                 S   s    t | tjrtt| | jS | S )z
        Pickle FakeTensor will result in error:
        AttributeError: Can't pickle local object 'WeakValueDictionary.__init__.<locals>.remove'

        Convert all Tensor to metadata. This may also makes pickle faster.
        )r@   rq   r}  r  r   r`   r  r1   r1   r2   handle_tensor  s   z5save_args_for_compile_fx_inner.<locals>.handle_tensorcompile_fx_inner/_z.pklwbNz3
Arguments for a compile_fx_inner call is saved to z. To replay the call,
run the following:

from torch._inductor.debug import load_args_and_run_compile_fx_inner
load_args_and_run_compile_fx_inner(z
)
        )r   r   r   mkdirr   r   nextsave_args_cntr   picklerw  r;   r  r   r   rF   )
rP   rb   folderr  args_to_savekwargs_to_savefn_namer   r   messager1   r1   r2   save_args_for_compile_fx_inner  s$   
r  r   c              	   C   s   ddl m} t| d}t|\}}W d    n1 sw   Y  dtdtfdd}tjjdd	}|6 t	
d
d t|||f\}}||i |W  d    W  d    S 1 s]w   Y  W d    d S 1 smw   Y  d S )Nr   )r  rbre   r.   c                 S   s0   t | trtjj| jj| jj| jj	| j
S | S rc   )r@   r  rq   _dynamotestingrand_stridedr  r  rR  rE   r`   r  r1   r1   r2   r    s   
z9load_args_and_run_compile_fx_inner.<locals>.handle_tensorT)allow_non_fake_inputs	save_argsF)torch._inductor.compile_fxr  r   r  loadr   rq   r0  FakeTensorModer   r	   r   )r   r  r   rP   rb   r  	fake_moder1   r1   r2   "load_args_and_run_compile_fx_inner  s   Rr  )package_pathfunc.exported_programinductor_configsr  c             
   C   s  ddl m} ddlm} ddlm} ddlm} |jj	}|j
dd}	t|	tjjs+J |j\}
}z[|r@|jjdkr@||d	|d
 |r|jjdkrt|	}t|j}t|}|||d |d |d
\}}t|}tjj||dd}| |j
dd|||ddd | |	|
||||dW S  |y } z||dd|d td |d }~w ty } z|rd}|jjdkrd}||d	||d |d }~ww )Nr   )AccuracyError)dump_to_minify)r   )_aoti_flatten_inputsF)check_guards   aot_inductor)options   r   )strictTaccuracy)r  r  load_and_runcheck_accuracy)r  r  r&  aot_inductor_accuracyminify)commandr"  zAccuracy failedrun)torch._dynamo.debug_utilsr  torch._dynamo.repro.aotir  torch._inductorr   r  r  r!  dump_aoti_minifiermoduler@   rq   r   r   example_inputsrepro_levelr   deepcopyrA   exportr;   r<   r  )r  r  r  r  r  r  r   r  use_minifierrL   rP   rb   gm_copyexample_inputs_copyconfig_copyflat_example_inputstuple_inputsflattened_epr  r*  r1   r1   r2   aot_inductor_minifier_wrapper  s   




	
r<  )FNrQ   r   )F)ro   r   r   dataclasses	functoolsr<  r#  rv  r   r   os.pathr  r  r/   r  collections.abcr   r   typingr   r   r   r   r   unittest.mockr	   rq   functorch.compiler
   r   r   r   torch._dynamo.repro.after_aotr   torch._dynamo.utilsr   r.  r   torch._loggingr   torch._logging._internalr   torch._utils_internalr   torch.fx.graph_moduler   torch.fx.passes.shape_propr   r   torch.fx.passes.tools_commonr   torch.typesr   torch.utils._ordered_setr   torch.utils._pytreer   rO  r   r   r   	schedulerr   r    r!   r"   r#   virtualizedr$   r   rT   r;   r%   r|  rp  rn   objectr  r&   r{   r'   rB   r  r  r~  rp   r*   rC  cacher3   rM   rr   r=   r   r   r   r&  r   r   r   r   r   r   r   r   r   r   r  r  r  r  r  rA   r  r  r  r  	dataclassr  r$  r  r  r  r  r  r  r  r  r4  ExportedProgramr<  r1   r1   r1   r2   <module>   s*  
 



.e

#



 +
*  .
@

a

<"&-
K.

