o
    "i                     @  s  U d dl mZ d dlZd dlZd dlZd dlZd dlZd dlZd dlZd dl	Z	d dl
Z
d dlZd dlZd dlZd dlZd dlZd dlZd dlZd dlZd dlZd dlZd dlZd dlmZ d dlmZ d dlmZ d dlmZmZmZmZmZm Z m!Z!m"Z"m#Z#m$Z$m%Z%m&Z&m'Z'm(Z( d dl)m*Z*m+Z+ d dlm,Z, d dl-Z-d dl.Z.d dl/Z.d dl0m1  m2Z3 d d	l4m5Z5 d d
l6m7Z7 d dl8m9Z9 d dl:m;Z; d dl<m=Z= d dl>m?Z?m@Z@mAZAmBZB d dlCmDZDmEZE d dlFmGZGmHZH ddlImJZJ ddlKmLZLmMZN eOePZQe&dZRee-jSe-jSf ZTdZUdZVeVeVd @ d kreVdksJ ddd ZWdKddZXG dd  d e-jYZZdLdMd'd(Z[e\ddNd*d+Z]dOd/d0Z^d1d2 Z_d3d4 Z`dPd8d9ZadQd=d>ZMd?d@ ZbdRdDdEZcdSdHdIZddTdLdMZedNdO ZfdPdQ ZgdUdVdTdUZh	RdWdXdZd[Zi	RdYdVd_d`ZjdZdddeZkd[dhdiZld\djdkZmdldm Zndndo Zoe+dpZpe&dqdrdsZqG dtdu due#eepeqf Zrd]dxdyZsdzd{ Ztd|d} Zud~d Zv	d^d_ddZwdd Zxd`ddZydd ZzdaddZ{dd Z|dbddZ}dcddZ~ddddZdeddZdd Zdd ZdfddZg Zded< dgddZdd Zejd^ddZdhddZe\ddd ZG dd de!ZG dd dZG dd deZejdd ZG dd dZe\ddNddZdNddÄZdiddǄZdjddʄZdd̜dd΄ZddЄ Zdd҄ ZddԄ Zddք ZG dd؄ d؃Zddڄ Zdd܄ Zddބ Zdd Zejdd Zd^ddZdd Zdd Zdd Zdd Zdd ZdkddZejdd Zdd Ze\ddd Ze\ddd Zdd Zdd Zdd  ZdNddZdlddZdd ZG d	d
 d
ejZdd Zdd Zdd ZdmddZdnddZejG dd dZejdd ZdoddZdVddZdVd d!Zd"d# Zdpd%d&Zd'd( Zdqd+d,Zdrd.d/Zd0d1 ZÐdsd3d4ZĐdtd6d7ZŐdud:d;Zddddːd<dvdGdHZǐdIdJ ZdS (w      )annotationsN)datetime)StringIO)Path)AnyCallableDictGenericIterableList
NamedTupleOptionalProtocolSetTupleTypeVarUnion
ValuesView)Concatenate	ParamSpec)mock)get_interface_for_device)detect_fake_mode)
DeviceType)	EventList)	ShapeProp)CeilDivCleanDivFloorDivModularIndexing)make_symbolSymT)bound_sympyValueRanges   )config)	cache_dirceildiv_T   @      zmust be power of 2c                 C  s   | t  d t  @ S )z/Round up to the nearest multiple of ALIGN_BYTESr$   )ALIGN_BYTES)nbytes r.   S/home/ubuntu/SoloSpeech/.venv/lib/python3.10/site-packages/torch/_inductor/utils.py_alignI      r0   v
sympy.Exprc                 C  s<   t | tjtjfrttt| jS t | tpt	| t
t
kS )z:v can be statically proven to be a multiple of ALIGN_BYTES)
isinstancesympyAddMaxallmap_is_alignedargsaligngcdr,   )r2   r.   r.   r/   r:   N   s   r:   c                   @  s$   e Zd ZdZdZdZedd ZdS )r<   z<Symbolically round up to the nearest multiple of ALIGN_BYTESr$   Tc                 C  s,   t |ttjfrtt|S t|r|S d S N)r4   intr5   Integerr0   r:   )clsvaluer.   r.   r/   eval[   s
   z
align.evalN)__name__
__module____qualname____doc__nargs
is_integerclassmethodrD   r.   r.   r.   r/   r<   U   s    r<      d   fnCallable[[], Any]returnfloatc                   s  |   t j  t jtdt jdd}t jjdd}t jjdd}|  tdD ]	}|  |   q)|  t j  |	|d }t
dt|| }t
dt|| }	t|D ]}|   qYt jjt jjjgd}
t|	D ]	}|  |   qot j  W d	   n1 sw   Y  td
 t|
 jddd tdd |
 D }t||	 dkrtdt||	t||	  t fddt|D }|  | }td t|jdd tdd |D d |	 }td| |S )aR  
    Returns benchmark results by examining torch profiler events.
    This could be more accurate as it doesn't count CPU side overhead.
    However, this also requires manually excluding irrelevant event, e.g.
    vectorized_elementwise_kernel which is used to fill L2 cache,
    various CUDA events, etc, so could also be fragile.
    g    Acuda)dtypedeviceT)enable_timing   r$   )
activitiesNz
raw eventsself_cuda_time_total)sort_by	row_limitc                 S  s&   g | ]}|j tjkr|jd kr|qS )zContext Sync)device_typer   CUDAname.0eventr.   r.   r/   
<listcomp>   s
    z,do_bench_using_profiling.<locals>.<listcomp>r   zYFailed to divide all profiling events into #repeat groups. #CUDA events: %d, #repeats: %sc                   s    g | ]\}}|  d kr|qS r   r.   )r`   ira   num_event_per_groupr.   r/   rb      s
    zprofiling time breakdown)r[   c                 s  s    | ]}|j V  qd S r?   )device_time_totalr_   r.   r.   r/   	<genexpr>   s    z+do_bench_using_profiling.<locals>.<genexpr>g     @@zprofiling results: %s ms)torchrR   synchronizeemptyr@   Eventrecordrangezero_elapsed_timemaxprofilerprofileProfilerActivityr]   logdebugkey_averagestabler   eventslenRuntimeError	enumerate_build_treesum)rN   warmuprepcachestart_event	end_event_estimate_msn_warmupn_repeatprd   filtered_eventsactual_eventsresr.   re   r/   do_bench_using_profilingc   sh   	




r   boolc               
   C  s   zddl m}  tjdd | d uotttjdd dW S  ty&   Y dS  t	y@ } zdt
|v s5J W Y d }~dS d }~ww )	Nr   )	roi_alignztorchvision::nmsMetatorchvisionr   Fztorchvision::nms does not exist)torchvision.opsr   ri   _C%_dispatch_has_kernel_for_dispatch_keyhasattrgetattropsImportErrorr{   str)r   er.   r.   r/   has_torchvision_roi_align   s   
r   rT   "Union[Optional[torch.device], str]torch.devicec                 C  s`   | d u r
t djS t| trt | } | jdvr.| jd u r.t| j}t j| j|j	 dS | S )Ng        )cpumeta)index)
ri   tensorrT   r4   r   typer   r   Workercurrent_devicerT   device_interfacer.   r.   r/   decode_device   s   


r   c                 C  s   t tj| tdS Nr$   )	functoolsreduceoperatormulr5   rA   itr.   r.   r/   sympy_product      r   c                 C  s2   t | t |ks
J ttdd t| |D S )Nc                 s  s    | ]	\}}|| V  qd S r?   r.   )r`   abr.   r.   r/   rh      s    zsympy_dot.<locals>.<genexpr>)rz   r5   expandr~   zip)seq1seq2r.   r.   r/   	sympy_dot   s   r   r   Iterable[_T]ValuesView[_T]c                 C  s   dd | D   S )Nc                 S  s   i | ]}t ||qS r.   )idr`   xr.   r.   r/   
<dictcomp>   s    zunique.<locals>.<dictcomp>)valuesr   r.   r.   r/   unique      r   numerUnion[int, sympy.Expr]denomc              	   C  sr   t | tjst |tjrtt| t|S t | tr!t |ts4J |  dt|  d| dt| t| |S )Nz: , )r4   r5   Exprr   sympifyr@   r   runtime_ceildiv)r   r   r.   r.   r/   r'      s    
r'   c                 C  s   | d u rdS t | dd }i dddddd	d
ddddddd	ddddddddddddddddd d!d"d#d$d%d&}t| D ]}|||< qOt| t r]| S d'||  S )(Nz*i8.rY   r   i1
float8e4nvfp8e4nvfloat8e5fp8e5float8e4b15fp8e4b15float8e4b15x4
fp8e4b15x4float8_e4m3fnfloat8_e5m2float16fp16bfloat16bf16float32fp32float64fp64int8i8int16i16int32i32int64i64uint8u8uint16u16u32u64)uint32uint64*)r   splitlistr   r4   )key	dtype_strtysr2   r.   r.   r/   _type_of   sX   	

r   lst"Iterable[Union[int, torch.SymInt]]List[sympy.Expr]c                 C  s   dd | D S )z
    Gets the shape and stride of a tensor. For non-symbolic tensors, this is
    trivial. But for symbolic tensors, we need to map from SymIntNode into
    sympy.Expr.
    c                 S  s*   g | ]}t |tjr|jjnt|qS r.   )r4   ri   SymIntnodeexprr5   rA   r`   rd   r.   r.   r/   rb     s    z-convert_shape_to_inductor.<locals>.<listcomp>r.   r   r.   r.   r/   convert_shape_to_inductor  s   r    Iterable[Union[int, sympy.Expr]]List[Union[int, torch.SymInt]]c                   s   ddl m   fdd| D S )zz
    Takes a list of shapes from Inductor and converts them into symints (or just
    ints if all shapes are static).
    r$   Vc                   sB   g | ]}t |tr|nt |tjrt|n	 jjjj|d dqS )N)hint)r4   r@   r5   rA   graphsizevars	shape_envcreate_symintnoder   r   r.   r/   rb   $  s    

z+convert_shape_to_symint.<locals>.<listcomp>)virtualizedr   r   r.   r   r/   convert_shape_to_symint  s   
r  optorch._ops.OpOverloadc                 C  s(   t | tjjs	J tdd | jjD S )z-
    Does this op overload have aliasing
    c                 s  s    | ]}|j d uV  qd S r?   )
alias_infor`   r   r.   r.   r/   rh   3  s    zis_view.<locals>.<genexpr>)r4   ri   _ops
OpOverloadany_schema	argumentsr  r.   r.   r/   is_view.  s   r  c                 C  sh   | j dksdS t| jtjjs| jtju sdS | jtju s"t| jr,t	dd | j
D S tjj| jjv S )Ncall_functionFc                 s      | ]}t |V  qd S r?   )is_pointwise_use)r`   ur.   r.   r/   rh   @      z#is_pointwise_use.<locals>.<genexpr>)r  r4   targetri   r
  r  r   getitemr  r8   usersTag	pointwisetags)user.   r.   r/   r  6  s   
r  c           
      C  s   t j }g }g }t|D ] \}}t|t jr(||d|  || q|| qtdd |	 D s;J |
| t||}t| jjdkrZt| jjd jdkrZ|f}|| t ji |}	|	|fS )Nargc                 s  s    | ]
}t |tj V  qd S r?   r4   ri   Tensorr   r.   r.   r/   rh   O  s    z$gen_gm_and_inputs.<locals>.<genexpr>r$   r   r  )ri   fxGraphr|   r4   r  appendplaceholderr8   r   r  tuplerz   r  returnsr   r   outputGraphModule)
r  r;   kwargsgg_argsa_argsnr  r   gmr.   r.   r/   gen_gm_and_inputsE  s    

r.  rR   r   c                 C  s,   | dkrd S t | }| r|  d S d S Nr   )r   is_availablerj   r   r.   r.   r/   rj   \  s   rj   modelCallable[..., Any]timesr@   c                 C  sT   t | td t }t|D ]
}| | }t | qt }|d us&J || S )Ni9  )rj   ri   manual_seedtimeperf_counterrn   )r1  example_inputsr3  rT   t0r   resultt1r.   r.   r/   timedd  s   

r;  r.   
         ?c                   sD   t  fddt|D }t | }t|| d |S )Nc                   s   g | ]	}t  qS r.   )r;  )r`   r   r;   rT   rN   r3  r.   r/   rb   v      z%print_performance.<locals>.<listcomp>z.6f)ri   r   rn   medianprint)rN   r;   r3  repeatbaselinerT   timingstookr.   r>  r/   print_performances  s   "rF  objr   methodc                   s$   t | |  t| | fdd dS )zKReplace obj.method() with a new method that returns a precomputed constant.c                     s    S r?   r.   r.   r9  r.   r/   <lambda>  s    z#precompute_method.<locals>.<lambda>N)r   setattr)rG  rH  r.   rI  r/   precompute_method|  s   rL  methods	List[str]c                 C  s   |D ]}t | | qdS )zFReplace methods with new methods that returns a precomputed constants.N)rL  )rG  rM  rH  r.   r.   r/   precompute_methods  s   rO  c                 C  s   t | |kt | |k  S r?   )r@   )r   r   r.   r.   r/   cmp     rP  c                 C  s&   t | dkrt| | d g| S | S )Nr$   r   )rz   r   )r   sizer.   r.   r/   pad_listlike  s   rS  c                 C  s$   t | dkrg S dd }t| |dS )Nr   c                 S  s   t | tr| S |  S r?   )r4   r   get_name)elemr.   r.   r/   	sort_func  s   
ztuple_sorted.<locals>.sort_funcr   )rz   sorted)r   rV  r.   r.   r/   tuple_sorted  s   rY  PRVT)	covariantc                   @  s$   e Zd ZedddZdd
dZdS )CachedMethodrP   Nonec                 C     d S r?   r.   selfr.   r.   r/   clear_cache     zCachedMethod.clear_cacher;   P.argsr(  P.kwargsr[  c                 O  r_  r?   r.   ra  r;   r(  r.   r.   r/   __call__     zCachedMethod.__call__N)rP   r^  )r;   rd  r(  re  rP   r[  )rE   rF   rG   staticmethodrb  rg  r.   r.   r.   r/   r]    s    r]  !Callable[Concatenate[Any, P], RV]CachedMethod[P, RV]c                   s<   d j  dt  fdd}fdd}||_|S )N___cachec                   s$   t | st|  |  t| S r?   )r   rK  r   r`  rN   r   r.   r/   wrapper  s   

zcache_on_self.<locals>.wrapperc                   s   t |  rt|   d S d S r?   )r   delattrr`  rW  r.   r/   rb    s   
z"cache_on_self.<locals>.clear_cache)rE   r   wrapsrb  )rN   ro  rb  r.   rn  r/   cache_on_self  s   rr  c                 C  sJ   ddl m} t| trttjdd | D t S t| |j	r"| j
S t S )Nr$   irc                 S  s$   g | ]}t |d r|jr|jjqS )r   )r   r   origins)r`   r   r.   r.   r/   rb     s    z%aggregate_origins.<locals>.<listcomp>) rt  r4   r   r   r   r   or_setExternKernelru  )node_schedulert  r.   r.   r/   aggregate_origins  s   
	r{  c                 C  s   t | }|dkrdd |D }tt|}nH|dkrPg }|D ]*}|jdkrHd|jv rH|jd d }t|d tr@||d  q||d j qtt|}n|d	kr\d
d |D }nt	|}d
dg| S )Noriginal_atenc                 S  s<   g | ]}|j d krd|jv r|jd dur|jd jjqS )r  r|  N)r  r   _overloadpacketrE   r`   originr.   r.   r/   rb     s    

z)get_fused_kernel_name.<locals>.<listcomp>ri   r  source_fn_stackrY   r$   inductor_nodec                 S  s   g | ]
}|j d kr|jqS r  )r  r^   r~  r.   r.   r/   rb     s    r   fused)r{  rX  rx  r  r   r4   r   r"  rE   NotImplementedErrorjoin)rz  descriptive_namesall_originssourcesr  	source_fnr.   r.   r/   get_fused_kernel_name  s.   r  c                 C  s  t | }dd |D }tt}tt}|D ]4}d|jv r5|jd d ur5t|jd j}|| |j d|jv rK|jd d d }|| |j q|j	 dd
t|  dd
t|  d	}g }	t| D ]\}
}|	|j	 d
|
 dd
t|  qn|d
|	fS )Nc                 S  s   g | ]	}|j d kr|qS r  r  r~  r.   r.   r/   rb     r?  z'get_kernel_metadata.<locals>.<listcomp>r|  	from_noder   z Source Nodes: [r   z], Original ATen: [] z => 
)r{  collectionsdefaultdictr   r   r   r}  r"  r^   commentr  rX  keysitems)rz  ro  r  inductor_nodesfrom_node_dictoriginal_aten_dictr   r   metadatadetailed_metadataoriginal_nodenodesr.   r.   r/   get_kernel_metadata  s,   


r  initial_queueIterable[torch.fx.Node]Set[torch.fx.Node]c                 C  sZ   t | } t| }| r+|  }|jD ]}|r||rq||vr(|| | | q| s
|S )zJReturns the set of nodes whose values depend on those within initial_queue)r   rx  popr  addr"  )r  skip_filterdominated_setr   userr.   r.   r/   dominated_nodes  s   


	r  c                   sb   dd l }ddlm   fddfdd| D }fdd| D }t|jg ||R  S )	Nr   r$   rs  c                   sD   t |  jr| jS t |  jr| jS t |  jo!t |  jS r?   )r4   	TensorBoxdata
StorageBoxIRNode	Pointwise)r,  rt  is_unrealized_noder.   r/   r  (  s
   

z*gather_origins.<locals>.is_unrealized_nodec                      g | ]	} |r|j qS r.   ru  )r`   valr  r.   r/   rb   /  r?  z"gather_origins.<locals>.<listcomp>c                   r  r.   r  )r`   r  r  r.   r/   rb   0  r?  )	itertoolsrv  rt  r   rx  chain)r;   r(  r  kwarg_originsarg_originsr.   r  r/   gather_origins#  s   r  r   c                 C  s   t | tjr	| jS t | tjrdtt| jS t | tj	r'dtt| jS t | t
ttfr@| jj ddtt| j dS t| S )z
    Normal sympy str is very slow, this is a lot faster.  The result are
    somewhat worse, as it doesn't do as much simplification.  So don't
    use this for final codegen.
    z + z * (r   ))r4   r5   Symbolr^   r6   r  r9   	sympy_strr;   Mulr   r   r   funcrE   r   )r   r.   r.   r/   r  4  s   "r  c                 C  s>   ddl m} tjrt|jdd  }r|jdkrt| S t	 S )Nr$   r   current_node
index_expr)
r  r   r%   compute_all_boundsr   interpreterr  r"   r#   unknown)r   r   fx_noder.   r.   r/   get_bounds_index_exprF  s   
r  prefixr!   idxsympy.Symbolc                 C  s   | t jksJ t| |dddS )9
    Used to generate an integer-nonnegative symbol.
    Tintegernonnegative)r!   SIZEr    )r  r  r.   r.   r/   sympy_index_symbol_with_prefixT  s   r  c                 C  s   | st jot jS r?   )r%   debug_index_assertsassert_indirect_indexing)checkr.   r.   r/   generate_assert`     r  r^   c                 C  s    | d dksJ t j| dddS )r  r   sTr  )r5   r  r^   r.   r.   r/   sympy_index_symbold  s   r  replacementsDict[sympy.Expr, Any]c                   s*   dd  t |  fdd| D S )z
    When the passed replacement symbol v is a string, it is converted to a symbol with name v that
    have the same replaced expression integer and nonnegative properties.
    c                 S  s2   t | tjsJ t |trtj|| j| jdS |S )Nr  )r4   r5   r   r   r  rJ   is_nonnegative)replacedreplacementr.   r.   r/   	to_symbolv  s   
zsympy_subs.<locals>.to_symbolc                   s   i | ]
\}}| ||qS r.   r.   )r`   kr2   r  r.   r/   r     s    zsympy_subs.<locals>.<dictcomp>)r5   r   xreplacer  )r   r  r.   r  r/   
sympy_subsp  s   
r  r   c                 C  s:   t | tjpt | tjotdd t|  |  D S )Nc                 s  r  r?   is_symbolicr   r.   r.   r/   rh     r  zis_symbolic.<locals>.<genexpr>)	r4   ri   r   r  r  r  r  rR  stride)r   r.   r.   r/   r    s    r  r;   c                  G     t dd | D S )Nc                 s  r  r?   r  r	  r.   r.   r/   rh     r  z"any_is_symbolic.<locals>.<genexpr>r  )r;   r.   r.   r/   any_is_symbolic  r   r  c                 C  sv   ddl m} h d}t r|h d | jjD ]}t|j|v r&|  S |j	
d }d ur8||r8|  S qd S )Nr   )free_unbacked_symbols>	   aten._assert_scalaraten._local_scalar_denseaten.multinomial.defaultfbgemm.dense_to_jagged.default%fbgemm.jagged_to_padded_dense.default,aten._fused_moving_avg_obs_fq_helper.default7aten._fused_moving_avg_obs_fq_helper_functional.defaultrun_with_rng_staterun_and_save_rng_state>   aten.scatter.srcaten.scatter_add_aten.scatter.reduceaten.index_put.defaultaten.index_put_.defaultaten.scatter_reduce.twoaten.scatter_add.defaultaten.scatter_reduce_.twoaten.scatter.value_reduceaten.scatter_reduce.two_outaten._unsafe_index_put.defaultr  )%torch.fx.experimental.symbolic_shapesr  ri   $are_deterministic_algorithms_enabledupdater   r  r   r  r   get)r-  r  forbidden_setr   r  r.   r.   r/   %get_first_incompatible_cudagraph_node  s   r  c                 C  s   t | d uS r?   )r  )r-  r.   r.   r/   has_incompatible_cudagraph_ops     r  r-  torch.fx.GraphModulec                 C  s&   t tt| jj}|jdksJ |S )z$Get the output node from an FX graphr&  )nextiterreversedr   r  r  )r-  	last_noder.   r.   r/   output_node  s   r  z	List[Any]_registered_cachesc                 C  s0   t | dr
t| jst|  dt|  | S )zq
    Use this decorator to register any caches that should be cache_clear'd
    with fresh_inductor_cache().
    cache_clearz# does not have a cache_clear method)r   callabler  AttributeErrorr  r"  rG  r.   r.   r/   clear_on_fresh_inductor_cache  s   
r  c                  C  s   t D ]} |   qdS )z&
    Clear all registered caches.
    N)r  r  r
  r.   r.   r/   clear_inductor_caches  s   
r  c              	   #  s   t   t }zzktjtjd|iR tj	|d tjtjd i1 dV  t
| trPt| dks9J dtj rPt }|  fdd|D  W d   n1 sZw   Y  W d   n1 siw   Y  t| W n ty   td	|  w W t   dS t   w )
z
    Contextmanager that provides a clean tmp cachedir for inductor.

    Optionally, pass a dict as 'cache_entries' to get a list of filenames and sizes
    generated with this cache instance.
    TORCHINDUCTOR_CACHE_DIRtritonTRITON_CACHE_DIRNr   z!expected empty cache_entries dictc              	     s,   i | ]}d |vr|t jt j |qS ).lock)ospathgetsizer  )r`   ftriton_cache_dirr.   r/   r     s
    z(fresh_inductor_cache.<locals>.<dictcomp>z(on error, temporary cache dir kept at %s)r  tempfilemkdtempr   patchdictr  environr  r  r4   rz   existslistdirr  shutilrmtree	Exceptionru   warning)cache_entriesinductor_cache_dirfilesr.   r  r/   fresh_inductor_cache  s<   



r%  	List[int]c                 C  s(   | j }tt| }ttt||ddS )NT)r   reverse)__getitem__rn   rz   r   r  rX  )seqgettera_rr.   r.   r/   argsort  s   r,  c                 C  s   t jd| d S )Nr.   rS   )ri   rk   element_sizer-  r.   r.   r/   get_dtype_size
  r1   r/  c                   @  s   e Zd ZU ded< dS )LineContextr   contextN)rE   rF   rG   __annotations__r.   r.   r.   r/   r0    s   
 r0  c                   @  s   e Zd ZdZd-ddZd.ddZd/d
dZd/ddZdd Zdd Z	dd Z
dd Zdd Zdd Zd0ddZd0ddZd0dd Zd1d"d#Zd2d&d'Zd(d) Zd*d+ Zd,S )3IndentedBuffer   r   c                 C  s   g | _ || _d S r?   )_lines_indent)ra  initial_indentr.   r.   r/   __init__  s   
zIndentedBuffer.__init__rP   )tuple[str, list[tuple[int, LineContext]]]c                 C  s   t  }d}g }| jD ]8}t|tr| }|d u rq
nt|tr(|||jf q
t|ts/J || |d |d|	d 7 }q
|
 |fS )Nr$   r  )r   r5  r4   DeferredLineBaser0  r"  r1  r   writecountgetvalue)ra  bufr   linemapliner.   r.   r/   getvaluewithlinemap  s"   




z"IndentedBuffer.getvaluewithlinemapr   c                 C  s   |   \}}|S r?   )rA  )ra  r2   r   r.   r.   r/   r=  ,  s   zIndentedBuffer.getvaluec                 C  s   t  }| jD ]6}t|tr| }|d u rqnt|trqt|ts#J |dr2||d d  q|| |d q| S )N\rY   r  )	r   r5  r4   r:  r0  r   endswithr;  r=  )ra  r>  r@  r.   r.   r/   getrawvalue0  s   




zIndentedBuffer.getrawvaluec                 C  s   | j   d S r?   )r5  clearr`  r.   r.   r/   rE  B     zIndentedBuffer.clearc                 C  
   t | jS r?   )r   r5  r`  r.   r.   r/   __bool__E     
zIndentedBuffer.__bool__c                 C  s   d| j | j  S )Nr  )r6  tabwidthr`  r.   r.   r/   r  H  r  zIndentedBuffer.prefixc                 C  s   |  d d S )Nr  	writeliner`  r.   r.   r/   newlineK  rF  zIndentedBuffer.newlinec                 C  sr   t |tr| j| d S t |tr| j||   d S | r1| j|   |  d S | jd d S Nrv  )r4   r0  r5  r"  r:  with_prefixr  stripra  r@  r.   r.   r/   rL  N  s   

zIndentedBuffer.writelinec                 C  s   |D ]}|  | qd S r?   rK  )ra  linesr@  r.   r.   r/   
writelinesX  s   zIndentedBuffer.writelinesr$   c                   s   t j fdd}| S )Nc                	   3  s<     j  7  _ zd V  W  j  8  _ d S  j  8  _ w r?   r6  r.   offsetra  r.   r/   ctx]  s
   "z"IndentedBuffer.indent.<locals>.ctx)
contextlibcontextmanager)ra  rV  rW  r.   rU  r/   indent\  s   zIndentedBuffer.indentc                 C  s   |  j |7  _ d S r?   rT  ra  rV  r.   r.   r/   	do_indentg  r   zIndentedBuffer.do_indentc                 C  s   |  j |8  _ d S r?   rT  r[  r.   r.   r/   do_unindentj  r   zIndentedBuffer.do_unindentFc                 C  s   t |trJtd}|jD ]}t |ts"|r"t|t|t|  }qt	|r*d}|jD ]}t |tr;| j
| q-t| |t|d   q-d S t|}|rU| }|sYd S | }|dD ]}| | qbd S )Ninfr   r  )r4   r3  rQ   r5  r0  minrz   lstripmathisinfr"  rL  r@   textwrapdedentrstripr   )ra  
other_coderP  rd  r@  r.   r.   r/   splicem  s,   





zIndentedBuffer.splicer  Callable[[Any], Any]c                   s&   t | jd} fdd| jD |_|S )Nr7  c                   s   g | ]} |qS r.   r.   )r`   r@  r  r.   r/   rb     s    z&IndentedBuffer.map.<locals>.<listcomp>)r3  r6  r5  )ra  r  r   r.   rj  r/   r9     s   zIndentedBuffer.mapc                 C  s   t |  d|   dS )Nr  r  )r   r=  r`  r.   r.   r/   __repr__  rQ  zIndentedBuffer.__repr__c                 C  s8   | j |j ksJ t| j d}|| j ||j |S )Nri  )r6  r3  rS  r5  )ra  otherr   r.   r.   r/   __add__  s
   zIndentedBuffer.__add__Nrc   )rP   r9  )rP   r   r>   )F)r  rh  rP   r3  )rE   rF   rG   rJ  r8  rA  r=  rD  rE  rH  r  rM  rL  rS  rZ  r\  r]  rg  r9   rk  rm  r.   r.   r.   r/   r3    s&    









r3  c                      s$   e Zd Z fddZdd Z  ZS )FakeIndentedBufferc                   s   t    d S r?   )superr8  r`  	__class__r.   r/   r8    rF  zFakeIndentedBuffer.__init__c                 C  s$   |dkr
t | |S td| d)Nrq  zTried to call self.z on FakeIndentedBuffer. This bufferis currently used on TritonTemplateKernel to prevent actualwrites to the body without explicitly specifying the body with`TritonTemplateKernel.set_subgraph_body(name)`)object__getattribute__r{   )ra  r^   r.   r.   r/   rs    s
   
z#FakeIndentedBuffer.__getattribute__)rE   rF   rG   r8  rs  __classcell__r.   r.   rp  r/   rn    s    rn  c                 c  s*    zd V  W | t _|t _d S | t _|t _w r?   )sysstdoutstderr)initial_stdoutinitial_stderrr.   r.   r/   restore_stdout_stderr  s   
rz  c                   @  sT   e Zd ZdZdd ZdddZdd
dZdd Zdd Zdd Z	dd Z
dd ZdS )r:  z.A line that can be 'unwritten' at a later timec                 C  s   |  sd}|| _d S rN  )rP  r@  rQ  r.   r.   r/   r8    s   
zDeferredLineBase.__init__rP   Optional[str]c                 C     t )zJReturns either self.line or None to indicate the line has been 'unwritten'r  r`  r.   r.   r/   rg    rc  zDeferredLineBase.__call__r@  r   c                 C  r|  )z3Returns a new deferred line with the same conditionr}  rQ  r.   r.   r/   	_new_line  rc  zDeferredLineBase._new_linec                 C  s   |  | | j S r?   r~  r@  )ra  r  r.   r.   r/   rO    s   zDeferredLineBase.with_prefixc                 C  s   |  | j S r?   )r~  r@  r`  r`  r.   r.   r/   r`    r  zDeferredLineBase.lstripc                 C  s   |  | j| S r?   r  )ra  r   r.   r.   r/   r(    r  zDeferredLineBase.__getitem__c                 C  rG  r?   )r   r@  r`  r.   r.   r/   rH    rI  zDeferredLineBase.__bool__c                 C  rG  r?   )rz   r@  r`  r.   r.   r/   __len__  rI  zDeferredLineBase.__len__N)rP   r{  )r@  r   rP   r:  )rE   rF   rG   rH   r8  rg  r~  rO  r`  r(  rH  r  r.   r.   r.   r/   r:    s    

r:  c                 C  s6   d}t j| j}||k rtjd||dd dS dS )ND   z,Not enough SMs to use max_autotune_gemm mode)min_sms	avail_sms)extraFT)ri   rR   get_device_propertiesmulti_processor_countru   r!  )r   r  r  r.   r.   r/   
is_big_gpu  s   r  c                   C  s   t jpt jpt jS r?   )r%   max_autotunemax_autotune_gemmsearch_autotune_cacher.   r.   r.   r/   use_max_autotune  s   r  allowed_layout_dtypesList[torch.dtype]c                 C  s,   t  o| jjdko| j|v ot| jjpdS )NrR   r   )r  rT   r   rS   r  r   )layoutr  r.   r.   r/   _use_template_for_cuda  s   
r  backendc                 C  s"   |   dd tj  dD v S )Nc                 S  s   g | ]}|  qS r.   )rP  r   r.   r.   r/   rb     s    z)_use_autotune_backend.<locals>.<listcomp>,)upperr%   max_autotune_gemm_backendsr   )r  r.   r.   r/   _use_autotune_backend  s   r  F)enable_int32c                C  s:   t jt jt jg}|rt jt jt jt jg}t| |otdS )NTRITON)ri   r   r   r   r   r  r  )r  r  layout_dtypesr.   r.   r/   use_triton_template  s   r  c           	      C  s   ddl m} |jjj|| | dd}|dks|tjjk rdS ddlm	} t
jjr+dS t
jt
jt
jt
jg}t| |o=td}|rJ| sJtd	 dS |S )
Nr$   r   rY   )fallbackr   F)try_import_cutlassCUTLASSzFailed to import CUTLASS lib. Please check whether _inductor.config.cuda.cutlass_dir is set correctly. Skipping CUTLASS backend for now.)r  r   r   r  	size_hintr%   rR   cutlass_backend_min_gemm_sizecodegen.cuda.cutlass_utilsr  ri   versionhipr   r   r   r   r  r  ru   r!  )	r  mr,  r  r   	gemm_sizer  r  r   r.   r.   r/   use_cutlass_template  s$   r  c                 C  s   t  o| jjdkS r/  )r  rT   r   )r  r.   r.   r/   _use_template_for_cpu  r   r  c                 C  s   ddl m} ddlm} ddlm} t| rtdsdS tj	j
s"dS tjg}|||\}}}	} }}t||	fr9dS t||jrC| }|d|||	| jt d}
| j|v or|
d uor||
jd  d	kor| d
 dkort||jor| S )Nr$   rs  )create_micro_gemm)mm_argsCPPF
micro_gemm)num_threadsr   rY   )rv  rt  codegen.cpp_micro_gemmr  kernel.mm_commonr  r  r  r%   cppweight_prepackri   r   has_free_symbolsr4   BaseViewunwrap_viewrS   parallel_num_threadsregister_blocking
get_strider  is_module_buffer)r  mat1mat2rt  r  r  r  r  r,  r  r  r.   r.   r/   use_cpp_packed_gemm_template  s6   

r  c                   C  s   t   ptdS )NATEN)r  r  r.   r.   r.   r/   use_aten_gemm_kernels7  r  r  c                   @  s8   e Zd ZU edZded< dd Zdd Zdd	 Z	d
S )DebugDirManagerr   r   prev_debug_namec                 C  s   t tj| _d S r?   )r  r  counterr   r`  r.   r.   r/   r8  ?  r  zDebugDirManager.__init__c                 C  s0   t jjj| _| j d| j | _| jt jj_d S )N_tmp_)ri   _dynamor%   debug_dir_rootr  r   new_namer`  r.   r.   r/   	__enter__B  s   zDebugDirManager.__enter__c                 G  s   t | j | jtjj_d S r?   )r  r  r  r  ri   r  r%   r  )ra  r;   r.   r.   r/   __exit__G  s   zDebugDirManager.__exit__N)
rE   rF   rG   r  r<  r  r2  r8  r  r  r.   r.   r.   r/   r  ;  s   
 
r  c              	     s   ddl m} |j g  fdd}tddi9 tj|d| tj	  | |i |}W d    n1 s9w   Y  W d    |fS W d    |fS 1 sUw   Y  |fS )Nr$   GraphLoweringc                   sF    | }t |j}|  W d    |S 1 sw   Y  |S r?   )open__file__r"  read)ra  modr  compile_to_modulesource_codesr.   r/   patched_compile_to_moduleR  s   
z3run_and_get_code.<locals>.patched_compile_to_modulefx_graph_cacheFr  )
r   r  r  r%   r  r   rr  ri   r  reset)rN   r;   r(  r  r  r9  r.   r  r/   run_and_get_codeL  s$   


r  c              	     s   ddl m} g  d fdd}tddi5 tj|d	| tj  | |i |}W d
   n1 s6w   Y  W d
    S W d
    S 1 sNw   Y   S )zLGet the inductor-generated code, but skip any actual compilation or running.r$   r  ra  r  c                   s8   G dd d}| j r|  n|  \}} | | S )Nc                   @  s    e Zd ZdZdd Zdd ZdS )z@get_code.<locals>.patched_compile_to_module.<locals>.DummyModulez4This is empty to replace the generated triton modulec                 S  r_  r?   r.   r`  r.   r.   r/   r8  l  rh  zIget_code.<locals>.patched_compile_to_module.<locals>.DummyModule.__init__c                 _  r_  r?   r.   rf  r.   r.   r/   callo  rc  zEget_code.<locals>.patched_compile_to_module.<locals>.DummyModule.callN)rE   rF   rG   rH   r8  r  r.   r.   r.   r/   DummyModulei  s    r  )cpp_wrappercodegen_with_cpp_wrappercodegenr"  )ra  r  coder   r  r.   r/   r  h  s
   
z+get_code.<locals>.patched_compile_to_moduler  Fr  N)ra  r  )	r   r  r%   r  r   rr  ri   r  r  )rN   r;   r(  r  r  r   r.   r  r/   get_codeb  s"   


r  c                 O  sJ   t | g|R i |}dt|  krdks!n J dt| |d S Nr$      z%expected one or two code outputs got r   )r  rz   )rN   r;   r(  r  r.   r.   r/   get_triton_code  s
   r  c                 O  sN   t | g|R i |\}}dt|  krdks#n J dt| |d S r  )r  rz   )rN   r;   r(  r   r  r.   r.   r/   run_and_get_triton_code  s
   r  c              	   c  sN    ddl m} |j|  }zt|||j| < dV  W ||j| < dS ||j| < w )z
    Override the lowering of aten_op with override_fn.
    The first argument of override_fn is the original lowering fn.
    r   )loweringN)torch._inductorr  	loweringsr   partial)aten_opoverride_fnr  orig_fnr.   r.   r/   override_lowering  s   
r  c                   s4   ddl m} |j  fdd}tjj|d|S )zr
    Add hook functions to be called at the beginning and end of Scheduler.__init__.
    Used for unit tests.
    r   )	Schedulerc                   s&   | |  | |}r| | |S r?   r.   )	schedulerr  outr  post_fnpre_fnr.   r/   ro    s
   


z(add_scheduler_init_hook.<locals>.wrapperr8  )torch._inductor.schedulerr  r8  unittestr   r  rr  )r  r  r  ro  r.   r  r/   add_scheduler_init_hook  s   r  c                 C  s"   t jr
t|  dS t|  dS )z
    Warnings that will be actionable for PyTorch developers, but not
    end users.  Allows us to easily disable them in stable releases but
    keep them on for nightly builds.
    N)r%   developer_warningsru   r!  info)msgr.   r.   r/   developer_warning  s   r  c                  C  s   z/t jd} | d tt jk r.tt j| d  dkr.t j| d  d dkr.t j| d  W S W n	 ty8   Y nw t jD ]}|drM|tdd   S q<dS )a  
    An experimental API used only when config.benchmark_kernel is true.

    The benchmark name is only available at codegen time. So we can not
    directly call it in benchmark_all_kernels which is run after codegen.

    The function assumes the argument after --only is the benchmark name.
    It works for torchbench.py/hugginface.py/timm_models.py. But for ad-hoc
    scripts, this function may return None.

    There are 2 flavors of --only argument we need handle:
    1. --only model_name
    2. --only=model_name
    z--onlyr$   r   -z--only=N)ru  argvr   rz   
ValueError
startswith)r  r  r.   r.   r/   get_benchmark_name  s   

r  c                 C  r  )Nc                 s      | ]}|d kV  qdS )r$   Nr.   r   r.   r.   r/   rh     r  zis_ones.<locals>.<genexpr>r8   r  r.   r.   r/   is_ones  r   r  c                 C  r  )Nc                 s  r  )r   Nr.   r   r.   r.   r/   rh     r  zis_zeros.<locals>.<genexpr>r  r  r.   r.   r/   is_zeros  r   r  c                 C  r  )Nc                 s  s,    | ]}t |tjr|jtd kV  qdS )r   N)r4   ri   r  rT   )r`   itemr.   r.   r/   rh     s    

z is_cpu_device.<locals>.<genexpr>r  )inputsr.   r.   r/   is_cpu_device  s   r  r  torch.dtypec                 C  s&   t | tjs
J d| jrtjS tjS )Nz8only support sympy.Expr as input to get_sympy_Expr_dtype)r4   r5   r   rJ   ri   r   r   )r  r.   r.   r/   get_sympy_Expr_dtype  s   r  c                 o  sN    | r"t jj|i |}|V  W d    d S 1 sw   Y  d S d V  d S r?   )ri   rr   rs   )should_profiler;   r(  r   r.   r.   r/   maybe_profile  s   "
r  c                  C  s   t jj} | dk rt } | S r   )r%   r  threadsri   get_num_threads)r  r.   r.   r/   r  
  s   r  c                 C  s   ddl m}m} | tjtjtjfv sJ t|j	
drEddlm} | }| tjtjfv r3|| |S tjjjjr?|tj|S |tj|S | tjtjfv rQ|| S tjjjjr\|tjS |tjS )Nr   )get_max_simd_tflopsget_max_tensorcore_tflops
clock_rate)max_clock_rate)triton.testingr  r  ri   r   r   r   inspect	signature
parametersr  torch._utils_internalr  backendsrR   matmul
allow_tf32)rS   r  r  r  sm_clockr.   r.   r/   get_device_tflops  s   


r  c                  C  s   ddl m}  |  S )Nr   get_dram_gbps)r  r  r  r.   r.   r/   get_gpu_dram_gbps-  s   r  c                  C  s"   ddl m}  | jjdddS )Nr   drivermax_shared_mem)triton.runtimer  activeutilsr  r  r  r.   r.   r/   get_gpu_shared_memory4  s   r  c                 C  s
   |  dS )Nwelford)r  reduction_typer.   r.   r/   is_welford_reduction:  rI  r  c                 C  s   t | rdS dS )N   r$   )r  r  r.   r.   r/   reduction_num_outputs>  r  r!  c                   C  s   t  dkS )NLinux)platformsystemr.   r.   r.   r/   is_linuxB  r  r%  itrIterable[Any]c                 C  r  )Nc                 s  s$    | ]}t |tjo|j V  qd S r?   )r4   r5   r   	is_numberr   r.   r.   r/   rh   G  s   " z#has_free_symbols.<locals>.<genexpr>r  )r&  r.   r.   r/   r  F  r   r  c                  G  s   ddl m} | D ]V}t||jr(t|j s$t|jdr't|j r' dS qt||j	|j
|jfrOt|dr=t|ds?J t| sKt| rN dS qt||jsVqtdt| dS )Nr$   rs  r  Tget_sizezunexpected type for is_dynamic F)rv  rt  r4   r  r  r  r)  r   r  r  r  ComputedBufferr  	TypeErrorr   )r;   rt  tr.   r.   r/   
is_dynamicJ  s&   
r-  c                   @  s   e Zd ZdZdZdS )PlaceholderKERNEL_NAMEDESCRIPTIVE_NAMEN)rE   rF   rG   r/  r0  r.   r.   r.   r/   r.  `  s    r.  c                 C  s
  ddl m} tjddddm}t }t }t|t|dj|  t	d|j
 |d	 t	|j
|d	 t }| |j
 t | }	||j
 |j
  |  t	d
|j
 |d	 t	|j
|d	 | | k}
td||j|
|	 W d    d S 1 s~w   Y  d S )Nr$   )stable_topological_sortwzutf-8F)modeencodingdelete)r-  	fake_modezBefore:
)filezAfter:
zZ%s, save before/after graph to %s, graph before/after are the same = %s, time elapsed = %s)pattern_matcherr1  r  NamedTemporaryFileior   r   r   	propagaterA  r   r   nowlint	recompiler=  ru   r  r^   )r  r-  inpr  r1  r  	before_ioafter_io
start_timetime_elapsedr,  r.   r.   r/   pass_execution_and_savej  s:   


"rD  c                 C     ddl m} t| |jkS Nr$   rs  )rv  rt  r   _CollectiveKernelr   rt  r.   r.   r/   is_collective     rI  c                 C  rE  rF  )rv  rt  r   _WaitKernelrH  r.   r.   r/   is_wait  rJ  rL  dynamo_gm_num_inputsaot_fw_gm_num_inputsc                 C  s   t jjjrdnd}||  | S )zaComputes the number of inputs to the aot fw graph which have fixed addresses (params and buffers)r  r   )ri   
_functorchr%   functionalize_rng_ops)rM  rN  num_rng_seed_offset_inputsr.   r.   r/   num_fw_fixed_arguments  s   rR  fx_gc                 C  sb   dd }d}g }| j jD ]}|jdkr ||r|| |d7 }q|ttt|ks-J t|S )z>
    Infers which inputs are static for a backwards graph
    c                 S  s   d| j vod| j vod| j vS )Ntangentsbwd_seedbwd_base_offsetr  )r   r.   r.   r/   is_saved_tensor  s
   
z'count_tangents.<locals>.is_saved_tensorr   r#  r$   )r   r  r  r"  r   rn   rz   )rS  rW  	arg_countstatic_arg_idxsr,  r.   r.   r/   count_tangents  s   

rZ  c                   @  s*   e Zd ZU ded< dd Zedd ZdS )	BoxedBoolr   rC   c                 C  s   | j S r?   )rC   r`  r.   r.   r/   rH    s   zBoxedBool.__bool__c                 C  s   t | tr
d| _| S dS NF)r4   r[  rC   r
  r.   r.   r/   disable  s   
zBoxedBool.disableN)rE   rF   rG   r2  rH  ri  r]  r.   r.   r.   r/   r[    s
   
 r[  c                 #  s`    ddl m} |j fdd}tjj|d| d V  W d    d S 1 s)w   Y  d S )Nr$   )WrapperCodeGenc                   s&     | | |||g|R i |S r?   )r"  )ro  r^   kernel_coder  r;   r(  kernel_listorig_define_kernelr.   r/   new_define_kernel  s   
z2collect_defined_kernels.<locals>.new_define_kerneldefine_kernel)codegen.wrapperr^  rd  r  r   r  rr  )ra  r^  rc  r.   r`  r/   collect_defined_kernels  s   "rf  c                 C  s   | d S )N__original__r.   r  r.   r.   r/    get_cloned_parameter_buffer_name     rh  c                 C  s   | dv S )N)rR   xpur.   rT   r.   r.   r/   is_gpu  ri  rl  c                 C  s   t | tsJ t| S r?   )r4   r   rl  rk  r.   r.   r/   device_need_guard  s   rm  c                 C  s   | t jt jt jhv S r?   )ri   r   r   r   r-  r.   r.   r/   ,needs_fallback_due_to_atomic_add_limitations  s   rn  op_overloadc                 C  s   | j tjjjkr
dnd}|d |hvpJ|ot|ot|pJ| j tjjjko:|dko:|o:|dko:tj	j
o:tj	jp:t dkpJ||koF|tjtjhv pJt S )Nr  r~   r   r$   )overloadpacketri   r   atenscatter_rl  rn  scatter_reduce_r%   r  fallback_scatter_reduce_sumdynamic_threadsr  r   r   r  )ro  r  
self_dtype	src_dtypesrc_device_typesrc_is_tensor	reduce_tyr.   r.   r/   use_scatter_fallback  s.   	r{  c                 C  s  ddl m}m} ddlm} tdt|  d t| D ]m\}}td|dd ||u r2td	 q||u r;td
 qt||r|	 }t|rIdnd d |rb|j
dusXJ td|j
jj  td |jjD ]}t| qjtd |jjD ]}t| qyqtdt| dS )z
    An API that can be used in pdb to dump a node_schedule.
    Right mainly dump the read/write dependencies but can add more as needed.
    r   )DisableReductionEnableReduction)SchedulerNodezNode schedule with z nodesr  3:zenable reductionzdisable reductionredpwz scheduler nodeNzoriginal reduction hint zReadDep:z	WriteDep:zUnrecognized node type: )torch._inductor.codegen.simdr|  r}  r  r~  rA  rz   r|   r4   is_reductionr   r  reduction_hintread_writesreadswritesr{   r   )rz  r|  r}  r~  r  r   is_reddepr.   r.   r/   dump_node_schedule  s0   




r  r   torch.Tensorc                 C  s   |   t| j t dkS )Nr   )storage_offsetr/  rS   GPU_ALIGN_BYTES)r   r.   r.   r/   tensor_is_aligned&  s
   r  example_inputc                 C  s   t | jjsdS tjpt| S r\  )rl  rT   r   r%   assume_aligned_inputsr  )r  r.   r.   r/   should_assume_input_aligned2  s   r  c                  C  s4   t jj } | st S | jj}|st S | S r?   )	ri   _guardsTracingContexttry_getrX  nullcontextr6  r  suppress_guards)tracing_contextr  r.   r.   r/   #maybe_get_suppress_shape_guards_ctx;  s   r  	namespacec                 C  s   t t d |  | S )N
aoti_eager)r   r&   )r  rT   r.   r.   r/   aoti_eager_cache_dirL  r   r  op_func_name_with_overloadc                 C  sB   ddl m} ddlm}m} |  d}| }|tj|||dS )Nr   )FileLock)get_lock_dirLOCK_TIMEOUTr  )timeout)filelockr  torch._inductor.codecacher  r  r  r  r  )r  r  r  r  op_conf_lock_filelock_dirr.   r.   r/   aoti_eager_op_conf_lockP  s
   
r  nsr\   c           
   	   C  s2  t | |}|| d }| sg S t|y t|d}t|}|D ]K}||d  }| |d< | sGg   W  d    W  d    S |d D ]"}	|	d rUJ d|	d dkr_d|	d	< tt|	d
 	dd |	d
< qKq#|W  d    W  d    S 1 sw   Y  W d    d S 1 sw   Y  d S )N.jsonkernel_path	meta_infor-  !Only support static shape for nowr\   r   rY   device_indexrS   r   )
r  r  r  r  jsonloadas_posixr   ri   r   )
r  r  r\   device_kernel_cacheop_confr  	json_datar  kernel_lib_abs_pathr  r.   r.   r/   load_aoti_eager_cache[  s:   



"r  )dynamic_shapesoptionsremove_runtime_assertionsdisable_constraint_solverdynamicr  
Tuple[Any]r(  Dict[str, Any]r  Optional[Dict[str, Any]]r  r  r  c                  sv  |rJ dt tjttjttji}t|  tj|i |}t	 fdd|D s.t
dt| |}| s=|jdd |d }| sI|  tjtjd|  iX z;tjj||||||	|
d	d
}g }|D ]k}i }||d< t|tjr|jj |d< t|grd|d< n|jj|d< |j |d< t| |d< t|  |d< n*t| sJ ||d< |dkrdnd|d< |t|  |d< g |d< g |d< ||d< |!| qmi }||d< t"|#| |d< g }d}|| d }| rdnd}t$| t%||F}zt&'|}W n t(y$ } zg }W Y d}~nd}~ww t|ts-J |D ]}t|ts9J |d |krDd	} nq/W d   n	1 sQw   Y  |r||!| t%|d}t&j)||dd W d   n	1 sww   Y  W d   n	1 sw   Y  |W W  d   S  t(y } zW Y d}~W d   dS d}~ww 1 sw   Y  dS )zO
    Compile the given function with persistent cache for AOTI eager mode.
    r  c                 3  s     | ]}t | tjfV  qd S r?   r  )r`   inputsupported_scalar_typesr.   r/   rh     s
    
z5aoti_compile_with_persistent_cache.<locals>.<genexpr>z-Only support tensor, int, float, bool for nowT)parentslibr  F)r  r  r  r  same_signaturer-  r\   rY   r  rS   sizesstridesr   r   scalar_valuer  r  r  rr2  Nr4  )rZ  rv  )*r@   ri   r   rQ   r   r$  r  pytreearg_tree_leavesr8   r  r  r  mkdirr   r  r  r  r  absoluter  _exportaot_compiler4   r  rT   r   r  r   rS   r   rR  r  r"  r   relative_tor  r  r  r  r   dump)r  r  r\   r  r  r;   r(  r  r  r  r  type_to_torch_dtypeflattened_inputspersistent_cachepersistent_cache_libkernel_lib_pathkernel_metadata_itemsr  r  kernel_meta_infor  update_jsonr  r3  op_conf_filer   r  r.   r  r/   "aoti_compile_with_persistent_cachex  s   




M
Mr  c                 O  s   t jjtddJ tj  dd l}dd l	}|
 }||}ddlm} || |j}||j | |i |}	| }
|| || W d    |	|
fS 1 sVw   Y  |	|
fS )Nrv   Tr   )output_code_log)r  r   r  rr  r%   ri   r  r  r:  loggingr   StreamHandlertorch._inductor.graphr  
addHandlerlevelsetLevelDEBUGr=  removeHandler)rN   r;   r(  r:  r  log_capture_stringchr  
prev_levelr9  r  r.   r.   r/   run_and_get_cpp_code  s$   




r  )r2   r3   )rL   rM   )rN   rO   rP   rQ   )rP   r   )rT   r   rP   r   )r   r   rP   r   )r   r   r   r   rP   r   )r   r   rP   r   )r   r   rP   r   )r  r  )rR   )rT   r   )r$   rR   )r1  r2  r3  r@   rT   r   rP   rQ   )r.   r<  r<  r=  rR   )rG  r   rH  r   )rG  r   rM  rN  )rP   r@   )rN   rj  rP   rk  r?   )r  r  rP   r  )r   r3   rP   r   )r  r!   r  r@   rP   r  )r^   r   rP   r  )r   r3   r  r  rP   r3   )r   r   rP   r   )r;   r   rP   r   )r-  r   )rG  r   )rP   r&  )r  r  rP   r   )r  r   rP   r   )r  r3   rP   r  )r&  r'  )rM  r@   rN  r@   )rS  r   )r^   r   )ro  r  )r   r  )r  r  )r  r   rT   r   )r  r   )r  r   r  r   r\   r   )r  r   r  r   r\   r   r  r   r  r2  r;   r  r(  r  r  r  r  r  r  r   r  r   )
__future__r   r  rX  dataclassesenumr   r	  r:  r  r  r  ra  r   r  r#  r  ru  r  rc  r5  r  r   r   pathlibr   typingr   r   r   r	   r
   r   r   r   r   r   r   r   r   r   typing_extensionsr   r   r   r5   ri   torch._exporttorch.utils._pytreer  _pytreer  torch._dynamo.device_interfacer   torch._dynamo.utilsr   torch.autogradr   torch.autograd.profiler_utilr   torch.fx.passes.shape_propr   torch.utils._sympy.functionsr   r   r   r   torch.utils._sympy.symbolr    r!   torch.utils._sympy.value_rangesr"   r#   rv  r%   runtime.runtime_utilsr&   r'   r   	getLoggerrE   ru   r(   r   	VarRangesr  r,   r0   r:   Functionr<   r   	lru_cacher   r   r   r   r   r   r   r  r  r  r.  rj   r;  rF  rL  rO  rP  rS  rY  rZ  r[  r]  rr  r{  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r2  r  r  rY  r%  r,  r/  r0  r3  rn  rz  r:  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r!  r%  r  r-  Enumr.  rD  rI  rL  rR  rZ  	dataclassr[  rf  rh  rl  rm  rn  r{  r  r  r  r  r  r  r  r  r  r.   r.   r.   r/   <module>   s^  @
$R#		
!*$
 
 		 %		






! 	&t