o
    "i                    @   s/  U d dl Z d dlZd dlZd dlZd dlZd dlZd dlZd dlmZ d dl	m
Z
mZmZmZmZmZmZmZ d dlmZ d dlZd dlZd dlZd dlZd dlm  mZ d dlmZ d dlmZm Z  d dl!m"Z"m#Z#m$Z$m%Z%m&Z&m'Z'm(Z(m)Z)m*Z*m+Z+m,Z, d dl-m.Z.m/Z/ d d	l0m1Z1m2Z2m3Z3m4Z4 d
dl5m6Z6 ddl7m8Z8m9Z9m:Z:m;Z; ddl<m=Z=m>Z> ddl:m?Z?m@Z@mAZAmBZBmCZCmDZDmEZEmFZFmGZGmHZHmIZI ddlmJZJmKZKmLZLmMZMmNZNmOZOmPZPmQZQmRZR ddlSmTZTmUZU eVeWZXi ZYeejZj[ede
f f e\d< i Z]eejZj[ede
f f e\d< e^ Z_eejZj[ e\d< ejTj`Z`ejTjaZaejTjbZbe^ ZceejZj[ e\d< e^ ZdeejZj[ e\d< e^ ZeeejZj[ e\d< ef ZgeejZj[ejZj[f e\d< ejTjhZhdd Zidd Zjdd Zkeje`jle`jme`jne`joe`jpe`jqe`jre`jse`jte`jue`jve`jwg ejxejyejzej{ej|ej}ej~ejejejejejejd Zd!efd"d#Zd$d% Zd&d' Zd(e'fd)d*Zd+d, Zd-d. Zd/d0 Zd1d2 Zd3e'jd3fd4d5Zd6d7 Zd^d8d9Z					3	d_d:d;Zd`d<d=Zd`d>eGd!ejfd?d@ZeebjddAd>eGd!ejfdBdCZd3dDd>eGd!ejfdEdFZee`jjddAd>eGd!ejfdGdHZd3dDd>eGdIejfdJdKZeebjddAd>eGdIejfdLdMZddNe'jd3ddd3d3df	dOdPZdQdR Ze  	3d`dSdTZee`jd3ddUdVdW Zee`jd3ddUdXdY Zee`je`je`je`jebjgdZd[ Zee`d\ree`je ee`jddAdad]d^Zee`jddAdad_d`Zee`jgdadadbZee`jdcdd Zee`jdedf Zee`jdgdh Zee`jdidj Zee`jjdkdl Zee`jdmdn Zee`jddAdodp ZeebjddAdqdr Zee`jddAdsdt Zee`jdudv Zee`jddAee`jddAee`jddAdwdx Zee`jddAdydz Zee`jddAdbd|d}Zee`jlddAdad~dZlee`jddAdaddZee`jddAdaddZdcddZeehjddAdeGdeGdeGdededed!ejdeGfddZeehjddAdeGdeGdeGdededed!ejdeGfddZeehjjddAdeGdedededed!ejdeGfddZeehjjddAdeGdedededed!ejdeGfddZeehjjddAdeGdeGdeGdeded!ejdeGfddZeehjjddAdeGdeGdeGdeded!ejdeGfddZee`j΃dcddZee`jddAdddededefddZee`jddAdddededefddZee`jddAdddededefddZee`jddAdd Zee`jddAdeddZee`jddAdcddZee`jddAdcddZee`jddAdd Zee`jddAdd Zee`jddAdd ZؐdcddZee`jڃdfddZڐdgddZe ddd ZݐdadejjfddZdadejjfddZdgdejjfddĄZdhddƄZddȄ ZeejTjjddAddʄ Zee`jddAdd̄ Zee`jddAdd΄ Zee`jjddAddЄ Zee`jdd҄ Ze dddԄ Zddք Zee`jjZee`jjZee`jjZee`jjZee`j ee`jdd؄ Zee`jddڄ Zee9jddAdd܄ Zee9jddAdIejfddބZee9jddAdd Zee9jddAdd Z ee9jddAd ddee deGdedefddZee9jddAd ddededee deGdef
ddZee`jddAd3d3ddeGdeGdedefddZdd Zdd Zdd Zd3dddZ	dhZ
dd Zee`j ee`j ee`j ee`j ee`jd3d ee`jjd3d ee`j ee`jd3d ee`j ee`j ee`j ee`j ee`j ee`j ee`jj ee`jj ee`j ee`jj ee`jj ee`j j ee`j! ee`j"d3d ee`j#d3d ee`jqe	 ee`j$e ee`j%e ee`j&e ee`j'e ee`j( ee`j) ee`j) ee`j* ee`j+ ee`j, ee`j- ee`j.e ee`j/ ee`j0 ee`j1 ee`j2 ee`j3 ee`j4 ee`j5 ee`j6 ee`j7e ee`j8 ee`j9e ee`j: ee`j; ee`j;j< ee`j= ee`j> ee`j? ee`j@ ee`jA ee`jB ee`jC ee`jD ee`jE ee`jF ee`jG ee`jH ee`jI ee`jJ ee`jK ee`jL ee`jM ee`jN ee`jO ee`jP ee`jQ ee`jR ee`jS ee`jT ee`jU ee`jVjW ee`jX ee`jY ee`jZ ee`j[ ee`j\ ee`j] ee`j^ ee`j_j ee`j`jd3d ee`jae eejbjcjd eejbjcje ee`jf ee`jg ee`jhe ee`ji ee`jj ee`jk ee`jl ee`jm ee`jnjo ee`jpjed3d ee`jqjed3d ee`jrjed3d ee`jsjed3d ee`jtjed3d ee`jujed3d ee`jvje ee`jwje ee`jxje ee`jyje ee`jzje	 ee`j{ ee`j|ddAd`d dZ|ee`j}ddddZ}dd Z~ee`d	rGee`je} eebjdd	 Zee`jddAd
edefddZee`jddAdiddZdd Zeeje`jgdddd3dddZeejd^ddZeejdd Zee`jdd Zee`jdd Zdd Zee`jddAdd  Zd!d" Zeeje`jgddddddd#d$d%Zd&d' Zd(d) Zee`jeeZeedZeed Zd*d+ Zee`jddddd,d-d.Zee`jddddd,d/d0Zee`jddddd,d1d2Zeebjjd3d4 Zeeje`jgd5d6 Zee`jddAd`d7d8Zee`jddAdjd9d:Zd;d< Zd=d> Zd?d@ Zee`jddAdAd Zee`jddAdBdC Zee`jd`dDdEZee`jd`dFdGZdHdI ZdJdK Zee`jddAd`dLdMZee9jddAd`dNdOZdPdQ Zee9jdd3dRdSdT Zee`jddAdadUdVZee`jddAd
efdWdXZddNdYdZejZj[d
ed[ee d\efd]d^Zee`jddAdd_d
ed[ee fd`daZee`jddAd
efdbdcZee`jddAd
efdddeZee`jddAd
efdfdgZee`jddAdNdhd
ed\efdidjZ	
	3dkdkeee df dledmefdndoZee`jjdadee fdpdqZee`jjdadee fdrdsZee`juj	d^dtee duee fdvdwZuee`jvj	d^dtee duee fdxdyZvee`jj			dldzee dtee duee fd{d|Zee`jj			dldzee dtee duee fd}d~Zdd Zee`jee`jee`jdd Zeebjjdd Zee`jddAdcddZdejdeejef fddZdejdejfddZdejdejdejfddZ	dmddZdd Zdd ZddddZeebjddA	3d`ddZeebjddAdd Zee`jr ee`jsjd3dZee`jsddAdd ZsdnddZǐdd Zee`jjd3dZee`jɃdd Zɐdd Zee`jjd3dZee`j̃dd Zee`jjd3dZϐdd Zee`j΃dd Zee`jj	doddZee`jmjd3dZee`jjd3dZee`jmddA		 	3	N	dpddZmee`jddA		 	3	N	dpddZӐdd Zee`jnjd3dZee`jnddA	daddZndd Zאdd ZؐdadefddZِdd Zee`jۃdqdddÐdĄZېdŐdƄ ZܐdǐdȄ Zݐdɐdʄ Zސdːd̄ Zee`jebjgdadd3d͜dΐdτZee`jdadd3d͜dАdфZdҐdӄ ZedԐdՄ Zee`jjd3dZee`jjd3dZee`jjd3dZee`jdNd֍dאd؄ Zd`dِdڄZee`jdېd܄ Zee`jddAd`dݐdބZedߐd Zedd Zee`jdNd֍daddZee`jgdNd֍dd Zd>e:jdee:j fddZeebjgdNd֍dd Zee`je`jjgdNe'jdUdd Zee`jebjgdNd֍dd Zee`jdd Zee`jebjgdqddddZee`jjZ ee`jjZee`jjZee`jjZee`jjZee`jd^ddZee`jd^ddZee`jdd Zee`jddAdaddZee`jddAdaddZee`j	dqddddZ	ee`j
dqdd Zee`jddAdqddZee`jddAdqddZeebjeِd ee`jeِdZee`jeِdZee`jeِdej|d	Zee`jeِd
ej|d	Zee`jdNddZd^ddZdd Zee`jZee`jZee`jZee`jZee`j Z ee`j!Z!ee`j"Z"ee`j#dNdZ#ee`j$ ee`j% ee`j&Z&ee`j'Z'ee`j(Z(ee`j)ddZ)ee`j*Z*ee`j+Z+ee`j,Z,ee`j- ee`j.Z.ee`j/e'jdAe. ee`j0 ee`j1 ee`j2 ee`jX ee`j3ddNejdZ3ee`j4ddNejdZ4ee`j5ddNejdZ5ee`j6ddNejdZ6ee`j7Z7ee`j8Z8ee`j9e7 ee`j:e8 ee`j;Z;ee`j&Z&ee`j<Z<ee`j= ee`j>ddZ>ee`j ee`j?ejd	 ee`j@e; ee`jAejd	 ee`jBejd	 ee`jCejd	 ee`jDejd	ZDee`jEejd	 ee`jFejd	 ee`jG ee`jH ee`jI ee`jJ ee`jK ee`jL ee`jM ee`jN ee`jO ee`jP ee`jQ ee`jR ee`jS ee`jT ee`jU ee`jV ddlWmXZX dd ZYeXD ]@ZZeYe`eZD ]\Z[Z\Z]ee[eZe\e]d q9eYebeZD ]\Z[Z\Z]ee[eZe\e]d qWq/ee`j^jedNdZ_ee`j^jedNdZ`ee`j^jedNd ee`jajeZbee`jajeZcee`jdje# ee`jdje# ee`jeje; ee`jfje& ee`jgje ee`jgjhe ee`jijeZjee`jijeZkee`jle! ee`jmje7 ee`jmje7 ee`jnje8 ee`jnje8 ee`joje7 ee`joje7 ee`jpje8 ee`jpje8 ee`jqe< ee`jre> ee`jse| dd Ztete`juje`j^je_ ete`juje`j^je` ete`jvje`jajeb ete`jvje`jajec ete`jwje`jijej ete`jwje`jijek dd Zxexe`jye exe`jze' exe`j{e( exe`j|e) exe`j}e* exe`j~e+ exe`je, exe`je exe`jje exe`jje exe`je3 exe`je4 exe`je5 exe`je6 exe`je# exe`je exe`je  ee`je' ee`je( ee`je* ee`je+ ee`je, exe`je`j exe`je`j exe`je`j exe`je`j exe`je`j ee`jd^ddZee`jjd d! Zee`jjd"d# Zee`jd$d% Ze. D ]\ZZee/ee qee`jd&d' ZeejTjjd(d) ZeejTjjd*d+ ZeejTj`jjd,d- ZeejTj`jddd.d/Zd d0lmZ ee ee d1d2 Zeed3d4 ZeejTjjd5d6 ZeejTjjd7d8 ZeeddAd9e:jd
efd:d;ZeejTjbjjd<d= ZeejTjjd>d? Zzd dlZejTjZeejd@dA ZeejdBdC ZeejdDdE ZeejdFdG ZeejdHdI ZeejdJdK ZeejdLdM ZeejdNdO ZeejdPdQ ZeejădRdS ZeejƃdTdU ZeejȃdVdW ZeejTjʐj˃dXdY ZW n e͐efy   eXϐdZ Y nw dd[l7mАZ e6eЃ dd\l7mѐZ eѐҡ  eѐӡ  dd]l7mԐZ eԐա  dS (r      N)defaultdict)AnyCallableDictListOptionalSetTupleUnion)patch)associative_scan_op) triton_kernel_wrapper_functionaltriton_kernel_wrapper_mutation)canonicalize_dimcanonicalize_dimscheckdtype_to_typeelementwise_dtypesELEMENTWISE_TYPE_PROMOTION_KINDget_computation_dtypeis_boolean_dtypeis_float_dtypeis_integer_dtypeNumber)magic_methodsmethod_to_operator)CeilDivFloorDiv
IntTrueDivModularIndexing   )import_submodule   )configinductor_primsirtest_operators)decompositionsget_decompositions)
ExpandViewIndexingConstant	is_tritonops_wrapperPermuteView	Pointwise	ReductionSqueezeView	TensorBoxvalidate_irView)	ceildivdecode_device
is_dynamicis_gpuis_pointwise_use,needs_fallback_due_to_atomic_add_limitationspad_listlikesympy_productuse_scatter_fallback)opsV.	loweringslayout_constraints	fallbacksneeds_realized_inputsforeach_opsinplace_foreach_opsinplaceable_foreach_opsc                 C   s   | s	t d| d S )Nzinductor does not support NotImplementedError)condmsg rJ   V/home/ubuntu/SoloSpeech/.venv/lib/python3.10/site-packages/torch/_inductor/lowering.py
assert_nyiS   s   rL   c                    sZ   t  tttfrdd  D S t  t  tjjr+t	 fdd 
 D  d S d S )Nc                 S      g | ]}t |qS rJ   )add_needs_realized_inputs.0xrJ   rJ   rK   
<listcomp>Z       z-add_needs_realized_inputs.<locals>.<listcomp>c                 3   s    | ]}t  |V  qd S N)getattr)rP   overloadfnrJ   rK   	<genexpr>]   s    

z,add_needs_realized_inputs.<locals>.<genexpr>)
isinstancelisttuplesetrB   addtorch_opsOpOverloadPacketupdate	overloadsrW   rJ   rW   rK   rN   X   s   
rN   c                 C   s:   t | tjjr|  D ]	}|tt| |< qd S |t| < d S rT   )rZ   r_   r`   ra   rc   r@   rU   )rX   
constraintrV   rJ   rJ   rK   add_layout_constraintb   s
   re   )r   r"   r                      	   
         dtypec                 C   s2   t | ts| S | tv sJ d|  dt|  } | S )Nzid z missing from DTYPE_ID_LOOKUP)rZ   intDTYPE_ID_LOOKUPrp   rJ   rJ   rK   decode_dtype   s
   
rt   c                 C   sB   t | trt|  pt|  S t | tjr| jdu S t | tS NT)	rZ   r1   r   	get_dtyper   sympyExpr
is_integerrq   rQ   rJ   rJ   rK   is_integer_type   s
   


r{   c                 C   s    t | trt|  S t | tS rT   )rZ   r1   r   rv   boolrz   rJ   rJ   rK   is_boolean_type   s   

r}   type_promotion_kindc                    s0   dd   fdd|D }t |d| i\}}|S )Nc                 S   sF   t | ttjfr
| S t| dsJ t|  }tjdg| | 	 dS )Nrv   r"   rs   )
rZ   r   rw   rx   hasattrlenget_sizer_   zerosrv   )inpdimrJ   rJ   rK   construct_input   s
   z+get_promoted_dtype.<locals>.construct_inputc                       g | ]} |qS rJ   rJ   rP   argr   rJ   rK   rR      rS   z&get_promoted_dtype.<locals>.<listcomp>r~   )r   )r~   argsinps_rp   rJ   r   rK   get_promoted_dtype   s   	r   c                 C   sh   t | ttfs| g} nt| } t| D ]}t |tjjr1| D ]}t||}|tvr0| 	| q q| S rT   )
rZ   r[   r\   r_   r`   ra   rc   rU   r?   append)aten_fnrX   rV   other_fnrJ   rJ   rK   get_overloads   s   

r   c                    s   dd t  D |s|r4r4|rtjndd  D }t|d|i fddfdd D  |rrrrtt fddD  D ]\}}| |< qFtt D ]}t | t	j
rqt | t d	    |< qU S )
Nc                 S      g | ]\}}t |tr|qS rJ   rZ   r1   rP   irQ   rJ   rJ   rK   rR          z"transform_args.<locals>.<listcomp>c                 S   s0   g | ]}t |ttjfst|d ddur|qS )rp   N)rZ   r   rw   rx   rU   rP   arJ   rJ   rK   rR      s    r~   c                    s@   t | tr
t| S t | tjrt| j d   S | S Nr   )rZ   r1   to_dtyper%   Constantvalue
get_device)r   )r   rp   indicesrJ   rK   promote   s
   

ztransform_args.<locals>.promotec                    r   rJ   rJ   r   )r   rJ   rK   rR      rS   c                       g | ]} | qS rJ   rJ   rP   r   r   rJ   rK   rR      rS   r   )	enumerater_   r|   r   zipbroadcast_tensorsranger   rZ   r%   r   r)   creater[   r   )r   	broadcastr~   convert_input_to_boolpromoting_argsr   rQ   rJ   )r   rp   r   r   rK   transform_args   s,   $
$r   c                    s>   t   fdd}t| }t| tt|| |S )a  
    Add a foreach lowering to lowerings dict.

    Arguments:
        aten_fn: torch.ops.aten.* fn we are lowering
        decomp_fn: alternate implementation on our IR
        broadcast: True to apply broadcasting to tensor inputs
        type_promotion_kind: kind of type promotion applied to tensor inputs, `None` means no type promotion
        convert_input_to_bool: some logical ops require inputs are converted to bool
    c                     s*   t | dksJ  | i |}t| |S )Nr    )r   r2   )r   kwargsout	decomp_fnrJ   rK   wrapped   s   z+_register_foreach_lowering.<locals>.wrapped)	functoolswrapsr   rC   rb   r?   dictfromkeys)r   r   r   aten_fnsrJ   r   rK   _register_foreach_lowering   s   
r   c                    s<   t  fdd}t  tt | |S )a  
    Add a lowering to lowerings dict

    Arguments:
        aten_fn: torch.ops.aten.* fn we are lowering
        decomp_fn: alternate implementation on our IR
        broadcast: True to apply broadcasting to tensor inputs
        type_promotion_kind: kind of type promotion applied to tensor inputs, `None` means no type promotion
        convert_input_to_bool: some logical ops require inputs are converted to bool
    c                     s   t | } d}t| dkrt| d t tfrd}| d } tdd | D r*J dtdd | D r@td	d  D s@J t| } |rL| g} | i |}t	| |S )
NFr"   r   Tc                 s       | ]}|d kV  qdS )r   NrJ   rO   rJ   rJ   rK   rY   !      
z6_register_lowering.<locals>.wrapped.<locals>.<genexpr>zout= ops aren't yet supportedc                 s   s    | ]}t |tV  qd S rT   r   rO   rJ   rJ   rK   rY   %      c                 s   s    | ]}|t v V  qd S rT   )rA   )rP   rX   rJ   rJ   rK   rY   %  r   )
r[   r   rZ   r\   anykeysvaluesallr   r2   )r   r   unpackedr   r   r   r   r   r~   rJ   rK   r     s*   
z#_register_lowering.<locals>.wrapped)r   r   r   r?   rb   r   r   )r   r   r   r~   r   r   rJ   r   rK   _register_lowering	  s
   r   Fc                 C   s   t jt| |||dS )z+
    Shim to support decorator syntax.
    r   r~   r   )r   partialr   )r   r   r~   r   rJ   rJ   rK   register_lowering;  s   	r   c                 C   s   g }t jt| t|tddD ];\}}|dkr|| q|dkr)|| qtjj	|| t
t|jt
t|jk rG|| q|| qtt|S )z
    Broadcasting logic based on symbolic shapes.

    We give the shapes 0 and 1 concrete values, while all other shapes
    are symbolic sympy formulas.
    r"   )	fillvalue)	itertoolszip_longestreversedrw   Integerr   r>   graphsizevarsguard_equalsr   expandfree_symbolsr\   )r   boutputrQ   yrJ   rJ   rK   broadcast_symbolic_shapesM  s    r   c                    s(  |d u s|d u sJ d|d u r|d u rt j}tdd | D s"| S tdd | D rC|p3t| d|ifdd  fdd	| D S td
d | D }g }| D ]A}t|ttfrp|	t
t|| | t|  qPt|tjr|	t
t|| | t|  qP|	| qP|S )NzEonly one of override_return_dtype or type_promotion_kind may be givenc                 s   s"    | ]}t |tjttfV  qd S rT   )rZ   rw   rx   rq   floatrO   rJ   rJ   rK   rY   m       z$promote_constants.<locals>.<genexpr>c                 s   s"    | ]}t |tttjfV  qd S rT   )rZ   rq   r   rw   rx   rO   rJ   rJ   rK   rY   o  r   r~   c                    s0   t | tjrt|  td S t|  td S rT   )rZ   rw   rx   r%   r*   r5   r   rz   rs   rJ   rK   
const_funct  s   z%promote_constants.<locals>.const_funcc                    r   rJ   rJ   rO   )r   rJ   rK   rR   z  rS   z%promote_constants.<locals>.<listcomp>c                 s   s&    | ]}t |tttjfr|V  qd S rT   )rZ   r1   r)   r%   r   rO   rJ   rJ   rK   rY   {  s   $ )r   DEFAULTr   r   r   nextrZ   rq   r   r   r)   r   r%   r   rv   r   r[   r   rw   rx   r*   )inputsoverride_return_dtyper~   exr   rQ   rJ   )r   rp   rK   promote_constantse  sB   
r   c              	      s,   d ddt t f fdd}|S )Nalphar   c              	      sj  
d urt tt|rrJ 
| S t|	}r0| d ur/| dkr/t|}t|d | |d< n| d u s6J dd |D |d  	pJ|d   t|d 	 j
dk|dd  D ]!}t|tjs}tt| ks}J d d d|  q\ fd	d
}sd }|D ]}t|	 j
r|	 } nq|s|d 	 }p|}tj| |dS )Nr"   c                 S      g | ]}|  qS rJ   make_loaderrO   rJ   rJ   rK   rR     rS   z1make_pointwise.<locals>.inner.<locals>.<listcomp>r   cudazndim mismatch  c                    s   t  t ksJ d  d tjkr&d ur& fddD  S r:r:tjkr: fddD  S  fddD  S )Nzwrong ndim r   c                       g | ]}| qS rJ   rJ   rP   loadindexrJ   rK   rR     rS   zCmake_pointwise.<locals>.inner.<locals>.inner_fn.<locals>.<listcomp>c                    r   rJ   rJ   r   r   rJ   rK   rR     rS   c                    r   rJ   rJ   r   r   rJ   rK   rR     rS   )r   r_   r|   float64r   )rp   rX   is_cudaloadersoverride_fn_when_cuda_float64override_fn_when_input_boolrangesr   rK   inner_fn  s   $z/make_pointwise.<locals>.inner.<locals>.inner_fndevicerp   r   r   )r   mapr+   r   r[   mulr   rv   r5   r   typerZ   r%   BaseConstantr   r7   r.   r   )r   r   otherr   r   r   allow_alpharX   override_devicer   r   r   triton_fallback)rp   r   r   r   rK   inner  sL   
	zmake_pointwise.<locals>.innerr   r1   )rX   r   r   r   r   r   r   r   rJ   r   rK   make_pointwise  s   (	1r   c                    s&   dddt t t  f fdd}|S )Nr"   r   r   c                    s|  dd }t tjjjdkptjjjtv }tjjjD ]}|jD ]}|jdkr*|jtv s,d}qqd }|D ]}t	|t
tfr?|} nq2|d usHJ dg }|D ]}t	|t
tfs`||gt |  qL|| qL|t| }	d gt | }
|	 D ]9\\}}}g }|D ]&\}} r|d| i}n| }||
|< t|jr|r|r||  q|rtj| qwtdd	 |
D sJ |
S )
Nc                 S   st   t t}t| D ]/\}}t|  }d }|D ]}t|tr#|j } nq|d us,J d|||f ||f q|S )Nz.foreach op should have at least one tensor arg)	r   r[   r   r6   rZ   r1   datar   r   )	arg_pairsr   r   r   use_foreachr   trJ   rJ   rK   
group_args  s   



z9make_foreach_pointwise.<locals>.inner.<locals>.group_argsr   call_functionTz1at least one input must be a list to a foreach opr   c                 s   s    | ]}|d uV  qd S rT   rJ   rO   rJ   rJ   rK   rY         z8make_foreach_pointwise.<locals>.inner.<locals>.<genexpr>)r   r>   r   current_nodeuserstargetrD   oprC   rZ   r[   r\   r   r   itemsr7   r   realizeregister_listr   )r   r   r  realize_outputsnodeusera_list_inputinputbroadcast_inputsgroupsoutputsr   r   groupbuffer_list
output_indr   r   r   pw_fnrJ   rK   r     sX   

z%make_foreach_pointwise.<locals>.innerr   )r  r   r   rJ   r  rK   make_foreach_pointwise  s   "Er  rQ   c                    s>   |    kr|rt| S | S  fdd}t| d| S )Nc                    s   t j|  dS )N)	src_dtype)r=   r   rz   rp   r  rJ   rK   	_to_dtype     zto_dtype.<locals>._to_dtyper   )rv   cloner   )rQ   rp   copyr  rJ   r  rK   r     s
   r   r~   c                 C   sZ   |j s|  j r&|  rt| |d}tj||  |S ttj	j
dd| |S t| |ddS )Nrs   Fadd_to_fallback_setTr  )
is_complexrv   r   
empty_liker%   InplaceCopyFallbackr   fallback_handlerprimsconvert_element_typedefaultr   )rQ   rp   dstrJ   rJ   rK   _convert_element_type"  s   r,  r#  c                   sp   |    kr|rt| S | S dd }|}| }||kr)ttjj|  S  fdd}t| d| S )Nc                 S   s   | j r	t| jS t| jS rT   )is_floating_pointr_   finfobitsiinfors   rJ   rJ   rK   _get_primitive_bitwidth7  s   z1to_dtype_bitcast.<locals>._get_primitive_bitwidthc                    s   t |  S rT   )r=   to_dtype_bitcastrz   rp   x_dtyperJ   rK   _to_dtype_bitcastC  s   z+to_dtype_bitcast.<locals>._to_dtype_bitcastr  )rv   r  r'  atenviewrp   r   )rQ   rp   r  r1  src_bitsdst_bitsr5  rJ   r3  rK   r2  2  s   r2  c                 C   s<   |j s|  j rttjtjjj	j
| |S t| |ddS NTr#  )r$  rv   r1   r   r%   ComplexViewr_   r=   r6  r7  rp   r2  rQ   rp   rJ   rJ   rK   _view_dtypeN  s
   r=  r   c                C   s8   t |}|  |kr|rt| S | S ttj| |S rT   )r5   r   r  r1   r   r%   
DeviceCopy)rQ   r   r  rJ   rJ   rK   	to_deviceW  s   r?  c                 C      t | |ddS r:  )r?  )rQ   r   rJ   rJ   rK   _device_put^     rA  Tc
                 C   s   |p| j }t|}
|rtd| }|durt|}t|
|||r!|nd||	d}
t| |||d|
}
tt|rBttt|d|d|
 |
S )z3A pointwise function that maps ops.{name} to inputs
libdevice_N)r   r   r   r   r   r   )r~   r   )__name__r,   r   r   r   r(  rU   )r   namer   r~   r   r   r   r   use_libdevice_for_f64r   rX   fn_libdevicerJ   rJ   rK   register_pointwisec  s>   


rH  c                     sx   d} t d  fdd} fdd}t|t|tjdgfdd}ttj|}tt| r:tt	t| d	d
| |S )z2A pointwise function that maps ops.frexp to inputsfrexpc                         | i |d S r   rJ   r   r   rI  rJ   rK   frexp0     zregister_frexp.<locals>.frexp0c                     rJ  Nr"   rJ   rK  rL  rJ   rK   frexp1  rN  zregister_frexp.<locals>.frexp1r  c                     s$    d | i | d | i |fS Nr   r"   rJ   rK  )pw_fnsrJ   rK   rX     s   $zregister_frexp.<locals>.fnNr   )
r,   r   r_   int32r   r6  rI  r   r(  rU   )rE  rM  rP  rX   rJ   )rI  rR  rK   register_frexp  s*   
rT  c                 C   s   t ||d}t| |}|S )Nr   )r  r   )r   pointwise_lowering_fnr   rX   rJ   rJ   rK   register_foreach_pointwise  s   
rW  )r   r~   c                    s  dd }t |ttfrt||}t |ttfrt||}| ||g t d  d tjd}dd t D }t|t	 fdd|D  D ]\}}| |< qFt
t D ]}t  | tjrqt | t |d	    |< qUt||d
 d	 t d |t d |S )Nc                  W   
   t j|  S rT   )r=   wherer   rJ   rJ   rK   rX        
zwhere.<locals>.fnr"   r    r   c                 S   r   rJ   r   r   rJ   rJ   rK   rR     r   zwhere.<locals>.<listcomp>c                    r   rJ   rJ   r   r   rJ   rK   rR     rS   r   r  )rZ   r   rq   constant_liker   r   r   r   r   r   r   r   r%   r   r)   r   r[   r   r   r   )rH   r   r   rX   rp   r   r   rQ   rJ   r   rK   rY    s&   
$
$
rY  c                  G   s   t | dkrt| d ttfrt| d  S ttdd | D g }g }| D ]$}| }t |t |ks?t	dd t
||D rDt||}|| q%|S )Nr"   r   c                 S   r   rJ   )r   rO   rJ   rJ   rK   rR     rS   z%broadcast_tensors.<locals>.<listcomp>c                 s   s4    | ]\}}|d kr|d kp|d ko|d kV  qdS r"   NrJ   rP   r   r   rJ   rJ   rK   rY     s    $
z$broadcast_tensors.<locals>.<genexpr>)r   rZ   r[   r\   r   r   reducer   r   r   r   r   r   )r   r  r  rQ   sizesrJ   rJ   rK   r     s   
r   c                 C   s   | S rT   rJ   rz   rJ   rJ   rK   nop     r`  
lift_freshc                 C   s   t | tsJ |d u rtt| jS tt|  |}tt |t	s%|fn|}g }t
|  D ]\}}||v rCtjjt|dsH|| q0||  krTt| |S | S rO  )rZ   r1   r0   r   r   r   r   r   r]   r\   r   r>   r   r   evaluate_exprrw   Eqr   r7  )rQ   r   dims	new_shapedsrJ   rJ   rK   squeeze  s   
ri  c                 C   s   t t| |S rT   )r  ri  )rQ   r   rJ   rJ   rK   squeeze_copy  rB  rj  c                 C   2   t | |}t| tsJ t|tsJ |j| _| S rT   )ri  rZ   r1   r   rQ   r   valrJ   rJ   rK   squeeze_  
   
rn  c                 C   2   t | rt| dtjdS td}t|tjd| S )NFrs   isinfr  r{   	full_liker_   r|   r,   r   rQ   rX   rJ   rJ   rK   rq       rq  c                 C   rp  )NFrs   isnanr  rr  rt  rJ   rJ   rK   rv    ru  rv  c                 C   $   t | rt| S td}t|| S )Nceilr{   r  r,   r   rt  rJ   rJ   rK   rx       rx  c                 C   rw  )Nfloorry  rt  rJ   rJ   rK   r{  '  rz  r{  c                 C   rw  )Nroundry  rt  rJ   rJ   rK   r|  /  s   r|  c                 C   rw  )Ntruncry  rt  rJ   rJ   rK   r}  8  rz  r}  c                 C   s   ddl m} t| g\} t| tjrt| t|S t| t	s!J t|t
tfs*J t|  t|kr6| S ||  s]tjjt|  }|dkr]||s]| tjjt||  t	t| jt|S )Nr   )free_unbacked_symbols)%torch.fx.experimental.symbolic_shapesr~  r   rZ   r%   r   r)   r   r\   r1   r[   r   r>   r   r   	size_hintr;   
mark_reuser   )rQ   r_  r~  x_size_productrJ   rJ   rK   r   @  s   r   c                 C   sL   t |}|D ]}d||< q| }t|D ]\}}|dkr t||}qt||S Nr   )r[   r   	unsqueezer   )r   shapebroadcast_dimensionsrh  broadcast_dimensionvidxrQ   rJ   rJ   rK   broadcast_in_dimZ  s   


r  c                 C   s   t | | S rT   )r   r   )rQ   r   rJ   rJ   rK   	expand_ash  rB  r  c                    sT  t |   tt kr%tdgtt      t| t  } tt|  ks1J t |  }d}ttD ]}| dkrId}|| |  ||< q?|rat||  | 	 dS t
dd t D rttt| |S  fdd	}tjjt }|dkr| tjjt||  |  tj| 	 |  |t |d
S )Nr"   Fr   Trp   r   c                 s   s$    | ]\}}|d kp|d kV  qdS r\  rJ   r]  rJ   rJ   rK   rY        " zrepeat.<locals>.<genexpr>c                    sv   t | t ks
J t| } tt D ]"}| dkr6 | dkr*td| |< qt| | d | | |< q| S )Nr"   r   )r   r[   r   rw   r   r   )r   r   old_sizerepeatsx_loaderrJ   rK   r     s   zrepeat.<locals>.inner_fnr   )r[   r   r   rw   r   r7  r   emptyrv   r   r   r   r  r   r>   r   r   r  r;   r  r   r.   r   )rQ   r  new_sizezero_tensorr   r   old_size_productrJ   r  rK   repeatm  s8    r  c                 C   s2   t | tsJ t |ttfsJ tt| j|S rT   )rZ   r1   r[   r\   r3   r   r   )rQ   r_  rJ   rJ   rK   r7    s   r7  c                 C   s6   t | tsJ t |ttfsJ tt| jt|S rT   )rZ   r1   r[   r\   r-   r   r   )rQ   re  rJ   rJ   rK   permute  s   r              c              	   C   s8   t | tsJ t| |d}ttjj| j|||||dS )Nr   clamp)rZ   r1   _validate_dimr%   	SliceViewr   r   )rQ   r   startendstepr  rJ   rJ   rK   slice_  s   r  c              	   C   s   t | trt | jtjr| j } |   t| s"td|  dt	| \}}t
|j|jdd |D dd |D t|p@d}tt||S )Nzunrealized as_strided(z, ...)c                 S      g | ]}t |qS rJ   rw   r   rP   rh  rJ   rJ   rK   rR         zas_strided.<locals>.<listcomp>c                 S   r  rJ   r  r  rJ   rJ   rK   rR     r  r   )rZ   r1   r   r%   BaseViewunwrap_viewr	  is_storage_and_layoutrG   as_storage_and_layoutFixedLayoutr   rp   rw   r   ReinterpretView)rQ   sizestridestorage_offsetstorage
old_layout
new_layoutrJ   rJ   rK   
as_strided  s   

r  c                 C   s$   t | tsJ t| |||j| _| S rT   )rZ   r1   r  r   )rQ   r  r  r  rJ   rJ   rK   as_strided_  s   r  c                 C   s   t | |||}t|S rT   )r  r  )rQ   r  r  r  resultrJ   rJ   rK   as_strided_copy  s   r  c                    s   g d}D ]} |||    f d d }qdd D  fdd}td  }d d | < tjd  d  ||dS )Nr   r   c                 S   r   rJ   r   rP   r   rJ   rJ   rK   rR     rS   z!pointwise_cat.<locals>.<listcomp>c           
   	      s<  t |  tj}g }g }ttD ]l  dkr t dtjn
t   d tj}t   d tj}t ||}t ||} dkrI|}n td krT|}nt 	||}|
| t|     d 8  < |
t | fddd q|d }	ttd ddD ] t |  |  |	}	q|	S )Nr   r"   c                      s     S rT   rJ   rJ   )r   idx_loadinputs_loadersrJ   rK   <lambda>   s    z1pointwise_cat.<locals>.inner_fn.<locals>.<lambda>        r   r    )r=   
index_exprr_   int64r   r   constantgeltand_r   r[   maskedrY  )
r  idx_dimmasksmasked_loadsr  r  
start_condend_condmasknext_valr   r   r  inputs_ranges)r   r  rK   r     sD   
zpointwise_cat.<locals>.inner_fnr   )r   r   r[   r.   r   r   rv   )r   r   prev_endr   r   r  rJ   r  rK   pointwise_cat  s   .

r  r  scaleszero_pointsaxis	quant_min	quant_maxreturnc              	      s   t  dksJ dt  dksJ d|  tjkr%t| tj} |  tjks5J d|    t |  k sHJ dt |   |     f	dd}tj	| 
 ||  dS )	Nr"   expect scales 1 dimexpect zero_points 1 dim<Expecting input to have dtype torch.float32, but got dtype: Expecting axis to be < c           
         s   |   f}| }|}|}t tjd\}}jtjkr(t|tj}jtjkr5t|tj}t|}t|| | }t	|t
||}	t|	S Nrs   )_create_constantsr_   float32rp   r=   r   rS  
reciprocalr|  maximumminimum)
r  channel_idxr  scale
zero_pointqminqmax	inv_scalerm  clamped	r  rp   input_loaderr  r  r  scales_loaderr  zero_points_loaderrJ   rK   r   3  s   

z;quantized_decomposed_quantize_per_channel.<locals>.inner_fnr   )r   r   rv   r_   bfloat16r   r  r   r.   r   r   r  r  r  r  r  r  rp   r   rJ   r  rK   )quantized_decomposed_quantize_per_channel  s,   
r  c                    s   t  dksJ dt  dksJ d|  |ks*J d| d|    t |  k s=J dt |   |     fdd}tj|  tj||  d	S )
Nr"   r  r  Expecting input to have dtype , but got dtype: r  c                    st   |   f}| }|}|}j tjkrt|tj}j tjkr+t|tj}tt|tj|| }|S rT   )rp   r_   r  r=   r   sub)r  r  r  r  r  rm  r  r  r  r  r  r  rJ   rK   r   e  s   
z=quantized_decomposed_dequantize_per_channel.<locals>.inner_fnr   	r   r   rv   r   r.   r   r   r_   r  r  rJ   r  rK   +quantized_decomposed_dequantize_per_channelL  s(   r  r  r  c                    s   |   tjkrt| tj} |   tjksJ d|    |   fdd}tj|   t	j
|t|t|d|  dS )Nr  c           	         sf   | }t d| |tjd\}}t|| | }t tjd\}}tt|||}t| S )N      ?rs   )r  r_   r  r=   r|  r  r  r   )	r  r  r  r  r  rm  r  r  r  rp   r  r  r  rJ   rK   r     s   
zBquantized_decomposed_quantize_per_tensor_default.<locals>.inner_fnr  r  r   )rv   r_   r  r   r  r   r.   r   r   r   r   r   rq   r   r  r  r  r  r  rp   r   rJ   r  rK   0quantized_decomposed_quantize_per_tensor_default{  s   
r  c                    sh   |   |ksJ d| d|    |    fdd}tj|  tjtj|t	|t
|d|  dS )Nr  r  c                    s:    | }t ||tjd\}}tt|tj|| }|S r  )r  r_   r  r=   r  r   )r  r  r  r  rm  r  rJ   rK   r     s   zDquantized_decomposed_dequantize_per_tensor_default.<locals>.inner_fnr  r   )rv   r   r.   r   r   r_   r  r   r   r   rq   r   r  rJ   r  rK   2quantized_decomposed_dequantize_per_tensor_default  s   r  c                    s   |   tjkrt| tj} |   tjksJ d|    t dks9t dkr5 d dks9J dt dksUt dkrQ d dksUJ d|     fdd}tj	| 
  ||  dS )	Nr  r   r"   expect scale as scalar tensor"expect zero_point as scalar tensorc                    s   | }t  dkrdnd}t  dkrdnd}jtjkr-t|tj}jtjkr:t|tj}t|t| | }t	tjd\}}t
t|||}t| S )Nr"   r   rJ   rs   )r   r   rp   r_   r  r=   r   r|  r  r  r  r  )r  r  _scale_zero_pointrm  r  r  r  rp   r  r  r  r  scale_loaderr  zero_point_loaderrJ   rK   r     s   zAquantized_decomposed_quantize_per_tensor_tensor.<locals>.inner_fnr   )rv   r_   r  r   r  r   r   r   r.   r   r   r  rJ   r  rK   /quantized_decomposed_quantize_per_tensor_tensor  s.   ""r  c                    s   t  dkst  dkr d dksJ dt  dks8t  dkr4 d dks8J d|  |ksJJ d| d|   |      fdd}tj|  tj||  d	S )
Nr   r"   r  r  r  r  c                    s    | }t  dkrdnd}t  dkrdnd}jtjkr-t|tj}jtjkr:t|tj}tt|tj|| }|S )Nr"   r  rJ   )r   r   rp   r_   r  r=   r   r  )r  r  r  r  rm  r  r  r  r  r  rJ   rK   r     s   zCquantized_decomposed_dequantize_per_tensor_tensor.<locals>.inner_fnr   r  r  rJ   r   rK   1quantized_decomposed_dequantize_per_tensor_tensor  s*   ""r  c           
         s  | d   jdk}|r:tdd | D r:| D ]}|  qtdd | D r1ttjg| R  \} }ttjj| |S t	| dkrFt
| d S t| d |d}t| dtjifdd	| D } d
tttjf dtjfdddd fddtfdd| D }dtffdd|rttj| |S fddd}d t	| |kst	| tjkrt fdd| D rtdd tjjD }tfdd| D o|}tfdd| D otfdd| D  }	|s|	r|st| |S ttj| |S )Nr   cpuc                 s   s$    | ]}|  tjtjfv V  qd S rT   )rv   r_   int8uint8rP   r  rJ   rJ   rK   rY   "  s    
zcat.<locals>.<genexpr>c                 s   s     | ]}t | d kV  qdS )rg   N)r   r   r  rJ   rJ   rK   rY   )      r"   r~   c                    s   g | ]}t | qS rJ   r   r  rs   rJ   rK   rR   4  r  zcat.<locals>.<listcomp>rQ   r  c                 S   s>   t | trt | jtjr| j S | jS t | tjr| jS | S rT   )rZ   r1   r   r%   r  r  
StorageBoxrz   rJ   rJ   rK   unwrap_tensor6  s   

zcat.<locals>.unwrap_tensorc                 S   s   t | tjot | jtjS rT   )rZ   r%   ComputedBufferr   r/   r   rJ   rJ   rK   is_reductionB     zcat.<locals>.is_reductionc                    sJ   t | ttjfr | S | p$t | tjo$t fdd|  D S )Nc                 3   s     | ]} t j|V  qd S rT   )r>   r   
get_buffer)rP   readcan_fuse_reductionrJ   rK   rY   K  s
    
z2cat.<locals>.can_fuse_reduction.<locals>.<genexpr>)rZ   r1   r%   r  r.   r   get_read_namesr  )r  r  r	  rJ   rK   r  E  s   zcat.<locals>.can_fuse_reductionc                 3       | ]} |V  qd S rT   rJ   rP   r   r  rJ   rK   rY   R  r  c                    sZ   t | rt j| dd\}}t j| S t| tt jfr# | S t| t jr+dS dS )NF)freezeT)	r%   r  r  ConcatKernelcan_realize_into_without_copyrZ   r1   r  r.   )rQ   r  r   )should_lower_cat_inputr	  rJ   rK   r  T  s   
z#cat.<locals>.should_lower_cat_inputc                    sZ   t | ttjfr | S t | tjsdS |  }|  D ]}| tj	|7 }q|S r   )
rZ   r1   r%   r  r.   inner_fn_opcountr  r>   r   r  )rQ   countr  )op_countr	  rJ   rK   r  h  s   zcat.<locals>.op_countrk   r    c                 3   s    | ]	}| kV  qd S rT   rJ   r  )MAX_SIMPLE_OP_COUNTr  rJ   rK   rY         c                 s   s    | ]}t |V  qd S rT   )r8   )rP   userJ   rJ   rK   rY     r  c                 3   r  rT   rJ   r  r  rJ   rK   rY     r  c                 3   r  rT   rJ   r  r  rJ   rK   rY     r   c                 3   r  rT   rJ   r  r  rJ   rK   rY     r  )r   r   r   r	  require_channels_lastr6  catr'  r*  r   r  r  r   r   r   r
   r1   r%   r  IRNoder   r|   r  r   r#   max_pointwise_cat_inputsr>   r  r  r  )
r   r   
cpu_devicer  r   fusable_reductionMAX_COMPLEX_POINTWISE_CATpointwise_usesfuse_pointwise_usehorizontal_fuse_catrJ   )r  r  rp   r  r  r  r	  rK   r!    sV   

r!  offsetdim1dim2c                    s  |   ttdtdtkfdd tjjt	|d}|rBtjj
tjj |  d}ntjj
tjj  | d}d |r`| df nd|f fddtD }||  fdd	}ttj| ||S )
N)r  rankc                      s   d  d S )Nz(diagonal dimensions cannot be identical z, rJ   rJ   r+  r,  rJ   rK   r        zdiagonal.<locals>.<lambda>r   )r   r   c                    s    g | ]\}}| fvr|qS rJ   rJ   )rP   r   rh  r.  rJ   rK   rR          zdiagonal.<locals>.<listcomp>c                    s   | d }dgt  }d}tD ]&}|kr | d  ||< q|kr-| d  ||< q| | ||< |d7 }q|t d ksBJ |S )Nr   r   r"   r    )r   r   )r  diag_idxoriginal_idxcur_dimrg  base_idxr+  r,  num_dimsoriginal_shaperJ   rK   	reindexer  s   
zdiagonal.<locals>.reindexer)r   r   r   r   r>   r   r   rc  rw   Ltevaluate_maxevaluate_minr   r   r1   r%   GenericViewr   )r  r*  r+  r,  offset_negative	diag_sizer_  r8  rJ   r4  rK   diagonal  s:   
r?  c                 C   s   t t| |||S rT   )r  r?  )r  r*  r+  r,  rJ   rJ   rK   diagonal_copy     r@  c                 C   $   t | }t||||}t|| |S rT   )r  r?  	mutate_to)r  srcr*  r+  r,  r   r  rJ   rJ   rK   diagonal_scatter     
rE  c                 C   s,   t ||  | }tt| |||d |S rO  )r3   handle_negative_indexr   ri  r  )rQ   r   r  rJ   rJ   rK   select  s   rH  c           	   
   C   s   t | |d}t|tjrtjj|}t|ttj	fr1tjj| 
 | }|g|| d |  }g }d}|D ]}|| }|t| ||||d |}q7|S )Nr   r"   r  )r  rZ   rw   rx   r>   r   r   evaluate_static_shaperq   r   r   r   r  )	rQ   r_  r   r  x_sizer  r  r  r  rJ   rJ   rK   split  s   rK  c                 C   s   t | ||ddS )NFr  )rK  )rQ   r_  r   rJ   rJ   rK   split_with_sizes     rL  c                 C   sJ   t | |d}tjj|  | }g }t|D ]}|t| || q|S r   )	r  r>   r   r   rI  r   r   r   rH  )rQ   r   rJ  r  r   rJ   rJ   rK   unbind  s   rN  c                    s   |   }t|}t|| |dkrtt| d|dS |  }tjj}||| |	d t
|| d }||dkrK| |t|| | g |d   || d d  |}	 fdd}
ttj| |	|
S )Nr   )r  r"   c                    s:   | d |     }g | d   ||  d d R S )Nr   r"   rJ   )r  dim_idxr   r  rJ   rK   r8    s   &zunfold.<locals>.reindexer)r   r   r   r  r  r>   r   r   	guard_leqguard_ltr   r  r  r   r1   r%   r<  r   )rQ   	dimensionr  r  r_  ndimdim_sizer   new_dim_sizeout_sizer8  rJ   rP  rK   unfold  s   
(rX  c                 C   s4   t | |d}t|  }||td t| |S rO  )r  r[   r   insertrw   r   r7  )rQ   r   rf  rJ   rJ   rK   r    s   
r  c                 C   rk  rT   )r  rZ   r1   r   rl  rJ   rJ   rK   
unsqueeze_"  ro  rZ  c                 C   sR   t |tsJ t|  }|dk r||| 7 }d|  kr$|| k s'J  J |S r   )rZ   rq   r   r   )rQ   r   r*  rT  rJ   rJ   rK   r  +  s    r  r   c                 C   sT   t | |d}tjj|  | d }t| |d|}t| |||d }t|t|S )Nr   r    )	r  r>   r   r   rI  r   r  r   sigmoid)rQ   r   new_lenr   r   rJ   rJ   rK   glu4  s
   r]  c                    s   |rt    fdd}|S )Nc                     s$   t tjtjj g| R i |S rT   )pytreetree_mapr1   r   r%   FallbackKernelrK  kernelrJ   rK   handlerB  s   z!fallback_handler.<locals>.handler)rA   r^   )rb  r"  rc  rJ   ra  rK   r'  >  s   
r'  c                   C      t d d S )NzjTorchinductor does not support code generation for complex operators. Performance may be worse than eager.)warningswarnrJ   rJ   rJ   rK   _warn_complex_not_supportedJ  s   rg  r   c                 C   s<   |   r|r|jtjjjjtjjjj	fv rdS t
  dS dS )z0Do not support reading or writing to this tensorFT)r$  r  r_   r=   r6  r7  rp   r(  r)  r*  rg  r   parentrJ   rJ   rK   unsupported_input_tensorS  s   

rj  c                 C   s   t | |rdS | jotjS )z2Do not support writing tensor but can read from itT)rj  is_cpur#   disable_cpp_codegenrh  rJ   rJ   rK   unsupported_output_tensora  s   
rm  r  c                 C   sh   | j tjju r	dS | j tjju rdS dd }tj| ji | jD ]}||| ddr, dS q || | ddS )NFc                 S   sp   t | tjjs	dS d| jvrdS t| jd D ]}t |tjjs"q|r-t	||r, dS qt
||r5 dS qdS )NFrm  T)rZ   r_   fxNodemetar^  tree_leaves_subclasses
FakeTensorrm  rj  )r  ri  	is_outputrp  rJ   rJ   rK   check_skip_conditionq  s   


zCfallback_node_due_to_unsupported_type.<locals>.check_skip_condition)rt  T)	r  r6  view_as_complexr*  lift_fresh_copyr^  arg_tree_leavesr   r   )r  allow_cpu_inputsru  r   rJ   rJ   rK   %fallback_node_due_to_unsupported_typeh  s   rz  c                    s   | t vsJ d|  |r:ttdr:t| gr:tjr#| tjj	j
v s:tjjjr2dtjj_td td|  d fdd}t| tjjrY|  D ]}t| |}|| qKd S t| tjjtjjfrj||  d S td	|  d
t|  )Nz*both a fallback and a decomp for same op: CIFznA make_fallback error occurred in suppress_errors config, and suppress_errors is being disabled to surface it.zmake_fallback(a.  ): a decomposition exists, we should switch to it. To fix this error, either add a decomposition to core_aten_decompositions (preferred) or inductor_decompositions, and delete the corresponding `make_fallback` line. Get help from the inductor team if unsure, don't pick arbitrarily to unblock yourself.c                    s.   t |   d urt|   t| d dt| S Nr   )rN   re   r   r'  )op_overloadlayout_constraintrJ   rK   register_fallback  s   

z(make_fallback.<locals>.register_fallbackzUnsupported fallback z with type )r'   r|   osgetenvr(   r#   fallback_randomr_   _decompdecompositions_for_rngextra_random_decomps_dynamosuppress_errorslogwarningAssertionErrorrZ   r`   ra   rc   rU   
OpOverloadHigherOrderOperatorRuntimeErrorr   )r  r  rf  r  olr}  rJ   r~  rK   make_fallback  s6   




r  c                 C   s$   d}| D ]}|| }qt |tjdS )z
    TorchInductor offset calculation differs from PyTorch eager offset
    calculation for random ops (tl.rand vs torch.rand). In future, we should
    strive for same impl for tl.rand and torch.rand.
    r"   rs   tensorr_   r  )r  numelrh  rJ   rJ   rK   philox_rand_offset  s   
r  c           	         sd   t | | t j|  | |  fdd}tj| |t| d}t	| }||fS )Nc                    sV   t g tj}t g tj}t t | tj|}t ||}t | S rT   )r=   r   r_   rS  r^   r  rand)r   seed_index_exproffset_index_exprrand_index_exprr  rp   offset_loader
random_posseed_loaderrJ   rK   r     s   zphilox_rand.<locals>.inner_fnr   )
r%   r  FlexibleLayoutcontiguous_stridesmake_indexerr   r.   r   r[   r  )	r  seedr*  r  r   rp   r   random_values_nodeoffset_noderJ   r  rK   philox_rand  s&   
r  c              	   C   s.   t jrttjtjtj	j
| ||S td)Nz&should be handled in replace_random.py)r#   r  r^  r_  r1   r   r%   r`  r6  native_dropoutr*  r  )rQ   ptrainrJ   rJ   rK   r    s   r  c                 G   sj   t js|  tdksJ d|   t|dks!t|d tr%t	j
jnt	j
j}tj|| g|R   | S )Nr  Tthis should be handled in decomps unless config.fallback_random or the device is CPUr   )r#   r  r   r_   r   r	  r   rZ   r   r6  
bernoulli_Tensorr%   InplaceBernoulliFallback)rQ   r   r}  rJ   rJ   rK   r    s   r  c                 G   s4   t js|  tdksJ dtt| g|R  S )Nr  r  )r#   r  r   r_   r   r  r  )rQ   r   rJ   rJ   rK   bernoulli_p	  s   r  c                 C   s   t rT   r  r   rJ   rJ   rK   _foobar  ra  r  c                 C   rd  )Nz1using triton random, expect difference from eager)r  info)saltrJ   rJ   rK   _warn_triton_random  rB  r  c                   C   s   t tjj d S rT   )r  r>   r   creation_timerJ   rJ   rJ   rK   warn_triton_random  rM  r  c                  O   F   | dd d urt| i |S tjr|dd  t| i |S tdN	generatorz-should have been handled in replace_random.py)getfallback_rand_generatorr#   r  popfallback_rand_defaultr  rK  rJ   rJ   rK   r  (     r  c                  O   r  r  )r  fallback_randn_generatorr#   r  r  fallback_randn_defaultr  rK  rJ   rJ   rK   randn2  r  r  c                 C   s   t |}t j| |S rT   )r%   get_stride_orderExternKernelrequire_stride_order)input_tensorr  stride_orderrJ   rJ   rK   inductor_force_stride_order<  s   
r  c                 C      t d)Nz.should be handled in fuse_seed_creation_pass()r  )r   rJ   rJ   rK   inductor_seedB     r  c                 C   s   t   tt| t|S rT   )r  r1   r   r%   RandomSeedsr5   )r  r   rJ   rJ   rK   inductor_seedsG  s   r  c                    s(    fdd}t j  |g dS )Nc                    s   t   S rT   )r=   	load_seedget_namer  r   seedsrJ   rK   r   O  r  z&inductor_lookup_seed.<locals>.inner_fnr   )r.   r   r   rv   )r  r   r   rJ   r  rK   inductor_lookup_seedM  s   r  r*  r  r  modec                   s   t jrJ  dv sJ g | } tj}| }tj||| tj| |d	 |
  fdd}tj|||g | d}|  |S )N)r  r  r  c                    s"   t t g t| tjS rT   )rU   r=   r  r_   rS  r   r  r  r  rJ   rK   r   f  s   z!inductor_random.<locals>.inner_fnr   )r#   r  r_   r  r   r%   r  r  r  r  r   r.   r   r	  )r  r  r  r*  rp   r   r   r  rJ   r  rK   inductor_randomZ  s(   
r  lowhighc                   sp   t jrJ g |}tj}| }tj|||tj||d	 |
  fdd}tj|||g |dS )Nr  c              	      s6   t g t | tjt tjt  tjS rT   )r=   	randint64r  r_   rS  r  r   r  r  r  r  rJ   rK   r     s   z"inductor_randint.<locals>.inner_fnr   )r#   r  r_   r  r   r%   r  r  r  r  r   r.   r   )r  r  r  r  r*  rp   r   r   rJ   r  rK   inductor_randintv  s"   
r  	out_int32right
boundariesr  r  c                   s   t   dks
J t| rt s ttjjdd|  |dS      d   }| 	 }|  |r;t
jnt
j fdd}tj|||  dS )	Nr"   Fr!  r  r   c                    s"   | }t |  }|S rT   )r=   	bucketizer  )r   rm  r   r  boundaries_sizeindex_dtyper  r  rJ   rK   r     s   zbucketize.<locals>.inner_fnr   )r   r   r+   r'  r6  r  r  r	  r   r   r_   rS  r  r.   r   )r  r  r  r  boundaries_loaderr   r   rJ   r  rK   r    s$   r  c                 O   $   t tjtjj||f\}}||fS rT   )r^  tree_map_onlyr%   r"  r  require_stride1r   r   r   rJ   rJ   rK   require_dense     r  c                 O   r  rT   )r^  r  r%   r"  r  require_contiguousr  rJ   rJ   rK   r    r  r  c                 O   r  rT   )r^  r  r%   r"  r  r   r  rJ   rJ   rK   r     r  r   )ignore_mutated_args_FIXMEc                   s  dd  |ret jtjjsJ jj} fdd}g }i }tt|jD ]\}\}	}
|j	| }|
|||	|
 q&dd |j	D }| D ]}|| }	j| }
|| }|||	|
||< qGt||fS t fddt|jD } fd	d| D }||fS )
Nc                 S   s2   t | tjrt|jd  }tj| |S | S Nrm  )rZ   r%   r"  r  rp  r  r  r  )r   fx_argr  rJ   rJ   rK   apply_constraint  s   z1constrain_to_fx_strides.<locals>.apply_constraintc                    s    | j d ur| j jr|S  ||S rT   )
alias_infois_write)
schema_argr   r  r  rJ   rK   maybe_apply_constraint  s   
z7constrain_to_fx_strides.<locals>.maybe_apply_constraintc                 S   s   i | ]}|j |qS rJ   )rE  r   rJ   rJ   rK   
<dictcomp>  rS   z+constrain_to_fx_strides.<locals>.<dictcomp>c                 3       | ]
\}} ||V  qd S rT   rJ   rP   r   r  r  rJ   rK   rY         
z*constrain_to_fx_strides.<locals>.<genexpr>c                    "   i | ]\}}| |j | qS rJ   r   rP   kr  r  fx_noderJ   rK   r        " )rZ   r  r_   r`   r  _schemar   r   r   	argumentsr   r   r   r\   r  )r  r  r   r   schemar  new_args
new_kwargsr  r   r  r  schema_kwargskeyrJ   r  rK   constrain_to_fx_strides  s,   


r  ztorchvision::roi_alignc                    sF   dd  t  fddt|jD } fdd| D }||fS )Nc                    s,  t | tjs| S |jd }| }t|}|r*|d dkr*tttt	| 
 }|js4tj| |S d t | ts=J t	| 
 dvrG| S  fdd}z|   || rbtjtj| |W S W n	 tyl   Y nw  fdd	}t | jtjr|| s||  rtjtj| |S tj| |S )
Nrm  r   r   rk   rf   rg   c                    sF   t  fddtt  d D }tjj  d dko"|S )Nc                 3   s.    | ]}t jj |   d kV  qdS r   N)r>   r   r   r  
get_strider   )	ALIGNMENTrQ   rJ   rK   rY   &  s
    
z`sdpa_constraint.<locals>.apply_constraint.<locals>.is_aligned_realized_tensor.<locals>.<genexpr>r"   r   )r   r   r   r  r>   r   r   r  )rQ   aligned_stridesr  rz   rK   is_aligned_realized_tensor%  s   zMsdpa_constraint.<locals>.apply_constraint.<locals>.is_aligned_realized_tensorc                    s   t jj|  d   dkS )Nr   r   )r>   r   r   r  r   rz   r	  rJ   rK   
is_aligned7  s   z=sdpa_constraint.<locals>.apply_constraint.<locals>.is_aligned)rZ   r%   r"  rp  r  r  r[   r   r   r   r   r   r  r  r1   r  r>   r   try_match_insignificant_stridesrealize_inputAttributeErrorr   r  r  )r   r  meta_valmeta_strider  r
  r  rJ   r	  rK   r    s@   

	z)sdpa_constraint.<locals>.apply_constraintc                 3   r  rT   rJ   r  r  rJ   rK   rY   C  r  z"sdpa_constraint.<locals>.<genexpr>c                    r  rJ   r  r  r  rJ   rK   r  F  r  z#sdpa_constraint.<locals>.<dictcomp>)r\   r   r   r  )r  r   r   rJ   r  rK   sdpa_constraint  s   5
r  )rf  c                 C   sn   |}|   |  krt||   }|  | kr t||  }|  | kr3t||  }t|S t|S rT   )r   r?  rv   r   r   r   r  )selfrD  non_blockingrQ   r   rJ   rJ   rK   r  	  s   r  )memory_formatc                C   s&   t j|  |  |  t|  dS )Nr   )r.   r   r   rv   r   r[   r   )rQ   r  rJ   rJ   rK   r  	  s   
r  c                 C   s   g }t | tr+t | jtjr+| j} t | tjr'||   | j} t | tjst| } t| } |rH| j} |d d d D ]}t| |} q;t| } | S r  )rZ   r1   r   r%   r  r   
get_layoutr  )rQ   reinterpret_view_layoutslayoutrJ   rJ   rK   clone_preserve_reinterpret_view!	  s   r  rw  c                   s(    fdd}t jt| || gdS )Nc                    s   t j| d    dS )Nr   rs   r=   r  r   rp   r  r  rJ   rK   rX   C	  r  ziota.<locals>.fnr   )r.   r   r5   )lengthr  r  rp   r   requires_gradrX   rJ   r  rK   iota9	  s   
r  r   r   c                    s   |   |  ks
J |  t|  d tjjtdr'| 	    tjj
d tjj| 	    tt| | 	 }|  fdd}tj|  |   |t| 	 dS )Nr   c              	      s6   t t t |   tjt tj| | S rT   )r=   rY  eqr  r_   rS  r  r   r   
src_loaderr  rJ   rK   r   Z	  s   z select_scatter.<locals>.inner_fnr   )rv   r   r  r>   r   r   rc  rw   r9  r   rQ  rR  r   r  r.   r   r   r[   )rQ   rD  r   r   r   rJ   r   rK   select_scatterN	  s    

r"  c                    s     |  ks
J  t d    tj \t }t d  | < t	||}|  fdd}t
j   |t dS )Nr   r"   c              
      s2  dkrkrdkr| S t |  tj}t|  t|    < g }dkr?|t |t t	tj krT|t 
|t t	tj dkrs|t t t|   dtjt dtj |swJ tt j|}t | fddtrdnd}t ||| S )Nr   r"   c                          S rT   rJ   rJ   )src_idxr!  rJ   rK   r  	      z1slice_scatter.<locals>.inner_fn.<locals>.<lambda>r  )r=   r  r_   r  r[   r   r   r  rw   r   r  r  r   r  r   r^  r  r  r{   rY  )r  r  r  src_valr   rU  r  r!  r  r  rQ   r  )r$  rK   r   z	  sR   zslice_scatter.<locals>.inner_fnr   )rv   r   r  r   r%   r  normalize_start_endr[   r   r   r.   r   r   )rQ   rD  r   r  r  r  src_sizer   rJ   r'  rK   slice_scatterl	  s    
.
r*  c                 C   s*   t | ttfrt| dkrt| d S | S r   )rZ   r[   r\   r   _unwraprz   rJ   rJ   rK   r+  	  s   r+  rp   r   r  
pin_memoryc                   s  t |d tjfv d|  t | d tt tr ptjnp%t g }t tj	r6 fdd}nBt t
tfrE fdd}n3t dksZt d t
tfrlt dkrl|tt   fdd}ntjtj |d	S tjt|||d
S )Nlayout=r-  c                       t  S rT   r  r   r   rp   rJ   rK   r   	     ztensor.<locals>.inner_fnc                    r/  rT   r=   r  r   r0  rJ   rK   r   	  r1  r   rk   c                    s8    fdd t dkrtdS  dt S )Nc              	      sr   | |k sJ ||  dkrt |  S ||  d |  }t t t d tjt |tj | | ||S )Nr"   r    r   )r=   r  rY  r  r  r_   r  )r  r  mid)binary_searchr   rp   r   rJ   rK   r4  	  s   z/tensor.<locals>.inner_fn.<locals>.binary_searchr   )r   r=   r  r   r0  )r4  r   rK   r   	  s   r  r   )rL   r_   stridedrZ   r+  rq   r  get_default_dtyperw   rx   r   r   r   r   r>   r   add_tensor_constantr  r.   r   r5   )r   rp   r   r  r-  r   r   rJ   r0  rK   r  	  s,   *r  c                 C   s@   t | tr|d urt| |} |d urt| |} | S t| ||dS )Nr  )rZ   r1   r   r?  r  )r   rp   r   rJ   rJ   rK   	as_tensor	  s   


r8  c                 C   s   t | tjdS r  r  r   rJ   rJ   rK   long_tensor	  rB  r:  c                 C   s   ddl m} |tjjjtjjjd }t|dksJ |t	t
| \}}t||| }tj||_tjjjd }t|tjtjtjfrK|jjS t|S )Nr   )resolve_unbacked_bindingsunbacked_bindingsr"   rm  )r  r;  r>   r   r   	shape_envr  rp  r   r   iterr  r%   DynamicScalarregister_bufferrE  rZ   r_   SymIntSymFloatSymBoolr  exprrw   sympify)r   r;  r<  binding_symkeypathbufferrm  rJ   rJ   rK   _local_scalar_dense	  s   
rI  c                 C      d S rT   rJ   )r   rI   rJ   rJ   rK   _assert_scalar,
  s   rK  c                    s   | t | ttfstdrjt ttfr  fdd}n"t tjr. fdd}nt dks8J 	 fdd}t
j| |t|dS )Nr   c                       t  S rT   r2  r   rp   r   rJ   rK   r   <
  r1  z_full.<locals>.inner_fnc                    rL  rT   r  r   rM  rJ   rK   r   A
  r1  r   c                    s    g S rT   rJ   r   )value_loaderrJ   rK   r   H
  s   r   )rZ   rq   r   r   r   rw   rx   r   r   r   r.   r   r[   )
fill_valuer   rp   r  r   rJ   )rp   r   rN  rK   _full5
  s    rP  c                 K   s   t t|| fi |S rT   create_tensor_liketensor_constructor)rQ   rO  r   rJ   rJ   rK   rs  S
  s   rs  c                    s    d d d d dd d fdd
}|S )NF)namesrp   r   r  r-  r  c                    s   t | d u d t |d tjfv d|  t | d t|}|p#t }t|dkr;t|d tttj	fr;t|d }|D ]
}t|tj
rGJ q=dd |D }t |||S )Nnamed tensorsr.  r-  r"   r   c                 S   r  rJ   r  r  rJ   rJ   rK   rR   n
  r  z5tensor_constructor.<locals>.inner.<locals>.<listcomp>)rL   r_   r5  r5   r6  r   rZ   r[   r\   SizerA  rP  )rT  rp   r   r  r-  r  r  rh  rO  rJ   rK   r   Z
  s   	"z!tensor_constructor.<locals>.innerrJ   )rO  r   rJ   rW  rK   rS  X
  s   rS  )rT  rp   r  r   r-  r  c                 G   sX   t | d u d t|}t|dkr"t|d tttjfr"t|d }t|d ||||dS )NrU  r"   r   rp   r  r   r-  )	rL   r5   r   rZ   r[   r\   r_   rV  empty_strided)rT  rp   r  r   r-  r  r  rJ   rJ   rK   r  t
  s   
"r  c                    s   dddddd fdd
}|S )zZ
    Shim to convert X_like(...) into X(...).  For example zeros_like() into zeros().
    NF)rp   r   r  r-  r  c                   sj   t | d t |d tjfv d|  |d u r|  }nt|}|p%|  }t|  } |||||dS )Nr-  r.  r,  )rL   r_   r5  rv   rt   r   r[   r   )rQ   rp   r   r  r-  r  r  creation_fnrJ   rK   _constant_like
  s   

z*create_tensor_like.<locals>._constant_likerJ   )r[  r\  rJ   rZ  rK   rR  
  s   
rR  c                 C   s   t t| S rT   rQ  rW  rJ   rJ   rK   r[  
  r1  r[  c                    s   d d d d d fdd
}|S )NrX  c                   sp   t |ttfs	J t| d t|d tjfv d|  t|p#|  }|p)|  }dd |D }t	 |||S )Nr-  r.  c                 S   r  rJ   )rw   r   r  rJ   rJ   rK   rR   
  r  z7new_constant.<locals>._new_constant.<locals>.<listcomp>)
rZ   r[   r\   rL   r_   r5  rt   rv   r   rP  rQ   r  rp   r  r   r-  rW  rJ   rK   _new_constant
  s   z#new_constant.<locals>._new_constantrJ   )rO  r^  rJ   rW  rK   new_constant
  s   r_  rX  c                C   s4   |d u r|   }|d u r|  }t|d ||||dS NrX  rv   r   rY  r]  rJ   rJ   rK   	new_empty
  s   rb  c                C   s   t | ttfs	J t |tttd fsJ t| d t|d tjfv d|  t|p/t }|p7t	dj
}td||| d}|  |jj}dgt|  |j_t |tjsYJ dd | D } |ridd |D ntj| }tj||| |d	|_|S )
Nr-  r.  r  r   )rO  r   rp   r  c                 S   r  rJ   r  r  rJ   rJ   rK   rR   
  r  z!empty_strided.<locals>.<listcomp>c                 S   r  rJ   r  r  rJ   rJ   rK   rR   
  r  )r   rp   r  r  )rZ   r[   r\   r   rL   r_   r5  rt   r6  r  r   rP  r	  r   r   r   r%   r
  r  r  r  r  )r  r  rp   r  r   r-  	pointwiserH  rJ   rJ   rK   rY  
  s.   
rY  c                C   s4   |d u r|   }|d u r|  }t||||||dS r`  ra  )rQ   r  r  rp   r  r   r-  rJ   rJ   rK   new_empty_strided
  s   rd  c                 C   s2   dd |D }t tt||jd}tj| |S )Nc                 S      g | ]	}t jj|qS rJ   )r>   r   r   r  r  rJ   rJ   rK   rR   
      z copy_strided.<locals>.<listcomp>)r  )sortedr   r   __getitem__r%   r  r  )rQ   r  r  rJ   rJ   rK   copy_strided
  s   ri  c                 K   s*   | dd usJ dt|| fi |S )Nrp   z(dtype should be handled by decomposition)r  rS  )r  rO  r   rJ   rJ   rK   full
  s   rj  c                    s   t | tsJ | tjksJ |  tdk}t|  | |  |  fdd}t	j
|  |  || dS )Nr   c                    s4   t | } t| dkrt|   |  < | S r   )r[   r   r=   indirect_indexingr  r   index_loaderr  r  rJ   rK   rX     s   zgather.<locals>.fnr   )rZ   r1   rv   r_   r  r   r   r  r   r.   r   r   )rQ   r   r   sparse_gradr*  rX   rJ   rl  rK   gather
  s   ro  c                    s   |rJ t | tsJ t |tsJ dt| v sJ |  |  t| |  g | dd   fdd}tj| 	 |  |dS )Nrq   r"   c                    s\   t | t ksJ |  d  | d  }t|d gg | d   }|S )Nz != r   )r   r=   rk  )r  	var_index
weight_idxindices_loaderindices_ndimr  weight_loaderweight_sizerJ   rK   rX   "  s   "
zembedding.<locals>.fnr   )
rZ   r1   strrv   r   r   r   r.   r   r   )weightr   padding_idxscale_grad_by_freqsparserX   rJ   rr  rK   	embedding  s    r|  c                    s   t dd  D sJ ddd  D  tdd  D r"tddd t D }t|d	ks5J d
d gt  }t|t fdd|D  D ]\}}| |krXtd|||< qJ||fS )Nc                 s   s4    | ]}|d ur|  tjtjtjtjfv V  qd S rT   )rv   r_   r  rS  r|   r  r   rJ   rJ   rK   rY   3  s    z.check_and_broadcast_indices.<locals>.<genexpr>z)indices must be int64, byte or bool. Got c                 S   s   g | ]
}|d ur|  qS rT   rv   r   rJ   rJ   rK   rR   7      z/check_and_broadcast_indices.<locals>.<listcomp>c                 s   s,    | ]}|d ur|  tjtjfv V  qd S rT   )rv   r_   r|   r  r   rJ   rJ   rK   rY   8  s    "zFallback for bool indicesc                 S   r   rJ   r   r   rJ   rJ   rK   rR   =  r   r   z"requires at least 1 non-None indexc                    r   rJ   rJ   r   r   rJ   rK   rR   @  rS   z.Fallback when indices is on a different device)r   r   rG   r   r   r   r   r   )r   r   
valid_idxsnew_indicesr   rQ   rJ   r  rK   check_and_broadcast_indices2  s"   
$
r  c              	      s   dt dd  D ]\}}	|	| dkrdq	fddtD g 	tt d  d }
r> nd |
  |
d    f	dd}|fS )	NFr"   Tc                    s    g | ]\}}|d u r | qS rT   rJ   )rP   r   rm  rJ  rJ   rK   rR   `  r0  z2index_output_size_and_inner_fn.<locals>.<listcomp>r   c           	   	      s  t | t ks
J t t ksJ t }g }d }r"dn|}d}td d D ]E}||kr8||7 }| d u rR|t | k sFJ || |  |d7 }q.| }|d us\J | }|tj|| |||  | d q.g || |d  }d u r|S |S )Nr   r   r"   r   )r   r   r   r=   rk  )	r  r-  	new_indexfirst_tensor_indexstart_offsetnext_idxr   loaderr  )	r   indexed_sizer   indices_loadersnon_consecutive_tensorsoutput_sizetensor_indicestensor_sizer  rJ   rK   rX   m  s<   

z*index_output_size_and_inner_fn.<locals>.fn)r   r   r   )rJ  r   r  r  r  r  r  r   previouscurrentr  rX   rJ   )
r   r  r   r  r  r  r  r  r  rJ  rK   index_output_size_and_inner_fnJ  s$    


!r  c           
   
      s   t  ttfs	J |  }t |  \ }t|dks J ddd  D }t |d   }|   fddtt D }d|v rOd|vrOt	dfddtt D }t
 ||||||d\}}	tj|  |  |	|d	S )
Nr   z Must have at least one valid idxc                 S       g | ]}|d ur|  nd qS rT   r   r   rJ   rJ   rK   rR     r0  zindex_impl.<locals>.<listcomp>c                    s    g | ]} | d ur| qS rT   rJ   r   r   rJ  rJ   rK   rR     r0  z0index is out of bounds for dimension with size 0c                    r   rJ   rJ   r   r  rJ   rK   rR     rS   r  r   )rZ   r[   r\   r   r  r   r   r   r   
IndexErrorr  r.   r   rv   )
rQ   r   r   r  r  r  r  r  r  r   rJ   r  rK   
index_impl  s6   
r  c                 C   sB   zt | |ddW S  ty    |   ttjjdd| | Y S w )NTr  Fr!  )r  rG   r	  r'  r6  r   r  rQ   r   rJ   rJ   rK   r     s   c                 C   r@  NFr  )r  r  rJ   rJ   rK   _unsafe_index  rB  r  c                 C      t t| |||S rT   )
index_put_r  rQ   r   r   
accumulaterJ   rJ   rK   	index_put  rA  r  c                 C   s   t t| |||ddS r  )index_put_impl_r  r  rJ   rJ   rK   _unsafe_index_put  s   r  c                 C   sB   |  |   krt||   }|rt| |}t| t|d || S r   )r   r?  r^   rC  rY  )r  r   r   r  rJ   rJ   rK   index_put_as_masked_fill  s
   
r  c                 C   sl   t  }t|r(|s|r(|sdnd}tjjjdd  }r$| d| }|tj_t	
tjjj| ||| | S )Nzindex put with accumulate.zdeterministic index put.stack_trace Found from : 
 )r_   $are_deterministic_algorithms_enabledr+   r>   r   r  rp  r  disable_cudagraphs_reasonr%   IndexPutFallbackr  )r  r   r   r  deterministicrI   r  rJ   rJ   rK   index_put_fallback  s   r  c                 C      t | |||ddS )NTr  r  r  r   r   r  rJ   rJ   rK   r    rA  r  c                 C   r  r  r  r  rJ   rJ   rK   _unsafe_index_put_  rA  r  c              
      sP  |  dkr9t|dkr9|d  tjtjhv r9|d }tt| t|  D ]}t|d}q)t	| |g||S t
 rDt| |||S |D ]}|d ur_| tjtjhv r_t| |||  S qF|   t }|rt|  r|dkrzt| dg} t| |||} |dkrt| g } | S t||  }zt||  \}}	W n ty   t| ||| Y S w dd |D }
t| tsJ |   |dkrt| dg} t||	d   } fddtt|D }t ||	||
|d |d\}}t||}tj|  |  | |||rdnd d	}td t| |}tj||_ |dkr&t| g } | S )
Nr"   r   r   c                 S   r  rT   r   r   rJ   rJ   rK   rR   '  r0  z#index_put_impl_.<locals>.<listcomp>c                    r   rJ   rJ   r   r  rJ   rK   rR   2  rS   r  
atomic_addr   rp   r   r   output_indexerscatter_mode)!	get_numelr   rv   r_   r|   r  r   r   r  r  r  r  r9   r7  r   r  r   rG   rZ   r1   r	  r[   r  r   r%   Scatterr   r
  MutationLayoutSHOULDREMOVEr>   r   r@  rE  )r  r   r   r  r   r  r   r   x_ndimr  r  r  r  expected_vals_sizer   scatterrH  rJ   r  rK   r    s   




r  )r~   r   c           	         s   dd | ||fD \}}}|   |  ksJ | tjtjhv s#J | |  | | |  fdd}tj| 	 |  ||  d}t
||   S )Nc                 s   s    | ]}t |d V  qdS )r   N)r7  rO   rJ   rJ   rK   rY   Z  r   z,masked_scatter_with_index.<locals>.<genexpr>c                    sH    }t  tj} fdd}t ||d}t |||S )Nc                     s    } t | }|gS rT   )r=   rk  )source_idx_valr   )r  source_idx_loadersource_loadersource_numelrJ   rK   load_source_vali  s   
zDmasked_scatter_with_index.<locals>.inner_fn.<locals>.load_source_valr   )r=   r   r_   r|   r  rY  )r  self_valmask_valr  
source_valmask_loaderself_loaderr  r  r  r  rK   r   e  s
   z+masked_scatter_with_index.<locals>.inner_fnr   )r   rv   r_   r|   r  r   r  r.   r   r   r7  )	r  r  
source_idxsource	self_flat	mask_flatsource_flatr   result_flatrJ   r  rK   masked_scatter_with_indexV  s    r  c                 C   rB  rT   )r  r  copy_)r  rD  r  r  r  r   output_viewrJ   rJ   rK   as_strided_scatterz  rF  r  c                 K   s   t t| |||fi |S rT   )scatter_r  )rQ   r   r   rD  r   rJ   rJ   rK   r    s   r  r^  include_selfr}  r^  r  c             	   C   s^   t |t}t| || |r| nt||r| jnd|r-tj| ||||||d |S d S )Nznot implr  )rZ   r1   r<   rv   r   r   r%   ScatterFallback)r}  r  r   r   rD  r^  r  src_is_tensorrJ   rJ   rK   scatter_fallback  s(   

	r  r^  c                C   sr   |dv sJ |d u r$t tjtjjjj}t|| ||||d}|d ur$|S |dkr+d}n|dkr1d}t	| ||||S )N>   Nr^   multiplyr  r^   sumr  prod)
rU   r6  r  r>   r   r  r  _overloadnamer  scatter_reduce_)r  r   r   rD  r^  r}  fallback_resultrJ   rJ   rK   r    s   r  c                 C   r  rT   )scatter_add_r  rQ   r   r   rD  rJ   rJ   rK   scatter_add  rA  r  c                 C   s   t | |||dS )Nr  )r  r  rJ   rJ   rK   r    rM  r  c                 K   s   t t| ||||fi |S rT   )r  r  )rQ   r   r   rD  reduction_typer   rJ   rJ   rK   scatter_reduce  s   r  )r  c             	      s  |dv sJ t tj dkrdtj v sJ dttjj |||d}|r+|S tts2J dt|	 v s<J t 
 }|dkrLtdgttr_t 
 dkr_tdgt|trrt |
 dkrrt|dg}t    | ttr nd  fdd	}fd
d}	dd }
|stj 	 fdd|
 |d d}td t|}tj||_tj 	 |	|
 ||
|d}td t|}tj||_|dkrtg S )N>   Nr  amaxaminmeanr  r"   twozKaten.scatter_reduce_.two is not the unique overload of aten.scatter_reduce_r  rq   r   c                    s@     }t|}t| }t| |dkrdn|  | < |S rQ  )r   r   r[   r=   rk  )r  r  rT  indirect_idx)r   rm  r  rJ   rK   r    s   z'scatter_reduce_.<locals>.output_indexerc                    s   r| S t   S rT   r=   r  rv   r  )r  rD  r!  rJ   rK   rX     s   zscatter_reduce_.<locals>.fnc                 S   s   | dkrdS | d u sJ d S )Nr  r  rJ   r  rJ   rJ   rK   backend_reduce_str  s   z+scatter_reduce_.<locals>.backend_reduce_strc                    s   t d  S r   r  r   )r  rJ   rK   r    r/  z!scatter_reduce_.<locals>.<lambda>r  )r   r6  r  rc   r  r  rZ   r1   rw  rv   r   r7  r  r	  r   r%   r  r   r
  r  r>   r   r@  rE  )r  r   r   rD  r^  r  r  rT  r  rX   r  zero_outrH  r  rJ   )r   rm  r  rD  r!  rK   r    s   




r  scales_xnexactc           
         s   |    |  |   d  |  d   }dd D t|ks)J |}dd t|D t|D ]\}}|d urGd| |< q9 fddfdd}	tj|  | 	 |	g ||d	S )
Nc                 S   re  rJ   )r>   r   r   rI  r   rJ   rJ   rK   rR   <  rf  z&upsample_nearestnd.<locals>.<listcomp>c                 S   s   g | ]\}}|| qS rJ   rJ   )rP   r   orJ   rJ   rK   rR   A      r  c                    s\   t | tj}  rt | t dtj} t | t |tj} t | tj} t j	| |ddS )N      ?Fr  )
r=   r  r_   r  r^   r  r   r   rS  rk  )rQ   r  r  )r  rJ   rK   scale_fnF  s   z$upsample_nearestnd.<locals>.scale_fnc                    sB   |  d  }| d   }g |fddt | D S )Nc                    s   g | ]\}}} |||qS rJ   rJ   )rP   r   rh  r  )r  rJ   rK   rR   U  r   z2upsample_nearestnd.<locals>.fn.<locals>.<listcomp>)r   )r  rQ   r   )i_sizes
inv_scalesr  r  r  rJ   rK   rX   Q  s
    zupsample_nearestnd.<locals>.fnr   )
realize_hintr   r   r   r   r   r.   r   r   rv   )
rQ   r  r  r  r  batcho_sizesr   r  rX   rJ   )r  r  r  r  r  r  rK   upsample_nearestnd1  s(   
r  c                 C   s   t | ||fddS )Nr"   r  r  rQ   r  r  rJ   rJ   rK   upsample_nearest1d`  rA  r  c                 C   s   t | ||fdddS )Nr"   Tr  r  r  r  rJ   rJ   rK   _upsample_nearest_exact1de  s   r  scales_hscales_wc                 C   s   t | |||fddS )Nr    r  r  rQ   r  r  r  rJ   rJ   rK   upsample_nearest2dj  s   r  c                 C   s   t | |||fdddS )Nr    Tr  r  r  rJ   rJ   rK   _upsample_nearest_exact2dq  s   r  scales_dc                 C   s   t | ||||fddS )Nrf   r  r  rQ   r  r  r  r  rJ   rJ   rK   upsample_nearest3dx  s   r  c                 C   s   t | ||||fdddS )Nrf   Tr  r  r   rJ   rJ   rK   _upsample_nearest_exact3d  s   r  c                    s   t  fdd|D S )Nc                 3   s    | ]	}t | V  qd S rT   r2  r   rs   rJ   rK   rY     r  z$_create_constants.<locals>.<genexpr>)r\   )rp   r   rJ   rs   rK   r    s   r  c                    s   t d dd |  d  D  |  fddtD fddtD  fdd}tj|  |  |t| dS )	Nr    c                 S   s   g | ]}|d  qS )r"   rJ   rP   hrJ   rJ   rK   rR     rS   z._reflection_padnd_backward.<locals>.<listcomp>c                    s    g | ]}d  d |   qS r    r"   rJ   r   r   paddingrJ   rK   rR     r0  c                    s$   g | ]}d  d |  d  qS r  rJ   r   r  rJ   rK   rR     s   $ c                    s  | d    |  d   fdddd 	fddt D 	fddt D }	fddt D }	
fd	dt D ttjfd
dt D }t|fddd}fdd}tjdd t D  D ]e}|tdg krq}g }g }t D ]J}	||	 dkr|	 }
|	 }n/||	 dkr||	 }
|	 d	|	 f}n||	 dkr||	 }
|	 |	 
|	  |	 d f}|	|
 |	| q||||}q}|S )Nc                    s   g  | S rT   rJ   rz   )r   grad_loaderrJ   rK   load_from_output  r  z@_reflection_padnd_backward.<locals>.fn.<locals>.load_from_outputc                 S   sP   | \}}}t |tj}t |tj}t |tj}t t ||t ||S rT   )r=   r  r_   rS  r  r  r  le)index_ranger   lbubrJ   rJ   rK   index_range_condition  s
   
zE_reflection_padnd_backward.<locals>.fn.<locals>.index_range_conditionc                    s   g | ]
}|  |  qS rJ   rJ   r   padding_leftxyzrJ   rK   rR     r~  z:_reflection_padnd_backward.<locals>.fn.<locals>.<listcomp>c                    s   g | ]
} | |  qS rJ   rJ   r   r  rJ   rK   rR     r~  c                    s(   g | ]}d  |  |  |  qS )r    rJ   r   )dhwr  r  rJ   rK   rR     s   ( c                    s.   g | ]} | d | |  |  fqS r  rJ   r   )centerr  r  padding_rightrJ   rK   rR     s     c                    s   g | ]} | qS rJ   rJ   r   )r  range_crJ   rK   rR     r  c                      r#  rT   rJ   rJ   )r  r	  rJ   rK   r    r%  z8_reflection_padnd_backward.<locals>.fn.<locals>.<lambda>r  c                    s|   t D ]}|| d || d k }t|tr|r|   S qttjfdd|D }t| fddd}t| |S )Nr    r"   c                    r   rJ   rJ   )rP   r  )r  rJ   rK   rR     rS   zN_reflection_padnd_backward.<locals>.fn.<locals>.accumulate.<locals>.<listcomp>c                      s    S rT   rJ   rJ   )r	  r   rJ   rK   r    r%  zL_reflection_padnd_backward.<locals>.fn.<locals>.accumulate.<locals>.<lambda>r  )	r   rZ   r|   r   r^  r=   r  r  r^   )gradr   index_rangesr   upper_less_than_lowerrH   g)r   r  r	  )r   rK   r    s   z:_reflection_padnd_backward.<locals>.fn.<locals>.accumulatec                 S   s   g | ]}g d qS ))r   r   r"   rJ   )rP   r   rJ   rJ   rK   rR     rS   r   r   r"   )
r   r   r^  r=   r  r  r   productr\   r   )r  left_reflectright_reflectrH   r  r  areaoutsr  r   r   r  )r  r   r  r  r  )r   r  r  r	  r  r  rK   rX     sD   
"
z&_reflection_padnd_backward.<locals>.fnr   )	r   r   r   r   r.   r   r   rv   r[   )grad_outputrQ   r  rX   rJ   )r  r   r  r  r  r  rK   _reflection_padnd_backward  s   O
r   c                    s:   |   |   fdd}tj|  |  |dS )Nc                    sF   t | } t| tksJ  D ]}| d | |  | |< q| S rO  )r[   r   )r  r   re  r_  r  rJ   rK   r    s
   zrev.<locals>.loaderr   )r   r   r.   r   r   rv   )rQ   re  r  rJ   r!  rK   rev  s   r"  c              	      sZ  t |d dks
J tdd |D rt| S |  }tttt|d d d |dd d  t |t   g  D ]\}}tj	j
||f q<t|d  }g t |d  D ]\\}}}	|	 |t|	| |  q`t |t |ksJ t|   fddfdd	}
|  tj|  |  |
|d
S )Nr    r   c                 s   r   r  rJ   )rP   r  rJ   rJ   rK   rY     r  z"constant_pad_nd.<locals>.<genexpr>r"   c                    s~   g }t  d  D ]\}\}}}|dkr|t|d |dkr+|t|| qttj|}t| fddS )Nr   c                      r#  rT   rJ   rJ   )r   r  rJ   rK   r  -  r%  z/constant_pad_nd.<locals>.mask.<locals>.<lambda>)	r   r   range_mask_lowrange_mask_highr   r^  r=   r  r  )r   r  r  r  r  r  )boundsrO  
mask_sizesr  r  r   rK   r  %  s   "zconstant_pad_nd.<locals>.maskc                    sZ   t | d  }t| d   D ]\}\}}|||  qt|t| ks)J |S rT   )r[   r   r   r   )r   r  r  r  r  )bounds_precompr  r  rJ   rK   	offset_fn/  s
   z"constant_pad_nd.<locals>.offset_fnr   )r   r   r  r   r[   r   r   r   r>   r   r   lookup_precomputed_sizerw   r   r   rv   r   r.   r   r   )rQ   r  rO  r_  lr  r  r  r  r  r(  rJ   )r%  r'  rO  r  r&  r  r  rK   constant_pad_nd  s2   *

r+  r   c                 C   s&   t t | tjt t|tjS rT   )r=   r  r  r_   r  rw   r   )r   r  rJ   rJ   rK   r#  ?  s   r#  c                 C   s    t t | tjt |tjS rT   )r=   r  r  r_   r  )r   r  rJ   rJ   rK   r$  F  s   r$  c                 C   s   t t| |t| |S rT   )r=   r  r#  r$  )r   r  r  rJ   rJ   rK   
range_maskM  s   r,  r  c                    sF       d   pdg   fdd}|S )Nr   c                    s|   | d   |  d   t tj fddtD }r1t| fddS t| 	fddS )Nc                    s.   g | ]}t |  | |  |  qS rJ   )r,  r   )r  ih	padding_hrJ   rK   rR   a  s   . z=constant_boundary_condition.<locals>.load.<locals>.<listcomp>c                      s   t  dg S )Nr   )constant_boundary_conditionrJ   )r   r-  pad_fill_valueprefixrQ   rJ   rK   r  f  s    
z;constant_boundary_condition.<locals>.load.<locals>.<lambda>c                      s   g  S rT   rJ   rJ   )r-  r2  r  rJ   rK   r  l  r/  )r   r^  r=   r  r   r  )r   r  r   rO  r  r1  r  r.  rQ   r  )r-  r2  rK   r   [  s   	z)constant_boundary_condition.<locals>.loadr   r   )rQ   rO  r  r1  r   r   rJ   r3  rK   r0  T  s
   r0  c                 C   s   t | d||   || d  || d  || }|r|t | d||   || d  d|| d   || }tjj|d ||  |  ||  dkra|d8 }tjjd|||  |  ||   tjj|| dkrztjj|| d}||fS |}||fS )Nr    r"   r   F)r   r>   r   r   r  rQ  r   )rQ   r   kernel_sizer  r  	ceil_modex_outx_altrJ   rJ   rK   pooling_sizer  s    ,0*$r9  c                 C   s4   t | d} | d | d  }|dkptdd |D S )Nr    r   r"      c                 s   s    | ]}|d kV  qdS r\  rJ   rP   rg  rJ   rJ   rK   rY     r  z:should_fallback_max_pool2d_with_indices.<locals>.<genexpr>)r:   r   )r5  dilationwindow_sizerJ   rJ   rK   'should_fallback_max_pool2d_with_indices  s   
r>  assert_fallbackc                C   s   |dkrddg}|dkrddg}|s|}t |d}t |d}t |d}t |d}t| ts/J t|dks7J t|dks?J t|dksGJ t|dksOJ t|  dv sYJ t||}|d urh||kshJ |||||fS )Nr   r"   r    r  )r:   rZ   r1   r   r   r>  )rQ   r5  r  r  r<  r@  use_fallbackrJ   rJ   rK   max_pool2d_checks  s(   




rB  c                    s  t |  |dd\ }}|   |  ^ }}}	t|d |\}
}t|	d |\}}t||
|g }d sEd sE|sE|rOt| tdddn|   fdd	}tj	| 
 |  tj|dd
|d}tj	| 
 tjtj|dd
|d}||fS )NFr?  r   r"   -infr    r/  c                    s   | ^ }}}d }d }t t d t d D ]T\}}|d  | d  }	|d  | d  }
g ||	|
}|r_t| d  | tj}|d u rT|}ntt||||}|d u rf|}qt	||}q|rq|S |S rQ  )
r   r  r   r=   r  r_   r  rY  gtr  )r  return_indexr2  bhbwmaxvalmaxindexh_incw_incr-  iwrm  r   r5  r  r  r  rJ   rK   rX     s(   z/_low_memory_max_pool2d_with_offsets.<locals>.fnrE  r   T)rB  r  r   r9  r[   r0  r   r   r.   r   r   rv   r   r   r_   r  )rQ   r5  r  r  r<  r6  r   r  r  wh_out
ceil_mode1w_out
ceil_mode2r  rX   r   offsetsrJ   rM  rK   #_low_memory_max_pool2d_with_offsets  s2   
rU  c                    sH   |   fdd  fdd}tj|  tj||  d}|S )Nc           	         sf   t  tj}t |d  d  tj}t |d  d  tj}||  }|| }|| | S rQ  )r=   r  r_   r  )	rJ  rK  rF  rG  w_inhbasewbaser-  rL  )input_widthr  r  rJ   rK   increments_to_index  s   zF_low_memory_max_pool2d_offsets_to_indices.<locals>.increments_to_indexc                    sP   | ^ }}}g |||}t tj}|| }|||  } ||||S rT   )r=   r  r_   rS  )r  r2  rF  rG  r*  kw_constrJ  rK  )rZ  kernel_widthoffsets_loaderrJ   rK   offsets_to_indices  s   zE_low_memory_max_pool2d_offsets_to_indices.<locals>.offsets_to_indicesr   )r   r.   r   r   r_   r  r   )rT  r\  rY  r  r  r^  r   rJ   )rZ  rY  r\  r]  r  r  rK   )_low_memory_max_pool2d_offsets_to_indices  s   r_  r!  c                    s  dkrddg|dkrddg}st |tsJ tdks#J tdks+J tdks3J t|dks;J t| dv sEJ |   z|  }W n tyZ   d }Y nw t |trt |jjtr|jj}	t	j
d t	j|	 |	 |	 d|	d}
|
  |
 }nz| }W n ty   d }Y nw |d ur|d dkp|d uo|d dk}tdd |D rt| ||||S | ^ }}
|  ^ }| |   t| }tfd	dtd d D tfd
dtd d D 		 }|dkrt| ||||S |  	
fdd}tj|  |  ||d}|rBt	j|S |S )Nr   r"   r    r  )r   rp   r  )rE  r  r   c                 s   s    | ]}|d kV  qdS r\  rJ   r;  rJ   rJ   rK   rY   J  r  z3max_pool2d_with_indices_backward.<locals>.<genexpr>c                 3   <    | ]}t |d   t d | d   d    dV  qdS r   r"   Nmaxr  r5  r  rJ   rK   rY   W  
    *
c                 3   <    | ]}t |d   t d| d   d    d V  qdS r"   r   Nrb  rP   rO  rd  rJ   rK   rY   [  re  r:  c                    sV  | ^ }}}t |
 | tj}|d  }|d  }t t|d  d  d tj}t t|d  d  d tj}t t|d d tj}t t|d d tj}t |t dtj}t |t dtj}t |t tj}t |t tj}d }	tD ]}
t	D ]}t 	|t |
tj}t 	|t |tj}g |t j
t |t |t dtjd ddt j
t |t |t dtjd dd}|} |}t ||}|	d u rt ||t dtj}	qt t t ||t |||}t |t 	|	||	}	qq|	d us)J |	S )Nr   r"   Fr  r   r  )r=   r  r_   rS  r   r  r  r  r   r^   rk  r  r  rY  r  r  r  )r  r2  r  rO  
index_testphstartpwstartphendpwendgradientph_pw_phpw
grad_indexindex_actual	grad_partr   r  r  h_window_sizers  indices_sizer5  r  pooled_heightpooled_widthr  w_window_sizewidthrJ   rK   rX   j  sl     


#z,max_pool2d_with_indices_backward.<locals>.fnr   )rZ   r1   r   r   r  r  r  r   r.   r%   r
  r  r   rv   decide_layoutr   )fallback_max_pool2d_with_indices_backwardr   r[   rc  r   r   r  r   )r  rQ   r5  r  r  r<  r6  r   	gO_strider   x_bufferx_strideis_channels_lastr  heightr   r  r=  rX   r   rJ   rw  rK    max_pool2d_with_indices_backward  s   	

 ;r  r  c                    s*   |   ^ }}}|   fdd}|S )Nc              
      s   |\|\ |\}}t t t   tjt |tjt t  tjt |tj}t | fddS )Nc                      s   g    S rT   rJ   rJ   )h_start_indexr-  rL  r2  w_start_indexr  rJ   rK   r    r~  z3pad_adaptive_loader.<locals>.load.<locals>.<lambda>)r=   r  r  r  r_   r  r  )r2  
incrementsstart_indicesend_indicesh_end_indexw_end_indexr  pad_valr  )r  r-  rL  r2  r  rK   r     s$   z!pad_adaptive_loader.<locals>.loadr4  )rQ   r  r   r  rO  r   rJ   r  rK   pad_adaptive_loader  s   r  c                    s(   |\|\  fdd}|S )Nc                    s   | ^ }}}|} |}|}|}d }	t td td D ]\}
}|||
|g||g||g}|	d u r>|}	q&t||	}	q&|	S rQ  r   r  r   r=   r^   )r  r  r2  rF  rG  r  r  r  r  totalr-  rL  rm  h_end_index_fnh_start_index_fnkernel_maxesw_end_index_fnw_start_index_fnrJ   rK   fn_sum  s"   $z)_adaptive_pooling_idx_sum.<locals>.fn_sumrJ   )r  start_index_fnsend_index_fnsr  rJ   r  rK   _adaptive_pooling_idx_sum  s   r  c                    s  t tsJ t|dksJ    ^ }}}tjj|}tjj|}|\}}||kr9||kr9t	S |dksA|dkrTg |||}t
|  dS || dkrm|| dkrm|| || g}t|S t|| d |}	t|| d |}
t|||g } }dd }dd }tj|||d	}tj|||d	}tj|||d	}tj|||d	}|	|
 }|d
krt|S t|	|
g||g||g tt fdd}tj |||d}|S )Nr    r   r  r"   c                 S      t | | |S rT   r   r   out_diminp_dimrJ   rJ   rK   start_index     z)_adaptive_avg_pool2d.<locals>.start_indexc                 S      t | d | | d |S rO  r  r  rJ   rJ   rK   	end_index  r  z'_adaptive_avg_pool2d.<locals>.end_indexr  r  r:  c                    s   t  | t | S rT   )r=   truedivr  r  r  ones_loaderrQ   rJ   rK   rX   (  s   z _adaptive_avg_pool2d.<locals>.fnr   )rZ   r1   r   r  r   r>   r   r   rI  r  r  rv   r   
avg_pool2dr4   r[   r   r   fallback_adaptive_avg_pool2dr  r  	ones_liker.   r   )rQ   r  r  h_inrV  rP  rR  o_sizer5  h_kernel_maxw_kernel_maxr  rp   r  r  r  r  r  r  r=  rX   rvrJ   r  rK   _adaptive_avg_pool2d  sT   

r  c                    s~   |\}|\}}dd }dd }	t j|||dt j|	||d t j||dt j|	|d fdd}
|
S )Nc                 S   r  rT   r  r  rJ   rJ   rK   r  =  r  z._adaptive_pooling_idx_max.<locals>.start_indexc                 S   r  rO  r  r  rJ   rJ   rK   r  @  r  z,_adaptive_pooling_idx_max.<locals>.end_indexr  c                    s   | ^ }}}|} |}|}|}d }d }	t td td D ]A\}
}||
|g||g||g}t||
  | | tj}r\|	d u rQ|}	ntt||||	}	|d u rc|}q(t	||}q(rn|	S |S rQ  )
r   r  r   r=   r  r_   r  rY  rD  r  )r  r2  rF  rG  r  r  r  r  rH  rI  r-  rL  rm  r   r  r  r  r  rE  r  rV  r  rJ   rK   fn_maxH  s6   $z)_adaptive_pooling_idx_max.<locals>.fn_max)r   r   )r  in_sizes	out_sizesrE  r  r  rP  rR  r  r  r  rJ   r  rK   _adaptive_pooling_idx_max7  s   "r  c              	   C   s  t | tsJ t|dksJ |   |  ^ }}}tjj|}tjj|}|\}}|dks5|dkrRg |||}t	|| 
 |  dt	|tj|  dfS || dkr|| dkr|| || g}t|ddgdrst| |S t| ||ddgddgdd\}	}
t|
|d ||ddgd}|	|fS t|| d |}t|| d |}t|||g }| 
 }|| }|d	krt| |S t||g||g||gdt| td
d}t||g||g||gdt| td
d}tj|  |||d}tj|  tj||d}||fS )Nr    r   r  r"   )r<  F)r  r  r<  r6  )r  r:  rC  )r  r  r  rE  r  Tr   )rZ   r1   r   r  r   r>   r   r   rI  r  rv   r   r_   r  r>  max_pool2d_with_indicesrU  r_  r4   r[   fallback_adaptive_max_pool2dr  r  r   r.   r   )rQ   r  r  r  rV  rP  rR  r  r5  r  rT  r   r  r  r  rp   r=  inner_func_max_valinner_func_max_idxr  rirJ   rJ   rK   adaptive_max_pool2dr  s|   


r  c                    sN      t  d    fdd}|S )Nr"   c                    s   g | }t | }t   }t || | t ||  }t |tj}t |t d tj}t ||t  tjS rO  )	r=   r  rv   r{  r   r_   r  r  rY  )r2  r   samplei_expr
alpha_exprseq_ir  r   r   in_sz	kernel_szout_szsamplessamples_loaderrJ   rK   r     s   z)_fractional_pooling_offsets.<locals>.load)r   r   )r  r  r  r  r   r   rJ   r  rK   _fractional_pooling_offsets  s   r  c                    s   |    |  ^ }\}}|\}}|| dkr!t| ||S tjt|g|d}	|	dd |	dd|   fdd}
t|||g }tj	| 
 |  tj|
dd	|d
}tj	| 
 tjtj|
dd	|d
}||fS )Nr:  )r  r  r  r  r   r/  r"   c              	      s   | ^ }}}t  ||}t ||}d }d }ttd td D ]I\}	}
g |||	 ||
 }|rft ||	  | |
 tj}|d u rT|}nt t 	t 
||t |||}|d u rm|}q*t ||}q*|rx|S |S rQ  )r=   rk  r   r  r   r  r_   r  rY  or_rD  rv  r  )r  rE  r2  rF  rG  r  r  rH  rI  r-  rL  rm  r   
h_index_fninp_hinp_wr5  
w_index_fnr  rJ   rK   rX     s,   $z!fractional_max_pool2d.<locals>.fnFrN  r   T)r  r   fallback_fractional_max_pool2dr   r   r  r   r[   r.   r   r   rv   r_   r  )rQ   r5  r  random_samplesr  kernel_hkernel_wrP  rR  gen_offsets_for_dimrX   r  r  r  rJ   r  rK   fractional_max_pool2d  sB   

r  c                    s"      ^ }}}tjj|}tjj|}|^ }}}	|| dkr9||	 dkr9t|| ||	 gddS t||}
t||	}dd fdd}tj	||d}tj	|||d}tj	|	|d}tj	||	|d}t
|
|g||g||g  fd	d
}tj  |t|d}|S )Nr   r"   )divisor_overridec                 S   s   t | | t|S rT   )r   rw   rE  r  rJ   rJ   rK   r  6     z0upsample_nearest2d_backward.<locals>.start_indexc                    s    | d ||S rO  rJ   r  )r  rJ   rK   r  9  r  z.upsample_nearest2d_backward.<locals>.end_indexr  c                    s    | t S rT   )r  r  )r  rQ   rJ   rK   rX   H  r  z'upsample_nearest2d_backward.<locals>.fnr   )r  r   r>   r   r   rI  r  r4   r   r   r  r.   r   r   rv   r[   )rQ   r  
input_sizer  r  r  r  r  out_hout_wr  r  r  r  r  r  r  rX   r  rJ   )r  r  rQ   rK   upsample_nearest2d_backward$  s8   

r  rJ   c              
   C      t | ||||||ddS )Nr    r/  _avg_poolndrQ   r5  r  r  r6  count_include_padr  rJ   rJ   rK   r  ]     
r  c              
   C   r  )Nrf   r/  r  r  rJ   rJ   rK   
avg_pool3ds  r  r  c                    s  	s	sdg t t 		t t| ts!J tks)J t	ks1J tks9J t|  d d fv sIJ |   |  d   }|   d  t 	fddtD  \}	}
tszt|
rt	| dd
d}n| 
 
d	}t|t|	 }|  ttj}|d
krdkrt}ndkrt}ntd || 	 |S 	fdd|r|r|rd| nd| 
fdd}n	
fdd}tj|  ||d}|S )Nr   r"   r    c              	      s"   g | ]}t | | qS rJ   )r9  r   )r6  r  r5  r  r  rJ   rK   rR     s    z_avg_poolnd.<locals>.<listcomp>r  r/  TFr:  rf   zUnknown dim: c                    s   | d   }|  d   d }t jfddtD  D ]% fddtD }|g ||}|d u r=|}qt||}q|S )Nc                    s   g | ]}t  | qS rJ   )r   r   )r5  rJ   rK   rR     r  z/_avg_poolnd.<locals>.fn_sum.<locals>.<listcomp>c                    s,   g | ]} | |  |  |  qS rJ   rJ   r   )r   r-  r  r  rJ   rK   rR     s   , r  )r  r  r2  r  r   rm  )r   r5  r  r  )r   r-  rK   r    s    z_avg_poolnd.<locals>.fn_sumr  c                    s   t | t  S rT   )r=   r   r  r  )rp   r  r  r  rJ   rK   rX     r  z_avg_poolnd.<locals>.fnc           	         s   | d   }|  d  }g }t D ]<}|| |  |  }t||  | |  } sBt|d}t|| }t|| tj}|| qt	
tj|}t| |S r   )r   rw   MinMaxr=   r  r_   rS  r   r   r^  r   r  )	r  r2  rF  divide_factorsr   hstarthendfactordivide_factor)r  r   r  r  r5  r  r  r  rJ   rK   rX     s    r   )r:   rZ   r1   r   r   r  r   r   r   r0  r   r[   rv   r   r^  operatorr   fallback_avg_pool2dfallback_avg_pool3d
ValueErrorr.   r   r   )rQ   r5  r  r  r6  r  r  r   r  rP  
ceil_modeshad_paddingr  r=  fallbackrX   r  rJ   )r6  r  r   rp   r  r  r5  r  r  r  r  rK   r    sr   




 

r  c                    s  d u sdksJ dssddgt | tsJ t |ts$J tdks,J tdks4J tdks<J t| dv sFJ |   | ^ }td|\}	}
td|\}}|  d pwd pw|
pw||  ^ }	
t| }| }t	fddt
d d D t	fddt
d d D  }|d	krt| ||S fd
d  	
fdd}tj|  |||d}|S )Nr   zdivisor must be not zeror    r  r"   c                 3   r`  ra  rb  r  rd  rJ   rK   rY   (  re  z&avg_pool2d_backward.<locals>.<genexpr>c                 3   rf  rg  rb  rh  rd  rJ   rK   rY   ,  re  r:  c              	      sX  t d tj}t d tj}t d tj}t d tj}t d tj}t d tj}t t | ||}t t |||}	t t ||t t  tj|}
t t |	|t t tj|}t 	|t dtj}t 	|	t dtj}	t |
t  tj}
t |t tj}t t |
|t ||	}|S )z{
        This computes the scaling factor that we will divide an element
        by when `count_include_pad=False`
        r   r"   )
r=   r  r_   rS  r  r   r  r^   r  r  )rr  rs  stride_hstride_wpad_hpad_wr  r  r  wstartr  wendr  )r  r5  r  r  r}  rJ   rK   !compute_pool_size_without_padding?  s,   

z>avg_pool2d_backward.<locals>.compute_pool_size_without_paddingc                    sR  | ^ }}}|d  }|d  }t t|d  
d  
d tj}t t|d  
d  
d tj}t t|
d d tj}t t|
d d tj}t |t dtj}t |t dtj}t |t tj}t |t 	tj}d }tD ]}	tD ]}
t 	|t |	tj}t 	|t |
tj}d ur}nssd d  }n ||}t 
g |t jt |t |t dtjddt jt |t |t dtj	dd|}t t ||t ||}|d u rt ||t dtj}qt |t 	|||}qq|d us'J |S )Nr   r"   Fr  r  )r=   r  r   r_   rS  r  r  r  r   r^   r  rk  r  r  r  rY  r  )r  r2  r  rO  rk  rl  rm  rn  ro  rp  rq  rr  rs  r  partr  )r  r  r  r  rx  r  r5  r  rz  r{  r  r|  rJ   rK   rX   [  sv     
	


*zavg_pool2d_backward.<locals>.fnr   )rZ   r1   r   r   r  r9  r   r[   rv   rc  r   fallback_avg_pool2d_backwardr.   r   r   )r  rQ   r5  r  r  r6  r  r  r  rP  rQ  rR  rS  r   r  rp   r=  rX   r  rJ   )r  r  r  r  rx  r  r  r5  r  rz  r{  r  r|  r}  rK   avg_pool2d_backward  s^   "Ar  c                 C   s   |   }t|tr|g}n|stt|}t|dkr*t|dv s(J d| g S t|}tt|D ]5}|| dk rL||  t|rHt|nd7  < d||   krZt|k sin t|dkrg|| dksiJ q4tt|t|ksxJ d|S )Nr   )rJ   r  r  zinvalid axis: r"   zreduction axis not unique)r   rZ   rq   r   r   r\   r[   r]   )rQ   r  r  r   rJ   rJ   rK   _validate_reduction_axis  s    
 :r  c          
         s   |d ur	t | |} |  tt| |}g }g g }g ttD ]}||v r5| ||  q"| ||  q" fdd}r_t}	D ]	}t	d|	|< qTn|}	| 
  t|  |pn|  |  ||	|dS )Nc                    s   t |t ks
J rt  t ksJ  fddD  t  t ks)J d gt  t |  }tt t|D ]\}}|||< q@|S )Nc                    r   rJ   rJ   r   r   rJ   rK   rR     rS   z9_make_reduction_inner.<locals>.loader.<locals>.<listcomp>)r   r   chainr   )r   reduction_indexr  r  varinner_loaderkeepdimskept_idxreduced_idxr  r   rK   r    s   
z%_make_reduction_inner.<locals>.loaderr"   )r   	dst_dtyper  r   r   reduction_ranges)r   r   r]   r  r   r   r   r[   rw   r   r   r   r   rv   )
rQ   r  r  rp   r   
kept_sizesreduced_sizesr   r  r  rJ   r  rK   _make_reduction_inner  s<   



r  r  c                    s   dd d fdd}|S )NFrs   c                   sB   t | ||| d}tjd| d|}t|jjtr|  |S )Nr  r  rp   r   )r  
input_noderJ   )r  r/   r   rZ   r   r	  )rQ   r  r  rp   r   r  r   r  rJ   rK   r     s   zmake_reduction.<locals>.innerNFrJ   )r  r   r   rJ   r	  rK   make_reduction  s   r  c                C   sJ   |d ur	t | |} |  }t| |}t|  |  f|  f|  |dS )N)r   dtypes	inner_fnsr  r  )r   r   r  r   r   rv   r   )rQ   r  rp   r  rJ   rJ   rK   _make_scan_inner  s   

r  rs   c                   s   |d ur	t | |} |   t| |}|  }|tjtjfv r$t | tj} t| ||}t	 fdd|D }t
||  |  }t|t| }t t|||S )Nc                 3       | ]} | V  qd S rT   rJ   r   r  rJ   rK   rY     r  zmean.<locals>.<genexpr>)r   r   r  rv   r_   float16r  r   sum_r;   r%   r*   r   r)   r   r[   div)rQ   r  keepdimrp   output_dtype
sum_resultdenomrJ   r  rK   r  
  s   

r  c           
         s   |d u rd}|    t| |}t| |dd}|r|  tt| |}t|||}t fdd|D }|r>t	|| d}t
||  |  }t|t|  }t||}	|s]|	fS |ra|nt||}|	|fS )Nr"   T)r  c                 3   r  rT   rJ   r   r  rJ   rK   rY   (  r  z var_mean_sum_.<locals>.<genexpr>r   )r   r  r  r	  squarer  r  r;   rw   r  r%   r*   rv   r   r)   r   r[   r  ri  )
rQ   r  
correctionr  return_meanx_meandiffsr  r  x_varrJ   r  rK   var_mean_sum_  s&   

r  c                 C   sV   t | |}t| ||d d d}|d }t|d }t|tjo*t|tjk o*t|dkS )Nr  r   r  r"   )	r  r  r;   rZ   rw   r   rq   r#   unroll_reductions_threshold)rQ   r  r  r   r   reduction_numelrJ   rJ   rK   use_two_step_variance5  s   


r!  c                   s    d u rd t | ||d d d}|d}|d |d tjjd|fd|  d|\}}}	|  |  |  t| |}t	fdd	|D d
d  fdd}
t
|
|}|rj|  ||fS |fS )Nr"   r  r   r  r  welford_reduce)r  r  rp   c                 3   r  rT   rJ   r   r  rJ   rK   rY   [  r  z$var_mean_welford_.<locals>.<genexpr>c                 S   s4   t | tjr| jstt| tj|S t	| |S rT   )
rZ   rw   rx   	is_numberr=   r   r  r_   r  r  r<  rJ   rJ   rK   get_constant_or_index_expr]  s   z5var_mean_welford_.<locals>.get_constant_or_index_exprc                    s4    }}t d}| t |||  S r   )r=   r  r  )r   cNzero)r  rp   r$  rnumelrJ   rK   r  b  s   

z#var_mean_welford_.<locals>.scale_fnrJ   )r  r  r%   WelfordReductionr   rv   r	  r   r  r;   r   )rQ   r  r  r  r  r   r  r  m2r   r  r  rJ   )r  rp   r$  r(  r  rK   var_mean_welford_E  s6   




r+  c                   s   |    t }t| |dd} t| ||||d}t| ||dr&tdi |ntdi |}t fdd|D }|s>|d S |S )	NFr#  )rQ   r  r  r  r  )r  r  c                 3   s    | ]
}t | d dV  qdS )Fr#  Nr  rO   	out_dtyperJ   rK   rY     s    z#var_mean_helper_.<locals>.<genexpr>r   rJ   )rv   r   r   r   r!  r  r+  r\   )rQ   r  r  r  r  compute_dtyper   r   rJ   r,  rK   var_mean_helper_p  s    	r/  )r  r  c                C   r  )NFr  r  r  r  r/  rQ   r  r  r  rJ   rJ   rK   var_     
r3  c                C   r  )NTr0  r1  r2  rJ   rJ   rK   var_mean  r4  r5  c                 C   st   |dk rt t| | |S |dkrtd|S |dkr| S t | |d |}t||}|d dkr8t|| }|S )Nr   r"   r    )pow_recursiver=   r  r  r   )rQ   r   rp   r  rJ   rJ   rK   r6    s   r6  c                 C      t | |S rT   )r=   powr   r   rJ   rJ   rK   
pow_native     r:  )r   c                    sV  t trtkrt tS t trdkrt S t tr,dkr,t S tdd  fD }t|}t toQd  k oIdk n  pQ|oQdk}|ro   fdd	}t	j
    |  d
S t  tr dkr}tdS  dkrt rtS |rt  trt S t trt S t S t S )Nr  r"   c                 s   s$    | ]}t |tjr| V  qd S rT   )rZ   r%   r1   rv   rO   rJ   rJ   rK   rY     r  zpow.<locals>.<genexpr>i    r   c                    s   t |   S rT   )r6  rv   r  r   r   r  rJ   rK   rX     r  zpow.<locals>.fnr   r    )rZ   r   rq   r8  sqrtr  r   r   r   r.   r   r   rv   r   r   rs  r   exp2fallback_pow_scalarfallback_pow_tensor_scalarfallback_pow_tensor_tensorr:  )r   r   rp   is_integer_powembed_exponentrX   rJ   r=  rK   r8    s@   
"







r8  c                 C   s   t | tr	| j}n| }t |tr|j}t |tjs3tj|  |  |	 | 
 dj}t |tjs3J t |tjrR| sR| sRt |jtjsR|  |j|_| S tjj|||d | S )Nr   unsafe_alias)rZ   r1   r   r%   r  r.   r   r   rv   r   r   is_input_bufferis_module_buffer	NopKernelr	  r  realize_into)changedrm  rF  changed_datarJ   rJ   rK   rC    s:   

rC  c                 C   s   t | t| |S rT   )rC  rs  )rQ   rO  rJ   rJ   rK   fill_  rM  rM  c                 C   s4   t ||  }t||  }t||  }t| |S rT   )r?  r   r   rv   r   r   rC  )r+  rD  r  rJ   rJ   rK   r    s   
r  c                 C   r7  rT   )r=   floordivr9  rJ   rJ   rK   rN    r;  rN  c                 C   r7  rT   )r=   truncdivr9  rJ   rJ   rK   rO    r;  rO  c                 C   s   t | ot |}t| ot|}|dkr(|rJ d|r!t| |S tt| |S |dkr@|r2J d|r9t| |S tt| |S t| |S )Nr{  z5floordiv operands can not be boolean at the same timer}  z5truncdiv operands can not be boolean at the same time)r{   r}   rN  r{  r  rO  r}  )r   r   rounding_modeboth_integerboth_booleanrJ   rJ   rK   div_mode  s   
rS  c                 C   s8   t | ot |}|rt| |S ttjj}t|| |S rT   )r}   logical_andr,   r6  r   rD  r   )r   r   	both_boolrX   rJ   rJ   rK   r   )  s
   
r   c              	   C   s   t | tjrt| jS t | tjrt|  S t | tjr| S t | tjs'dS t	j
j|  }t|' ttjdd | j|   }W d   n1 sPw   Y  W d   n1 s_w   Y  t |t	j
jjsnJ t |jtjrx|jS dS )z:Try convert an arbitrary IR node into an ir.Constant valueNallow_indexingT)rZ   r%   
MutableBoxget_constant_valuer   r  r  r   Loopsr_   	_inductorops_handlerExtractConstantsHandlerr   r>   set_ops_handlerr   objectr  r   inner_fn_argsvirtualizedOpsValuer   )rQ   rc  r   rJ   rJ   rK   rX  3  s(   
 rX  c                 C   s|   t dd | |fD }|rt| |S t| }d ur3|jdkr)ttd|j}nd|j }t| |S dd }t|| |S )Nc                 s   s     | ]}t |pt|V  qd S rT   )r}   r{   rO   rJ   rJ   rK   rY   T  r  zdiv_prim.<locals>.<genexpr>r   infr  c                  W   rX  rT   )r=   r  r   rJ   rJ   rK   rX   a  rZ  zdiv_prim.<locals>.fn)	r   rO  rX  r   mathcopysignr   r   r   )r   r   is_integraldivisorr  rX   rJ   rJ   rK   div_primR  s   



rg  c                 C   s    t | |ftjd\} }t| |S r|  )r   r   INT_TO_FLOATrg  r9  rJ   rJ   rK   r  g  s   


r  c                 C   s4   t | pt| }|rdd }ndd }t|| |S )Nc                 S   r7  rT   )r=   modr9  rJ   rJ   rK   rX   y  r1  zfmod.<locals>.fnc                 S   r7  rT   )r=   fmodr9  rJ   rJ   rK   rX   ~  r1  )r}   r{   r   )r   r   re  rX   rJ   rJ   rK   rj  s  s
   
rj  c                 C   s:   |   }t|st|rt| t } dd }t|| S )Nc                 S   s
   t | S rT   )r=   rsqrtrz   rJ   rJ   rK   _rsqrt  rZ  zrsqrt.<locals>._rsqrt)rv   r   r   r   r_   r6  r   )rQ   rp   rl  rJ   rJ   rK   rk    s
   rk  c                C   B   t |  st|  r|d u rtj}td|d}|| |||dS )Nr  r  rs   r   rv   r   r_   r  r  rQ   r  r  rp   rX   rJ   rJ   rK   r       

r  c                 C      t |  st|  r|d u rtj}t|  dkr.|dv s!J |p&|  }t| |ddS dd }t| ||d}t	j
jd
i |d|i\}|d u rRt| ||d	S |S )Nr   r   r   Tr#  c                 S      | \}|\}t ||fS rT   )r=   r^   a_tupleb_tupler   r   rJ   rJ   rK   
combine_fn     zcumsum.<locals>.combine_fnr  rp   rw  r   rp   rJ   )r   rv   r   r_   r  r   r   r   r  r%   Scanr   fallback_cumsumrQ   r  rp   rw  r   r  rJ   rJ   rK   cumsum      

r~  c                 C   rq  )Nr   rr  Tr#  c                 S   rs  rT   )r=   r   rt  rJ   rJ   rK   rw    rx  zcumprod.<locals>.combine_fnry  rw  rz  rJ   )r   rv   r   r_   r  r   r   r   r  r%   r{  r   fallback_cumprodr}  rJ   rJ   rK   cumprod  r  r  c                 C   sv   dd }|   }t|  dkr|dv sJ t| S t| ||d}tjjdi |d|i\}|d u r9t| |dS |S )	Nc              	   S   s\   | \}|\}t ||}t ||}||kt | B }t |t t || | |fS rT   )r=   r  r  rq  rY  log1pexp)ru  rv  r   r   min_vmax_vr  rJ   rJ   rK   log_add_exp_helper  s   $z(logcumsumexp.<locals>.log_add_exp_helperr   rr  ry  rw  r/  rJ   )	rv   r   r   r  r  r%   r{  r   fallback_logcumsumexp)rQ   r   r  rp   r   r  rJ   rJ   rK   logcumsumexp  s   r  c                 C   s   t |  dkr|dv sJ t| t| tjdfS |  }tjd|dd}|tj	u r,dn|j
r5t|jnt|j}t| ||d}|tjf|d< |  d	d
 f|d< tjjdi |d|i\}}|d u rlt| |dS ||fS )Nr   rr  rs   argmaxFrp   arg_break_ties_leftry  r  c                 S      dS NrindexrJ   r  rJ   rJ   rK   r        zcummax.<locals>.<lambda>r  rw  r/  rJ   )r   r   r  r%  r_   r  rv   r%   get_reduction_combine_fnr|   r-  r.  minr0  r  r   r{  r   fallback_cummax)rQ   r  rp   rw  	min_valuer   r   r   rJ   rJ   rK   cummax  (   


r  c                 C   s   t |  dkr|dv sJ t| t| tjdfS |  }tjd|dd}|tj	u r,dn|j
r5t|jnt|j}t| ||d}|tjf|d	< |  d
d f|d< tjjdi |d|i\}}|d u rlt| |dS ||fS )Nr   rr  rs   argminFr  Try  r  c                 S   r  r  rJ   r  rJ   rJ   rK   r    r  zcummin.<locals>.<lambda>r  rw  r/  rJ   )r   r   r  r%  r_   r  rv   r%   r  r|   r-  r.  rc  r0  r  r   r{  r   fallback_cummin)rQ   r  rp   rw  	max_valuer   r   r   rJ   rJ   rK   cummin  r  r  c                C   rm  )Nr  r  rs   rn  ro  rJ   rJ   rK   r  $  rp  r  c                 C   s   t | tj} td| ||dS )Nr   r  r  )r   r_   r|   r  rQ   r   r  rJ   rJ   rK   
reduce_any/  s   r  c                 C   2   |d urt | ||dt| ||dfS t | d |dS Nr  )reduce_amaxreduce_argmaxr  rJ   rJ   rK   
reduce_max5  
   r  c                 C   r  r  )reduce_aminreduce_argminr  rJ   rJ   rK   
reduce_min@  r  r  xor_sumrc  r  r  r  r  
logical_or)r   r   c                 C   s   t | |tj|dS )NrE  r~   r   rH  r   rh  )r  rE  r   rJ   rJ   rK   register_pointwise_numericZ  s   r  c                 C   s   t | tjddS )NT)r~   rF  r  r  rJ   rJ   rK    register_pointwise_numeric_ldf64c  s
   r  rU  logical_not)r   )r~   r   r   identity)pointwise_overrides_datac                 #   s    t |  t|  jd }|d u rd S  fdd}t|tjjr6| D ]}t||}| j||fV  q#d S | j||fV  d S )Nc                    s    j d u r	t| S d S rT   )tritonr'  r  r9  rJ   rK   make_triton_fallback  s   
z6_get_pointwise_overrides.<locals>.make_triton_fallback)	r  rU   rE  rZ   r_   r`   ra   rc   r~   )nsrE  r  r  olnamer  rJ   r9  rK   _get_pointwise_overrides  s   
r  r  c                    s,   | t |< t|   fdd}t| | d S )Nc                     sB    | i |}g }t | d |D ]\}}|t||dd q|S )Nr   TrE  )r   r   rC  )r   r   resultsmut_resultsr   r  outplace_oprJ   rK   rX     s
   z$register_foreach_inplace.<locals>.fn)rE   rD   r^   r   )aten_opoutplace_aten_opr  rX   rJ   r  rK   register_foreach_inplace  s   
r  c                    s   t | d d fdd}|S )Nr   c                     s.    | i |}t || d  }t| d |S r   )r   rv   rC  )r   r   r  r  rJ   rK   rX   7  s   zregister_inplace.<locals>.fn)r   )r  r  rX   rJ   r  rK   register_inplace6  s   
r  c                 C   rJ  rT   rJ   )r   r  rc  rJ   rJ   rK   sym_constrain_range`  ra  r  c                 C   &   t jjjd }t|tjsJ |jjS r  	r>   r   r  rp  rZ   r_   rA  r  rD  r   r   rm  rJ   rJ   rK   sym_sizee  s   r  c                 C   r  r  r  r  rJ   rJ   rK   
sym_stridev  s   r  c                 C   s   |   S rT   )r  )r   rJ   rJ   rK   	sym_numel~  r  r  c                 O   r  )NzHelpful for debuggingrF   )r  r   r   rJ   rJ   rK   foobar  r  r  c                 C   s   |    t| S rT   )r	  r  rz   rJ   rJ   rK   _realize  s   r  c                 C   s   |    t| | | S rT   )r	  r%   ResizeStorageBytes)variabler  rJ   rJ   rK   resize_storage_bytes_  s   r  c                 C   s"   |    |   tt| |S rT   )r	  r1   r   r%   SetSourceTensorKernel)r  source_tensorrJ   rJ   rK   set__source_tensor  s   r  c          	         sv  t | tsJ t |ttfsJ |d u rtj}|tjkr#td| |tjkr0t	|dks0J |tj
kr=t	|dks=J |  |  }|  }t | jtjrV| j | _t rvtjjjrvt|rhtdnt|rst|jndndtjjdrt|||dS t | gd	g}|!  tj"#||}t$||||%  fd
d}t&j'|||t|d}|S )Nzunsupported memory format: rg   rh   nanTr  r   r  r"   c                    sH   |  t  tj}t tj}t ||}t | fddS )Nc                      s
    gS rT   rJ   rJ   )
flat_indexflat_loaderrJ   rK   r    s   
 z*resize.<locals>.inner_fn.<locals>.<lambda>)r=   r  r_   r  r  r  )r  flat_index_exprlimitr  r  	old_numelout_indexeruninitalized_val)r  rK   r     s
   zresize.<locals>.inner_fnr   )(rZ   r1   r[   r\   r_   contiguous_formatpreserve_formatr  channels_lastr   channels_last_3dr  rv   r   r   r%   r  r  r  utilsr  fill_uninitialized_memoryr   r   r   r0  rc  r>   r   r   statically_known_equalsrj  r  r   r   stride_ordered_for_memory_formatr  r  r.   r   )	rQ   r  r  rp   r   x_flat
out_strider   r   rJ   r  rK   resize  sT   



	r  )auto_functionalizedc                 C   s@   ddl m} ||}tj| |i ||d dd | D S )Nr   )kernel_side_table)
kernel_idxgridkernel_argsc                 S   s    i | ]\}}t |tr||qS rJ   r   )rP   r  rm  rJ   rJ   rK   r    r0  z'triton_kernel_wrap_.<locals>.<dictcomp>)*torch._higher_order_ops.triton_kernel_wrapr  get_constant_argsr%   UserDefinedTritonKernelr  )r  constant_args_idxr  r   r  constant_argsrJ   rJ   rK   triton_kernel_wrap_  s   

r  c           
      C   s   i }|  D ]>\}}t|tjr@|j}d}	t|tjr-t|tjs$d}	n	|j}t|tjs|	r8ttj|}||v r@t	|}|||< qt
| |||dS )NFT)r  r  r  r   )r  rZ   r%   r1   r   r  r  r  r  r  r  )
r  r  r  r   tensors_to_cloner   rE  r   rQ   has_non_rv_viewsrJ   rJ   rK   triton_kernel_wrap  s,   
r  c                 C   sh   t | sttt |r#d}tjjjdd  }r| d| }|tj_t	j
| |||}tttj|S )Nz"control flow operator: torch.cond.r  r  )r+   r   r   r>   r   r  rp  r  r  r%   Conditionalr   r[   r1   )predtrue_fnfalse_fnoperandsrI   r  r  rJ   rJ   rK   rH     s   rH   c                 C   sd   t tt|| r!d}tjjjdd  }r| d| }|tj_t	j
| |||}tttj|S )Nz(control flow operator: torch.while_loop.r  r  )r   r   r+   r>   r   r  rp  r  r  r%   	WhileLoopr   r[   r1   )cond_fnbody_fncarried_inputsadditional_inputsrI   r  r  rJ   rJ   rK   
while_loop   s   r  rw  c                    s   ddl m m}  fddt||D }|| |fdd}t|d |d d}td	d
 |D |d< tdd
 |D |d< tjj	di |d|i}|d d u rVt
d|S )Nr"   )InputDescriptorlower_pointwise_subgraphc                    s    g | ]} |  | d qS )r  )rv   r   rO   )r  rJ   rK   rR   0  s    z$associative_scan.<locals>.<listcomp>c                    s    g t | t |R  S rT   )r^  rq  )lhsrhs)lowered_combine_fnrJ   rK   wrapped_combine_fn6  s
   z,associative_scan.<locals>.wrapped_combine_fnr   ry  c                 s       | ]}|  V  qd S rT   r}  rO   rJ   rJ   rK   rY   =  r  z#associative_scan.<locals>.<genexpr>r  c                 s   r  rT   r   rO   rJ   rJ   rK   rY   >  r  r  rw  z/Unable to generate code for associative_scan oprJ   )subgraph_loweringr  r  r   r  r  r\   r%   r{  r   r  )rw  r  r   r  subgraph_inputsr  r   r  rJ   )r  r  rK   associative_scan,  s   


r  c                 C   rJ  rT   rJ   )tokensrJ   rJ   rK   _sink_tokensE  ra  r  c                 O   s   t jj|g|R i |}ddlm} ||||}|d usJ tjj| }|d u r,|fS t	t j
tj|}t|ttfs@||fS |g|R S )Nr   )get_effect_key)r%   EffectfulKernelr   torch._higher_order_ops.effectsr  r>   r   effectful_opsr^  r  MultiOutputr1   rZ   r[   r\   )tokenr  r   r   r  r  effect_typeeffectful_kernelrJ   rJ   rK   with_effectsJ  s   r  c                 C   "   t | } tjtjj| || | S rT   )r  r%   _CollectiveKernelcreate_inplace_c10d_functionalall_reduce_r*  r   	reduce_op
group_namerJ   rJ   rK   _all_reducec  
   r  c                 C      t jtjj| || | S rT   )r%   r  r  r  r  r*  r  rJ   rJ   rK   _all_reduce_k     r  c                 C   s(   dd | D } t jtjj| || | S )Nc                 S   rM   rJ   )r  r  rJ   rJ   rK   rR   t  rS   z)_all_reduce_coalesced.<locals>.<listcomp>r%   r  r  r  all_reduce_coalesced_r*  r   r  r  rJ   rJ   rK   _all_reduce_coalescedr  s   r  c                 C   r  rT   r  r  rJ   rJ   rK   _all_reduce_coalesced_}  s   r  c                 C   s   t jt jtjj| ||S rT   )r%   r1   r   r  create_out_of_placer  all_gather_into_tensorr*  )r   
group_sizer  rJ   rJ   rK   _all_gather_into_tensor  s   r#  c              	   C   s"   t tjjtjtjj	| ||S rT   )
r^  r_  r%   r1   r   r  r   r   all_gather_into_tensor_coalescedr*  )r   r"  r  rJ   rJ   rK   !_all_gather_into_tensor_coalesced  s   r%  c              	   C       t jt jtjj| |||S rT   )r%   r1   r   r  r   r  reduce_scatter_tensorr*  )r   r  r"  r  rJ   rJ   rK   _reduce_scatter_tensor     r(  c              
   C   s$   t tjjtjtjj	| |||S rT   )
r^  r_  r%   r1   r   r  r   r  reduce_scatter_tensor_coalescedr*  )r   r  r"  r  rJ   rJ   rK    _reduce_scatter_tensor_coalesced  s   r+  c              	   C   r&  rT   )r%   r1   r   r  r   r  all_to_all_singler*  )r   output_split_sizesinput_split_sizesr  rJ   rJ   rK   _all_to_all_single  r)  r/  c                 C   r  rT   )r  r%   r  r  r  
broadcast_r*  r   rD  r  rJ   rJ   rK   
_broadcast  r  r2  c                 C   r  rT   )r%   r  r  r  r0  r*  r1  rJ   rJ   rK   _broadcast_  r  r3  c                 C   s   t jtjj|  | S rT   )r%   _WaitKernelcreate_waitr  wait_tensorr*  )r   rJ   rJ   rK   _wait_tensor  s   r7  c              	   C   s$   t jt jtjjjj	| |||S rT   )
r%   r1   r   r  r   r_   r=   _dtensorshard_dim_alltoallr*  )r   
gather_dim	shard_dimr  rJ   rJ   rK   _shard_dim_alltoall  s   
r<  zRInductor support for distributed collectives depends on building torch.distributedra  )quantized_lowerings)mkldnn_lowerings)NN)NNNNFN)FrT   )r   r   r  r"   Tr  )r   r   r"   )r   Tr  )Tru   )r   NNr"   )r   FF)r    F)NNN)Nr  N)r  )NNNN)rJ   r   FTNr
  (  r   r   loggingrc  r  r  re  collectionsr   typingr   r   r   r   r   r   r	   r
   unittest.mockr   rw   r_   $torch.ao.quantization.fx._decomposedtorch.fxtorch.utils._pytreer  _pytreer^  (torch._higher_order_ops.associative_scanr   r  r   r   torch._prims_commonr   r   r   r   r   r   r   r   r   r   r   torch.fx.experimental.sym_noder   r   torch.utils._sympy.functionsr   r   r   r   _dynamo.utilsr!    r#   r$   r%   r&   decompositionr'   r(   r)   r*   r+   r,   r-   r.   r/   r0   r1   r2   r3   r4   r5   r6   r7   r8   r9   r:   r;   r<   r`  r=   r>   	getLoggerrD  r  r?   r`   r  __annotations__r@   r]   rA   r6  tr_c10dr(  rB   rC   rD   r   rE   quantized_decomposedrL   rN   re   r  r  r  bmmconvolutionconvolution_backwardr  r  mmr  r  _int_mmr  r  int16rS  r  r  r  r   	complex32	complex64r|   r  rr   rq   rt   r{   r}   r   r   r   r   r   r   r   r   r   r   r  rp   r   r)  r,  r2  r7  r=  r   r?  
device_putrA  rH  rT  rW  rY  r   aliasdetachdetach_liftview_ofr`  r   rb  ri  rj  rn  rq  rv  rx  r{  r|  r*  r}  r   r  r  r  _unsafe_viewreshaper  slicer  r  r  r  quantize_per_channelr  dequantize_per_channelr  quantize_per_tensorr   r  dequantize_per_tensorr  r  r  r  r!  r?  r@  rE  rH  rK  rL  rN  rX  r  rZ  r  r]  r'  	lru_cacherg  rr  rs  rj  rm  rn  ro  rz  r  r  rngprimsr  r  r  	bernoullir  r  r  r  r  r  r  r  r  r  r  r  randintforce_stride_orderr  r  r  r  r  lookup_seedr  randomrw  r  r  r  r  r  r   r  FALLBACK_ALLOW_LISTr  _adaptive_avg_pool3dadaptive_max_pool3dfractional_max_pool3dmax_pool3d_with_indicesuniformexponential_pdist_forwardsoft_margin_loss_backwardsearchsorted_cdist_forward_cdist_backwardmax_unpool2dmax_unpool3d
_trilinearsegment_reduce_segment_reduce_backwardhistc	histogrambin_ct_histogramdd_bin_edges_histogramdd_from_bin_ctsaddbmmaddmv_addmm_activation
_cudnn_rnn_cudnn_rnn_backward_embedding_bag_embedding_bag_forward_only_embedding_bag_dense_backward*_embedding_bag_per_sample_weights_backward_fused_moving_avg_obs_fq_helper*_fused_moving_avg_obs_fq_helper_functionalavg_pool3d_backward max_pool3d_with_indices_backward_adaptive_avg_pool2d_backward_adaptive_avg_pool3d_backwardadaptive_max_pool2d_backwardadaptive_max_pool3d_backwardfractional_max_pool2d_backwardfractional_max_pool3d_backwardreplication_pad1d_backwardreplication_pad2d_backwardupsample_linear1d_backwardupsample_bicubic2d_backwardupsample_trilinear3d_backwardgrid_sampler_2d_backward_pdist_backwardsortstablekthvaluetopkr  median	nanmedianrandpermresize_
resize_as__linalg_detlinalg_householder_productlinalg_inv_exlinalg_ldl_factor_exlinalg_ldl_solve	linalg_lulinalg_lu_factor_exlinalg_lu_solvelinalg_matrix_exp	linalg_qr_linalg_slogdet_linalg_solve_exlinalg_solve_triangular_linalg_svd	lu_unpackormqr_linalg_check_errorslinalg_pinvatol_rtol_tensor_linalg_eightriangular_solvelinalg_cholesky_excholesky_inversecholesky_solvegeqrf_fft_r2cnonzerogcd_thnn_fused_lstm_cell_prims	rng_primsrun_and_save_rng_staterun_with_rng_statemasked_scattermasked_scatter_backwardrv  angle_efficientzerotensor(_sparse_coo_tensor_with_dims_and_tensors	to_sparse
_to_sparser   rT  '_scaled_dot_product_efficient_attention0_scaled_dot_product_efficient_attention_backward#_scaled_dot_product_flash_attention,_scaled_dot_product_flash_attention_backward+_scaled_dot_product_flash_attention_for_cpu4_scaled_dot_product_flash_attention_for_cpu_backward_flash_attention_forward_flash_attention_backward_efficient_attention_forward_efficient_attention_backward
_scaled_mmindex_reducer  r  r  rw  r  r"  r*  r+  scalar_tensorr8  
LongTensorr:  rI  rK  rP  rs  rS  r  rR  r[  r%  r  
zeros_liker_  rb  rY  rd  ri  rj  ro  r|  r  r  r  r   r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  reflection_pad1d_backwardreflection_pad2d_backwardreflection_pad3d_backwardr   r"  r+  rx   r#  r$  r,  r0  r9  r>  rB  rU  r_  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r!  r+  r/  r  r3  r5  r6  r:  r8  Tensor_TensorrB  Scalarr@  Tensor_ScalarrA  rC  rM  r  rN  rO  r  rS  r   r"  r   rX  rg  true_divider  rh  rj  rk  r  r  r~  r|  r  r  r  r  r  r  r  r  r  r   r  rc  r  r  r  r  r  r  r  r  r  r  r  r  r^   r  r  r  r?  expm1relur[  r>  r  r  cossinabsbitwise_andbitwise_left_shiftbitwise_not
bitwise_orbitwise_right_shiftbitwise_xorlgammaerfspecial_erfr  tantanhrT  r  r  logical_xorr  r  	clamp_min	clamp_maxnegr  	remaindersignsignbit	_neg_viewr
  r  r  rD  r  necoshsinhacosacoshasinasinhatan2atanatanhrd  erfcerfinvhypotlog10log2	nextaftercodegen.commonr  r  rE  r  r~   r   _foreach_addforeach_add_listforeach_add_scalar_foreach_mulforeach_mul_listforeach_mul_scalar_foreach_sub_foreach_neg_foreach_abs_foreach_powScalarAndTensor_foreach_divforeach_div_listforeach_div_scalar_foreach_sqrt_foreach_maximum_foreach_minimum_foreach_clamp_min_foreach_clamp_max_foreach_reciprocal_foreach_sign_foreach_copyr  _foreach_add__foreach_mul__foreach_div_r  add_bitwise_and_bitwise_left_shift_bitwise_not_bitwise_or_bitwise_right_shift_bitwise_xor_mul_div_Tensor_modelogical_and_logical_not_logical_or_logical_xor_sub_relu_sigmoid___and__
__lshift____or__
__rshift____xor____iand____ilshift____ior____irshift____ixor__r  r  r  r  r  methodfuncr  _inductor_testr	  r  inductorr  set_source_Tensorr  r  *torch._higher_order_ops.auto_functionalizer  r  r  higher_orderrH   r  Subgraphr  r  r  )torch.distributed._functional_collectivesr  
all_reducer  r  r  all_reduce_coalescedr  r  r  r!  r#  r$  r%  r'  r(  r*  r+  r,  r/  r   r2  r0  r3  r6  r7  r8  r9  r<  r  ImportErrorr  rb  r=  register_quantized_opsregister_woq_mm_opsr>  register_onednn_fusion_opsrJ   rJ   rJ   rK   <module>   s  
(44,
  
	%4
.=I
,















1
C2,$-)s6
	


		
%/
&







	
	

*
.

0
@


C
8



,



G&

Z
!
!,&k/





`*1"*A
!
 


E6


M


E0


s
	 $1
+


/
#$	












	











@
 
"

	

