o
    iO                    @  s3  U d dl mZ d dlZd dlZd dlZd dlZd dlZd dlZd dlZd dl	Z	d dl
Z
d dlZd dlmZ d dlmZmZ d dlmZmZmZmZmZmZmZ d dlmZ d dlmZ d dlZd dlZd dlZd dl Zd dl!m"  m#Z$ d dl%m&Z& d d	l'm(Z( d d
l)m*Z* d dl+m,Z, d dl-m.Z.m/Z/m0Z0m1Z1m2Z2m3Z3m4Z4m5Z5m6Z6m7Z7m8Z8 d dl9m:Z:m;Z; d dl<m=Z=m>Z>m?Z? d dl@mAZA d dlBmCZCmDZDmEZEmFZF ddlGmHZH ddlImJZJmKZKmLZLmMZM ddlNmOZOmPZP ddlLmQZQmRZRmSZSmTZTmUZUmVZVmWZWmXZXmYZYmZZZm[Z[m\Z\m]Z]m^Z^m_Z_m`Z`maZa ddl"mbZbmcZcmdZdmeZemfZfmgZgmhZhmiZimjZjmkZkmlZlmmZm ddlnmoZompZp er7ddlqmrZr edZsedZteAddgZuevewZxi Zydezd< i Z{d ezd!< eAej|j}  Z~ejojZejojZejojZeAej|j}  ZeAej|j} ejjgZeAej|j}  Zi Zd"ezd#< ejojZd$d% Zdd(d)Zdd.d/Zd0d1 Zd2d3 Zd4d5 Zd6d7 Zeejejejejejejejejejejejejejejg ejejejejejejejejejejejejejd8Zdd;d<Zd=d> Zd?d@ ZddCdDZdEdF ZdGdH ZddMdNZddXdYZdZd[ Zdd\d]Zd^e3jd^eyfdd`daZdbdc ZddddeZ				^	ddfdgZddhdiZ	^dddmdnZeejjddodpdq ZeejddoddrdsZd^dtddudvZeejjddoddwdxZd^d^dyddzd{Zeejddoddd|d}Zdd~e3jd^ddd^dfddZdd Zeƃ  	^dddZeejd^dddd Zeejd^dddd Zeejejejejejgdd Zeedreejуeσ eejddodddZeejddodddZeejgdddZeejՃdd Zeejփdd Zeej׃dd Zeej؃dd Zeejjڃdd Zeejۃdd Zeejddodd Zeejddodd Zeejddodd Zeej߃dd ZeejddoeejddoeejddodddZeejddodd ZeejddodddZeejddodddZeejddodddZeejddodddZdddZeejddodddZdd ZeejjddÄ Zeejjddń ZeejddoddƜdddʄZeejjddodddτZeejjddoddƜdddфZeejjddodddӄZeejjddoddƜdddՄZeejdddׄZeejddodddd܄ZeejddoddddބZeejddoddddZeejddodd ZeejddodddZeejddodddZeejddodddZeejddodd Zeej ddodd Z eejddodd ZdddZeejdddZdddZejdd ZddddZddddZddd dZ	dddZ
dd Zeejojjddodd Zeejddodd	 Zeejddod
d Zeejjddodd Zeejdd Zeddd Zdd ZeejjڃZeejjZeejjڃZeejjZe
ej eejdd Zeejdd ZeeKjddodd Z eeKj!ddodddZ"eeKj#ddodd Z$eeKj%ddodd Z&eeKj'ddod d dd&d'Z(eeKjddod d dd*d+Z)dd.d/Z*dd1d2Z+eej,j-ddod^d^ddd3dd<d=Z,eej.e3j/dod^d^d>dd@dAZ.dBdC Z0dDdE Z1dFdG Z2dHdI Z3dJdK Z4dLdM Z5dNdO Z6dPdQ Z7e
ej8 e
ej9 e
ej: e
ej;d^dR e
ej<jd^dR e
ej= e
ej>d^dR e
ej?d^dR ej@A re
ejBd^dR e
ejC e
ejD e
ejE e
ejFjڃ e
ejGjڃ e
ejH e
ejIjJ e
ejKjڃ e
ejLjڃ e
ejM e
ejNd^dR e
ejOe0 e
eje6 e
ejPe0 e
ejQe1 e
ejRe1 e
ejSe1 e
ejT e
ejU e
ejU e
ejV e
ejW e
ejX e
ejYe0 e
ejZ e
ej[ e
ej\ e
ej] e
ej^ e
ej_ e
ej` e
eja e
ejbe1 e
ejc e
ejde0 e
eje e
ejf e
ejfjg e
ejh e
eji e
ejj e
ejk e
ejl e
ejm e
ejn e
ejo e
ejp e
ejq e
ejr e
ejs e
ejt e
eju e
ejv e
ejw e
ejx e
ejy e
ejz e
ej{ e
ej| e
ej} e
ej~ e
ej e
ej e
ejj e
ej e
ej e
ej e
ej e
ej e
ej e
ej e
ejjڃ e
ejjd^dR e
eje0 e
ejjj e
ejjj e
ejjj e
ej e
ej e
eje1 e
ej e
ej e
ej e
ej e
ej e
ejj e
ejjڐe7d^dR e
ejjڐe7d^dR e
ejjڐe7d^dR e
ejjڐe7d^dR e
ejjڐe7d^dR e
ejjڐe7d^dR e
ejjڐe7d^dR e
ejjڐe7d^dR e
ejjڐe7d^dR e
ejjڐe7d^dR e
ejjڐe7 e
ejjڐe7 e
ejjڐe7 e
ejjڐe7 e
ej e
ejj-d~dS eejddoddTdlZeejddUdVdWZdXdY ZeedZ
reeje eejd[d\ Zeejddod d_d`ZeejddoddadbZdcdd Zeejejgdddd^dedfdgZeejddhdiZeejdjdk Zeejdldm Zeejdndo Zeej	ddddpdqdrZdsdt Zdudv Zdwdx Zeejejgdddddddydzd{Zd|d} Zd~d ZeejeeZeedZeed ZÐdd ZeejŃdddddddZeejƃdddddddZeejǃdddddddZeejjڃdd Zeejejgdd ZeejddodddZeejddodddZːdd Z	~dddZ͐dd ZΐdddZeejddodd^ Zeejddodd ZeejddodddZeejӃdddZӐdd ZԐdd ZeejddodddZeeKjddodddZאdddZؐeejjd^dZڐeejjd^dZeejddodd Zeejddodd Zedd ZeejddodddZeejddodddZdd~ddddZeejddodddddZeejddodddZeejddodddZeejddoddÐdĄZeejddod~dŜddƐdǄZ		^dd	d̐d̈́Zeejjڃdd
dϐdЄZeejjڃdd
dѐd҄Zeejjڃ	dddՐdքZeejjڃ	dddאd؄Zeejjڃ			dddڐdۄZeejjڃ			dddܐd݄Zdސd߄ Zeejjڃdd ZdddZeejddodddZdddZdddZdddZ	dddZddddZdd ZddddZdd Zeejddo	^ddd ZdddZeejddod	d
 Zdd Zeejddo		 		^dddZeejddo		 		^dddZeejjd^dZeejddodd ZdddZdd Zdd Zdd Zeej jd^dZeej dd Z eejjd^dZeejdd Zd d! Zeejd"d# Zeejd$d% Zd&d' Zeejjڃ	dd(d)Zeejjd^dZ	eej
jd^dZeejddo	*	 	^	~	dd+d,Zeej
ddo	*	 	^	~	dd-d.Z
d/d0 Zeejjd^dZeejddo	dd1d2Zeejjd^dZeejddo	dd3d4Zd5d6 Zd7d8 Zddd;d<Zd=d> Zeejddd?d@dAZdBdC ZdDdE ZdFdG ZdHdI Zeejejgddd^dJdKdLZeejddd^dJdMdNZdOdP ZedQdR Zeejjd^dZ eejj!d^dZ"eejj#d^dZ$eejd~dSdTdU ZddVdWZ%eej&dXdY Z&eej'ddoddZd[Z'ed\d] Z(ed^d_ Z)eej*d~dSdd`daZ+eej,gd~dSdbdc Z,ddfdgZ-eej*gd~dSdhdi Z.eej/ej*j-gd~e3j0ddjdk Z*eej1ej1gd~dSdldm Z1eej2ej2gddd?dndoZ3eej4jڃZ5eej6jڃZ7eej8jڃZ9eej:jڃZ;eej<jڃZ=eej4ddpdqZ4eej6ddrdsZ6eej8dtdu Z8eej:ddoddvdwZ:eej<ddoddxdyZ<eej>ddd?dzd{Z>eej?dd|d}Z@eejAddodd~dZBeejCddodddZDeejEed eejFedZGeejHedZIeejJedejdZKeejLedejdZMeejNd~ddZNeejfjgd^dZOeejfjgddoddd^dddZPeejfjddodddZfdddZQdddZReQejSZSeRejTZTeQejUZUeQejVZVeejWZWeRejXZXeRejYZYeejZZZeej[d~dZ[eRej\ eRej] eej^Z^eej_Z_eej`Z`eejaddZaeejbZbeejcZceejdZdeQeje eQejfZfeejge3j0doef eQejh eQeji eQejj eRejx eejkdd~ejdZkeejldd~ejdZleejmdd~ejdZmeejndd~ejdZneejoZoeejpZpeejqeo eejrep eejsZseej^Z^eQejtZteeju eejvddZveej׃ eejwejd eejxes eejyejd eejzejd eej{ejd eej|ejdZ|eej}ejd eej~ejd eQej eQej eQej eQej eQej eQej eQej eQej eQej eQej eQej eQej eQej eQej eQej eQej ddlmZmZ dd ZeD ]@ZeeeD ]\ZZZeŐeeeed q͐eeeD ]\ZZZeŐeeeed qqeejjeNd~dZeejj!eNd~dZeejj-eNd~d eejje,Zeejj-e, eejj!e,Zeejje[ eejj!e[ eejjڐes eejjڐe^ eejj!e eejje eejje eejje*Zeejj-e* eejj!e*ZeejeY eejeS eejjeo eejj!eo eejjep eejj!ep eejjeo eejj!eo eejjep eejj!ep eejet eejev eejeZdd Zeejjejje eejj!ejj!e eejjejje eejj!ejj!e eejjejje eejj!ejj!e eejjejjڐe dd ZeejeN eeje_ eeje` eejea eejeb eejec eejed eeje, eejj-e* eejje+ eejek eejel eejem eejÐen eejĐe[ eejŐeW eejƐeX eejǃe_ eejȃe` eejɃeb eejʃec eej˃ed eejejǃ eejejȃ eejejɃ eejejʃ eejej˃ eejуdddZeejҐjӃdd ZeejԐjӃdd ZeejՃdd Ze:֡ D ]\ZאZee;e׃e؃ qeejكdd Zeejdd Zeejojېj܃dd Zeejojސj߃dd Zeejojjjdd Zeejojd[rYeejojj'jڃdd ZeejojjddUddZd dlmZ e
e ee*dd Zeejojjddodd ZeejojjddodddZeejojjddoejed~d eejojjddoddÐdĄZeejjddoddŜddƐdǄZee(ddoddːd̄Zeejojjjڃd͐d΄ ZeejojjddodϐdЄ ZddѐlmZ e  eeKjddodҐdӄ ZddlImZ eHe ddlImZ e  e  ddlImZ e  ddlImZ e  ejddؐdلZ dS (       )annotationsN)defaultdict)IterableSequence)AnyCallablecastOptionalTYPE_CHECKINGTypeVarUnion)	ParamSpec)patch)counters)associative_scan_op)triton_kernel_wrapper_mutation)get_layout_constraint_tag)canonicalize_dimcanonicalize_dimscheckdtype_to_typeelementwise_dtypesELEMENTWISE_TYPE_PROMOTION_KINDget_computation_dtypeis_boolean_dtypeis_float_dtypeis_integer_dtypeNumber)magic_methodsmethod_to_operator)free_unbacked_symbolshas_free_unbacked_symbolsresolve_unbacked_bindings)
OrderedSet)CeilDivFloorDivIdentityModularIndexing   )import_submodule   )configinductor_primsirtest_operators)decompositionsget_decompositions)BaseView	DtypeView
ExpandViewIndexingConstantIRNode	is_triton
MutableBoxOnlineSoftmaxReductionops_wrapperPermuteView	Pointwise	ReductionShapeAsConstantBufferSqueezeView	TensorBoxvalidate_irView)ceildivdecode_device
is_dynamicis_gpuis_pointwise_useis_view,needs_fallback_due_to_atomic_add_limitationspad_listlike#register_op_dtype_propagation_rules#register_op_requires_libdevice_fp64sympy_productuse_scatter_fallback)opsV)ReductionType_T_Pztorchvision::roi_alignzaten::index_addz8dict[Union[Callable[..., Any], str], Callable[..., Any]]	loweringsz9dict[torch._ops.OpOverload, Optional[Callable[..., Any]]]_maybe_layout_constraintsz2dict[torch._ops.OpOverload, torch._ops.OpOverload]inplaceable_foreach_opsc                  C  s<   t jjjD ]} | jD ]}|jdkr|jtv s  dS q
qdS )Ncall_functionTF)rO   graphcurrent_nodeusersoptargetforeach_ops)nodeuser r_   N/home/ubuntu/vllm_env/lib/python3.10/site-packages/torch/_inductor/lowering.pycur_node_has_non_foreach_users   s   
ra   	arg_pairs%Iterable[Union[tuple[Any, Any], Any]]c                 C  s   t t}d}t| D ]A\}}t|tsd}|f}t|  ptj}d }|D ]}t|tr2|j	
 } nq$|d us;J d|r@|\}|||f ||f q
|S )NFTz.foreach op should have at least one tensor arg)r   list	enumerate
isinstancer   rD   r+   #combo_kernel_foreach_dynamic_shapesr?   data
get_deviceappend)rb   outunpack_argsiargsuse_foreachdevicetr_   r_   r`   group_foreach_args   s&   


rr   fnCallable[..., Any]returnOptional[Callable[..., Any]]c                 C  s>   t | tjjs	dS t| dd }rt|S | tv rt|  S dS )zHGet layout constraints. Returns None if there are no layout constraints.NF)with_default)rf   torch_ops
OpOverloadr   tag_to_layout_constraintrT   )rs   maybe_layout_tagr_   r_   r`   maybe_layout_constraints   s   r}   c                 C  sV   | t jjjkr	tS | t jjjkrtS | t jjjkrtS | t jjj	kr$d S t
d|  )NzUnknown layout constraint tag: )rx   _CTagneeds_exact_stridesconstrain_to_fake_tensorsneeds_contiguous_stridesrequire_contiguous_stridesneeds_fixed_stride_orderconstrain_to_fx_stridesflexible_layoutAssertionError)tagr_   r_   r`   r{      s   r{   c                 C  s   | s	t d| d S )Nzinductor does not support NotImplementedErrorcondmsgr_   r_   r`   
assert_nyi   s   r   c                   s\   t  ttttfrdd  D S t  t  tjj	r,t
 fdd  D  d S d S )Nc                 S  s   g | ]}t |qS r_   )add_needs_realized_inputs.0xr_   r_   r`   
<listcomp>       z-add_needs_realized_inputs.<locals>.<listcomp>c                 3  s    | ]}t  |V  qd S N)getattr)r   overloadrs   r_   r`   	<genexpr>   s    

z,add_needs_realized_inputs.<locals>.<genexpr>)rf   rd   settupler#   needs_realized_inputsaddrx   ry   OpOverloadPacketupdate	overloadsr   r_   r   r`   r      s   
r   c                 C  s:   t | tjjr|  D ]	}|tt| |< qd S |t| < d S r   )rf   rx   ry   r   r   rT   r   )rs   
constraintr   r_   r_   r`   add_layout_constraint   s
   r   )r   r*   r(                     	   
         dtypeintc                 C  s2   t | ts| S | tv sJ d|  dt|  } | S )Nzid z missing from DTYPE_ID_LOOKUP)rf   r   DTYPE_ID_LOOKUPr   r_   r_   r`   decode_dtype   s
   
r   c                 C  sB   t | trt|  pt|  S t | tjr| jdu S t | tS )NT)	rf   r?   r   	get_dtyper   sympyExpr
is_integerr   r   r_   r_   r`   is_integer_type  s
   


r   c                 C  s    t | trt|  S t | tS r   )rf   r?   r   r   boolr   r_   r_   r`   is_boolean_type  s   

r   type_promotion_kindr   c                   s0   dd   fdd|D }t |d| i\}}|S )Nc                 S  s8   t | ttjfr
| S t|  }tjdg| |  dS )Nr*   r   )	rf   r   r   Basiclenget_sizerx   zerosr   )inpdimr_   r_   r`   construct_input  s   z+get_promoted_dtype.<locals>.construct_inputc                      g | ]} |qS r_   r_   )r   argr   r_   r`   r      r   z&get_promoted_dtype.<locals>.<listcomp>r   )r   )r   rn   inps_r   r_   r   r`   get_promoted_dtype  s   r   c                 C  sh   t | ttfs| g} nt| } t| D ]}t |tjjr1| D ]}t||}|tvr0| 	| q q| S r   )
rf   rd   r   rx   ry   r   r   r   rS   rj   )aten_fnrs   r   other_fnr_   r_   r`   get_overloads%  s   

r   c                 C  s6   t | tjjr|| jv S t | tjjr||  v S dS NF)rf   rx   ry   r   _qualified_op_namerz   name)rZ   	namespacer_   r_   r`   in_namespace5  s
   
r   r   r?   rp   torch.devicec                 C  s   t | jtjrt|  r| S dd |  D }|  }|durH|jdkrH||krHt|dks;t|dkrH|d dkrHt	t
tj| |dS | S )zB
    Copy cpu scalar if doesn't not match with given `device`
    c                 S     g | ]	}t jj|qS r_   rO   rW   sizevarssize_hint_or_throwr   sr_   r_   r`   r   E      z)maybe_copy_cpu_scalar.<locals>.<listcomp>Ncpur   r*   F)rf   rh   r-   ReinterpretViewr!   r   ri   typer   r?   
StorageBox
DeviceCopycreate)r   rp   size
cur_devicer_   r_   r`   maybe_copy_cpu_scalar=  s   
$r   rn   	list[Any]kwargsdict[str, Any]	broadcastr   )Optional[ELEMENTWISE_TYPE_PROMOTION_KIND]convert_input_to_bool tuple[list[Any], dict[str, Any]]c                   s  dd t  D }dd  D }|s|s fS |s|r|r$tjndd  D }|dd  D  t|d|i|rF |d  n|d   |D ]}t |  |< qP|D ]}	t|	 |	< q^fd	d
fdd D  fdd D |rt	t
t fdd|D fdd|D  }
t
|
d  }t||
dt| D ]\}}| |< qt||
t|d D ]\}	}||	< qtt D ]}t | tjrt | | |< qֈD ]}	t|	 tjrt|	 ||	< q fS )zB
    Transforms arguments for broadcasting and type promotion
    c                 S     g | ]\}}t |tr|qS r_   rf   r?   r   rm   r   r_   r_   r`   r   \      z"transform_args.<locals>.<listcomp>c                 S  r   r_   r   r   kvr_   r_   r`   r   ]  r   c                 S  s*   g | ]}t |ttjfst|d r|qS r   )rf   r   r   r   hasattrr   ar_   r_   r`   r   g  s    c                 s  s    | ]
}t |d r|V  qdS )r   N)r   r   r_   r_   r`   r   m      z!transform_args.<locals>.<genexpr>r   r   c                   s6   t | tr
t| S t | tjrtj| j dS | S )Nvaluer   rp   )rf   r?   to_dtyper-   Constantr   )r   )rp   r   r_   r`   promote~  s
   

ztransform_args.<locals>.promotec                   r   r_   r_   r   r   r_   r`   r     r   c                   s   i | ]	\}}| |qS r_   r_   r   r   r_   r`   
<dictcomp>  r   z"transform_args.<locals>.<dictcomp>c                 3      | ]} | V  qd S r   r_   r   rm   rn   r_   r`   r         c                 3  r   r   r_   r   r   r   r_   r`   r     r   N)re   itemsrx   r   extendvaluesr   ri   r   broadcast_tensorsrd   	itertoolschainr   zipr   rangerf   r-   r   r3   r   )rn   r   r   r   r   args_indiceskwargs_indicespromoting_argsrm   r   broadcastedr   r   r_   )rn   rp   r   r   r   r`   transform_argsQ  sb   

r  c                   s>   t   fdd}t| }t| tt|| |S )a  
    Add a foreach lowering to lowerings dict.

    Arguments:
        aten_fn: torch.ops.aten.* fn we are lowering
        decomp_fn: alternate implementation on our IR
        broadcast: True to apply broadcasting to tensor inputs
        type_promotion_kind: kind of type promotion applied to tensor inputs, `None` means no type promotion
        convert_input_to_bool: some logical ops require inputs are converted to bool
    c                    s*   t | dksJ  | i |}t| |S )Nr(   )r   r@   )rn   r   rk   	decomp_fnr_   r`   wrapped  s   z+_register_foreach_lowering.<locals>.wrapped)	functoolswrapsr   r\   r   rS   dictfromkeys)r   r  r  aten_fnsr_   r  r`   _register_foreach_lowering  s   
r  c                   s<   t  fdd}t  |t | |S )a  
    Add a lowering to lowerings dict

    Arguments:
        aten_fn: torch.ops.aten.* fn we are lowering
        decomp_fn: alternate implementation on our IR
        broadcast: True to apply broadcasting to tensor inputs
        type_promotion_kind: kind of type promotion applied to tensor inputs, `None` means no type promotion
        convert_input_to_bool: some logical ops require inputs are converted to bool
    c                    s   t | } t|}d}t| dkr!t| d t tfr!d}t | d } tdd  D s9tdd | D r9J dt| |\} }|rH| g} | i |}t	| |S )	NFr*   r   Tc                 s  s"    | ]}|t v pt|d V  qdS )_c10d_functionalN)	fallbacksr   )r   rs   r_   r_   r`   r         
z6_register_lowering.<locals>.wrapped.<locals>.<genexpr>c                 s      | ]}|d kV  qdS )rk   Nr_   r   r_   r_   r`   r     r   zout= ops aren't yet supported)
rd   r  r   rf   r   allanykeysr  r@   )rn   r   unpackedrk   r   r   r   r  r   r_   r`   r    s(   
z#_register_lowering.<locals>.wrapped)r  r  r   r   r  r  )r   r  r   r   r   lowering_dictr  r_   r  r`   _register_lowering  s
   r   F.Callable[[Callable[_P, _T]], Callable[_P, _T]]c                 C  s   t jt| ||||dS )z+
    Shim to support decorator syntax.
    )r   r   r   r  )r  partialr   )r   r   r   r   r  r_   r_   r`   register_lowering  s   r#  c                 C  s   g }t jt| t|tjjdD ]A\}}tjj	|r!|
| qtjj	|r.|
| qtjj|| tt|jtt|jk rL|
| q|
| qtt|S )z
    Broadcasting logic based on symbolic shapes.

    We give the shapes 0 and 1 concrete values, while all other shapes
    are symbolic sympy formulas.
    )	fillvalue)r  zip_longestreversedr   SOnerO   rW   r   is_size_one_or_falserj   check_equalsr   expandfree_symbolsr   )r   boutputr   yr_   r_   r`   broadcast_symbolic_shapes	  s   $ r0  c              
     s,  |d u s|d u sJ d|d u r|d u rt j}tdd | D s"| S tdd | D rC|p3t| d|ifdd  fdd	| D S td
d | D }g }| D ]C}t|ttfrq|	t
tj|| | dt|  qPt|tjr|	t
t|| | dt|  qP|	| qP|S )NzEonly one of override_return_dtype or type_promotion_kind may be givenc                 s  s"    | ]}t |tjttfV  qd S r   )rf   r   r   r   floatr   r_   r_   r`   r   '       z$promote_constants.<locals>.<genexpr>c                 s  s"    | ]}t |tttjfV  qd S r   )rf   r   r1  r   r   r   r_   r_   r`   r   )  r2  r   c                   s4   t | tjrtj|  td dS tj|  td dS )Nindexr   rp   r   )rf   r   r   r-   r4   rC   r   r   r   r_   r`   
const_func.  s
   
z%promote_constants.<locals>.const_funcc                   r   r_   r_   r   )r5  r_   r`   r   6  r   z%promote_constants.<locals>.<listcomp>c                 s  s&    | ]}t |tttjfr|V  qd S r   )rf   r?   r3   r-   r   r   r_   r_   r`   r   7  s   $ r   r3  )r   DEFAULTr  r  r   nextrf   r   r1  rj   r3   r   r-   r   r   get_device_or_errorrd   r   r   r   r4   )inputsoverride_return_dtyper   exrk   r   r_   )r5  r   r`   promote_constants  sL   

	r<  c                   s"   d dd fdd}|S )Nalphar9  r?   c              	     s  d urt dd D rrJ  S t
r2| d ur1| dkr1ttd | d< n| d u s8J dd D d  
pLd   dd  D ]!}t|tjstt	t	| kstJ d d	 d	|  qSt
jt
jftjd uottjd
d d uotjjjd uotjjjddo v  	fdd}sd }D ]}t| jr| } nq|sȈd  }p|}tj| |dS )Nc                 s  "    | ]}t |tot|V  qd S r   rf   r5   r6   r   r   r_   r_   r`   r   [  r  z0make_pointwise.<locals>.inner.<locals>.<genexpr>r*   c                 S     g | ]}|  qS r_   make_loaderr   r_   r_   r`   r   h  r   z1make_pointwise.<locals>.inner.<locals>.<listcomp>r   zndim mismatch  rX   low_precision_pointwise_barrierFc                   s   t  t ksJ d  d tjkr&d ur& fddD  S g }tD ]'\}}| }|  }rN|v rNtj||dd}t||}|| q,| }rhtj|dd}t|S |S )Nzwrong ndim rF  c                   s   g | ]}| qS r_   r_   )r   loadr4  r_   r`   r     r   zCmake_pointwise.<locals>.inner.<locals>.inner_fn.<locals>.<listcomp>F)use_compute_types)r   rx   r   re   r   rN   r   rj   )r4  inputs_loaded	inp_indexrH  rk   	inp_dtypedowncast)r   emulate_precision_castsrs   r9  loaders	low_pr_fpoverride_fn_when_input_boolrangesrI  r`   inner_fn}  s    $z/make_pointwise.<locals>.inner.<locals>.inner_fnrp   r   rT  rS  )r  r<  rd   mulr   r   rf   r-   BaseConstantr   rx   bfloat16float16rO   rW   r   rX   metagetrE   ri   r   r;   r   )r>  r9  otherrT  rp   rm   allow_alphars   override_devicerR  r:  triton_fallback)r   rO  r9  rP  rQ  rS  r`   innerZ  sb   

zmake_pointwise.<locals>.inner)r9  r?   r_   )rs   r:  r_  rR  r^  r`  ra  r_   r]  r`   make_pointwiseR  s   Krb  c                   s   ddd fdd}|S )Nr*   r=  r9  list[list[TensorBox]]c                   sT  t tjjjdkptjjjtv pt }d }|D ]}t|t	t
fr$|} nq|d us-J dg }|D ]}t|t	t
fsE||gt |  q1|| q1tt| }d gt | }| D ]@\\}}	}
g }|
D ]-\}} rt|d| i}n| }|||< tj|tjr|	r|r|  ||  qf|rtj| q\tdd |D sJ |S )Nr   z1at least one input must be a list to a foreach opr>  c                 s      | ]}|d uV  qd S r   r_   r   r_   r_   r`   r     r   z8make_foreach_pointwise.<locals>.inner.<locals>.<genexpr>)r   rO   rW   rX   rY   r[   inplace_foreach_opsra   rf   rd   r   rj   rr   r  r   has_featureBackendFeatureFOREACHrealizeget_operation_nameregister_operation_listr  )r>  r9  realize_outputsa_list_inputinputbroadcast_inputsgroupsoutputsrp   ro   groupoperation_list
output_indrn   r.  r^  pw_fnr_   r`   ra    sZ   
z%make_foreach_pointwise.<locals>.inner)r9  rc  r_   )rv  r^  ra  r_   ru  r`   make_foreach_pointwise  s   6rw  'Union[TensorBox, ShapeAsConstantBuffer]torch.dtypecopyc                   s>   |    kr|rt| S | S  fdd}t| d| S )Nc                   s   t j|  dS )N)	src_dtype)rN   r   r   r   r{  r_   r`   	_to_dtype     zto_dtype.<locals>._to_dtyper:  )r   clonerb  )r   r   rz  r}  r_   r|  r`   r     s
   r   r   c                 O  s   ddl m} |}| j}||tjd}t| |j|  W d   n1 s'w   Y  |j}|s3J t|}dgt	| }	|
 D ]0\\}
}}g }|D ]\}}||	|< tj|
tjri|ri|  ||  qL|rrtj| qBtdd |	D s~J |	S )aI  
    This lowers an invocation of foreach_map
    The way this works is that an arbitrary N-arg func is provided by the user, looped over by the
    polyfill with the same semantics as a foreach op (a loop applying an n-ary function to n args)
    and then traced into a subgraph by dynamo.
    This code allows us to inline the subgraph into the main graph lowering using the PontwiseSubgraphLowering.
    The graph outputs represent the vertically fused sequence of ops, and then register_operation_list
    below registers the buffers as horizontally fuseable in the scheduler.
    r*   )PointwiseSubgraphLowering)root_graph_loweringNc                 s  rd  r   r_   r   r_   r_   r`   r     r   z_foreach_map.<locals>.<genexpr>)subgraph_loweringr  graph_modulerO   rW   set_graph_handlerrungraph_outputsrr   r   r   rf  rg  rh  ri  rj   rj  rk  r  )subgraphrn   r   r  r9  gmpw_subgraphsub_outputsrp  rq  rp   ro   rr  rs  rt  r.  r_   r_   r`   _foreach_map  s6   r  c                 C  sZ   |j s|  j r&|  rt| |d}tj||  |S ttj	j
dd| |S t| |ddS )Nr   Fadd_to_fallback_setTrz  )
is_complexr   r   
empty_liker-   InplaceCopyFallbackr   fallback_handlerprimsconvert_element_typedefaultr   )r   r   dstr_   r_   r`   _convert_element_type  s   r  r  c                C  sb   |   }||kr|rt| S | S dd }||}||}||kr)ttjj| |S tt| |S )Nc                 S  s   | j r	t| jS t| jS r   )is_floating_pointrx   finfobitsiinfor   r_   r_   r`   _get_primitive_bitwidth1  s   z1to_dtype_bitcast.<locals>._get_primitive_bitwidth)	r   r  r  atenviewr   r?   r2   r   )r   r   rz  x_dtyper  src_bitsdst_bitsr_   r_   r`   to_dtype_bitcast,  s   r  c                 C  s8   |j s|  j rttjtjjj	j
| |S t| |S r   )r  r   r?   r   r-   ComplexViewrx   rN   r  r  r   r  r   r   r_   r_   r`   _view_dtype@  s
   
r  rz  non_blockingc                C  s:   t |}|  |kr|rt| S | S ttj| ||S r   )rC   ri   r  r?   r   r-   r   )r   rp   rz  r  r_   r_   r`   	to_deviceI  s   r  c                 C  s   t | |d|dS )NTr  )r  )r   rp   r  r_   r_   r`   _device_putP     r  Tc	           
      C  sz   |p| j }t|}	t||| |durt|}t|	||||d}	t| |||d|	}	tt|r;ttt|d|d|	 |	S )z3A pointwise function that maps ops.{name} to inputsN)r:  rR  r^  r`  )r   r   r   )r   r   )__name__r9   rJ   rb  r#  r   r  r   )
r   r   r   r   r   r:  rR  r^  r`  rs   r_   r_   r`   register_pointwiseU  s>   

r  c                    sx   d} t d  fdd} fdd}t|t|tjdgfdd}ttj|}tt| r:tt	t| d	d
| |S )z2A pointwise function that maps ops.frexp to inputsfrexpc                        | i |d S Nr   r_   rn   r   r  r_   r`   frexp0     zregister_frexp.<locals>.frexp0c                    r  Nr*   r_   r  r  r_   r`   frexp1  r  zregister_frexp.<locals>.frexp1r  c                    s$    d | i | d | i |fS Nr   r*   r_   r  )pw_fnsr_   r`   rs     s   $zregister_frexp.<locals>.fnNr  )
r9   rb  rx   int32r#  r  r  r   r  r   )r   r  r  rs   r_   )r  r  r`   register_frexp  s*   
r  c                 C  s   t ||d}t| |}|S )Nr^  )rw  r  )r   pointwise_lowering_fnr^  rs   r_   r_   r`   register_foreach_pointwise  s   
r  )r   r   c                   s  dd }t |ttfrt||}t |ttfrt||}| ||g t d  d tjd}dd t D }t|t	 fdd|D  D ]\}}| |< qFt
t D ]}t  | tjrqt | t |d	    |< qUt||d
 d	 t d |t d |S )Nc                  W  
   t j|  S r   )rN   wherer   r_   r_   r`   rs        
zwhere.<locals>.fnr*   r(   r  c                 S  r   r_   r   r   r_   r_   r`   r     r   zwhere.<locals>.<listcomp>c                      g | ]} | qS r_   r_   r   r   r_   r`   r     r   r   r  )rf   r1  r   constant_liker   r   r6  re   r  r  r  r   r-   r   r3   r   rd   r   rb  r   )r   r   r-  rs   r   indicesrm   r   r_   r   r`   r    s&   
$
$
r  c                  G  s   t | dkrt| d ttfrt| d  S ttdd | D g }g }| D ]$}| }t |t |ks?t	dd t
||D rDt||}|| q%|S )Nr*   r   c                 S  rC  r_   )r   r   r_   r_   r`   r     r   z%broadcast_tensors.<locals>.<listcomp>c                 s  s0    | ]\}}t jj|t jj|kV  qd S r   )rO   rW   r   r)  r   r   r-  r_   r_   r`   r     s    
z$broadcast_tensors.<locals>.<genexpr>)r   rf   rd   r   r  r  reducer0  r   r  r  r+  rj   )r9  r[   rq  r   sizesr_   r_   r`   r    s   
r  c                 C  s   | S r   r_   r   r_   r_   r`   nop     r  
lift_freshc                 C  s   t | tsJ |d u rtt| jS t |ttjfr"tj	j
|ntdd |D }tt|  |}tt |ts=|fn|}g }t|  D ]\}}||v r[tj	j
t|ds`|| qH||  krlt| |S | S )Nc                 s  s    | ]
}t jj|V  qd S r   rO   rW   r   	guard_intr   dr_   r_   r`   r     r   zsqueeze.<locals>.<genexpr>r*   )rf   r?   r>   r   rh   r   r   r   rO   rW   r   r  r   r   r   r   r#   re   guard_or_falseEqrj   r  )r   r   dims	new_shaper  r   r_   r_   r`   squeeze  s   
r  c                 C  s   t t| |S r   )r  r  )r   r   r_   r_   r`   squeeze_copy     r  c                 C  2   t | |}t| tsJ t|tsJ |j| _| S r   )r  rf   r?   rh   r   r   valr_   r_   r`   squeeze_  
   
r  c                 C  2   t | rt| dtjdS td}t|tjd| S )NFr   isinfr  r   	full_likerx   r   r9   rb  r   rs   r_   r_   r`   r  
     r  c                 C  r  )NFr   isnanr  r  r  r_   r_   r`   r    r  r  c                 C  $   t | rt| S td}t|| S )Nceilr   r  r9   rb  r  r_   r_   r`   r       r  c                 C  r  )Nfloorr  r  r_   r_   r`   r  "  r  r  c                 C  r  )Nroundr  r  r_   r_   r`   r  *  s   r  c                 C  r  )Ntruncr  r  r_   r_   r`   r  3  r  r  c                 C  s   t | g\} t| tjrt| t|S t| tsJ t|ttfs$J t| 	 t|kr0| S t
| 	 sWtjjt| 	 }|dkrWt
|sW| tjjt||  tt| jt|S r  )r<  rf   r-   rW  r3   r   r   r?   rd   r   r    rO   rW   r   r   rL   
mark_reuserh   )r   r  x_size_productr_   r_   r`   r+  ;  s$   
r+  c                 C  sL   t |}|D ]}d||< q| }t|D ]\}}|dkr t||}qt||S NrB  )rd   re   	unsqueezer+  )r   shapebroadcast_dimensionsr   broadcast_dimensionr   idxr   r_   r_   r`   broadcast_in_dimV  s   


r  c                 C  s   t | | S r   )r+  r   )r   r/  r_   r_   r`   	expand_asd  r  r  c                   sb  t |   tt kr$tjjgtt      t| t  } tt|  ks0J t |  }d}ttD ]}| dkrHd}|| |  ||< q>|r`t|| 	 | 
 dS tdd t D rstt| |S  fdd}t st|stjjt }|dkr| tjjt||  |  tj| 
 | 	 |t |d	S )
NFr   Tr   rp   c                 s  s$    | ]\}}|d kp|d kV  qdS r*   Nr_   r  r_   r_   r`   r   {     " zrepeat.<locals>.<genexpr>c                   st   t | t ks
J t| } tt D ]!}| dkr5 | dkr)tjj| |< qt| | d | | |< q| S r  )r   rd   r  r   r'  Zeror'   )r4  rm   old_sizerepeatsx_loaderr_   r`   rT    s   zrepeat.<locals>.inner_fnrU  )rd   r   r   r   r'  r(  r  r  emptyr   ri   r  r  r  r+  r    rO   rW   r   r   rL   r  rE  r;   r   )r   r  new_sizezero_tensorrm   rT  old_size_productr_   r  r`   repeati  s>   r  r  Sequence[sympy.Expr]c                 C  s   t t| j|S r   )r?   rA   r   rh   )r   r  r_   r_   r`   r    s   r  c                 C  s6   t | tsJ t |ttfsJ tt| jt|S r   )rf   r?   rd   r   r:   r   rh   )r   r  r_   r_   r`   permute  s   r              c              	   C  s8   t | tsJ t| |d}ttjj| j|||||dS )Nr   clamp)rf   r?   _validate_dimr-   	SliceViewr   rh   )r   r   startendstepr  r_   r_   r`   slice_  s   r
  c              	   C  s   t | trt | jtjr| j } |   t| s"td|  dt	| \}}t
|j|jdd |D dd |D t|p@d}ttj||dS )Nzunrealized as_strided(z, ...)c                 S     g | ]}t |qS r_   r   r+  r   r_   r_   r`   r         zas_strided.<locals>.<listcomp>c                 S  r  r_   r  r   r_   r_   r`   r     r  r   rh   layout)rf   r?   rh   r-   r1   unwrap_viewri  is_storage_and_layoutr   as_storage_and_layoutFixedLayoutrp   r   r   r+  r   )r   r   stridestorage_offsetstorage
old_layout
new_layoutr_   r_   r`   
as_strided  s   

r  c                 C  s$   t | tsJ t| |||j| _| S r   )rf   r?   r  rh   )r   r   r  r  r_   r_   r`   as_strided_  s   r  c                 C  s   t | |||}t|S r   )r  r  )r   r   r  r  resultr_   r_   r`   as_strided_copy  s   r  c                   s   g d}D ]} |||    f d d }qdd D  fdd}td  }d d | < tjd  d  ||dS )Nr   rB  c                 S  rC  r_   rD  rA  r_   r_   r`   r     r   z!pointwise_cat.<locals>.<listcomp>c           
   	     s@  t |  tj}g }g }ttD ]n  dkr t dtjn
t   d tj}t   d tj}t ||}t ||} dkrI|}n td krT|}nt 	||}|
| t| t   d  < |
t | fddd q|d }	ttd ddD ] t |  |  |	}	q|	S )Nr   r*   c                     s     S r   r_   r_   )rm   idx_loadinputs_loadersr_   r`   <lambda>      z1pointwise_cat.<locals>.inner_fn.<locals>.<lambda>        rB  r(   )rN   
index_exprrx   int64r  r   constantgeltand_rj   rd   r&   maskedr  )
r  idx_dimmasksmasked_loadsr  r  
start_condend_condmasknext_valr   r9  r  inputs_ranges)rm   r  r`   rT    sD   
zpointwise_cat.<locals>.inner_fnrU  )rj   r   rd   r;   r   ri   r   )r9  r   prev_endr   rT  r  r_   r0  r`   pointwise_cat  s   0

r3  rn  scaleszero_pointsaxis	quant_min	quant_maxc              	     s   t  dksJ dt  dksJ d|  tjkr%t| tj} |  tjks5J d|    t |  k sHJ dt |   |     f	dd}tj	| 
 ||  dS )	Nr*   expect scales 1 dimexpect zero_points 1 dim<Expecting input to have dtype torch.float32, but got dtype: Expecting axis to be < c           
        s   |   f}| }|}|}t tjd\}}jtjkr(t|tj}jtjkr5t|tj}t|}t|| | }t	|t
||}	t|	S Nr   )_create_constantsrx   float32r   rN   r   r  
reciprocalr  maximumminimum)
r  channel_idxrn  scale
zero_pointqminqmax	inv_scaler  clamped	r6  r   input_loaderr8  r7  r4  scales_loaderr5  zero_points_loaderr_   r`   rT  2  s   

z;quantized_decomposed_quantize_per_channel.<locals>.inner_fnrU  )r   r   r   rx   rX  r   r?  rE  r;   r   ri   )rn  r4  r5  r6  r7  r8  r   rT  r_   rJ  r`   )quantized_decomposed_quantize_per_channel  s(   
rN  c                   sP       t tj  fdd}tj    |t 	 d}|   |S )Nc                   sB   t j  t  | W  d    S 1 sw   Y  d S r   )r-   ComputedBufferforce_realizerN   device_assert_asyncrE  rI  r   r_   r`   rT  O  s   $z_assert_async.<locals>.inner_fnrU  )
ri  r   rx   r   r;   r   ri   r   rd   r   )r   r   rT  assertion_opr_   r   r`   _assert_asyncK  s   
rS  c                 C  
   t | |S r   rS  r   r_   r_   r`   lower_assert_async]     
rV  c                 C  rT  r   rU  r   r_   r_   r`   lower_assert_functional_asyncb  rW  rX  	out_dtyperZ  Optional[torch.dtype]c          	        s   t  dksJ dt  dksJ d|  |ks*J d| d|    t |  k s=J dt |   d u rDtj|     fdd}tj|  ||  d	S )
Nr*   r9  r:  Expecting input to have dtype , but got dtype: r<  c                   s   |   f}| }|}|}j tjkrt|tj}j tjkr+t|tj}tt|tj|| }t|}|S r   )r   rx   r?  rN   r   sub)r  rC  rn  rD  rE  r  r6  rK  rZ  r4  rL  r5  rM  r_   r`   rT    s   
z=quantized_decomposed_dequantize_per_channel.<locals>.inner_fnrU  	r   r   r   rx   r?  rE  r;   r   ri   )	rn  r4  r5  r6  r7  r8  r   rZ  rT  r_   r_  r`   +quantized_decomposed_dequantize_per_channelg  s(   ra  rD  r1  rE  c                   s   |   tjkrt| tj} |   tjksJ d|    |   fdd}tj|   t	j
|t|t|d|  dS )Nr;  c           	        sf   | }t d| |tjd\}}t|| | }t tjd\}}tt|||}t| S )N      ?r   )r>  rx   r?  rN   r  rB  rA  r   )	r  rD  rE  rn  rH  r  rF  rG  rI  r   rK  r8  r7  r_   r`   rT    s   
zBquantized_decomposed_quantize_per_tensor_default.<locals>.inner_fnrD  rE  rU  )r   rx   rX  r   r?  rE  r;   r   ri   r  r"  r1  r   r   rn  rD  rE  r7  r8  r   rT  r_   rc  r`   0quantized_decomposed_quantize_per_tensor_default  s   
rf  c                  sv   |   |ksJ d| d|    d u rtj|    fdd}tj|  tj|t	|t
|d|  dS )Nr\  r]  c                   sF    | }t ||tjd\}}tt|tj|| }t|}|S r=  )r>  rx   r?  rN   r^  r   )r  rD  rE  rn  r  rK  rZ  r_   r`   rT    s
   zDquantized_decomposed_dequantize_per_tensor_default.<locals>.inner_fnrd  rU  )r   rx   r?  rE  r;   r   ri   r  r"  r1  r   r   rn  rD  rE  r7  r8  r   rZ  rT  r_   rg  r`   2quantized_decomposed_dequantize_per_tensor_default  s   ri  c                   s   |   tjkrt| tj} |   tjksJ d|    t dks9t dkr5 d dks9J dt dksUt dkrQ d dksUJ d|     fdd}tj	| 
  ||  dS )	Nr;  r   r*   expect scale as scalar tensor"expect zero_point as scalar tensorc                   s   | }t  dkrdnd}t  dkrdnd}jtjkr-t|tj}jtjkr:t|tj}t|t| | }t	tjd\}}t
t|||}t| S )Nr*   r   r_   r   )r   r   r   rx   r?  rN   r   r  r@  r>  rB  rA  )r  rn  _scale_zero_pointr  rF  rG  rI  r   rK  r8  r7  rD  scale_loaderrE  zero_point_loaderr_   r`   rT    s   zAquantized_decomposed_quantize_per_tensor_tensor.<locals>.inner_fnrU  )r   rx   rX  r   r?  r   r   rE  r;   r   ri   re  r_   ro  r`   /quantized_decomposed_quantize_per_tensor_tensor  s.   ""rr  c                  s   t  dkst  dkr d dksJ dt  dks8t  dkr4 d dks8J d|  |ksJJ d| d|   d u rQtj|      fdd}tj|  ||  d	S )
Nr   r*   rj  rk  r\  r]  c                   s    | }t  dkrdnd}t  dkrdnd}jtjkr-t|tj}jtjkr:t|tj}tt|tj|| }t|}|S )Nr*   rl  r_   )r   r   r   rx   r?  rN   r   r^  )r  rn  rm  rn  r  rK  rZ  rD  rp  rE  rq  r_   r`   rT  8  s   zCquantized_decomposed_dequantize_per_tensor_tensor.<locals>.inner_fnrU  r`  rh  r_   rs  r`   1quantized_decomposed_dequantize_per_tensor_tensor  s.   ""rt  c           
        s  | d   jdk}|r:tdd | D r:| D ]}|  qtdd | D r1ttjg| R  \} }ttjj| |S t	| dkrFt
| d S t| d |d}t| dtjifdd	| D } d%dddd fddtfdd| D }d&fddtjrt| |S |rttj| |S fddd}d d'ddt	| |kst	| tjkrt fd d| D rtfd!dtjjD }tfd"d| D o|}tfd#d| D otfd$d| D  }	|s|	r|st| |S ttj| |S )(Nr   r   c                 s  s$    | ]}|  tjtjfv V  qd S r   )r   rx   int8uint8r   rn  r_   r_   r`   r   O  s    
zcat.<locals>.<genexpr>c                 s  s     | ]}t | d kV  qdS )r   N)r   r   rw  r_   r_   r`   r   V      r*   r   c                   s   g | ]}t | qS r_   r   rA  r   r_   r`   r   a  r  zcat.<locals>.<listcomp>r   Union[TensorBox, ir.StorageBox]ru   	ir.IRNodec                 S  s>   t | trt | jtjr| j S | jS t | tjr| jS | S r   )rf   r?   rh   r-   r1   r  r   r   r_   r_   r`   unwrap_tensorc  s   

zcat.<locals>.unwrap_tensorc                 S  s   t | tjot | jtjS r   )rf   r-   rO  rh   r<   rq   r_   r_   r`   is_reductiono     zcat.<locals>.is_reductionc                   sJ   t | ttjfr | S | p$t | tjo$t fdd|  D S )Nc                 3  s     | ]} t j|V  qd S r   )rO   rW   
get_buffer)r   readcan_fuse_reductionr_   r`   r   x  s
    
z2cat.<locals>.can_fuse_reduction.<locals>.<genexpr>)rf   r?   r-   r   r;   r  get_read_namesr}  )r  r~  r|  r_   r`   r  r  s   zcat.<locals>.can_fuse_reductionc                 3      | ]} |V  qd S r   r_   r   rq   r  r_   r`   r     r   r   c                   sZ   t | rt j| dd\}}t j| S t| tt jfr# | S t| t jr+dS dS )NF)freezeT)	r-   r  r  ConcatKernelcan_realize_into_without_copyrf   r?   r   r;   )r   r  r   )should_lower_cat_inputr|  r_   r`   r    s   
z#cat.<locals>.should_lower_cat_inputc                   s\   t | ttjfr | S t | tjsdS |  j}|  D ]}| tj	
|7 }q|S r  )rf   r?   r-   r   r;   inner_fn_opcountnum_opsr  rO   rW   r  )r   countr  )op_countr|  r_   r`   r    s   
zcat.<locals>.op_countr   r(   rZ   torch._ops.OpOverloadc                 S  s   | t jjt jjfv S r   )r  catr  constant_pad_ndrZ   r_   r_   r`   additional_pointwise_ops     z%cat.<locals>.additional_pointwise_opsc                 3  s    | ]	}| kV  qd S r   r_   r  )MAX_SIMPLE_OP_COUNTr  r_   r`   r         c                 3  s    | ]}t | V  qd S r   )rF   )r   use)r  r_   r`   r     s
    
c                 3  r  r   r_   rA  r  r_   r`   r     r   c                 3  r  r   r_   rA  r  r_   r`   r         
c                 3  r  r   r_   r  r  r_   r`   r     r   )r   rz  ru   r{  )ru   r   rZ   r  )ri   r   r  ri  require_channels_lastr  r  r  r  r   r  r  r   r   r6  r  r+   force_pointwise_catr3  r?   r-   r  r   max_pointwise_cat_inputsrO   rX   rY   )
r9  r   
cpu_devicern  r   fusable_reductionMAX_COMPLEX_POINTWISE_CATpointwise_usesfuse_pointwise_usehorizontal_fuse_catr_   )r  r  r  r   r~  r  r  r|  r`   r  L  s`   




r  offsetdim1dim2c                   s  |   ttdtdtkfdd tjjt	|d}|rBtjj
tjj |  d}ntjj
tjj  | d}d |r`| df nd|f fddtD }||  fdd	}ttj| ||S )
N)r  rankc                     s   d  d S )Nz(diagonal dimensions cannot be identical z, r_   r_   r  r  r_   r`   r        zdiagonal.<locals>.<lambda>r   )r   r   c                   s    g | ]\}}| fvr|qS r_   r_   )r   rm   r   r  r_   r`   r          zdiagonal.<locals>.<listcomp>c                   s   | d }dgt  }d}tD ]&}|kr | d  ||< q|kr-| d  ||< q| | ||< |d7 }q|t d ksBJ |S )NrB  r   r*   r(   )r   r  )r  diag_idxoriginal_idxcur_dimr  base_idxr  r  num_dimsoriginal_shaper_   r`   	reindexer  s   
zdiagonal.<locals>.reindexer)r   r   r   r   rO   rW   r   evaluate_exprr   Ltevaluate_maxevaluate_minre   rj   r?   r-   GenericViewr   )rn  r  r  r  offset_negative	diag_sizer  r  r_   r  r`   diagonal  s:   
r  c                 C  s   t t| |||S r   )r  r  )rn  r  r  r  r_   r_   r`   diagonal_copy      r  c                 C  $   t | }t||||}t|| |S r   )r  r  	mutate_to)rn  srcr  r  r  r.  r[   r_   r_   r`   diagonal_scatter     
r  c                 C  s  t |}t |  | }d }tjjt |dr || }ntjjt |dr-|}|d urct	|rU|  }| 
 }|  j|| |  }||= ||= t| |||S t| |||d }t||S ttjjjtjjjd }	|	d usvJ t|	dksJ |	tt|	 \}
}|  }| 
 }|
}t|
||  j|| |  | }tj||_tj| ||= ||= t| |||S )Nr   r*   unbacked_bindings)r   r+  r   rO   rW   r   r  r  Ger!   
get_stride
get_layoutr  r  r
  r  r"   	shape_envrX   rZ  r   r7  iterr   r-   DynamicSelectStorageOffsetregister_bufferr   register_operation)r   r   r  r   actual_indexr  
new_stridenew_storage_offsetslice_resultr  unbacked_offset_symr   bufferr_   r_   r`   select  sL   



r  c           
   
   C  s   t | |d}|}t|ttfs2|  | }tjjt	|| d |}|g| }||d |  |d< g }d}|D ]}|| }	|
t| |||	dd |	}q8|S )Nr   r*   rB  Fr  )r  rf   rd   r   r   rO   rW   r   r  r%   rj   r
  )
r   r  r   sizes_x_sizechunksr  r  r   r  r_   r_   r`   splitG  s   
r  c                 C  s   t | ||S r   )r  )r   r  r   r_   r_   r`   split_with_sizesb     r  c                   s>   t  d tjj   } fddt|D }|S )Nr   c                   s   g | ]}t  |qS r_   )r  r   r   r   r_   r`   r   k      zunbind.<locals>.<listcomp>)r  rO   rW   r   r  r   r  )r   r   r  r  r_   r  r`   unbindg  s   r  c                   s   |   }t|}t|| |dkrtt| d|dS |  }tjj}||| |	d t
|| d }||dkrK| |t|| | g |d   || d d  |}	 fdd}
ttj| |	|
S )Nr   )r  r*   c                   s:   | d |     }g | d   ||  d d R S )NrB  r*   r_   )r  dim_idxr   r	  r_   r`   r    s   &zunfold.<locals>.reindexer)r   r   r   r
  r  rO   rW   r   	check_leqcheck_ltr%   r   r  r$   r?   r-   r  r   )r   	dimensionr   r	  r  ndimdim_sizer   new_dim_sizeout_sizer  r_   r  r`   unfoldo  s"   
(r  c                 C  s2   t | |d}t|  }||tjj t| |S r  )r  rd   r   insertr   r'  r(  r  )r   r   r  r_   r_   r`   r    s   
r  c                 C  r  r   )r  rf   r?   rh   r  r_   r_   r`   
unsqueeze_  r  r  c                 C  sZ   t jjjt|}t|  }|dk r||| 7 }d|  kr(|| k s+J  J |S r  )	rO   rW   r   r  r  r   sympifyr   r   )r   r   r  r  r_   r_   r`   r    s    r  rB  c                 C  sT   t | |d}tjj|  | d }t| |d|}t| |||d }t|t|S )Nr   r(   )	r  rO   rW   r   r  r   r
  rV  sigmoid)r   r   new_lenr   r-  r_   r_   r`   glu  s
   r  c                   s$   |rt    fdd}d|_|S )Nc                    s*   dd }t |tjj g| R i |S )Nc                 S  s   t | tjrt| S | S r   )rf   r-   r5   r?   r   r   r_   r_   r`   wrap_tensors  r  z7fallback_handler.<locals>.handler.<locals>.wrap_tensors)pytreetree_mapr-   FallbackKernelr   )rn   r   r  kernelr_   r`   handler  s   z!fallback_handler.<locals>.handlerT)r  r   _is_fallback_handler)r  r  r  r_   r  r`   r    s
   
	r  c                   C     t d d S )NzjTorchinductor does not support code generation for complex operators. Performance may be worse than eager.)warningswarnr_   r_   r_   r`   _warn_complex_not_supported  s   r  rq   torch.Tensorc                 C  s   |   r	t  dS | jrdS | jtjkr?|sdS t|jtjj	r1|jt
jjt
jjt
jjt
jjfv p=t|jtjj	o=t|j S dS )z0Do not support reading or writing to this tensorTF)r  r  is_metar   rx   float8_e8m0fnurf   r[   ry   rz   r  r  r  r  r  
_scaled_mmrG   )rq   r]   r_   r_   r`   unsupported_input_tensor  s(   r  c                 C  sL   t jjtjjjjf}|dur|j|v r| 	 rdS t
| |r dS | jo%tjS )z2Do not support writing tensor but can read from itNFT)r  r  r   rx   rN   r  r  r  r[   r  r  is_cpur+   disable_cpp_codegen)rq   r]   supported_complex_viewsr_   r_   r`   unsupported_output_tensor  s   

r  r]   torch.fx.Nodec                   sv    j tjju r	dS  jdkrdS  j tjju rdS  fdd}tj ji  j	D ]}||ddr4 dS q)| ddS )NFplaceholderc                   sp   t | tjjs	dS d| jvrdS t| jd D ]}t |tjjs"q|r-t	| r, dS qt
| r5 dS qdS )NFr  T)rf   rx   fxNoderZ  r  tree_leaves_subclasses
FakeTensorr  r  )inp_out_node	is_outputrZ  r]   r_   r`   check_skip_condition  s   


zCfallback_node_due_to_unsupported_type.<locals>.check_skip_condition)r  T)
r[   r  view_as_complexr  rZ   lift_fresh_copyr  arg_tree_leavesrn   r   )r]   allow_cpu_inputsr
  r   r_   r	  r`   %fallback_node_due_to_unsupported_type  s   
r  c                   s   | t vs|sJ d|  |r>ttdr>t| gr>tjr%| tjj	j
v s>|s>tjjjr6dtjj_td td|  d fdd}t| tjjr]|  D ]}t| |}|| qOd S t| tjjtjjfrn||  d S td	|  d
t|  )Nz*both a fallback and a decomp for same op: CIFznA make_fallback error occurred in suppress_errors config, and suppress_errors is being disabled to surface it.zmake_fallback(a.  ): a decomposition exists, we should switch to it. To fix this error, either add a decomposition to core_aten_decompositions (preferred) or inductor_decompositions, and delete the corresponding `make_fallback` line. Get help from the inductor team if unsure, don't pick arbitrarily to unblock yourself.c                   s.   t |   d urt|   t| d dt| S Nr  )r   r   r#  r  )op_overloadlayout_constraintr_   r`   register_fallback?  s   

z(make_fallback.<locals>.register_fallbackzUnsupported fallback z with type )r/   r   osgetenvr0   r+   fallback_randomrx   _decompdecompositions_for_rngextra_random_decomps_dynamosuppress_errorslogwarningr   rf   ry   r   r   r   rz   HigherOrderOperatorRuntimeErrorr   )rZ   r  r  override_decompr  olr  r_   r  r`   make_fallback  s>   




r$  c                 C  s$   d}| D ]}|| }qt |tjdS )z
    TorchInductor offset calculation differs from PyTorch eager offset
    calculation for random ops (tl.rand vs torch.rand). In future, we should
    strive for same impl for tl.rand and torch.rand.
    r*   r   tensorrx   r#  )r  numelr   r_   r_   r`   philox_rand_offsetQ  s   
r(  c           	        sd   t | | t j|  | |  fdd}tj| |t| d}t	| }||fS )Nc                   sV   t g tj}t g tj}t t | tj|}t ||}t | S r   )rN   r   rx   r  r   r"  rand)r4  seed_index_exproffset_index_exprrand_index_exprr  r   offset_loader
random_posseed_loaderr_   r`   rT  j  s   zphilox_rand.<locals>.inner_fnrU  )
r-   r  FlexibleLayoutcontiguous_stridesmake_indexerrE  r;   r   rd   r(  )	r   seedr  r  rp   r   rT  random_values_nodeoffset_noder_   r-  r`   philox_rand]  s&   
r7  c              	   C  s.   t jrttjtjtj	j
| ||S td)Nz&should be handled in replace_random.py)r+   r  r  r  r?   r   r-   r  r  native_dropoutr  r   )r   ptrainr_   r_   r`   r8    s   r8  c                 G  sj   t js|  tdksJ d|   t|dks!t|d tr%t	j
jnt	j
j}tj|| g|R   | S )Nr   Tthis should be handled in decomps unless config.fallback_random or the device is CPUr   )r+   r  ri   rx   rp   ri  r   rf   r1  r  
bernoulli_Tensorr-   InplaceBernoulliFallback)r   rn   r  r_   r_   r`   r<    s   r<  c                 G  s4   t js|  tdksJ dtt| g|R  S )Nr   r;  )r+   r  ri   rx   rp   r<  r  )r   rn   r_   r_   r`   bernoulli_p  s   r?  c                 C  s   t r   r   r   r_   r_   r`   _foobar  r  rB  c                 C  r  )Nz1using triton random, expect difference from eager)r  info)saltr_   r_   r`   _warn_triton_random  r  rE  c                   C  s   t tjj d S r   )rE  rO   rW   creation_timer_   r_   r_   r`   warn_triton_random  r  rG  c                  O  F   | dd d urt| i |S tjr|dd  t| i |S tdN	generatorz-should have been handled in replace_random.py)r[  fallback_rand_generatorr+   r  popfallback_rand_defaultr   r  r_   r_   r`   r)       r)  c                  O  rH  rI  )r[  fallback_randn_generatorr+   r  rL  fallback_randn_defaultr   r  r_   r_   r`   randn  rN  rQ  c                 C  s   t |}t j| |S r   )r-   get_stride_orderExternKernelrequire_stride_order)input_tensorr  stride_orderr_   r_   r`   inductor_force_stride_order  s   
rW  c                 C     t d)Nz.should be handled in fuse_seed_creation_pass()r@  )rp   r_   r_   r`   inductor_seed     rY  c                 C  s   t   tt| t|S r   )rG  r?   r   r-   RandomSeedsrC   )r  rp   r_   r_   r`   inductor_seeds  s   r\  c                   s(    fdd}t j  |g dS )Nc                   s   t   S r   )rN   	load_seedget_namerA  r4  seedsr_   r`   rT    r~  z&inductor_lookup_seed.<locals>.inner_fnrU  )r;   r   ri   r   )r`  r4  rT  r_   r_  r`   inductor_lookup_seed  s   ra  r  r   	list[int]r4  modestrc                  s   t jrJ  dv sJ g | } tj}| }tj||| tj| |d	 |
  fdd}tj|||g | d}|  |S )N)r)  rQ  rb  c                   s"   t t g t| tjS r   )r   rN   r"  rx   r  rI  rd  r/  r0  r_   r`   rT    s   z!inductor_random.<locals>.inner_fnrU  )r+   r  rx   r?  r8  r-   r  r1  r2  r3  rE  r;   r   ri  )r   r4  rd  r  r   rp   rT  r  r_   rf  r`   inductor_random  s(   
rg  lowhighc                  sp   t jrJ g |}tj}| }tj|||tj||d	 |
  fdd}tj|||g |dS )Nrb  c              	     s6   t g t | tjt tjt  tjS r   )rN   	randint64r"  rx   r  r#  rI  ri  rh  r/  r0  r_   r`   rT  	  s   z"inductor_randint.<locals>.inner_fnrU  )r+   r  rx   r#  r8  r-   r  r1  r2  r3  rE  r;   r   )rh  ri  r   r4  r  r   rp   rT  r_   rk  r`   inductor_randint	  s"   
rl  tb.tuple[str, sympy.Expr, sympy.Expr, sympy.Expr]c                 C  s4   |   |  d |  d |  d  |  d fS NrB  r   )r^  r   r  rm  r_   r_   r`   _boundaries_helper(	  s
   

rq  tuple[str, sympy.Expr]c                 C  s   |   |  d fS r  )r^  r  rp  r_   r_   r`   _sorter_helper1	  r  rs  	out_int32rightsidesortersorted_sequenceselfru  rv  rw  Optional[str]rx  Optional[TensorBox]c          
        s   dd }|r||rd ur$|s$t tjjdd|||dS |d ur.|dkr.d|r3tjntj |   d urF  t	
 dkrY fd	d
}n
 fdd
}| }tj| ||jd}	|	  |	S )Nc                 S  s   t j| tjS r   )rO   rW   rf  rg  	BUCKETIZErp  r_   r_   r`   r  ?	  s    zsearchsorted.<locals>.<lambda>Fr  rt  rv  Tr*   c              	     sD   | }t j|td d u rd ntd u rd dS ddS )Nr   rx  sorter_indicesrN   	bucketizerq  rs  )r  r  index_dtyperv  ry  rx  values_loaderr_   r`   rT  d	  s   
zsearchsorted.<locals>.inner_fnc              	     s\    }d fdd}t j|t|d u rd ntd u r(d dS |dS )Nrm  r?   c                   s>   |   }tttjdd t|d d  d d D S )Nc                 s  s    | ]	\}}|| V  qd S r   r_   )r   r   rm   r_   r_   r`   r   {	  r  zNsearchsorted.<locals>.inner_fn.<locals>.get_flattened_index.<locals>.<genexpr>rB  )r  rN   r"  r  r  operatorr   r  )rm  strides)r  r  r_   r`   get_flattened_indexw	  s   &z;searchsorted.<locals>.inner_fn.<locals>.get_flattened_indexr~  )rm  r?   r  )r  r  r  r  r  r`   rT  r	  s   	
rU  )r  r  searchsortedr=  rx   r  r#  rE  ri  r   r   ri   r;   r   r  )
ry  rz  ru  rv  rw  rx  validate_bucketizerT  rp   r  r_   r  r`   r  5	  sB   
r  ru  rv  
boundariesc                  s   t   dks
J tj| tjrtj tjs(ttj	j
dd|  |dS    |  }|  |r9tjntj fdd}tj|||  d}|  |S )Nr*   Fr  r  c                   s"   | }t |t d}|S r  )rN   r  rq  )r4  r  r  r  r  rK  rv  r_   r`   rT  	  s   zbucketize.<locals>.inner_fnrU  )r   r   rO   rW   rf  rg  r}  r  r  r  r=  ri  ri   rE  rx   r  r#  r;   r   )rn  r  ru  rv  rp   rT  r  r_   r  r`   r  	  s*   
r  c                 O  $   t tjtjj||f\}}||fS r   )r  tree_map_onlyr-   r5   rS  require_stride1r   rn   r   r_   r_   r`   require_dense	     r  c                 O  r  r   )r  r  r-   r5   rS  require_contiguousr  r_   r_   r`   r  	  r  r  c                 O  r  r   )r  r  r-   r5   rS  r   r  r_   r_   r`   r   	  s   r   c                 O  r  r   )r  r  r-   r5   rS  r  r  r_   r_   r`   r  	  r  r  c                   s|   t  tjrdd  D }tj |S t  tr' fdd  D S t  tt	fr<t
 dd t D S  S )Nc                 S  $   g | ]}t |tjr|jjn|qS r_   rf   rx   SymIntr]   exprr   r_   r_   r`   r   	      z,constrain_to_fake_tensor.<locals>.<listcomp>c                   s    i | ]}|t  | | qS r_   constrain_to_fake_tensorr   keyr   fake_argr_   r`   r   	  s    z,constrain_to_fake_tensor.<locals>.<dictcomp>c                 s      | ]
\}}t ||V  qd S r   r  )r   r   f_ar_   r_   r`   r   	      
z+constrain_to_fake_tensor.<locals>.<genexpr>)rf   r-   r5   r  rS  require_exact_stridesr  r  r   rd   r   r  )r   r  meta_stride_exprr_   r  r`   r  	  s   
r  c                   s6   t dd t| |D }  fdd| D }| |fS )Nc                 s  r  r   r  )r   r   r  r_   r_   r`   r   
  s
    
z,constrain_to_fake_tensors.<locals>.<genexpr>c                   s    i | ]\}}|t | | qS r_   r  r   fake_kwargsr_   r`   r   
  r  z-constrain_to_fake_tensors.<locals>.<dictcomp>)r   r  r   )rn   r   	fake_argsr  r_   r  r`   r   
  s
   r   c                   sJ    fdd t  fddt|jD } fdd| D }||fS )Nc                   s^   t  tjrtjd  tjjj	}tj
 |S t  tr- fdd  D S  S )Nr  c                   s    i | ]}| | | qS r_   r_   r  )apply_constraintr   fx_argr_   r`   r   
  r  zEconstrain_to_fx_strides.<locals>.apply_constraint.<locals>.<dictcomp>)rf   r-   r5   rR  rZ  r  rO   rW   r   r  rS  rT  r  r  )r   r  rV  r  )r   r  r`   r  
  s   
z1constrain_to_fx_strides.<locals>.apply_constraintc                 3  s    | ]
\}} ||V  qd S r   r_   )r   r   r  r  r_   r`   r   
  r  z*constrain_to_fx_strides.<locals>.<genexpr>c                   s"   i | ]\}}| |j | qS r_   r   r   r  fx_noder_   r`   r   
  s   " z+constrain_to_fx_strides.<locals>.<dictcomp>)r   r  rn   r   r  rn   r   r_   r  r`   r   
  s   

r   c                   sN   fdd t  fddtt|jD } fdd| D }||fS )Nc                   s  t |tjs|S |jd }dd | D }t| }|r1|d dkr1tttt	|
 }jtjjkrF| dv rFt	|dksDJ d}|jsPtj||S d	 jtjjjjko^| d
k}t |tsfJ t	|
 dvrp|S t| }|rttj||S t |tr| d ur|rttj||S |r2t|
 }g }	| }
tt	|
 d D ]}tjj|| ds|
d urtjj|
| dr|	| qdgt	| }d|d< d}tt	|d ddD ]A}||d  dkr|||d   }||	v rtjj||d    drd||< qtjj|  ds&t|   }|||< qtj ||S |r?ttj||S t |trY| d urY|rYttj||S  fdd}t |j!tj"r}||s}||# r}ttj||S tj||S )Nr  c                 S  r  r_   r  r   r_   r_   r`   r   &
  r  z=sdpa_constraint.<locals>.apply_constraint.<locals>.<listcomp>rB  r   )r   r   r   )r   r*   r(   r   r   r   r   r   r*   r(   c                   s   t jj|  d   dkS ro  )rO   rW   r   	size_hintr   r   	ALIGNMENTr_   r`   
is_aligned
  s   z=sdpa_constraint.<locals>.apply_constraint.<locals>.is_aligned)$rf   r-   r5   rZ  r  rR  rd   r&  r  r   r   r[   r  0_scaled_dot_product_efficient_attention_backwardr  is_cudarS  rT  rx   rN   '_scaled_dot_product_efficient_attentionr?   is_aligned_realized_tensor_hinttry_match_insignificant_stridesrealize_inputmaybe_get_striderO   rW   r   statically_known_equalsrj   rB   r  rh   r1   r  )r  r   r  meta_valr  rV  effn_attn_fwd_biasis_aligned_tensorr  expanded_dimsmaybe_striderm   out_stridesr  r  )r  r  r`   r  !
  s   





z)sdpa_constraint.<locals>.apply_constraintc                 3  s$    | ]\}\}} |||V  qd S r   r_   )r   r  r   r  r  r_   r`   r   
  s
    


z"sdpa_constraint.<locals>.<genexpr>c                   s$   i | ]\}}| d |j | qS rB  r   r   r  r_   r`   r   
  s   $ z#sdpa_constraint.<locals>.<dictcomp>)r   re   r  rn   r   r  r_   r  r`   sdpa_constraint
  s   |r  )r  )r"  c                 C  sn   |}|   |  krt||   }|  | kr t||  }|  | kr3t||  }t|S t|S r   )ri   r  r   r   r   r+  r  )rz  r  r  r   rk   r_   r_   r`   rz  z  s   )memory_formatc                C  s&   t j|  |  |  t|  dS NrU  )r;   r   ri   r   rE  rd   r   )r   r  r_   r_   r`   r    s   
r  c                 C  s   g }t | tr+t | jtjr+| j} t | tjr'||   | j} t | tjst| } t| } |rI| j} |d d d D ]	}tj| |d} q;t| } | S )NrB  r  )rf   r?   rh   r-   r   rj   r  r  )r   reinterpret_view_layoutsr  r_   r_   r`   clone_preserve_reinterpret_view  s   r  r  c                  s(    fdd}t jt| || gdS )Nc                   s   t j| d    dS )Nr   r   rN   r"  rI  r   r  r	  r_   r`   rs     r  ziota.<locals>.fnrU  )r;   r   rC   )lengthr  r	  r   rp   requires_gradrs   r_   r  r`   iota  s   
r  r   r4  c                   s   |   |  ks
J |  t|  d tjjtdr'| 	    tjj
d tjj| 	    tt| | 	 }|  fdd}tj|  |   |t| 	 dS )Nr   c              	     s6   t t t |   tjt tj| | S r   )rN   r  eqr"  rx   r  r  r   r4  
src_loaderr  r_   r`   rT    s   z select_scatter.<locals>.inner_fnrU  )r   rE  r  rO   rW   r   r  r   r  r   r  r  r+  r  r;   r   ri   rd   )r   r  r   r4  rT  r_   r  r`   select_scatter  s    

r  c                   s   t | } t d    tj \t }t	 d  | < t
||}|  fdd}tj  |t dS )Nr   r*   c              
     s2  dkrkrdkr| S t |  tj}t|  t|    < g }dkr?|t |t t	tj krT|t 
|t t	tj dkrs|t t t|   dtjt dtj |swJ tt j|}t | fddtrdnd}t ||| S )Nr   r*   c                         S r   r_   r_   )src_idxr  r_   r`   r        z1slice_scatter.<locals>.inner_fn.<locals>.<lambda>r!  )rN   r"  rx   r#  rd   r%   rj   r%  r   r+  r&  r  r'   r$  r  r  r'  r(  r   r  )r  r)  r.  src_valr   r  r  r  r  r	  r   r  )r  r`   rT    sR   zslice_scatter.<locals>.inner_fnrU  )r   r   rE  r  r   r-   r  normalize_start_endrd   r%   r+  r;   r   ri   )r   r  r   r  r  r	  src_sizerT  r_   r  r`   slice_scatter  s    
.
r  c                 C  s*   t | ttfrt| dkrt| d S | S r  )rf   rd   r   r   _unwrapr   r_   r_   r`   r  "  s   r  r   rp   r  
pin_memoryc                  s  t |d tjfv d|  t | d tt tr ptjnp%t g }t tj	r6 fdd}nBt t
tfrE fdd}n3t dksZt d t
tfrlt dkrl|tt   fdd}ntjtj |d	S tjt|||d
S )Nlayout=r  c                      t  S r   r  rI  rh   r   r_   r`   rT  5     ztensor.<locals>.inner_fnc                   r  r   rN   r$  rI  r  r_   r`   rT  :  r  r   r   c                   s8    fdd t dkrtdS  dt S )Nc              	     sr   | |k sJ ||  dkrt |  S ||  d |  }t t t d tjt |tj | | ||S )Nr*   r(   r   )rN   r$  r  r&  r"  rx   r#  )r  r  mid)binary_searchrh   r   r4  r_   r`   r  B  s   z/tensor.<locals>.inner_fn.<locals>.binary_searchr   )r   rN   r$  rI  r  )r  r4  r`   rT  A  s   r  rU  )r   rx   stridedrf   r  r   r#  get_default_dtyper   r   r1  r   rj   IntegerrO   rW   add_tensor_constantr&  r;   r   rC   )rh   r   rp   r  r  rS  rT  r_   r  r`   r&  (  s,   *r&  c                 C  s@   t | tr|d urt| |} |d urt| |} | S t| ||dS )Nr  )rf   r?   r   r  r&  )rh   r   rp   r_   r_   r`   	as_tensora  s   


r  c                 C  s   t | tjdS r=  r%  rh   r_   r_   r`   long_tensorl  r  r  c                 C  s   t tjjjtjjjd }|d usJ t|dksJ |tt	|
 \}}t||| }tj||_tj| tjjjd }t|tjtjtjfrQ|jjS t|S )Nr  r*   r  )r"   rO   rW   r   r  rX   rZ  r   r7  r  r   r-   DynamicScalarr  r   r  rf   rx   r  SymFloatSymBoolr]   r  r   r  )rh   r  binding_symkeypathr  r  r_   r_   r`   _local_scalar_denseq  s   
r  c                 C     d S r   r_   )rh   r   r_   r_   r`   _assert_scalar  s   r  )rp   r  c                C  r  r   r_   )r   r   r  r   rp   r  r_   r_   r`   _assert_tensor_metadata  s   r  c                   s   | t | ttfstdrjt ttfr  fdd}n"t tjr. fdd}nt dks8J 	 fdd}t
j| |t|dS )Nr   c                      t  S r   r  rI  r   r   r_   r`   rT    r  z_full.<locals>.inner_fnc                   r  r   r  rI  r  r_   r`   rT    r  r   c                   s    g S r   r_   rI  )value_loaderr_   r`   rT    s   rU  )rf   r   r1  r   r   r   r   r   r   rE  r;   r   rd   )
fill_valuerp   r   r   rT  r_   )r   r   r  r`   _full  s    r  c                 K  s   t t|| fi |S r   create_tensor_liketensor_constructor)r   r  r   r_   r_   r`   r    s   r  c                   s    d d d d dd d fdd
}|S )NF)namesr   rp   r  r  r  c                   s   t | d u d t |d tjfv d|  t | d t|}|p#t }t|dkr;t|d tttj	fr;t|d }|D ]
}t|tj
rGJ q=dd |D }t |||S )Nnamed tensorsr  r  r*   r   c                 S  r  r_   r  r   r_   r_   r`   r     r  z5tensor_constructor.<locals>.inner.<locals>.<listcomp>)r   rx   r  rC   r  r   rf   rd   r   Sizer  r  )r  r   rp   r  r  r  r   r   r  r_   r`   ra    s   	"z!tensor_constructor.<locals>.innerr_   )r  ra  r_   r  r`   r    s   r  )r  r   r  rp   r  r  c                 G  sX   t | d u d t|}t|dkr"t|d tttjfr"t|d }t|d ||||dS )Nr  r*   r   r   r  rp   r  )	r   rC   r   rf   rd   r   rx   r  empty_strided)r  r   r  rp   r  r  r   r_   r_   r`   r    s   
"r  c                   s   dddddd fdd
}|S )zZ
    Shim to convert X_like(...) into X(...).  For example zeros_like() into zeros().
    NF)r   rp   r  r  r  c                  sj   t | d t |d tjfv d|  |d u r|  }nt|}|p%|  }t|  } |||||dS )Nr  r  r  )r   rx   r  r   r   ri   rd   r   )r   r   rp   r  r  r  r   creation_fnr_   r`   _constant_like  s   

z*create_tensor_like.<locals>._constant_liker_   )r  r  r_   r   r`   r     s   
r  c                 C  s   t t| S r   r  r  r_   r_   r`   r    r  r  c                   s   d d d d d fdd
}|S )Nr  c                  st   t |ttfs	J t| d t|d tjfv d|  t|p#|  }|p)|  }dd |D }t	 t
|||S )Nr  r  c                 S  r  r_   )r   r  r   r_   r_   r`   r   )  r  z7new_constant.<locals>._new_constant.<locals>.<listcomp>)rf   rd   r   r   rx   r  r   r   ri   r  rC   r   r   r   r  rp   r  r  r_   r`   _new_constant!  s   z#new_constant.<locals>._new_constantr_   )r  r  r_   r  r`   new_constant   s   r  r  c                C  s8   |d u r|   }|d u r|  }t|d ||t||dS Nr  r   ri   r  rC   r  r_   r_   r`   	new_empty/  s   r  c                C  s  t | ttfs	J t |tttd fsJ t| d t|d tjfv d|  t|p/t }|p7t	dj
}t|}td||| d}|  |jj}tj|jdgt|  d|_t |tjsbJ dd | D } |rrd	d |D ntj| }tj||| |d
|_|S )Nr  r  r!  r   )r  rp   r   r   )rS  c                 S  r  r_   r  r   r_   r_   r`   r   P  r  z!empty_strided.<locals>.<listcomp>c                 S  r  r_   r  r   r_   r_   r`   r   R  r  )rp   r   r   r  )rf   rd   r   r   r   rx   r  r   r  r&  rp   rC   r  ri  rh   dataclassesreplacer   r-   rO  r1  r2  r  r  )r   r  r   r  rp   r  	pointwiser  r_   r_   r`   r  ?  s0   
r  c                C  s8   |d u r|   }|d u r|  }t||||t||dS r  r  )r   r   r  r   r  rp   r  r_   r_   r`   new_empty_strided_  s   r  c                 C  s2   dd |D }t tt||jd}tj| |S )Nc                 S  r   r_   r   r   r_   r_   r`   r   s  r   z copy_strided.<locals>.<listcomp>)r  )sortedr  r   __getitem__r-   rS  rT  )r   r  rV  r_   r_   r`   copy_stridedq  s   r  c                 K  s*   | dd usJ dt|| fi |S )Nr   z(dtype should be handled by decomposition)r[  r  )r   r  r   r_   r_   r`   fullx  s   r  c                   s   t | tsJ | dkrt| | S |  tdk}t|  | |r/t| dg} dg|  |  fdd}t	j
|  |  || dS )Nr   r*   c                   sF   t | } t|   }t| dkr|g} | S ||  < | S r  )rd   rN   indirect_indexingr   )r  
gather_idxr   index_loaderr   r  r_   r`   rs     s   zgather.<locals>.fnrU  )rf   r?   	get_numelr  r   r   r  r+  rE  r;   r   ri   r   )r   r   r4  sparse_gradr  rs   r_   r  r`   gather~  s$   	r  c                   s   |rt tjj| ||||S |rJ t| tsJ t|ts J dt| v s*J |  |  t	|
 | 
 g |
 dd   fdd}tj|  |  |dS )Nr   r*   c                   s\   t | t ksJ |  d  | d  }t|d gg | d   }|S )Nz != r   )r   rN   r  )r  	var_index
weight_idxindices_loaderindices_ndimr  weight_loaderweight_sizer_   r`   rs     s   "
zembedding.<locals>.fnrU  )r  r  	embeddingr  rf   r?   re  r   rE  r   r   r;   r   ri   )weightr  padding_idxscale_grad_by_freqsparsers   r_   r  r`   r    s(   

r  c                   s   t dd  D sJ ddd  D  tdd  D r"tddd t D }t|d	ks5J d
d gt  }t|t fdd|D  D ]\}}| |krXtd|||< qJ||fS )Nc                 s  s4    | ]}|d ur|  tjtjtjtjfv V  qd S r   )r   rx   r#  r  r   rv  r   r_   r_   r`   r     s    z.check_and_broadcast_indices.<locals>.<genexpr>z)indices must be int64, byte or bool. Got c                 S  s   g | ]
}|d ur|  qS r   r   r   r_   r_   r`   r         z/check_and_broadcast_indices.<locals>.<listcomp>c                 s  s,    | ]}|d ur|  tjtjfv V  qd S r   )r   rx   r   rv  r   r_   r_   r`   r     s    "zFallback for bool indicesc                 S  r   r_   r   r   r_   r_   r`   r     r   r   z"requires at least 1 non-None indexc                   r  r_   r_   r   r  r_   r`   r     r   z.Fallback when indices is on a different device)r  r  r   re   r   r  r  ri   )r  rp   
valid_idxsnew_indicesrm   r   r_   r&  r`   check_and_broadcast_indices  s"   
$
r)  c	              
     s   dt dd  D ]\}	}
|
|	 dkrdq
fddtD g 
tt d  d }r> nd |  |d    	f
dd}|fS )	NFr*   Tc                   s    g | ]\}}|d u r | qS r   r_   )r   rm   r  r  r_   r`   r     r  z2index_output_size_and_inner_fn.<locals>.<listcomp>r   c           	   	     s  t | t ks
J t t ksJ t }g }d }r"dn|}d}td d D ]F}||kr8||7 }| d u rR|t | k sFJ || |  |d7 }q.| }|d us\J | }|tj|| |||  | d q.g || |d  }	d u r|S 	|S )Nr   rB  r*   r   wrap_neg)r   r  rj   rN   r  )	r  r  	new_indexfirst_tensor_indexstart_offsetnext_idxrm   loaderr   )
r   indexed_sizer  indices_loadersnon_consecutive_tensorsoutput_sizetensor_indicestensor_sizer,  r  r_   r`   rs     s>   

z*index_output_size_and_inner_fn.<locals>.fn)r  re   r   )r  r  r6  r7  r3  r2  r  r   r,  previouscurrentr.  rs   r_   )r   r2  r  r3  r4  r5  r6  r7  r,  r  r  r`   index_output_size_and_inner_fn  s$    


"r:  c                 C  s,   t | ||\}}}tj|  |  ||dS r  )index_impl_helperr;   r   ri   r   )r   r  r   r5  rT  r   r_   r_   r`   
index_impl(  s   r<  c           
        s   t ttfs	J |  t|  \}t|dks J ddd D }t|d   }|  fddttD }|rQd|v rQd|vrQt	dfddttD }t
||||d ||d	\}  fd	d
}	||	 fS )Nr   z Must have at least one valid idxc                 S      g | ]}|d ur|  nd qS r   rD  r   r_   r_   r`   r   9  r  z%index_impl_helper.<locals>.<listcomp>c                   s    g | ]} | d ur| qS r   r_   r   )r  r  r_   r`   r   A  r  z0index is out of bounds for dimension with size 0c                   r  r_   r_   r   r*  r_   r`   r   E  r   r+  c                   s    | S r   r_   r  )index_inner_fnr  r_   r`   rT  R  r  z#index_impl_helper.<locals>.inner_fn)rf   rd   r   rE  r)  ri   r   r   r  
IndexErrorr:  )
r   r  r   r,  r6  r3  r7  r2  r5  rT  r_   )r>  r  r  r  r`   r;  3  s0   

r;  c                 C  sB   zt | |ddW S  ty    |   ttjjdd| | Y S w )NTr   Fr  )r<  r   ri  r  r  r4  r=  r   r  r_   r_   r`   r4  X  s   c                 C  s   t | |ddS )NFr@  )r<  rA  r_   r_   r`   _unsafe_indexd  r  rB  c                 C  s   t t| |||dddS )NTFr   may_realizeindex_put_impl_r  r   r  r  
accumulater_   r_   r`   	index_putq     rI  c                 C  s   t t| |||dddS )NFrC  rE  rG  r_   r_   r`   _unsafe_index_putx  rJ  rK  c                 C  sB   |  |   krt||   }|rt| |}t| t|d || S r  )ri   r  r   r  r  )rz  r  r   rH  r_   r_   r`   index_put_as_masked_fill  s
   
rL  c                 C  s4   t tjjjtjjsJ t	tjjj| ||| | S r   )
rf   rO   rW   rX   r[   rx   ry   rz   r-   IndexPutFallbackrz  r  r  rH  r_   r_   r`   index_put_fallback  s   rO  c                 C  s   t | |||dddS )NTrC  rF  rN  r_   r_   r`   
index_put_     rQ  c                 C  s   t | |||dddS )NFTrC  rP  rN  r_   r_   r`   _unsafe_index_put_  rR  rS  c              
     s  |r!dd }dd  || |  v r!t fdd|D s!|  | dkrZt|dkrZ|d  tjtjfv rZ|d }t	t|
 t| 
 D ]}t|d	}qJt| |g||S t ret| |||S |D ]}	|	d ur|	 tjtjfv rt| |||  S qg| 
 t}
|rt|  r|
dkrt| dg} t| |||} |
dkrt| g } | S t||  }zt||  \}}W n ty   t| ||| Y S w d
d |D }t| tsJ |   |
dkrt| dg} t||d  
 }fddt	t|D }t|||||d |d\}}t||}|  }|d us!J tj||  | |||r1dnd d}tjd t| |d}tj !||_"tj #| |
dkrWt| g } | S )Nc                 S  sP   t | tjr	| j} t | tjr|  } t | tjr| j} t | tjr&|  S d S r   )	rf   r-   r?   rh   r1   r  r   Bufferr^  r   r_   r_   r`   try_get_name  s   z%index_put_impl_.<locals>.try_get_namec                 S  sd   t | tr0t | jtjr0| j } t | tjo/t | jtjo/t| jdd o/| jj	j
tjjjjkS dS )Nr  F)rf   r?   rh   r-   r1   r  r   rS  r   r  r[   rx   rN   r  randpermr  )indicer_   r_   r`   indice_slice_from_randperm  s   
z3index_put_impl_.<locals>.indice_slice_from_randpermc                 3  r  r   r_   )r   rW  )rX  r_   r`   r     r  z"index_put_impl_.<locals>.<genexpr>r*   r   rB  c                 S  r=  r   rD  r   r_   r_   r`   r     r  z#index_put_impl_.<locals>.<listcomp>c                   r  r_   r_   r   r*  r_   r`   r     r   r@  
atomic_addrp   r   rT  rS  output_indexerscatter_moder   r  rh   )$r  r  ri  r  r   r   rx   r   rv  r  r   r  rL  $are_deterministic_algorithms_enabledrO  rH   r  r   r)  ri   r   rf   r?   rd   r:  r+  r-   ScatterrE  rO  MutationLayoutSHOULDREMOVErO   rW   r  r   r  )rz  r  r  rH  r   rD  rU  r.  r   r4  x_ndimr6  r3  r7  r2  expected_vals_sizerT  rp   scatterr  r_   )rX  r  r`   rF    s   	




rF  r  c                   sT   t | |ddd\}}  |   fdd}tj|  |  ||dS )NFr+  c                   sB   j tjkrt tj}n }t| fddS )Nc                     s    S r   r_   r_   )_unsafe_index_fnr  self_loaderr_   r`   r  1  r   z8_unsafe_masked_index.<locals>.inner_fn.<locals>.<lambda>)r   rx   r   rN   r   r(  )r  mask_valrd  fillr.  mask_loaderre  r  r`   rT  ,  s   z&_unsafe_masked_index.<locals>.inner_fnrU  )r;  rE  r;   r   ri   r   )rz  r.  r  rh  rS  r   rT  r_   rg  r`   _unsafe_masked_index$  s   rj  c                   s@   t ||d}|   fddtt D }t| ||ddS )Nr   c                   s6   g | ]} | rt  | |  | d  ndqS r  r  r   r  r  r_   r`   r   ?  s    (z7_unsafe_masked_index_put_accumulate.<locals>.<listcomp>T)rH  )r  r   r  r   rK  )r   r.  r  r  masked_valueclamped_indicesr_   rk  r`   #_unsafe_masked_index_put_accumulate;  s   
rn  c                 C  s   t |t || S r   )rN   rA  rB  r   minmaxr_   r_   r`   r  H     r  c                 C  r  r   )r  r  copy_)rz  r  r   r  r  r.  output_viewr_   r_   r`   as_strided_scatterM  r  ru  c                 K  s   t t| |||fi |S r   )scatter_r  )r   r   r4  r  r   r_   r_   r`   rc  U     rc  r  include_selfr  r  r  ry  c             	   C  sf   t |t}t| || ttj|r| nt||r| jnd|r1t	j
| ||||||d |S d S )Nznot implrx  )rf   r?   rM   r   r   rx   r   r   ri   r-   ScatterFallback)r  rz  r   r4  r  r  ry  src_is_tensorr_   r_   r`   scatter_fallbackZ  s(   

	r|  r  c                C  sr   |dv sJ |d u r$t tjtjjjj}t|| ||||d}|d ur$|S |dkr+d}n|dkr1d}t	| ||||S )N)Nr   multiplyr}  r   sumr~  prod)
r   r  rv  rO   rW   rX   r[   _overloadnamer|  scatter_reduce_)rz  r   r4  r  r  r  fallback_resultr_   r_   r`   rv  {  s   rv  c                 C  s   t t| |||S r   )scatter_add_r  r   r   r4  r  r_   r_   r`   scatter_add  r  r  c                 C  s   t | |||dS )Nr  )r  r  r_   r_   r`   r    r  r  c                 K  s   t t| ||||fi |S r   )r  r  )r   r   r4  r  reduction_typer   r_   r_   r`   scatter_reduce  s   r  )ry  c             	     s8  |dv sJ t tj dkrdtj v sJ dttr$tttjj |||d}|r5|S tt	s<J dt
| v sFJ t  }|dkrVtdgtt	rit  dkritdgt|t	r|t | dkr|t|dg}| dkrS t    | tt	r nd  fdd	}fd
d}	dd }
 }|d usJ |stj| fdd| |d d}tjd t|d}tj||_tj| tj| |	| ||
|d}tjd t|d}tj||_tj| |dkrtg S )N)Nr  r  meanamaxaminr*   twozKaten.scatter_reduce_.two is not the unique overload of aten.scatter_reduce_rx  r   r   c                   sD     }t|}t| }tj| |dkrdn|  dd| < |S )Nr   r*   F)r,  )r   r   rd   rN   r  )r  r  r  indirect_idx)r   r  rz  r_   r`   r[    s   
z'scatter_reduce_.<locals>.output_indexerc                   s   r| S t   S r   rN   r$  r   r  )rz  r  r  r_   r`   rs     s   zscatter_reduce_.<locals>.fnc                 S  s   | dkrdS | d u sJ d S )Nr  rY  r_   r}  r_   r_   r`   backend_reduce_str  s   z+scatter_reduce_.<locals>.backend_reduce_strc                   s   t d  S r  r  rI  )rz  r_   r`   r    r  z!scatter_reduce_.<locals>.<lambda>rZ  r]  )r   r  r  r   rf   r   r  r|  r  r?   re  r   r   r  r  r  ri  rE  ri   r-   r_  rO  r`  rO   rW   r  r   r  )rz  r   r4  r  r  ry  r  r  r[  rs   r  rp   zero_outr  rc  r_   )r   r  rz  r  r  r`   r    s   







r  scales_xtuple[Optional[float], ...]nexactc           
        s   |    |  |   d  |  d   }dd D t|ks)J |}dd t|D t|D ]\}}|d urGd| |< q9 fddfdd}	tj|  | 	 |	g ||d	S )
Nc                 S  r   r_   r  r   r_   r_   r`   r     r   z&upsample_nearestnd.<locals>.<listcomp>c                 S  s   g | ]\}}|| qS r_   r_   )r   rm   or_   r_   r`   r      r  rb  c                   s\   t | tj}  rt | t dtj} t | t |tj} t | tj} t j	| |ddS )N      ?Fr@  )
rN   r"  rx   r?  r   r$  rV  r   r  r  )r   rD  r   )r  r_   r`   scale_fn%  s   z$upsample_nearestnd.<locals>.scale_fnc                   sB   |  d  }| d   }g |fddt | D S )Nc                   s   g | ]\}}} |||qS r_   r_   )r   rm   r   r   )r  r_   r`   r   4  r   z2upsample_nearestnd.<locals>.fn.<locals>.<listcomp>)r  )r  r   r-  )i_sizes
inv_scalesr  r  r  r_   r`   rs   0  s
    zupsample_nearestnd.<locals>.fnrU  )
realize_hintrE  r   r   r  re   r;   r   ri   r   )
r   r5  r  r  r  batcho_sizesrm   rD  rs   r_   )r  r  r  r  r  r  r`   upsample_nearestnd  s(   
r  Optional[float]c                 C  s   t | ||fddS )Nr*   r  r  r   r5  r4  r_   r_   r`   upsample_nearest1d?  r  r  c                 C  s   t | ||fdddS )Nr*   Tr  r  r  r  r_   r_   r`   _upsample_nearest_exact1dD  rr  r  scales_hscales_wc                 C  s   t | |||fddS )Nr(   r  r  r   r5  r  r  r_   r_   r`   upsample_nearest2dI  s   r  c                 C  s   t | |||fdddS )Nr(   Tr  r  r  r_   r_   r`   _upsample_nearest_exact2dP  s   r  scales_dc                 C  s   t | ||||fddS )Nr   r  r  r   r5  r  r  r  r_   r_   r`   upsample_nearest3dW  s   r  c                 C  s   t | ||||fdddS )Nr   Tr  r  r  r_   r_   r`   _upsample_nearest_exact3db  s   r  c                   s   t  fdd|D S )Nc                 3  s    | ]	}t | V  qd S r   r  r   r   r_   r`   r   p  r  z$_create_constants.<locals>.<genexpr>)r   )r   rn   r_   r   r`   r>  o  s   r>  c                   s:   |   |   fdd}tj|  |  |dS )Nc                   sF   t | } t| tksJ  D ]}| d | |  | |< q| S r  )rd   r   )r  r   r  r  r  r_   r`   r1  y  s
   zrev.<locals>.loaderrU  )rE  r   r;   r   ri   r   )r   r  r1  r_   r  r`   revs  s   r  paddingSequence[int]r  c                 C  s  dd }| r	dS t |dkst |  dkrdS |   t| tjrBt| jtjrBt| jjtjs=t	j
rBt| jjtjrB| jjjsDdS |   t| \}}|j}|d dkrZdS |d dksl|d dksl|d dkrndS |d }|dkrxdS |d }|jd }	||	| k rdS | jjj}
|jd |jd | g}|tjj|
< t| ||j|j}t|d|	|	| d	}t|| td
 d  d7  < |S )z
    This optimization changes the semantics of padding from 'clone'
    style to 'view' style.

    Thanks to functionalization, this change can still maintain numerical
    correctness.
    c                  S  sL   t jj} | du r
dS t| j}t|dkr$|d jtjj	tj
j	fv r$dS dS )a  
        Conservatively check if padding can be fused with downstream op.
        1. if the downstream op is a sum, then there is little benefit to
           do inplace padding
        2. if the downstream op is a matmul, doing inplace padding can
           save membw.
        NTr*   r   F)rO   rW   rX   r   rY   r   r[   r  mmr  addmm)rX   rY   r_   r_   r`   _padding_can_be_fused  s   
z6inplace_constant_pad_nd.<locals>._padding_can_be_fusedNr   r(   r*   r   r   )r   r  r  inductorinplace_padding)r   r   ri  rf   r-   r?   rh   r   rO  r+   can_inplace_pad_graph_inputInputBufferr   freeze_layoutr  r  r   rO   rW   buffer_to_padded_sizer  r  r
  fill_r   )r   r  r  r  r   r  r  npadstride0rowsizebufnamepadded_size	resized_xsliced_xr_   r_   r`   inplace_constant_pad_nd  s\   

$


r  c              	     st  t |d dks
J tdd |D rt| S tjr$t| |}|r$|S |  }tttt	|d d d |dd d  t |t   g  D ]\}}
tjj||f qIt|d  }g t	 |d  D ]\\}}	}

|
 |
t|
| |	  qmt |t |ksJ t|   fddfdd	}|  tj|  |  ||d
S )Nr(   r   c                 s  r  r   Nr_   r   r9  r_   r_   r`   r     r   z"constant_pad_nd.<locals>.<genexpr>r*   c                   s~   g }t  d  D ]\}\}}}|dkr|t|d |dkr+|t|| qttj|}t| fddS )Nr   c                     r  r   r_   r_   )r4  r  r_   r`   r    r  z/constant_pad_nd.<locals>.mask.<locals>.<lambda>)	r  rj   range_mask_lowrange_mask_highr  r  rN   r'  r(  )r4  r.  r  rh  ri  r  )boundsr  
mask_sizesr  r  rI  r`   r.    s   "zconstant_pad_nd.<locals>.maskc                   sZ   t | d  }t| d   D ]\}\}}|||  qt|t| ks)J |S r   )rd   r  rj   r   )r4  r-  r  rh  _high)bounds_precompr.  r  r_   r`   	offset_fn  s
   z"constant_pad_nd.<locals>.offset_fnrU  )r   r  r  r+   r  r  r   rd   r&  r  rj   rO   rW   r   lookup_precomputed_sizer   r+  r   r   rE  r;   r   ri   )r   r  r  rk   r  lhr5  rh  ri  r   r  r_   )r  r  r  r.  r  r  r  r`   r    s:   *

r  rm   
sympy.ExprUnion[sympy.Expr, int]c                 C  s&   t t | tjt t|tjS r   )rN   r%  r"  rx   r#  r   r  )rm   rh  r_   r_   r`   r     s   r  c                 C  s    t t | tjt |tjS r   )rN   r&  r"  rx   r#  )rm   ri  r_   r_   r`   r  '  s   r  c                 C  s   t t| |t| |S r   )rN   r'  r  r  )rm   ri  rh  r_   r_   r`   
range_mask.  s   r  rb  c                   sF       d   pdg   fdd}|S )Nr   c                   s|   | d   |  d   t tj fddtD }r1t| fddS t| 	fddS )Nc                   s.   g | ]}t |  | |  |  qS r_   )r  r   )r  ih	padding_hr_   r`   r   B  s   . z=constant_boundary_condition.<locals>.load.<locals>.<listcomp>c                     s   t  dg S )Nr   )constant_boundary_conditionr_   )r   r  pad_fill_valueprefixr   r_   r`   r  G  s    
z;constant_boundary_condition.<locals>.load.<locals>.<lambda>c                     s   g  S r   r_   r_   )r  r  r  r_   r`   r  M  r  )r  r  rN   r'  r  r(  )r4  r.  r   r  r  r  r  r  r   r  )r  r  r`   rH  <  s   	z)constant_boundary_condition.<locals>.load)r   rE  )r   r  r  r  r   rH  r_   r  r`   r  5  s
   r  dilationc          	      C  s&  |d u rdgt | }t| d||   || || d   || d  || }|rt| d||   || || d   d|| d   || }tjj|d ||  |  ||  dkrt|d8 }tjjd|||  |  ||   tjj|| dkrtjj|| d}||fS |}||fS )Nr*   r(   r   F)r   r%   rO   rW   r   r  r  r*  )	r   rm   kernel_sizer  r  	ceil_moder  x_outx_altr_   r_   r`   pooling_sizeS  s4   .
*$r  c                C  s    t | |} ttj| }|dkS )N   )rI   r  r  r  rV  )r  n_dimwindow_sizer_   r_   r`   %should_fallback_max_pool_with_indicesq  s   
r  assert_fallbackc                C  s   |dkr	dg| }|dkrdg| }|s|}t ||}t ||}t ||}t ||}t| ts1J t||ks9J t||ksAJ t||ksIJ t||ksQJ t|  |d |d fv saJ t||d}|d urq||ksqJ |||||fS )Nr   r*   r(   r  )rI   rf   r?   r   r   r  )r   r  r  r  r  r  r  use_fallbackr_   r_   r`   max_pool_checksw  s(   





 r  c             
     sN  |    | jd   }| j d  t fddtD  \} | j}	|	tju r2dn|	jr9tdnt	|	j
}
t|t| }tsXt sXtdd D r`t| |
dn|  fdd	}tjd
| |  |	|	||d}tjd| |  tj|	||d}t|jjtr|  t|jjtr|  ||fS )Nc                   s&   g | ]}t | | d qS )r  r  r  )r  dhwr  r  r  r  r_   r`   r     s    z*_max_pool_with_offsets.<locals>.<listcomp>F-infc                 s  s    | ]}|d kV  qdS r  r_   r  r_   r_   r`   r     r   z)_max_pool_with_offsets.<locals>.<genexpr>r  c                   sJ   | d   }|  d    fddt D }g ||S )Nc                   4   g | ]} | |  | |   |  qS r_   r_   r   bhr  r  reduction_idxr  r_   r`   r         &z<_max_pool_with_offsets.<locals>.fn_inner.<locals>.<listcomp>r  )r  r  r  r  )r  r  r  r  r  r  r  r`   fn_inner  s   z(_max_pool_with_offsets.<locals>.fn_innerrq  r  
input_noderp   	dst_dtyper{  rT  rS  reduction_rangesargmax)r  r  r  r  r   rx   r   r  r1  r  rp  rd   r  r  rE  r<   r   ri   r#  rf   rh   ri  )r   r  r  r  r  r  r  r  dhw_outr   	min_valuer  r  r  offsetsr_   )r  r  r  r  r  r  r  r  r`   _max_pool_with_offsets  sV   
	
"	

r  c           
   
   C  s   t |}t| |||||dd\}}}}}tjdd t| ||||||d\}}	|t|	tjfW  d    S 1 s:w   Y  d S )NFr  r  unroll_reductions_thresholdr  )r   r  r+   r   r  r   rx   ru  )
r   r  r  r  r  r  r  r   r  r  r_   r_   r`   !_low_memory_max_pool_with_offsets  s,   	

	$r   r  r  "Sequence[Union[int, torch.SymInt]]
input_sizeincrements_to_indexxCallable[[Sequence[Union[int, torch.SymInt]], Sequence[Union[int, torch.SymInt]]], torch._inductor.virtualized.OpsValue]c                   sZ   t |  tttj fdd}tj	| 
 tj||  d}|S )Nc                   sJ   | }t |}t|} | |}t t| d  tjS r   )rN   r  r,   _flattened_index_to_ndr"  _flatten_indexrx   r#  )r  r  offset_sympyr  idhwr  r  r  r  offsets_loaderr  r_   r`   offsets_to_indices  s   
z4_pool_offsets_to_indices.<locals>.offsets_to_indicesrU  )r   rE  r   r  r  r  r  rV  r;   r   ri   rx   r#  r   )r  r  r  r  r  r  r_   r	  r`   _pool_offsets_to_indices  s   		r  c                   s(   t | fdd}t| |||S )Nc                   s,   |  d    fddt D S )Nc                   r  r_   r_   r   r  r_   r`   r   ,  r  zX_low_memory_max_pool_offsets_to_indices.<locals>.increments_to_index.<locals>.<listcomp>r  r  r  r  r  r  r  r  r`   r  *  s   zD_low_memory_max_pool_offsets_to_indices.<locals>.increments_to_index)r   r  )r  r  r  r  r  r  r  r_   r  r`   '_low_memory_max_pool_offsets_to_indices!  s
   r  c              	   C  s^   t | |||||d\}}}}}t| ||||||d\}}	t|	|| j| d  |||}
||
fS )Nr  )r  r  r  r  )r   r  r  r  r  r  r  r   rk   r  r  r_   r_   r`   _max_pool_with_indices6  s   	
	r  c              	   C     t | |||||ddS Nr(   r  r  r   r  r  r  r  r  r_   r_   r`   max_pool2d_with_indicesT     	r  c              	   C  r  Nr   r  r  r  r_   r_   r`   max_pool3d_with_indicesc  r  r  c                   s`  dkrddg|dkrddg}st |tsJ tdks#J tdks+J tdks3J t|dks;J t| dv sEJ |   |  }t |trt |jjtr|jj}	|	 }
|
d usgJ t	j
d t	j|
|	 |	 d|	d}|  | }n| }|d ur|d dkp|d uo|d dk}tdd |D rt| ||||S | ^ }}
|  ^ }| |   t| }tfd	dtd d D tfd
dtd d D 		 }|dkrt| ||||S |  	
fdd}tj|  |  ||d}|r.t	j|S |S )Nr   r*   r(   r  )rp   r   r   r]  c                 s  s    | ]}|d kV  qdS r  r_   r  r_   r_   r`   r     r   z3max_pool2d_with_indices_backward.<locals>.<genexpr>c                 3  <    | ]}t |d   t d | d   d    dV  qdS r   r*   Nrq  r   r  r  r  r_   r`   r     
    *
c                 3  <    | ]}t |d   t d| d   d    d V  qdS r*   r   Nr  r   wr  r_   r`   r     r  r  c                   sV  | ^ }}}t |
 | tj}|d  }|d  }t t|d  d  d tj}t t|d  d  d tj}t t|d d tj}t t|d d tj}t |t dtj}t |t dtj}t |t tj}t |t tj}d }	tD ]}
t	D ]}t 	|t |
tj}t 	|t |tj}g |t j
t |t |t dtjd ddt j
t |t |t dtjd dd}|} |}t ||}|	d u rt ||t dtj}	qt t t ||t |||}t |t 	|	||	}	qq|	d us)J |	S )Nr   r*   Fr@  rB  r!  )rN   r"  rx   r  r%   rA  r$  rB  r  r   r  r^  r  r  r?  r'  r&  )r  r  r  r"  
index_testphstartpwstartphendpwendgradientph_pw_phpw
grad_indexindex_actual	grad_partr   r.  grad_loaderh_window_sizer  indices_sizer  r  pooled_heightpooled_widthr  w_window_sizewidthr_   r`   rs     sl     


#z,max_pool2d_with_indices_backward.<locals>.fnrU  )rf   r?   r   r   r  r  rh   r;   ri   r-   rO  r1  r   decide_layoutr  r  )fallback_max_pool2d_with_indices_backwardrE  rd   rq  r  r   rS  r  )grad_outputr   r  r  r  r  r  r  	gO_striderh   rp   x_bufferx_strideis_channels_last_batch_heightr   r  r  rs   rk   r_   r1  r`    max_pool2d_with_indices_backwardw  s   	

 ;rB  r!  c                   s   |    fdd}|S )Nc              
     s   |\|\ |\}}t t t   tjt |tjt t  tjt |tj}t | fddS )Nc                     s   g    S r   r_   r_   )h_start_indexr  iwr  w_start_indexr  r_   r`   r    r%  z3pad_adaptive_loader.<locals>.load.<locals>.<lambda>)rN   r'  r&  r"  rx   r#  r(  )r  
incrementsstart_indicesend_indicesh_end_indexw_end_indexr.  pad_valr  )rC  r  rD  r  rE  r`   rH    s$   z!pad_adaptive_loader.<locals>.loadrD  )r   rL  rH  r_   rK  r`   pad_adaptive_loader	  s   rM  c           
      C  sL   t j| ||d}t j|||d}t j| ||d}t j|||d}	||||	fS )N)out_diminp_dim)r  r"  )
start_index	end_indexh_inw_inh_outw_outrC  rI  rE  rJ  r_   r_   r`    compute_indices_adaptive_pooling%  s
   rV  c                   sD   |\}}|\}}	t | |||||	\  fdd}
|
S )Nc                   s   | ^ }}}|} |}|}|}d }	t td td D ]\}
}|||
|g||g||g}|	d u r>|}	q&||	}	q&|	S r  )r  productr  )r  r1  r  r  bwrC  rI  rE  rJ  r  r  rD  r  h_end_index_fnh_start_index_fnkernel_maxes
pooling_fnw_end_index_fnw_start_index_fnr_   r`   rs   >  s"   $z _adaptive_pooling_fn.<locals>.fnrV  )rP  rQ  r\  in_sizes	out_sizesr]  rR  rS  rT  rU  rs   r_   rY  r`   _adaptive_pooling_fn/  s   rc  c           
        sF   |\}|\}}t | ||||\  fdd}	|	S )Nc                   s   | ^ }}}|} |}|}|}d }	d }
t td td D ]>\}}||||g||g||g}t||  | | tj}|
d u rO|}
ntt||	||
}
|	d u ra|}	q(||	}	q(|
S r  )	r  rW  r  rN   r"  rx   r#  r  gt)r  r1  r  r  rX  rC  rI  rE  rJ  maxvalmaxindexr  rD  r  r4  rZ  r[  r\  r]  r^  rS  r_  r_   r`   rs   g  s0   $z)_adaptive_pooling_fn_with_idx.<locals>.fnr`  )
rP  rQ  r\  ra  rb  r]  rR  rT  rU  rs   r_   rg  r`   _adaptive_pooling_fn_with_idxX  s   #rh  c                   s    tjkrtdttsJ t|dksJ    ^ }}}t	j
j|}t	j
j|}|\}}||krD||krDtS |dksL|dkr_g |||}t|   dS || dkrx|| dkrx|| || g}t|S t|| d |}	t|| d |}
t|||g }  }|	|
 }|dkrt|S dd }d	d
 }t|||	|
g||g||gtjd tt fdd}tj |||d}|S )Nz0'adaptive_avg_pool2d' not implemented for 'Long'r(   r   r  r*   r  c                 S     t | | |S r   r%   r4  rN  rO  r_   r_   r`   rP       z)_adaptive_avg_pool2d.<locals>.start_indexc                 S     t | d | | d |S r  rj  rk  r_   r_   r`   rQ    r  z'_adaptive_avg_pool2d.<locals>.end_indexrP  rQ  r\  ra  rb  r]  c                   s   t  | t | S r   )rN   truedivrM  r  fn_sumones_loaderr   r_   r`   rs     s   z _adaptive_avg_pool2d.<locals>.fnrU  )r   rx   r#  r!  rf   r?   r   r  r   rO   rW   r   r  r  r  ri   
avg_pool2drB   rd   fallback_adaptive_avg_pool2drc  rN   r   rM  	ones_liker;   r   )r   r5  r  rR  rS  rT  rU  o_sizer  h_kernel_maxw_kernel_maxr  r   r  rP  rQ  rs   rvr_   rp  r`   _adaptive_avg_pool2d  sV   

	rz  c                   s    tjkrtdttsJ t|dksJ    ^ }}}t	j
j|}t	j
j|}|\}}|dks@|dkr]g |||}t|   dt|tj dfS || dkrk|| dkrktt|| d |}t|| d |}	t|||g }
  }||	 }|dkrt|S dd }d	d
 }t||||	g||g||gtjdt||||	g||g||gtjd fdd} fdd}tj |||
d}tj tj||
d}||fS )Nz,adaptive_max_pool2d not implemented for Longr(   r   r  r*   r  c                 S  ri  r   rj  rk  r_   r_   r`   rP    rl  z(adaptive_max_pool2d.<locals>.start_indexc                 S  rm  r  rj  rk  r_   r_   r`   rQ    r  z&adaptive_max_pool2d.<locals>.end_indexrn  c                       | t tdS Nr  rM  r1  r  )inner_func_max_valr   r_   r`   inner_fn_max_val  r  z-adaptive_max_pool2d.<locals>.inner_fn_max_valc                   r{  r|  r}  r  )inner_func_max_idxr   r_   r`   inner_fn_max_idx  r  z-adaptive_max_pool2d.<locals>.inner_fn_max_idxrU  )r   rx   r#  r!  rf   r?   r   r  r   rO   rW   r   r  r  ri   
ValueErrorrB   rd   fallback_adaptive_max_pool2drc  rN   rA  rh  r;   r   )r   r5  r  rR  rS  rT  rU  rv  rw  rx  r  r   r  rP  rQ  r  r  ry  rir_   )r  r~  r   r`   adaptive_max_pool2d  sp   
		r  c                   s<            fdd}|S )Nc           
        sX    }t|dkr@t| dkrd| d d   g}n.t| dkr4| d | d d   g}nddd   g}ng | d   }t| }t tj}td tj}tt|tj	t|tj	}t
t|dd|}t|| | t||  }t|tj}t||}	tt
|	||tS )Nr   r*   r   r(   )r   r   rN   r"  r   rx   r#  ro  r   float64r  r  r  r&  r  r   r  )
r  rm   samples_shapesamplei_exprdiffout_sz_exprr>  seq_ir.  r   in_sz	kernel_szndimsout_szsamplessamples_loaderr_   r`   rH  5  s&     z)_fractional_pooling_offsets.<locals>.loadrD  )r  r  r  r  r   r  rH  r_   r  r`   _fractional_pooling_offsets/  s   #r  c                 C     t | |||ddS r  _fractional_max_poolr   r  r5  random_samplesr_   r_   r`   fractional_max_pool2d[  r  r  c                 C  r  r  r  r  r_   r_   r`   fractional_max_pool3d`  r  r  c                   s\  |    | jd   | j d  }tjdd fddtD  |  fdd} fddt|t }|  }tj	d	| | 
 ||||d
}	tj	d| | 
 tj|||d
}
t|	tsrJ |	t|	jjtr}|	  t|
tsJ |
t|
jjtr|
  t|
| j}|	|fW  d    S 1 sw   Y  d S )Nr  r  c              
     s    g | ]}t  |d qS ))r  r  r  r  r  r   )r  r  )inp_dhwr  r  r5  r  r_   r`   r   j  s    	z(_fractional_max_pool.<locals>.<listcomp>c                   s$   | d   }g | | |S r   r_   )r  r  r  )r  r  r  r_   r`   r  x  s   z&_fractional_max_pool.<locals>.fn_innerc                   s8   | d   |  d    fddt D S )Nc                   s&   g | ]}|  | |  qS r_   r_   r  )bdhwdhw_index_fnr  r  r_   r`   r     s    zE_fractional_max_pool.<locals>.increments_to_index.<locals>.<listcomp>r  r  )r  r  )r  r  r  r`   r  |  s
   z1_fractional_max_pool.<locals>.increments_to_indexrq  r  r  )r  r  r+   r   r  rE  rd   r   r<   r   ri   rx   r#  rf   r?   rh   ri  r  )r   r  r5  r  r  r  r  r  r   r  r  r  r_   )r  r  r  r  r  r5  r  r  r`   r  e  sT   "	


$r  c                   s       ^ }}}tjj|}tjj|}|^ }}}	|| dkr9||	 dkr9t|| ||	 gddS t||}
t||	}dd fdd}t||
|g||g||	gt	j
d  fd	d
}tj  |t|d}|S )Nr   r*   )divisor_overridec                 S  s   t | | t|S r   )r$   r   r  rk  r_   r_   r`   rP    r  z0upsample_nearest2d_backward.<locals>.start_indexc                   s    | d ||S r  r_   rk  )rP  r_   r`   rQ    r~  z.upsample_nearest2d_backward.<locals>.end_indexrn  c                   s    | t S r   )rM  r  )rq  r   r_   r`   rs     rl  z'upsample_nearest2d_backward.<locals>.fnrU  )r  r   rO   rW   r   r  rs  rB   rc  rN   r   r;   r   ri   r   rd   )r   r5  r  r  r  r@  inp_hinp_wout_hout_wrw  rx  rQ  rs   ry  r_   )rq  rP  r   r`   upsample_nearest2d_backward  s6   

	r  r_   c              
   C     t | ||||||ddS )Nr(   r  _avg_poolndr   r  r  r  r  count_include_padr  r_   r_   r`   rs       
rs  c              
   C  r  )Nr   r  r  r  r_   r_   r`   
avg_pool3d  r  r  c              	     s  
s
sdg t t 

t t| ts!J tks)J t
ks1J tks9J t|  d d fv sIJ |   |  d   }|   d  t 
fddtD  \}	}
tszt|
rt	| ddd}n| 
 d	}t|t|	 }|  ttj}|d
krdkrt}ndkrt}ntd || 
 |S 
fdd|r|r|r|n|jrd 		fdd}nfdd}n
f	dd}tj|  ||d}|S )Nr   r*   r(   c              	     s"   g | ]}t | | qS r_   r  r   )r  r  r  r  r  r_   r`   r   (  s    z_avg_poolnd.<locals>.<listcomp>r!  r  TFr  r   zUnknown dim: c                   s   | d   }|  d   d }t jfddtD  D ]% fddtD }|g ||}|d u r=|}qt||}q|S )Nc                   s   g | ]}t  | qS r_   r  r   )r  r_   r`   r   P  r  z/_avg_poolnd.<locals>.fn_sum.<locals>.<listcomp>c                   s,   g | ]} | |  |  |  qS r_   r_   r   )r-  r  r  r  r_   r`   r   Q  s   , )r  rW  r  rN   r   )r  r1  r  totalr   r  )r   r  r  r  )r-  r  r`   rq  L  s    z_avg_poolnd.<locals>.fn_sumc                   s   t | t  S r   )rN   rV  r$  r  )r   rq  rD  r  r_   r`   rs   ^  r  z_avg_poolnd.<locals>.fnc                   s   t | t  S r   )rN   truncdivr$  r  )divisorr   rq  r  r_   r`   rs   c  rw  c                   s   |  d  }g }t D ]<}|| |  |  }t||  | |  } s;t|d}t|| }t|| tj}|| qt	
tj|}jr]t| |S t| |S r  )r  r   MinMaxrN   r"  rx   r  rj   r  r  rV  r  ro  r  )r  r  divide_factorsrm   hstarthendfactordivide_factor)	r  r   r   rq  r  r  r  r  r  r_   r`   rs   i  s    rU  )rI   rf   r?   r   r   r  r  r  r  r  rE  rd   r   r  r  r  rV  fallback_avg_pool2dfallback_avg_pool3dr  r  r;   r   ri   )r   r  r  r  r  r  r  r   r  rT  
ceil_modeshad_paddingr  r  fallbackrs   ry  r_   )r  r  r   r  r   rq  r  r  r  rD  r  r  r`   r    st   




 
r  c                   s  d u sdksJ dssddgt | tsJ t |ts$J tdks,J tdks4J tdks<J t| dv sFJ |   | ^ }td|\}	}
td|\}}|  d pwd pw|
pw||  ^ }	
t| }| }t	fddt
d d D t	fddt
d d D  }|d	krt| ||S fd
d  	
fdd}tj|  |||d}|S )Nr   divisor must be not zeror(   r  r*   c                 3  r  r  r  r  r  r_   r`   r     r  z&avg_pool2d_backward.<locals>.<genexpr>c                 3  r  r   r  r!  r  r_   r`   r     r  r  c              	     sX  t d tj}t d tj}t d tj}t d tj}t d tj}t d tj}t t | ||}t t |||}	t t ||t t  tj|}
t t |	|t t tj|}t 	|t dtj}t 	|	t dtj}	t |
t  tj}
t |t tj}t t |
|t ||	}|S )z{
        This computes the scaling factor that we will divide an element
        by when `count_include_pad=False`
        r   r*   )
rN   r$  rx   r  r^  rV  rB  r   r"  rA  )r,  r-  stride_hstride_wpad_hpad_wkernel_hkernel_wr  wstartr  wendr  )heightr  r  r  r8  r_   r`   !compute_pool_size_without_padding  s,   

z>avg_pool2d_backward.<locals>.compute_pool_size_without_paddingc                   sR  | ^ }}}|d  }|d  }t t|d  
d  
d tj}t t|d  
d  
d tj}t t|
d d tj}t t|
d d tj}t |t dtj}t |t dtj}t |t tj}t |t 	tj}d }tD ]}	tD ]}
t 	|t |	tj}t 	|t |
tj}d ur}nssd d  }n ||}t 
g |t jt |t |t dtjddt jt |t |t dtj	dd|}t t ||t ||}|d u rt ||t dtj}qt |t 	|||}qq|d us'J |S )Nr   r*   Fr@  r!  )rN   r"  r%   rx   r  rA  r$  rB  r  r   ro  r  r^  r'  r&  r  r?  )r  r  r  r"  r%  r&  r'  r(  r)  r*  r+  r,  r-  rD  partr.  )r  r  r  r2  r3  r  r  r  r5  r6  r  r7  r_   r`   rs     sv     
	


*zavg_pool2d_backward.<locals>.fnrU  )rf   r?   r   r   r  r  rE  rd   r   rq  r  fallback_avg_pool2d_backwardr;   r   ri   )r;  r   r  r  r  r  r  r  r   _h_out
ceil_mode1_w_out
ceil_mode2r  r   r  rs   ry  r_   )r  r  r  r2  r3  r  r  r  r  r5  r6  r  r7  r8  r`   avg_pool2d_backward  sb   "Ar  c                   s  d u sdksJ ds	
sg d
t | tsJ t |ts$J t	dks,J tdks4J t
dks<J t| dv sFJ |   | ^ }td	
|\}	}
td	
|\}}td	
|\}}|  t
p|
p|p||  ^ }t| }|	 }	fdd	t
dD \  }|d
krt| |	
|S 	
fdd  	
fdd}tj|  |||d}|S )Nr   r  )r   r   r   r   )r   r   r*   r(   c                 3  s6    | ] t  fd dt  d D V  qdS )c                 3  s<    | ]}t |   t d |       dV  qdS r  r  r  )rm   r  r  r_   r`   r   c  r  z0avg_pool3d_backward.<locals>.<genexpr>.<genexpr>r(   N)rq  r  )r   r  )rm   r`   r   b  s    
z&avg_pool3d_backward.<locals>.<genexpr>}   c              	     s  dd D \}}}dd D \}}}dd D \}	}
}dd t | ||g|||g|||gD \}}}dd t |||g|	|
|g g|||gD \}}}dd |||fD \}}}dd t |||g gD \}}}ttt||t||t||}|S )	Nc                 s      | ]
}t |tjV  qd S r   rN   r$  rx   r  r   r_   r_   r`   r   y  r   zQavg_pool3d_backward.<locals>.compute_pool_size_without_padding.<locals>.<genexpr>c                 s  r  r   r  r  r_   r_   r`   r   z  r   c                 s  r  r   r  r   r_   r_   r`   r   {  r  c                 s  s*    | ]\}}}t t |||V  qd S r   )rN   r^  rV  )r   r9  r   padr_   r_   r`   r     s
    
c              
   s  s>    | ]\}}}}t t ||t t |tj|V  qd S r   )rN   rB  r   r"  rx   r  )r   r  r   r   r  r_   r_   r`   r     s    

c                 s  &    | ]}t |t d tjV  qdS r  rN   rA  r$  rx   r  )r   r  r_   r_   r`   r     
    
c                 s  *    | ]\}}t |t |tjV  qd S r   rN   rB  r"  rx   r  )r   r  r   r_   r_   r`   r     
    
)r  rN   rV  r^  )pdr,  r-  stride_dr  r  pad_dr  r  kernel_dr  r  dstartr  r  dendr  r  r  )depthr  r  r  r  r8  r_   r`   r  x  s8   $z>avg_pool3d_backward.<locals>.compute_pool_size_without_paddingc                   sJ  | ^ }}}}dd t |||gD \}}}dd t |||gD \}}}dd t |||gD \}}	}
dd |||fD \}}}dd t ||	|
g	
gD \}}	}
d }tD ]}tD ]}tD ]}dd t |||g|||gD \}}}d ur}nssd d	  d
  }n |||}tg |tjt|t|td	tj		ddtjt|t|	td	tj	
ddtjt|t|
td	tj	dd|}t
t
t||t||	t||
}|d u rt||tdtj}qjt|t|||}qjqdq^|d us#J |S )Nc                 s  s    | ]	\}}|| V  qd S r   r_   )r   r   r  r_   r_   r`   r     r  z2avg_pool3d_backward.<locals>.fn.<locals>.<genexpr>c                 s  s2    | ]\}}}t t|| | |tjV  qd S r   rN   r"  r%   rx   r  )r   r   r   r   r_   r_   r`   r     s
    
c                 s  s,    | ]\}}t t||d  tjV  qdS r  r  )r   r   r   r_   r_   r`   r     s
    
c                 s  r  r  r  )r   pstartr_   r_   r`   r     r  c                 s  r  r   r  )r   pend
pooled_dimr_   r_   r`   r     r  c                 s  r  r   )rN   r   r$  rx   r  )r   r  p_r_   r_   r`   r     r  r   r*   r(   Fr@  r!  )r  r  rN   ro  r  rB  r^  r$  rx   r  r'  r&  r  r?  r   )r  r  r  r  r"  pdstartr%  r&  pdendr'  r(  r)  pd_r*  r+  r  r,  r-  rD  r  r.  )r  r  d_window_sizer  r2  r3  r  r  r  pooled_depthr5  r6  r  r7  r_   r`   rs     s    	

8zavg_pool3d_backward.<locals>.fnrU  )rf   r?   r   r   r  r  rE  r  rd   r   r  fallback_avg_pool3d_backwardr;   r   ri   )r;  r   r  r  r  r  r  r  r@  _d_outceil_mode_dr  ceil_mode_hr  ceil_mode_wr   r  r   r  rs   ry  r_   )r  r  r  r  r  r2  r3  r  r  r  r  r  r5  r6  r  r7  r8  r`   avg_pool3d_backward5  sf   &%Wr  c                 C  s   |   }t|tr|g}n|stt|}t|dkr*t|dv s(J d| g S t|}tt|D ]5}|| dk rL||  t|rHt|nd7  < d||   krZt|k sin t|dkrg|| dksiJ q4tt|t|ksxJ d|S )Nr   )r_   rl  r  zinvalid axis: r*   zreduction axis not unique)r   rf   r   r  r   r   rd   r#   )r   r6  r   rm   r_   r_   r`   _validate_reduction_axis  s    
 :r  c          
        s   |d ur	t | |} |  tt t| |}g }g g }g ttD ]}||v r7| ||  q$| ||  q$ fdd}r`t}	D ]}t	j
j|	|< qVn|}	|   t|  |po|  |  ||	|dS )Nc                   s   t |t ks
J rt  t ksJ  fddD  t  t ks)J d gt  t |  }tt t|D ]\}}|||< q@|S )Nc                   r  r_   r_   r   rI  r_   r`   r   %  r   z9_make_reduction_inner.<locals>.loader.<locals>.<listcomp>)r   r  r  r  )r4  reduction_indexr-  r  varinner_loaderkeepdimskept_idxreduced_idxr   rI  r`   r1  !  s   
z%_make_reduction_inner.<locals>.loader)rp   r  r{  rT  rS  r  )r   r   r#   r   r  r  r   rj   rd   r   r'  r(  rE  r  ri   r   )
r   r6  r  r   r:  
kept_sizesreduced_sizesrm   r1  r  r_   r  r`   _make_reduction_inner  s<   



r  r  rP   c                   s   dd d fdd}|S )NFr   c                  sB   t | ||| d}tjd| d|}t|jjtr|  |S )Nr6  r  r   r:  )r  r  r_   )r  r<   r   rf   rh   ri  )r   r6  r  r   r   r  r:  r  r_   r`   ra  A  s   zmake_reduction.<locals>.innerr   r_   )r  r:  ra  r_   r  r`   make_reduction@  s   r  c                C  sB   |d ur	t | |} t| |}t|  |  f|  f|  |dS )N)rp   dtypes	inner_fnsr   r6  )r   r  r  ri   r   rE  r   )r   r6  r   r_   r_   r`   _make_scan_innerT  s   

r  r   c                  s   |d ur	t | |} |   t| |}|  }|tjtjfv r$t | tj} t| ||}t	 fdd|D }t
j||  |  d}t|t| }t t|||S )Nc                 3  r   r   r_   r   r   r_   r`   r   m  r   zmean.<locals>.<genexpr>r3  )r   r   r  r   rx   rY  rX  r1  sum_rL   r-   r4   ri   r3   r   rd   div)r   r6  keepdimr   output_dtype
sum_resultdenomr_   r  r`   r  b  s   

r  c           
        s   |d u rd}|    t| |}t| |dd}|r|  tt| |}t|||}t fdd|D }|r>t	|| d}t
j||  |  d}t|t|  }t||}	|s^|	fS |rb|nt||}|	|fS )Nr*   T)r  c                 3  r   r   r_   r   r  r_   r`   r     r   z var_mean_sum_.<locals>.<genexpr>r   r3  )r   r  r  ri  squarer^  r   rL   r   r  r-   r4   r   ri   r3   r   rd   r  r  )
r   r6  
correctionr  return_meanx_meandiffsr  r  x_varr_   r  r`   var_mean_sum_s  s&   

r  c                 C  sV   t | |}t| ||d d d}|d }t|d }t|tjo*t|tjk o*t|dkS )Nr  rS  r  r*   )	r  r  rL   rf   r   r  r   r+   r  )r   r6  r  r   rS  reduction_numelr_   r_   r`   use_two_step_variance  s   


r  c                  s    d u rd t | ||d d d}|d}|d |d tjjd|fd|  d|\}}}	|  |  |  t| |}t	fdd	|D d
d  fdd}
t
|
|}|rj|  ||fS |fS )Nr*   r  rT  r  r{  welford_reduce)r  r  r   c                 3  r   r   r_   r   r  r_   r`   r     r   z$var_mean_welford_.<locals>.<genexpr>c                 S  s4   t | tjr| jstt| tj|S t	| |S r   )
rf   r   r   	is_numberrN   r   r"  rx   r#  r$  r  r_   r_   r`   get_constant_or_index_expr  s   z5var_mean_welford_.<locals>.get_constant_or_index_exprc                   s4    }}t d}| t |||  S r  )rN   r$  rA  )rh   cNzero)r  r   r  rnumelr_   r`   r    s   

z#var_mean_welford_.<locals>.scale_fnr_   )r  rL  r-   WelfordReductionr   r   ri  r   r  rL   rb  )r   r6  r  r  r  r   r1  r  m2r   r  r  r_   )r  r   r  r  r   r`   var_mean_welford_  s6   




r  c                  s   |    t }t| |dd} t| ||||d}t| ||dr&tdi |ntdi |}t fdd|D }|s>|d S |S )	NFr  )r   r6  r  r  r  )r6  r  c                 3  s    | ]
}t | d dV  qdS )Fr  Nry  r   rY  r_   r`   r     r   z#var_mean_helper_.<locals>.<genexpr>r   r_   )r   r   r   r  r  r  r  r   )r   r6  r  r  r  compute_dtyper   r.  r_   rY  r`   var_mean_helper_  s    	r  )r  r  c                C  r  )NFr6  r  r  r  r  r   r6  r  r  r_   r_   r`   var_     
r  c                C  r  )NTr  r  r  r_   r_   r`   var_mean  r  r   c                 C  st   |dk rt t| | |S |dkrtd|S |dkr| S t | |d |}t||}|d dkr8t|| }|S )Nr   r*   r(   )pow_recursiverN   r@  r$  rV  )r   r/  r   r  r_   r_   r`   r!    s   r!  c                 C     t | |S r   )rN   powr   r-  r_   r_   r`   
pow_native  r  r%  )r   c                   sV  t trtkrt tS t trdkrt S t tr,dkr,t S tdd  fD }t|}t toQd  k oIdk n  pQ|oQdk}|ro   fdd	}t	j
    |  d
S t  tr dkr}tdS  dkrt rtS |rt  trt S t trt S t S t S )Nr  r*   c                 s  s$    | ]}t |tjr| V  qd S r   )rf   r-   r?   r   r   r_   r_   r`   r     r  zpow.<locals>.<genexpr>i    r   c                   s   t |   S r   )r!  r   r  r   r-  r1  r_   r`   rs     r  zpow.<locals>.fnrU  r(   )rf   r1  r   r#  sqrtr  r7  r   rE  r;   r   ri   r   r   r   r  r   exp2fallback_pow_scalarfallback_pow_tensor_scalarfallback_pow_tensor_tensorr%  )r   r-  r   is_integer_powembed_exponentrs   r_   r'  r`   r#    s@   
"







r#  c                 C  s   t | tr	| j}n| }t |tr|j}t |tjs>tj|  |  |	 | 
 d}t |ttfs3J |j}t |tjs>J t |tjr]| s]| s]t |jtjs]|  |j|_| S tjj|||d | S )NrU  unsafe_alias)rf   r?   rh   r-   r   r;   r   ri   r   rE  r   r1   r7   is_input_bufferis_module_buffer	NopKernelri  r`  realize_into)changedr  r0  changed_datar]   r_   r_   r`   r  7  s:   

r  c                 C  s   t | t| |S r   )r  r  )r   r  r_   r_   r`   r  \  r  r  c                 C  @   | |u r| S t ||  }t||  }t||  }t| |S r   r  ri   r   r   r+  r   r  )r  r  r  r_   r_   r`   rs  a     
rs  c                 C  r"  r   )rN   floordivr$  r_   r_   r`   r:  l  r  r:  c                 C  r"  r   )rN   r  r$  r_   r_   r`   r  q  r  r  c                 C  s   t | ot |}t| ot|}|dkr(|rJ d|r!t| |S tt| |S |dkr@|r2J d|r9t| |S tt| |S t| |S )Nr  z5floordiv operands can not be boolean at the same timer  z5truncdiv operands can not be boolean at the same time)r   r   r:  r  r  r  r  )r   r-  rounding_modeboth_integerboth_booleanr_   r_   r`   div_modev  s   
r>  c                 C  s8   t | ot |}|rt| |S ttjj}t|| |S r   )r   logical_andr9   r  rV  r  rb  )r   r-  	both_boolrs   r_   r_   r`   rV    s
   
rV  r{  Optional[ir.Constant]c              	   C  s   t | tjrt| jS t | tjrt|  S t | tjr| S t | tjs'dS t	j
j|  }t|' ttjdd | j|   }W d   n1 sPw   Y  W d   n1 s_w   Y  t |t	j
jjsnJ t |jtjrx|jS dS )z:Try convert an arbitrary IR node into an ir.Constant valueNallow_indexingT)rf   r-   r7   get_constant_valuerh   r1   r  r   Loopsrx   	_inductorops_handlerExtractConstantsHandlerri   rO   set_ops_handlerr   objectr1  rT  inner_fn_argsvirtualizedOpsValuer   )r   r  rk   r_   r_   r`   rC    s*   
 rC  c                 C  s   t dd | |fD }|rt| |S t| }d ur:|  jdkr:|jdkr0ttd|j}nd|j }t	| |S dd }t
|| |S )	Nc                 s  s     | ]}t |pt|V  qd S r   )r   r   r   r_   r_   r`   r     rx  zdiv_prim.<locals>.<genexpr>r   r   infrb  c                  W  r  r   )rN   ro  r   r_   r_   r`   rs     r  zdiv_prim.<locals>.fn)r  r  rC  ri   r   r   mathcopysignr1  rV  rb  )r   r-  is_integralr  r@  rs   r_   r_   r`   div_prim  s   



rQ  c                 C  s    t | |ftjd\} }t| |S r  )r<  r   INT_TO_FLOATrQ  r$  r_   r_   r`   r    s   


r  c                 C  s4   t | pt| }|rdd }ndd }t|| |S )Nc                 S  r"  r   )rN   modr$  r_   r_   r`   rs     r  zfmod.<locals>.fnc                 S  r"  r   )rN   fmodr$  r_   r_   r`   rs     r  )r   r   rb  )r   r-  rP  rs   r_   r_   r`   rT    s
   
rT  c                C  B   t |  st|  r|d u rtj}td|d}|| |||dS )Nr  r  r   r   r   r   rx   r#  r  r   r6  r  r   rs   r_   r_   r`   r        

r   c                 C     t |  st|  r|d u rtj}t|  dkr.|dv s!J |p&|  }t| |ddS dd }t| ||d}t	j
jd
i |d|i\}|d u rRt| ||d	S |S )Nr   r   rB  Tr  c                 S     | \}|\}t ||fS r   )rN   r   a_tupleb_tupler   r-  r_   r_   r`   
combine_fn     zcumsum.<locals>.combine_fnr6  r   r_  r   r   r_   )r   r   r   rx   r#  r   r   r   r  r-   Scanr   fallback_cumsumr   r6  r   r_  r   r  r_   r_   r`   cumsum      

rf  c                 C  rY  )Nr   rZ  Tr  c                 S  r[  r   )rN   rV  r\  r_   r_   r`   r_    r`  zcumprod.<locals>.combine_fnra  r_  rb  r_   )r   r   r   rx   r#  r   r   r   r  r-   rc  r   fallback_cumprodre  r_   r_   r`   cumprod  rg  ri  c                 C  sv   dd }|   }t|  dkr|dv sJ t| S t| ||d}tjjdi |d|i\}|d u r9t| |dS |S )	Nc              	   S  s\   | \}|\}t ||}t ||}||kt | B }t |t t || | |fS r   )rN   rB  rA  r  r  log1pexp)r]  r^  r   r-  min_vmax_vr.  r_   r_   r`   log_add_exp_helper(  s   $z(logcumsumexp.<locals>.log_add_exp_helperr   rZ  ra  r_  r  r_   )	r   r   r   r  r  r-   rc  r   fallback_logcumsumexp)r   r   rn  r   r   r  r_   r_   r`   logcumsumexp&  s   rp  c                      t |  dkr dv sJ t| t| tjdfS |  }tjd|dd}t	|  |d}|tjf|d< | 
  fd	d
f|d< tjjdi |d|i\}}|d u rXt|  dS ||fS )Nr   rZ  r   r  Fr   arg_break_ties_leftra  r  c                      t |   tjS r   rN   r"  rx   r#  r  r6  r_   r`   r  K      zcummax.<locals>.<lambda>r  r_  r  r_   )r   r   r  r  rx   r#  r   r-   get_reduction_combine_fnr  rE  rc  r   fallback_cummaxr   r6  r   r_  r   r  r  r_   rv  r`   cummax<      
r{  c                   rq  )Nr   rZ  r   argminFrr  ra  r  c                   rt  r   ru  r  rv  r_   r`   r  b  rw  zcummin.<locals>.<lambda>r  r_  r  r_   )r   r   r  r  rx   r#  r   r-   rx  r  rE  rc  r   fallback_cumminrz  r_   rv  r`   cumminS  r|  r  c                C  rU  )Nr  r  r   rV  rW  r_   r_   r`   r  j  rX  r  c                 C  s   t | tj} td| ||dS )Nr  r6  r  )r   rx   r   r  r   r   r  r_   r_   r`   
reduce_anyu  s   r  c                 C  2   |d urt | ||dt| ||dfS t | d |dS Nr  )reduce_amaxreduce_argmaxr  r_   r_   r`   
reduce_max{  
   r  c                 C  r  r  )reduce_aminreduce_argminr  r_   r_   r`   
reduce_min  r  r  xor_sumrq  rp  r  r  r}  
logical_or)r^  rR  stabler   
descendingc          
   	   C  s:  |d u rd}|   }|  }tt||}t|dkr't| td|tj|fS t|r/|| nd}tj	j
|ttjjsFt| |||dS t|ddtj|dd}dgt| }t|r`|||< t||}t||}tjj|| j|jf|  | f||||d\}	}|	d u rt| |||dS |d usJ |	t|tjfS )NFr   r*   r  )r  r	  r   rp   r  )rp   r  r  r   r6  r  r  )r   ri   r   r   r  r  rx   r#  rO   rW   r   statically_known_ltr  int16rq  sort_fallbackr  r  r+  r-   Sortr   r   rE  r   )
r   r  r   r  r  rp   r  r  
view_shaper  r_   r_   r`   sort_stable  s>   



	r  c                 C  s   t | d||dS )NFr  )r  )r   r   r  r_   r_   r`   sort  r  r  c                 C  s   t | |tj|dS )Nr   r   r`  )r  r   rR  )rZ   r   r`  r_   r_   r`   register_pointwise_numeric  s   r  rZ   torch._ops.OpOverloadPacketc                 C  s   t | j t| tjdS r  )rK   r  r  r   rR  r  r_   r_   r`    register_pointwise_numeric_ldf64  s
   
r  r  logical_not)rR  )r   r   r:  identity)rg  pointwise_overrides_datac                 #  s    t |  t|  jd }|d u rd S  fdd}t|tjjr6| D ]}t||}| j||fV  q#d S | j||fV  d S )Nc                   s    j d u r	t| S d S r   )tritonr  r  r  r_   r`   make_triton_fallbackE  s   
z6_get_pointwise_overrides.<locals>.make_triton_fallback)	r  r   r   rf   rx   ry   r   r   r   )nsr   rZ   r  olnamer#  r_   r  r`   _get_pointwise_overrides?  s   
r  r  c                   s,   | t |< t|   fdd}t| | d S )Nc                    sB    | i |}g }t | d |D ]\}}|t||dd q|S )Nr   Tr/  )r  rj   r  )rn   r   resultsmut_resultsr   r  outplace_opr_   r`   rs     s
   z$register_foreach_inplace.<locals>.fn)rU   re  r   r  )aten_opoutplace_aten_opr  rs   r_   r  r`   register_foreach_inplace  s   
r  c                   s   t | d d fdd}|S )Nr  c                    s.    | i |}t || d  }t| d |S r  )r   r   r  )rn   r   r  r  r_   r`   rs     s   zregister_inplace.<locals>.fn)r#  )r  r  rs   r_   r  r`   register_inplace  s   
r  c                 C  r  r   r_   ro  r_   r_   r`   sym_constrain_range  r  r  c                 C  0   t jjjd }t|tjsJ d| |jjS Nr  z*Expect val to be torch.SymInt but got val=	rO   rW   rX   rZ  rf   rx   r  r]   r  r   r   r  r_   r_   r`   sym_size  s
   r  c                 C  r  r  r  r  r_   r_   r`   
sym_stride  s
   r  c                 C  s   |   S r   )r  )r   r_   r_   r`   	sym_numel  rZ  r  c                 C  r  r   )r   Addr   r_   r_   r`   sym_sum  rW  r  c                 O  rX  )NzHelpful for debuggingr   )rz  rn   r   r_   r_   r`   foobar  rZ  r  c                 C  s   |    t| S r   )ri  r  r   r_   r_   r`   _realize  s   r  c                 C  s   |    t| | | S r   )ri  r-   ResizeStorageBytes)variabler  r_   r_   r`   resize_storage_bytes_  s   r  c                 C  s"   |    |   tt| |S r   )ri  r?   r   r-   SetSourceTensorKernel)rz  source_tensorr_   r_   r`   set__source_tensor  s   r  c                 C  r7  r   r8  )r  r  r_   r_   r`   
fsdp_copy_(  r9  r  c          	        sv  t | tsJ t |ttfsJ |d u rtj}|tjkr#td| |tjkr0t	|dks0J |tj
kr=t	|dks=J |  |  }|  }t | jtjrV| j | _t rvtjjjrvt|rhtdnt|rst|jndndtjjdrt|||dS t | gd	g}|!  tj"#||}t$||||%  fd
d}t&j'|||t|d}|S )Nzunsupported memory format: r   r   nanTr!  r   r  r*   c                   sH   |  t  tj}t tj}t ||}t | fddS )Nc                     s
    gS r   r_   r_   )
flat_indexflat_loaderr_   r`   r  l  s   
 z*resize.<locals>.inner_fn.<locals>.<lambda>)rN   r"  rx   r#  r&  r(  )r  flat_index_exprlimitr.  r  	old_numelout_indexeruninitialized_val)r  r`   rT  g  s
   zresize.<locals>.inner_fnrU  )(rf   r?   rd   r   rx   contiguous_formatpreserve_formatr!  channels_lastr   channels_last_3dr  r   r8  rh   r-   r1   r  r^  utilsdeterministicfill_uninitialized_memoryr   r1  r   r  rq  rO   rW   r   r  r  r  rE  r1   stride_ordered_for_memory_formatr  r3  r;   r   )	r   r   r  r   rp   x_flat
out_striderT  rk   r_   r  r`   resize3  sT   



	r  )auto_functionalizedc                 C  sB   ddl m} ||}tj| ||i ||d dd | D S )Nr   )kernel_side_table)
kernel_idxgridtma_descriptor_metadatakernel_argsc                 S  s    i | ]\}}t |tr||qS r_   r   )r   r  r  r_   r_   r`   r     r  z'triton_kernel_wrap_.<locals>.<dictcomp>)*torch._higher_order_ops.triton_kernel_wrapr  get_constant_argsr-   UserDefinedTritonKernelr   )r  constant_args_idxr  r  r   r  constant_argsr_   r_   r`   triton_kernel_wrap_z  s   	

r  c                 C  sj   t dd | g|D r$d}tjjjdd  }r | d| }|tj_tj	| |||}t
ttj	|S )Nc                 s  r?  r   r@  r   r_   r_   r`   r     r2  zcond.<locals>.<genexpr>z"control flow operator: torch.cond.stack_trace Found from : 
 )r  rO   rW   rX   rZ  r[  disable_cudagraphs_reasonr-   Conditionalr   rd   mapr?   )predtrue_fnfalse_fnoperandsr   r  r  r_   r_   r`   r     s   r   c           	      C  s   t dd || D r#d}tjjjdd  }r| d| }|tj_d
dd	}tj	| ||||}t
|ts9J tt||S )Nc                 s  r?  r   r@  r   r_   r_   r`   r     s
    
zwhile_loop.<locals>.<genexpr>z(control flow operator: torch.while_loop.r  r  rk   r   c                 S  sJ   t | tr| S t | tjrt| S t | tjrt| S tdt|  )NzNYI unsupported output type: )rf   r?   r-   r   MultiOutputr   r!  r   )rk   r_   r_   r`   _map_output  s   

zwhile_loop.<locals>._map_output)rk   r   )r  rO   rW   rX   rZ  r[  r  r-   	WhileLoopr   rf   r   rd   r  )	cond_fnbody_fncarried_inputsadditional_inputsstack_outputr   r  r  r  r_   r_   r`   
while_loop  s   


r  )r  subgraph_fnir.Subgraph
identifierc                 G  s$   t jj| g|R  }tttj|S r   )r-   InvokeSubgraphr   rd   r  r?   )r  r  r  r  r_   r_   r`   invoke_subgraph  s   r  )schemec          
      G  s   d }t jjjdd }|d usJ t| jjjD ]Y\}}|jdkr*|| t jj	|< q|jdkrgt j
|\}}t|| D ]}	|	  |jrQt jj|	  t jj|	  q?tjjt j|||}qt j|t jj	|< q|S )Nquant_optionsr  r.  )rO   rW   rX   rZ  r[  re   r  nodesrZ   envfetch_args_kwargs_from_envr  r  r  ri  codegen_low_precisionlow_precision_codegen_opsr   rj  invoke_quant_opsrx   r  Interpreterr.  run_node)
r  r  r  r.  r  rm   r]   rn   r   r   r_   r_   r`   invoke_quant_tracer  s"   

r   r_  r  tuple[torch.Tensor]c                   s   ddl m m} t|dkrtd fddt||D }|| |fdd}t|d dd d	}td
d |D |d< tdd |D |d< t	j
jd|dd|}|d d u r_td|S )Nr*   )InputDescriptorlower_pointwise_subgraphr   zSUnable to generate code for associative_scan op, because there are lifted argumentsc                   s    g | ]} |  | d qS )r  )r   ri   r   )r  r_   r`   r     s    z$associative_scan.<locals>.<listcomp>c                   s    g t | t |R  S r   )r  r  )lhsrhs)lowered_combine_fnr_   r`   wrapped_combine_fn  s
   z,associative_scan.<locals>.wrapped_combine_fnra  c                 s      | ]}|  V  qd S r   r$  r   r_   r_   r`   r     r   z#associative_scan.<locals>.<genexpr>r  c                 s  r  r   rD  r   r_   r_   r`   r     r   r  F)r_  can_fallback_to_atenz/Unable to generate code for associative_scan opr_   )r  r  r  r   r!  r  r  r  r   r-   rc  r   )r_  xsr  r  subgraph_inputsr  r   r  r_   )r  r  r`   associative_scan  s,   


r  c                 C  r  r   r_   )tokensr_   r_   r`   _sink_tokens  r  r  c                 O  s   t jj|g|R i |}ddlm} ||||}|d usJ tjj| }|d u r,|fS t	t j
tj|}t|ts>||fS |g|R S )Nr   )get_effect_key)r-   EffectfulKernelr   torch._higher_order_ops.effectsr  rO   rW   effectful_opsr  r  r  r?   rf   r   )tokenrZ   rn   r   r  r  effect_typeeffectful_kernelr_   r_   r`   with_effects  s   
r  )register_comm_loweringsc                 C  s   t | |dddd}|d }tjjt|}tjjdi |d|d\}}|dkrFtjj	|t
jkrFtjd| d|d	|\}}||fS ttd
 t| |dd}	ttj t| |	}
t|
|dd}|	|fS )zn
    Lowering inductor_prims.prepare_softmax_online to compute max/sum in one pass if no split is needed.
    TNr  r  online_softmax_reduce)r  r  r*   r(   )r  
num_outputreduction_hintz
            Online softmax is disabled on the fly since Inductor decides to
            split the reduction. Cut an issue to PyTorch if this is an
            important use case and you want to speed it up with online
            softmax.
            )r  r_   )r  rO   rW   r   simplifyrL   r-   r<   
num_splitsr  r+   r  r8   r   r  r  textwrapdedentr  rS   r  rk  r^  r   )r   r   r   r  r  hint	num_split
max_tensor
sum_tensorr  rk  xsumr_   r_   r`   prepare_softmax_online$  s8   



r$  r  )quantized_lowerings)mkldnn_lowerings)jagged_loweringsc              	   c  st    t | tjjsJ dt| }zt| t|  dV  W |r&|t| < dS t|  dS |r4|t| < w t|  w )z^
    A context manager to force fallback an op. Used in unit test
    for FallbackKernel.
    z+Only OpOverload to make the clean up easierN)	rf   rx   ry   rz   rS   r[  r#  r  rL  )rZ   old_handlerr_   r_   r`   force_fallbackq  s   

r)  )rb   rc   )rs   rt   ru   rv   )r   r   )r   r   )r   r?   rp   r   ru   r?   )rn   r   r   r   r   r   r   r   r   r   ru   r   )r   r   )r   r   ru   r!  )NN)NNNFN)F)r   rx  r   ry  rz  r   )r   r?   r   ry  )r   r?   rp   r   r   )r   r?   r  r   ru   r?   )r   r   r  r*   Trl  )rn  r?   r4  r?   r5  r?   r6  r   r7  r   r8  r   r   ry  ru   rx  )rn  r?   r4  r?   r5  r?   r6  r   r7  r   r8  r   r   ry  rZ  r[  ru   rx  )rn  r?   rD  r1  rE  r   r7  r   r8  r   r   ry  ru   rx  )rn  r?   rD  r1  rE  r   r7  r   r8  r   r   ry  rZ  r[  ru   rx  )rn  r?   rD  r?   rE  r?   r7  r   r8  r   r   ry  ru   rx  )rn  r?   rD  r?   rE  r?   r7  r   r8  r   r   ry  rZ  r[  ru   rx  )r   r   r*   )r  r   r  r   r  r   r  )T)rq   r  )r]   r   )NTF)rp   r   )r   rc  r4  r?   rd  re  r  r   )
rh  r   ri  r   r   rc  r4  r?   r  r   )rm  r?   ru   rn  )rm  r?   ru   rr  )ry  r?   rz  r?   ru  r   rv  r   rw  r{  rx  r|  ru   rx  )rn  r?   r  r?   ru  r   rv  r   )r   r   r4  r   )r   NNr*   )NNN)rB  FF)r   r   )r  r  r   r   r  r{  ry  r   )r   r   r  r{  )r   r   ry  r   )r(   F)r  r  r  r   r  r   )r4  r  )r  r  r  r  )r  r  r  r  r  r  )r   r?   r  r  r  r1  ru   r|  )rm   r  rh  r  )rm   r  ri  r  )rm   r  ri  r  rh  r  )Nrb  N)
r  r?   r  r  r  r  r  r  ru   rx  )Nr   r*   F)r!  )NNNN)r_   r   FTN)r  rP   r   )r   r{  ru   rA  )rB  F)rZ   r  )r  r  r  re  )r  r  )r_  r  r  r  r  (  
__future__r   
contextlibr	  r  r  loggingrN  r  r  r  r  collectionsr   collections.abcr   r   typingr   r   r   r	   r
   r   r   typing_extensionsr   unittest.mockr   r   rx   $torch.ao.quantization.fx._decomposedtorch.fxtorch.utils._pytreer  _pytreer  torch._dynamo.utilsr   (torch._higher_order_ops.associative_scanr   r  r   torch._library.utilsr   torch._prims_commonr   r   r   r   r   r   r   r   r   r   r   torch.fx.experimental.sym_noder   r   %torch.fx.experimental.symbolic_shapesr    r!   r"   torch.utils._ordered_setr#   torch.utils._sympy.functionsr$   r%   r&   r'   _dynamo.utilsr)    r+   r,   r-   r.   decompositionr/   r0   r1   r2   r3   r4   r5   r6   r7   r8   r9   r:   r;   r<   r=   r>   r?   r@   rA   rB   rC   rD   rE   rF   rG   rH   rI   rJ   rK   rL   rM   rK  rN   rO   rF  rP   rQ   rR   FALLBACK_ALLOW_LIST	getLoggerr  r  rS   __annotations__rT   ry   rz   r  r  tr_c10dr  r   _higher_order_opsr  r\   re  rU   quantized_decomposedra   rr   r}   r{   r   r   r   r  r  rs  r  bmmconvolutionconvolution_backwardr  r  rB  r  r  r  _int_mmrv  ru  r  r  r#  rY  r?  r  	complex32	complex64r   rX  r   r   r   r   r   r   r   r   r  r  r   r6  r#  r0  r<  rb  rw  r   r  r  r  r  r   r  r  
device_putr  r  r  r  r  r  aliasdetachdetach_liftview_ofr  r   r  r  r  r  r  r  r  r  r  r  r  r+  r  r  r  _unsafe_viewreshaper  slicer
  r  r3  quantize_per_channelrN  rS  r   rV  _functional_assert_asyncrX  dequantize_per_channelra  quantize_per_tensorrf  dequantize_per_tensorri  r&  rr  rt  r  r  r  r  r  r  r  r  r  r  r  r  r  r  cacher  r  r  r  r$  r(  rngprimsr7  r8  r<  	bernoullir9  r?  rB  	lru_cacherE  rG  r)  rM  rJ  rK  rQ  rP  rO  randintforce_stride_orderrW  r4  rY  r`  r\  lookup_seedra  randomrg  rl  rq  rs  r  r=  r  	NO_OPMATHr  r  r   r  r  r   r   r  _adaptive_avg_pool3dadaptive_max_pool3d*_scaled_dot_product_attention_math_for_mpsuniformexponential_pdist_forwardsoft_margin_loss_backward_fused_rms_normxpuis_availableembedding_dense_backward_cdist_forward_cdist_backward
_trilinearsegment_reduce_segment_reduce_backwardhistc	histogrambin_ct_histogramdd_bin_edges_histogramdd_from_bin_ctsaddbmm_addmm_activation_grouped_mm
_cudnn_rnn_cudnn_rnn_backward_embedding_bag_embedding_bag_forward_only_embedding_bag_backward*_embedding_bag_per_sample_weights_backward_fused_moving_avg_obs_fq_helper*_fused_moving_avg_obs_fq_helper_functional max_pool3d_with_indices_backward_adaptive_avg_pool2d_backward_adaptive_avg_pool3d_backwardadaptive_max_pool2d_backwardadaptive_max_pool3d_backwardfractional_max_pool2d_backwardfractional_max_pool3d_backwardreplication_pad1d_backwardreplication_pad2d_backwardupsample_linear1d_backwardupsample_bicubic2d_backwardupsample_trilinear3d_backwardgrid_sampler_2d_backward_pdist_backwardr  r  kthvaluetopkrd  median	nanmedianrV  resize_
resize_as__linalg_detlinalg_householder_productlinalg_inv_exlinalg_ldl_factor_exlinalg_ldl_solve	linalg_lulinalg_lu_factor_exlinalg_lu_solvelinalg_matrix_exp	linalg_qr_linalg_slogdet_linalg_solve_exlinalg_solve_triangular_linalg_svd	lu_unpackormqr_linalg_check_errorslinalg_pinvatol_rtol_tensor_linalg_eightriangular_solvelinalg_cholesky_excholesky_inversecholesky_solvegeqrf_fft_r2cnonzerogcd_thnn_fused_lstm_cell_prims	rng_primsrun_and_save_rng_staterun_with_rng_stategraphsafe_run_with_rng_statemasked_scattermasked_scatter_backwardr  angle_efficientzerotensor(_sparse_coo_tensor_with_dims_and_tensors	to_sparse
_to_sparser   r  r  r  #_scaled_dot_product_flash_attention,_scaled_dot_product_flash_attention_backward#_scaled_dot_product_cudnn_attention,_scaled_dot_product_cudnn_attention_backward+_scaled_dot_product_flash_attention_for_cpu4_scaled_dot_product_flash_attention_for_cpu_backward0_scaled_dot_product_fused_attention_overrideable9_scaled_dot_product_fused_attention_overrideable_backward_flash_attention_forward_flash_attention_backward_efficient_attention_forward_efficient_attention_backwardindex_reducerepeat_interleaverz  r  r  r  r  r  r  r  scalar_tensorr  
LongTensorr  r  r  r  r  r  r  r  r  r  r  ru  
zeros_liker  r  r  r  r  r  r  r  r)  r:  r<  r;  r4  rB  rI  rK  rL  rO  rQ  rS  rF  rj  fallback__unsafe_masked_indexrn  ,fallback__unsafe_masked_index_put_accumulater  ru  rc  r|  rv  r  r  r  r  r  r  r  r  r  r>  r  r  r  r  r  r  r  r  r  r  r  r   r  r  r  r:  rM  rV  rc  rh  rz  rt  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r   r!  r%  r#  Tensor_Tensorr,  Scalarr*  Tensor_Scalarr+  r  r  rs  r:  r  r  r>  rV  rC  rQ  true_dividerR  rT  r  r   rf  rd  ri  rh  rp  ro  r{  ry  r  r~  r  r  r  rq  r  rp  r  r  r  r  r  r  r  r  r}  r  r   r  r  r  r  rsqrtrk  r)  expm1relur  r(  r  r^  cossinabsbitwise_andbitwise_left_shiftbitwise_not
bitwise_orbitwise_right_shiftbitwise_xorlgammaerfspecial_erfrj  tantanhr?  r  r  logical_xorrA  rB  	clamp_min	clamp_maxnegr@  	remaindersignsignbit	_neg_viewler&  r%  rd  r  necoshsinhacosacoshasinasinhatan2atanatanhrO  erfcerfinvhypotlog10log2	nextaftercodegen.commonrg  r  r  r   rZ   r   r`  _foreach_addListforeach_add_listforeach_add_scalar_foreach_mulforeach_mul_listforeach_mul_scalar_foreach_sub_foreach_neg_foreach_abs_foreach_powScalarAndTensor_foreach_divforeach_div_listforeach_div_scalar_foreach_sqrt_foreach_rsqrt_foreach_maximum_foreach_minimum_foreach_clamp_min_foreach_clamp_max_foreach_reciprocal_foreach_sign_foreach_copyforeach_copyr  _foreach_add__foreach_mul__foreach_div__foreach_copy_r  add_bitwise_and_bitwise_left_shift_bitwise_not_bitwise_or_bitwise_right_shift_bitwise_xor_mul_div_Tensor_modelogical_and_logical_not_logical_or_logical_xor_sub_relu_sigmoid___and__
__lshift____or__
__rshift____xor____iand____ilshift____ior____irshift____ixor__r  r  r   r  r  r   methodfuncr  r  _inductor_testri  r  r  r  set_source_Tensorr  fsdpr  r  *torch._higher_order_ops.auto_functionalizer  r  higher_orderr   r  while_loop_stack_outputr"  r  invoke_quantr   r  r  r  comm_loweringr  r$  r  r%  register_quantized_opsregister_woq_mm_opsr&  register_onednn_fusion_opsr'  register_jagged_opscontextmanagerr)  r_   r_   r_   r`   <module>   sF  $4L8



	R95V;
,
-















4
E2



2$$-/|6
9

		(2&




			a:		 	


C
8



,
	


$!#I%
 



!w/


_7K"
 
)5


E


Q
,
D-


z
	 &

	 H1
+


/
%
	 











'	











	


@
#6
