o
    ߗi;                    @   s1  U d dl Z d dlZd dlZd dlZd dlZd dlZd dlZd dlZd dlm	Z	 d dl
mZmZmZmZmZmZmZmZmZ d dlmZ d dlZd dlZd dlZd dlZd dlm  mZ d dlmZ d dl m!Z! d dl"m#Z#m$Z$m%Z%m&Z&m'Z'm(Z(m)Z)m*Z*m+Z+m,Z,m-Z- d dl.m/Z/m0Z0 d d	l1m2Z2m3Z3m4Z4m5Z5m6Z6 d
dl7m8Z8 ddl9m:Z:m;Z;m<Z<m=Z= ddl>m?Z?m@Z@ ddl<mAZAmBZBmCZCmDZDmEZEmFZFmGZGmHZHmIZImJZJmKZKmLZLmMZM ddlmNZNmOZOmPZPmQZQmRZRmSZSmTZTmUZUmVZVmWZW ddlXmYZYmZZZ ddhZ[e\e]Z^i Z_eeedef e`f edef f ead< i Zbeejcjdeedef  f ead< ee Zfeejcjd ead< ejYjgZgejYjhZhejYjiZiee Zjeejcjd ead< ee Zkeejcjd ead< ee Zleejcjd ead< i Zmeejcjdejcjdf ead< ejYjnZndedef deedef  fddZod d! Zpd"d# Zqd$d% Zrd&d' Zseregjtegjuegjvegjwegjxegjyegjzegj{egj|egj}egj~egjegjg ejejejejejejejejejejejejejd(Zd)efd*d+Zd,d- Zd.d/ Zd0e(fd1d2Zd3d4 Zd5d6 Zd7ee d8ee`ef d9ed0ee( d:edeee ee`ef f fd;d<Zd=d> Zd0ee( fd?d@ZdAe(jdAfd0ee( fdBdCZdDdE ZdmdFdGZ					A	dndHdIZdodJdKZdodLeKd)ejfdMdNZeeijddOdLeKd)ejfdPdQZdAdRdLeKd)ejfdSdTZeegjjddOdLeKd)ejfdUdVZdAdAdWdLeKdXejfdYdZZeeijddOdodLeKdXejfd[d\Zdd]e(jdAdddAdAdf	d^d_Zd`da Ze  	AdodbdcZeegjdAddddedf ZeegjdAddddgdh Zeegjegjegjegjeijgdidj Zeegdkreegje eegjddOdpdldmZeegjddOdpdndoZeegjgdpdpdqZeegjdrds Zeegjdtdu Zeegjdvdw Zeegjdxdy Zeegjjdzd{ Zeegjd|d} ZeegjddOd~d ZeeijddOdd ZeegjddOdd ZeegjÃdd ZeegjddOeegjddOeegjddOdd ZeegjddOdd ZeegjddOdqddZeegjtddOdpddZteegjddOdpddZeegjuddOdpddZudrddZeenjddOdeKdeKdeKdededed)ejdeKfddZeenjddOdeKdeKdeKdededed)ejdeKfddZeenjjddOdeKdedededed)ejdeKfddZeenjjddOdeKdedededed)ejdeKfddZeenjjddOdeKdeKdeKdeded)ejdeKfddZeenjjddOdeKdeKdeKdeded)ejdeKfddZeegj׃drddZeegjddOdsdededefddZeegjddOdsdededefddZeegjddOdsdededefddZeegjddOdd ZeegjddOdrddZeegjddOdrddZeegjddOdrddZeegjddOdd ZeegjddOdd ZeegjddOdd ZdrddÄZeegjdtddƄZduddȄZedddʄ Zdpdejfdd̈́ZdpdejfddτZdudejjfdd҄ZdvddԄZddք ZeejYjjddOdd؄ ZeegjddOddڄ ZeegjddOdd܄ ZeegjjddOddބ Zeegjdd Zeddd Zdd ZeegjjZeegjjZeegjjZeegjjZeegj  eegjdd Zeegjdd Zee;jddOdd Zee;jddOdXejfddZee;jddOdd Zee;jddOdd Zee;j	ddOd ddee deKde`defddZ
ee;j ddOd ddededee deKdef
ddZdeKdee`ejejejf fddZdeKdee`ejf fddZeegjjddOdAdAddd deKdeKdededee` deeK deKfddZeegjddOdAdAd	deKd
eKdedefddZdd Zdd Zdd Zdd Zdd Zeegj eegj eegj eegj eegjdAd eegjjdAd eegj eegjdAd eegj eegj eegj  eegj!j eegj"j eegj# eegj$j% eegj&j eegj'j eegj( eegj)dAd eegjze eegj*e eegj+e eegj,e eegj-e eegj. eegj/ eegj/ eegj0 eegj1 eegj2 eegj3e eegj4 eegj5 eegj6 eegj7 eegj8 eegj9 eegj: eegj; eegj<e eegj= eegj>e eegj? eegj@ eegj@jA eegjB eegjC eegjD eegjE eegjF eegjG eegjH eegjI eegjJ eegjK eegjL eegjM eegjN eegjO eegjP eegjQ eegjR eegjS eegjT eegjU eegjV eegjW eegjX eegjY eegjZ eegj[j\ eegj] eegj^ eegj_ eegj` eegja eegjb eegjc eegjdj eegjejdAd eegjfe eejgjhji eejgjhjj eegjk eegjl eegjme eegjn eegjo eegjp eegjq eegjr eegjsjt eegjujedAd eegjvjedAd eegjwjedAd eegjxjedAd eegjyjedAd eegjzjedAd eegj{jedAd eegj|jedAd eegj}je eegj~je eegjje eegjje eegj eegjddOdoddZeegjddddZdd Zeegd
reegje eeijd d! ZeegjddOd"ed#efd$d%ZeegjddOdwd&d'Zd(d) ZeejegjgddddAd*d+d,Zeejdmd-d.Zeejd/d0 Zeegjd1d2 Zeegjd3d4 Zd5d6 ZeegjddOd7d8 Zd9d: Zeejegjgddddddd;d<d=Zd>d? Zd@dA ZeegjeeZeedZeed ZdBdC ZeegjdddddDdEdFZeegjdddddDdGdHZeegjdddddDdIdJZeeijjdKdL ZeejegjgdMdN ZeegjddOdodOdPZeegjddOdxdQdRZdSdT Z	]dudUdVZdWdX ZdudYdZZeegjddOd[d# ZeegjddOd\d] Zeegjdod^d_Zeegjdod`daZdbdc Zddde ZeegjddOdodfdgZee;jddOdodhdiZdjdk ZeegjjdAdlZeegjjdAdlZeegjddOdmdn ZeegjddOdodp Zedqdr ZeegjddOdpdsdtZeegjddOd"efdudvZdd]dwdxejcjdd"edyee` dzefd{d|ZeegjddOdd}d"edyee` fd~dZeegjddOd"efddZeegjddOd"efddZeegjddOd"efddZeegjddOd]dd"edzefddZ	
	Adydeee df dedefddZeegjjdpdee fddZeegjjdpdee fddZeegj~j	dmdee dee fddZ~eegjj	dmdee dee fddZeegjj			dzdee dee dee fddZeegjj			dzdee dee dee fddZdd Zeeijjdd ZeegjddOdrddZdejdeejef fddZÐdejdejfddZĐdejdejdejfddZ	d{ddZƐdd Zǐdd ZddddZeeijddO	AdoddZeeijddOdd Zeegj{ eegj|jdAdlZeegj|ddOdd Z|d|ddZ͐dd Zΐdd Zϐdd ZeegjjdAdlZeegjуdÐdĄ ZeegjjdAdlZeegjӃdŐdƄ ZeegjjdAdlZ֐dǐdȄ ZeegjՃdɐdʄ Zeegjj	d}dːd̄ZeegjvjdAdlZeegjjdAdlZeegjvddO		 	A	]	d~dΐdτZveegjddO		 	A	]	d~dАdфZڐdҐdӄ ZeegjwjdAdlZeegjwddO	dpdԐdՄZweegjjdAdlZeegjddO	dpd֐dׄZސdؐdل Zdڐdۄ Zdpde`fdݐdބZdߐd ZeegjdddddZdd Zdd Zdd Zdd ZeegjeijgdpddAdddZeegjdpddAdddZdd Zedd ZeegjjdAdlZeegjjdAdlZeegjjdAdlZeegjd]ddd ZdoddZeegjdd ZeegjddOdoddZedd Zed d Zeegjd]ddpddZeegjgd]ddd ZdLe<jDdee<j fddZeeijgd]ddd	 Zeegj egjjgd]e(jddd
d Zeegjeijgd]ddd ZeegjeijgdddddZeegjjZeegjjZeegj	jZ
eegjjZeegjjZeegjdmddZeegjdmddZeegj	dd Z	eegjddOdpddZeegjddOdpddZeegjdddddZeegjdddZeegjddOdddZeegjddOdd d!Zeeijed" eegjed#Zeegjed$Zeegjed%ejd&Zeegjed'ejd&Zeegjd]d(d)Zeegj@jAdAdlZ eegj@jAddOdddAd*d+d,Z!eegj@jddOdd-d.Z@dmd/d0Z"d1d2 Z#e"egj$Z$e#egj%Z%e"egj&Z&e"egj'Z'eegj(Z(e#egj)Z)e#egj*Z*eegj+Z+eegj,d]d3Z,e#egj- e#egj. eegj/Z/eegj0Z0eegj1Z1eegj2d4d5Z2eegj3Z3eegj4Z4eegj5Z5e"egj6 e"egj7Z7eegj8e(jdOe7 e"egj9 e"egj: e"egj; e#egj^ eegj<dd]ejd6Z<eegj=dd]ejd6Z=eegj>dd]ejd6Z>eegj?dd]ejd6Z?eegj@Z@eegjAZAeegjBe@ eegjCeA eegjDZDeegj/Z/e"egjEZEeegjF eegjGd7d5ZGeegj eegjHejd& eegjIeD eegjJejd& eegjKejd& eegjLejd& eegjMejd&ZMeegjNejd& eegjOejd& e"egjP e"egjQ e"egjR e"egjS e"egjT e"egjU e"egjV e"egjW e"egjX e"egjY e"egjZ e"egj[ e"egj\ e"egj] e"egj^ e"egj_ dd8l`maZambZb d9d: ZcebD ]@ZdecegedD ]\ZeZfZgeeeedefegd; qjeceiedD ]\ZeZfZgeeeedefegd; qq`eegjhjed]d3Zieegjhjed]d3Zjeegjhjed]d3 eegjkjeZleegjkje eegjkjeZmeegjnje, eegjnje, eegjojeD eegjpje/ eegjqje eegjqje eegjqjre eegjsjeZteegjsje eegjsjeZueegjve* eegjwe$ eegjxje@ eegjxje@ eegjyjeA eegjyjeA eegjzje@ eegjzje@ eegj{jeA eegj{jeA eegj|eE eegj}eG eegj~e d<d= Zeegjjegjhjei eegjjegjhjej eegjjegjkjel eegjjegjkjem eegjjegjsjet eegjjegjsjeu d>d? Zeegje eegje0 eegje1 eegje2 eegje3 eegje4 eegje5 eegje eegjje eegjje eegje< eegje= eegje> eegje? eegje, eegje( eegje) eegje0 eegje1 eegje3 eegje4 eegje5 eegjegj eegjegj eegjegj eegjegj eegjegj eegjdmd@dAZeegjjdBdC ZeegjjdDdE ZeegjdFdG Ze/ D ]\ZZee0ee qZeejdHdI ZeegjdJdK ZeejYjjdLdM ZeejYjjdNdO ZeejYjgjjdPdQ ZeejYjdreejYjjjdRdS ZeejYjgjdddTdUZd dVlmZ ee ee!dWdX ZeejYjjdYdZ ZeejYjjd[d\ ZeejYjjddOd]e<jd^e`fd_d`ZeeddOdae<jd"efdbdcZeejYjijjddde ZeejYjjddOdfdg ZddhlmZ e  ddil9mZ e8e ddjl9mZ e¡  eá  ddkl9mĐZ eĐš  ddll9mƐZ eƐǡ  dS (      N)defaultdict)	AnyCallableDictListOptionalSequenceSetTupleUnion)patch)associative_scan_op)triton_kernel_wrapper_mutation)canonicalize_dimcanonicalize_dimscheckdtype_to_typeelementwise_dtypesELEMENTWISE_TYPE_PROMOTION_KINDget_computation_dtypeis_boolean_dtypeis_float_dtypeis_integer_dtypeNumber)magic_methodsmethod_to_operator)CeilDivFloorDivIdentity
IntTrueDivModularIndexing   )import_submodule   )configinductor_primsirtest_operators)decompositionsget_decompositions)	DtypeView
ExpandViewIndexingConstantIRNode	is_tritonops_wrapperPermuteView	Pointwise	ReductionSqueezeView	TensorBoxvalidate_irView)
ceildivdecode_device
is_dynamicis_gpuis_pointwise_use,needs_fallback_due_to_atomic_add_limitationspad_listlike#register_op_dtype_propagation_rulessympy_productuse_scatter_fallback)opsVztorchvision::roi_alignzaten::index_add.	lowerings_maybe_layout_constraints	fallbacksneeds_realized_inputsforeach_opsinplace_foreach_opsinplaceable_foreach_opsfnreturnc                    sR   t  tjjs	dS  tv rt  S  tv rdt < dS  fdd}t }||S )zHGet layout constraints. Returns None if there are no layout constraints.Nc                    sF   | t jjju rtt < t  S | t jjju rd t < d S td|  )NzUnknown layout constraint tag: )torch_CTagneeds_fixed_stride_orderconstrain_to_fx_stridesrD   flexible_layoutAssertionError)tagrJ    V/home/ubuntu/transcripts/venv/lib/python3.10/site-packages/torch/_inductor/lowering.pyhandle_layout_constraint_tags   s   z>maybe_layout_constraints.<locals>.handle_layout_constraint_tag)
isinstancerL   _ops
OpOverloadrD   rC   get_layout_constraint_tag)rJ   rW   rS   rU   rT   rV   maybe_layout_constraintsf   s   
r\   c                 C   sX   t jjjt jjjg}|D ]}|| jv r|  S qt jj| r$t jjjS t	t jjt
jS N)rL   rM   rN   rO   rQ   tags_libraryutils
is_builtingetattrr$   #custom_op_default_layout_constraint)rJ   tags_by_priorityrS   rU   rU   rV   r[      s   

r[   c                 C   s   | s	t d| d S )Nzinductor does not support NotImplementedError)condmsgrU   rU   rV   
assert_nyi   s   ri   c                    sZ   t  tttfrdd  D S t  t  tjjr+t	 fdd 
 D  d S d S )Nc                 S   s   g | ]}t |qS rU   )add_needs_realized_inputs.0xrU   rU   rV   
<listcomp>       z-add_needs_realized_inputs.<locals>.<listcomp>c                 3   s    | ]}t  |V  qd S r]   )rb   )rl   overloadrT   rU   rV   	<genexpr>   s    

z,add_needs_realized_inputs.<locals>.<genexpr>)rX   listtuplesetrF   addrL   rY   OpOverloadPacketupdate	overloadsrT   rU   rT   rV   rj      s   
rj   c                 C   s:   t | tjjr|  D ]	}|tt| |< qd S |t| < d S r]   )rX   rL   rY   rv   rx   rD   rb   )rJ   
constraintrp   rU   rU   rV   add_layout_constraint   s
   rz   )r   r#   r!                     	   
         dtypec                 C   s2   t | ts| S | tv sJ d|  dt|  } | S )Nzid z missing from DTYPE_ID_LOOKUP)rX   intDTYPE_ID_LOOKUPr   rU   rU   rV   decode_dtype   s
   
r   c                 C   sB   t | trt|  pt|  S t | tjr| jdu S t | tS )NT)	rX   r4   r   	get_dtyper   sympyExpr
is_integerr   rm   rU   rU   rV   is_integer_type   s
   


r   c                 C   s    t | trt|  S t | tS r]   )rX   r4   r   r   boolr   rU   rU   rV   is_boolean_type   s   

r   type_promotion_kindc                    s0   dd   fdd|D }t |d| i\}}|S )Nc                 S   s8   t | ttjfr
| S t|  }tjdg| |  dS )Nr#   r   )	rX   r   r   Basiclenget_sizerL   zerosr   )inpdimrU   rU   rV   construct_input   s   z+get_promoted_dtype.<locals>.construct_inputc                       g | ]} |qS rU   rU   )rl   argr   rU   rV   rn      ro   z&get_promoted_dtype.<locals>.<listcomp>r   )r   )r   argsinps_r   rU   r   rV   get_promoted_dtype   s   r   c                 C   sh   t | ttfs| g} nt| } t| D ]}t |tjjr1| D ]}t||}|tvr0| 	| q q| S r]   )
rX   rr   rs   rL   rY   rv   rx   rb   rC   append)aten_fnrJ   rp   other_fnrU   rU   rV   get_overloads   s   

r   c                 C   s6   t | tjjr|| jv S t | tjjr||  v S dS NF)rX   rL   rY   rv   _qualified_op_namerZ   name)op	namespacerU   rU   rV   in_namespace  s
   
r   r   kwargs	broadcastconvert_input_to_boolc                    s  dd t  D }dd  D }|s|s fS |s|ri|r$tjndd  D }|dd  D  t|d|i|rF |d  n|d   fd	d
fdd D  fdd D |rtt	t
 fdd|D fdd|D  }t	|d  }	t||d t| D ]\}
}| |
< qt||t|d  D ]\}}||< qtt D ]}
t |
 tjrt |
 |	 |
< qD ]}t| tjrt| |	|< qЈ fS )Nc                 S      g | ]\}}t |tr|qS rU   rX   r4   rl   irm   rU   rU   rV   rn         z"transform_args.<locals>.<listcomp>c                 S   r   rU   r   rl   kvrU   rU   rV   rn     r   c                 S   s*   g | ]}t |ttjfst|d r|qS r   )rX   r   r   r   hasattrrl   arU   rU   rV   rn      s    c                 s   s    | ]
}t |d r|V  qdS )r   N)r   r   rU   rU   rV   rq   &      z!transform_args.<locals>.<genexpr>r   r   c                    s6   t | tr
t| S t | tjrtj| j dS | S )Nvaluer   device)rX   r4   to_dtyper&   Constantr   )r   )r   r   rU   rV   promote0  s
   

ztransform_args.<locals>.promotec                    r   rU   rU   r   r   rU   rV   rn   8  ro   c                    s   i | ]	\}}| |qS rU   rU   r   r   rU   rV   
<dictcomp>9      z"transform_args.<locals>.<dictcomp>c                 3       | ]} | V  qd S r]   rU   rl   r   r   rU   rV   rq   ?      c                 3   r   r]   rU   rl   r   r   rU   rV   rq   @  r   )	enumerateitemsrL   r   extendvaluesr   
get_devicebroadcast_tensorsrr   	itertoolschainr   zipr   rangerX   r&   r   r+   create)r   r   r   r   r   args_indiceskwargs_indicespromoting_argsbroadcastedsizer   rm   r   rU   )r   r   r   r   r   rV   transform_args  sZ   

r   c                    s>   t   fdd}t| }t| tt|| |S )a  
    Add a foreach lowering to lowerings dict.

    Arguments:
        aten_fn: torch.ops.aten.* fn we are lowering
        decomp_fn: alternate implementation on our IR
        broadcast: True to apply broadcasting to tensor inputs
        type_promotion_kind: kind of type promotion applied to tensor inputs, `None` means no type promotion
        convert_input_to_bool: some logical ops require inputs are converted to bool
    c                     s*   t | dksJ  | i |}t| |S )Nr!   )r   r5   )r   r   out	decomp_fnrU   rV   wrappeda  s   z+_register_foreach_lowering.<locals>.wrapped)	functoolswrapsr   rG   rw   rC   dictfromkeys)r   r   r   aten_fnsrU   r   rV   _register_foreach_loweringU  s   
r   c                    s<   t  fdd}t  tt | |S )a  
    Add a lowering to lowerings dict

    Arguments:
        aten_fn: torch.ops.aten.* fn we are lowering
        decomp_fn: alternate implementation on our IR
        broadcast: True to apply broadcasting to tensor inputs
        type_promotion_kind: kind of type promotion applied to tensor inputs, `None` means no type promotion
        convert_input_to_bool: some logical ops require inputs are converted to bool
    c                     s   t | } t|}d}t| dkr!t| d t tfr!d}t | d } tdd  D s9tdd | D r9J dt| |\} }|rH| g} | i |}t	| |S )	NFr#   r   Tc                 s   s"    | ]}|t v pt|d V  qdS )_c10d_functionalN)rE   r   )rl   rJ   rU   rU   rV   rq         
z6_register_lowering.<locals>.wrapped.<locals>.<genexpr>c                 s       | ]}|d kV  qdS )r   NrU   rk   rU   rU   rV   rq         
zout= ops aren't yet supported)
rr   r   r   rX   rs   allanykeysr   r5   )r   r   unpackedr   r   r   r   r   r   rU   rV   r     s,   

z#_register_lowering.<locals>.wrapped)r   r   r   rC   rw   r   r   )r   r   r   r   r   r   rU   r   rV   _register_loweringn  s
   r   Fc                 C   s   t jt| |||dS )z+
    Shim to support decorator syntax.
    r   r   r   )r   partialr   )r   r   r   r   rU   rU   rV   register_lowering  s   r   c                 C   s   g }t jt| t|tjjdD ]O\}}tjjj	j
t|dddr(|| qtjjj	j
t|dddr<|| qtjj|| tt|jtt|jk rZ|| q|| qtt|S )z
    Broadcasting logic based on symbolic shapes.

    We give the shapes 0 and 1 concrete values, while all other shapes
    are symbolic sympy formulas.
    )	fillvaluer#   Tsize_oblivious)r   zip_longestreversedr   SOnerB   graphsizevars	shape_envevaluate_exprEqr   guard_equalsr   expandfree_symbolsrs   )r   boutputrm   yrU   rU   rV   broadcast_symbolic_shapes  s   $

 r  c              
      s,  |d u s|d u sJ d|d u r|d u rt j}tdd | D s"| S tdd | D rC|p3t| d|ifdd  fdd	| D S td
d | D }g }| D ]C}t|ttfrq|	t
tj|| | dt|  qPt|tjr|	t
t|| | dt|  qP|	| qP|S )NzEonly one of override_return_dtype or type_promotion_kind may be givenc                 s   s"    | ]}t |tjttfV  qd S r]   )rX   r   r   r   floatrk   rU   rU   rV   rq          z$promote_constants.<locals>.<genexpr>c                 s   s"    | ]}t |tttjfV  qd S r]   )rX   r   r  r   r   rk   rU   rU   rV   rq     r  r   c                    s4   t | tjrtj|  td dS tj|  td dS )Nindexr   r   r   )rX   r   r   r&   r,   r8   r   r   r   rU   rV   
const_func  s
   
z%promote_constants.<locals>.const_funcc                    r   rU   rU   rk   )r  rU   rV   rn     ro   z%promote_constants.<locals>.<listcomp>c                 s   s&    | ]}t |tttjfr|V  qd S r]   )rX   r4   r+   r&   r   rk   rU   rU   rV   rq     s   $ r   r  )r   DEFAULTr   r   r   nextrX   r   r  r   r+   r   r&   r   r   get_device_or_errorrr   r   r   r   r,   )inputsoverride_return_dtyper   exr   rm   rU   )r  r   rV   promote_constants  sL   

	r  c              	      s(   d ddt f fdd}|S )Nalphar  c              	      s  d urt dd |D rrJ | S t|
}r2| d ur1| dkr1t|}t|d | |d< n| d u s8J dd |D |d  
pL|d   tt|d  j	|dd  D ]!}t
|tjstt| ksJ d d	 d	|  q^tjd uottjd
d d uotjjjd uotjjjddo tjtjfv  	fdd}sd }|D ]}t| j	r| } nq|s|d  }p|}tj| |dS )Nc                 s   "    | ]}t |tot|V  qd S r]   rX   r-   r.   rl   r   rU   rU   rV   rq     r   z0make_pointwise.<locals>.inner.<locals>.<genexpr>r#   c                 S      g | ]}|  qS rU   make_loaderrk   rU   rU   rV   rn     ro   z1make_pointwise.<locals>.inner.<locals>.<listcomp>r   zndim mismatch  current_nodelow_precision_pointwise_barrierFc                    s   t  t ksJ d  d tjkr&d ur& fddD  S r:r:tjkr: fddD  S g }D ]}| }rTtj|dd}t|}|| q>| }rntj|dd}t|S |S )Nzwrong ndim r  c                       g | ]}| qS rU   rU   rl   loadr  rU   rV   rn   4  ro   zCmake_pointwise.<locals>.inner.<locals>.inner_fn.<locals>.<listcomp>c                    r  rU   rU   r  r  rU   rV   rn   :  ro   F)use_compute_types)r   rL   r   float64rA   r   r   )r  inputs_loadedr  r   downcast)r   emulate_precision_castsrJ   is_gpu_deviceloadersoverride_fn_when_gpu_float64override_fn_when_input_boolrangesr  rV   inner_fn1  s*   $
z/make_pointwise.<locals>.inner.<locals>.inner_fnr   r   r*  r)  )r   r  rr   mulr   r   r:   r8   r   typerX   r&   BaseConstantr   rB   r   rb   r  metagetrL   bfloat16float16r1   r   )r  r  otherr*  r   r   allow_alpharJ   override_devicer'  r(  r  triton_fallback)r   r$  r%  r&  r)  rV   inner  sb   

zmake_pointwise.<locals>.inner)r4   )rJ   r  r6  r(  r'  r5  r7  r8  rU   r4  rV   make_pointwise  s   $	Pr9  c                    s&   dddt t t  f fdd}|S )Nr#   r  r  c                    s  dd }t tjjjdkptjjjtv }tjjjD ]}|jD ]}|jdkr*|jtv s,d}qqd }|D ]}t	|t
tfr?|} nq2|d usHJ dg }|D ]}t	|t
tfs`||gt |  qL|| qL|t| }	d gt | }
|	 D ]@\\}}}g }|D ]-\}} r|d| i}n| }||
|< tj|tjr|r|r|  ||  q|rtj| qwtdd	 |
D sJ |
S )
Nc                 S   sz   t t}t| D ]2\}}t|  ptj}d }|D ]}t|tr&|j	 } nq|d us/J d|||f 
||f q|S )Nz.foreach op should have at least one tensor arg)r   rr   r   r9   r$   #combo_kernel_foreach_dynamic_shapesrX   r4   datar   r   )	arg_pairsr   r   r   use_foreachr   trU   rU   rV   
group_argse  s   


z9make_foreach_pointwise.<locals>.inner.<locals>.group_argsr   call_functionTz1at least one input must be a list to a foreach opr  c                 s   s    | ]}|d uV  qd S r]   rU   rk   rU   rU   rV   rq     r   z8make_foreach_pointwise.<locals>.inner.<locals>.<genexpr>)r   rB   r   r  userstargetrH   r   rG   rX   rr   rs   r   r   r   has_featureBackendFeatureFOREACHrealizeget_operation_nameregister_operation_listr   )r  r  r?  realize_outputsnodeusera_list_inputinputbroadcast_inputsgroupsoutputsr   r=  groupoperation_list
output_indr   r  r5  pw_fnrU   rV   r8  b  sd   

z%make_foreach_pointwise.<locals>.inner)r   r4   )rU  r5  r8  rU   rT  rV   make_foreach_pointwisea  s   "LrV  rm   c                    s>   |    kr|rt| S | S  fdd}t| d| S )Nc                    s   t j|  dS )N)	src_dtype)rA   r   r   r   rW  rU   rV   	_to_dtype     zto_dtype.<locals>._to_dtyper  )r   cloner9  )rm   r   copyrY  rU   rX  rV   r     s
   r   r   c                 C   sZ   |j s|  j r&|  rt| |d}tj||  |S ttj	j
dd| |S t| |ddS )Nr   Fadd_to_fallback_setTr]  )
is_complexr   r   
empty_liker&   InplaceCopyFallbackr   fallback_handlerprimsconvert_element_typedefaultr   )rm   r   dstrU   rU   rV   _convert_element_type  s   rj  ra  c                C   sb   |   }||kr|rt| S | S dd }||}||}||kr)ttjj| |S tt| |S )Nc                 S   s   | j r	t| jS t| jS r]   )is_floating_pointrL   finfobitsiinfor   rU   rU   rV   _get_primitive_bitwidth  s   z1to_dtype_bitcast.<locals>._get_primitive_bitwidth)	r   r\  re  atenviewr   r4   r*   r   )rm   r   r]  x_dtypero  src_bitsdst_bitsrU   rU   rV   to_dtype_bitcast  s   ru  c                 C   s8   |j s|  j rttjtjjj	j
| |S t| |S r]   )rb  r   r4   r   r&   ComplexViewrL   rA   rp  rq  r   ru  rm   r   rU   rU   rV   _view_dtype  s
   
rx  r]  non_blockingr   c                C   s:   t |}|  |kr|rt| S | S ttj| ||S r]   )r8   r   r\  r4   r   r&   
DeviceCopy)rm   r   r]  rz  rU   rU   rV   	to_device  s   r|  c                 C   s   t | |d|dS )NTry  )r|  )rm   r   rz  rU   rU   rV   _device_put     r}  Tc
                 C   s   |p| j }t|}
|rtd| }td| || t||| |dur't|}t|
|||r/|nd||	d}
t| |||d|
}
tt|rPttt|d|d|
 |
S )z3A pointwise function that maps ops.{name} to inputs
libdevice_N)r  r(  r'  r5  r7  r   )r   r   )__name__r/   r>   r9  r   r   rf  rb   )r   r   r   r   r   r  r(  r5  use_libdevice_for_f64r7  rJ   fn_libdevicerU   rU   rV   register_pointwise  sJ   



r  c                     sx   d} t d  fdd} fdd}t|t|tjdgfdd}ttj|}tt| r:tt	t| d	d
| |S )z2A pointwise function that maps ops.frexp to inputsfrexpc                         | i |d S Nr   rU   r   r   r  rU   rV   frexp0.     zregister_frexp.<locals>.frexp0c                     r  Nr#   rU   r  r  rU   rV   frexp11  r  zregister_frexp.<locals>.frexp1r[  c                     s$    d | i | d | i |fS Nr   r#   rU   r  )pw_fnsrU   rV   rJ   9  s   $zregister_frexp.<locals>.fnNr^  )
r/   r9  rL   int32r   rp  r  r   rf  rb   )r   r  r  rJ   rU   )r  r  rV   register_frexp)  s*   
r  c                 C   s   t ||d}t| |}|S )Nr5  )rV  r   )r   pointwise_lowering_fnr5  rJ   rU   rU   rV   register_foreach_pointwiseK  s   
r  )r   r   c                    s  dd }t |ttfrt||}t |ttfrt||}| ||g t d  d tjd}dd t D }t|t	 fdd|D  D ]\}}| |< qFt
t D ]}t  | tjrqt | t |d	    |< qUt||d
 d	 t d |t d |S )Nc                  W   
   t j|  S r]   )rA   wherer   rU   rU   rV   rJ   W     
zwhere.<locals>.fnr#   r!   r^  c                 S   r   rU   r   r   rU   rU   rV   rn   c  r   zwhere.<locals>.<listcomp>c                       g | ]} | qS rU   rU   r   r   rU   rV   rn   d  ro   r   r[  )rX   r  r   constant_liker   r   r	  r   r   r   r   r   r&   r   r+   r   rr   r   r9  r   )rg   r   r   rJ   r   indicesr   rm   rU   r   rV   r  U  s&   
$
$
r  c                  G   s   t | dkrt| d ttfrt| d  S ttdd | D g }g }| D ]$}| }t |t |ks?t	dd t
||D rDt||}|| q%|S )Nr#   r   c                 S   r  rU   )r   rk   rU   rU   rV   rn   s  ro   z%broadcast_tensors.<locals>.<listcomp>c                 s   s    | ]?\}}t jjjjt|d ddo"t jjjjt|d dd p?t jjjjt|d dd o?t jjjjt|d ddV  qdS )r#   Tr   N)rB   r   r   r   r   r   r   rl   r   r   rU   rU   rV   rq   x  s"    


	

z$broadcast_tensors.<locals>.<genexpr>)r   rX   rr   rs   r   r   reducer  r   r   r   r   r   )r  rB  rP  rm   sizesrU   rU   rV   r   n  s   
r   c                 C   s   | S r]   rU   r   rU   rU   rV   nop     r  
lift_freshc                 C   s   t | tsJ |d u rtt| jS t |ttjfr"tj	j
|ntdd |D }tt|  |}tt |ts=|fn|}g }t|  D ]\}}||v r]tj	j
tj|dddsb|| qH||  krnt| |S | S )Nc                 s   s    | ]
}t jj|V  qd S r]   rB   r   r   evaluate_static_shaperl   drU   rU   rV   rq     r   zsqueeze.<locals>.<genexpr>r#   Tr   )rX   r4   r3   r   r;  r   r   r   rB   r   r   r  rs   r   r   r   rt   r   r   r   r   rq  )rm   r   dims	new_shaper  srU   rU   rV   squeeze  s"   
r  c                 C   s   t t| |S r]   )r\  r  )rm   r   rU   rU   rV   squeeze_copy     r  c                 C   2   t | |}t| tsJ t|tsJ |j| _| S r]   )r  rX   r4   r;  rm   r   valrU   rU   rV   squeeze_  
   
r  c                 C   2   t | rt| dtjdS td}t|tjd| S )NFr   isinfr[  r   	full_likerL   r   r/   r9  rm   rJ   rU   rU   rV   r       r  c                 C   r  )NFr   isnanr[  r  r  rU   rU   rV   r    r  r  c                 C   $   t | rt| S td}t|| S )Nceilr   r\  r/   r9  r  rU   rU   rV   r       r  c                 C   r  )Nfloorr  r  rU   rU   rV   r    r  r  c                 C   r  )Nroundr  r  rU   rU   rV   r    s   r  c                 C   r  )Ntruncr  r  rU   rU   rV   r    r  r  c                 C   s   ddl m} t| g\} t| tjrt| t|S t| t	s!J t|t
tfs*J t|  t|kr6| S ||  s]tjjt|  }|dkr]||s]| tjjt||  t	t| jt|S )Nr   )free_unbacked_symbols)%torch.fx.experimental.symbolic_shapesr  r  rX   r&   r.  r+   r   rs   r4   rr   r   rB   r   r   	size_hintr?   
mark_reuser;  )rm   r  r  x_size_productrU   rU   rV   r     s   r   c                 C   sL   t |}|D ]}d||< q| }t|D ]\}}|dkr t||}qt||S Nr  )rr   r   	unsqueezer   )r   shapebroadcast_dimensionsr  broadcast_dimensionr   idxrm   rU   rU   rV   broadcast_in_dim  s   


r  c                 C   s   t | | S r]   )r   r   )rm   r  rU   rU   rV   	expand_as  r  r  c                    sR  t |   tt kr$tjjgtt      t| t  } tt|  ks0J t |  }d}ttD ]}| dkrHd}|| |  ||< q>|r`t|| 	 | 
 dS tdd t D rstt| |S  fdd}tjjt }|dkr| tjjt||  |  tj| 
 | 	 |t |d	S )
NFr   Tr   r   c                 s   s$    | ]\}}|d kp|d kV  qdS r#   NrU   r  rU   rU   rV   rq   3     " zrepeat.<locals>.<genexpr>c                    st   t | t ks
J t| } tt D ]!}| dkr5 | dkr)tjj| |< qt| | d | | |< q| S r  )r   rr   r   r   r   Zeror    )r  r   old_sizerepeatsx_loaderrU   rV   r*  8  s   zrepeat.<locals>.inner_fnr+  )rr   r   r   r   r   r   rq  r   emptyr   r   r   r   r\  r   rB   r   r   r  r?   r  r  r1   r   )rm   r  new_sizezero_tensorr   r*  old_size_productrU   r  rV   repeat!  s8   r  c                 C   s2   t | tsJ t |ttfsJ tt| j|S r]   )rX   r4   rr   rs   r6   r   r;  )rm   r  rU   rU   rV   rq  S  s   rq  c                 C   s6   t | tsJ t |ttfsJ tt| jt|S r]   )rX   r4   rr   rs   r0   r   r;  )rm   r  rU   rU   rV   permute\  s   r              c              	   C   s8   t | tsJ t| |d}ttjj| j|||||dS )Nr   clamp)rX   r4   _validate_dimr&   	SliceViewr   r;  )rm   r   startendstepr  rU   rU   rV   slice_c  s   r  c              	   C   s   t | trt | jtjr| j } |   t| s"td|  dt	| \}}t
|j|jdd |D dd |D t|p@d}ttj||dS )Nzunrealized as_strided(z, ...)c                 S      g | ]}t |qS rU   r   r   rl   r  rU   rU   rV   rn   v      zas_strided.<locals>.<listcomp>c                 S   r  rU   r  r  rU   rU   rV   rn   w  r  r   r;  layout)rX   r4   r;  r&   BaseViewunwrap_viewrF  is_storage_and_layoutrf   as_storage_and_layoutFixedLayoutr   r   r   r   ReinterpretView)rm   r   stridestorage_offsetstorage
old_layout
new_layoutrU   rU   rV   
as_stridedj  s   

r  c                 C   s$   t | tsJ t| |||j| _| S r]   )rX   r4   r  r;  )rm   r   r  r  rU   rU   rV   as_strided_}  s   r  c                 C   s   t | |||}t|S r]   )r  r\  )rm   r   r  r  resultrU   rU   rV   as_strided_copy  s   r  c                    s   g d}D ]} |||    f d d }qdd D  fdd}td  }d d | < tjd  d  ||dS )Nr   r  c                 S   r  rU   r  r  rU   rU   rV   rn     ro   z!pointwise_cat.<locals>.<listcomp>c           
   	      s@  t |  tj}g }g }ttD ]n  dkr t dtjn
t   d tj}t   d tj}t ||}t ||} dkrI|}n td krT|}nt 	||}|
| t| t   d  < |
t | fddd q|d }	ttd ddD ] t |  |  |	}	q|	S )Nr   r#   c                      s     S r]   rU   rU   )r   idx_loadinputs_loadersrU   rV   <lambda>      z1pointwise_cat.<locals>.inner_fn.<locals>.<lambda>        r  r!   )rA   
index_exprrL   int64r   r   constantgeltand_r   rr   r   maskedr  )
r  idx_dimmasksmasked_loadsr  r  
start_condend_condmasknext_valr   r  r  inputs_ranges)r   r  rV   r*    sD   
zpointwise_cat.<locals>.inner_fnr+  )r   r   rr   r1   r   r   r   )r  r   prev_endr   r*  r  rU   r	  rV   pointwise_cat  s   0

r  rM  scaleszero_pointsaxis	quant_min	quant_maxc              	      s   t  dksJ dt  dksJ d|  tjkr%t| tj} |  tjks5J d|    t |  k sHJ dt |   |     f	dd}tj	| 
 ||  dS )	Nr#   expect scales 1 dimexpect zero_points 1 dim<Expecting input to have dtype torch.float32, but got dtype: Expecting axis to be < c           
         s   |   f}| }|}|}t tjd\}}jtjkr(t|tj}jtjkr5t|tj}t|}t|| | }t	|t
||}	t|	S Nr   )_create_constantsrL   float32r   rA   r   r  
reciprocalr  maximumminimum)
r  channel_idxrM  scale
zero_pointqminqmax	inv_scaler  clamped	r  r   input_loaderr  r  r  scales_loaderr  zero_points_loaderrU   rV   r*    s   

z;quantized_decomposed_quantize_per_channel.<locals>.inner_fnr+  )r   r   r   rL   r1  r   r  r  r1   r   r   rM  r  r  r  r  r  r   r*  rU   r#  rV   )quantized_decomposed_quantize_per_channel  s,   
r(  c                    s   t  dksJ dt  dksJ d|  |ks*J d| d|    t |  k s=J dt |   |     fdd}tj|  tj||  d	S )
Nr#   r  r  Expecting input to have dtype , but got dtype: r  c                    st   |   f}| }|}|}j tjkrt|tj}j tjkr+t|tj}tt|tj|| }|S r]   )r   rL   r  rA   r   sub)r  r  rM  r  r  r  r  r$  r  r%  r  r&  rU   rV   r*    s   
z=quantized_decomposed_dequantize_per_channel.<locals>.inner_fnr+  	r   r   r   r  r1   r   r   rL   r  r'  rU   r,  rV   +quantized_decomposed_dequantize_per_channel  s(   r.  r  r  c                    s   |   tjkrt| tj} |   tjksJ d|    |   fdd}tj|   t	j
|t|t|d|  dS )Nr  c           	         sf   | }t d| |tjd\}}t|| | }t tjd\}}tt|||}t| S )N      ?r   )r  rL   r  rA   r  r  r  r   )	r  r  r  rM  r!  r  r  r   r"  r   r$  r  r  rU   rV   r*  D  s   
zBquantized_decomposed_quantize_per_tensor_default.<locals>.inner_fnr  r  r+  )r   rL   r1  r   r  r  r1   r   r   r   r   r  r   r   rM  r  r  r  r  r   r*  rU   r0  rV   0quantized_decomposed_quantize_per_tensor_default1  s   
r3  c                    sh   |   |ksJ d| d|    |    fdd}tj|  tjtj|t	|t
|d|  dS )Nr)  r*  c                    s:    | }t ||tjd\}}tt|tj|| }|S r  )r  rL   r  rA   r+  r   )r  r  r  rM  r  r$  rU   rV   r*  i  s   zDquantized_decomposed_dequantize_per_tensor_default.<locals>.inner_fnr1  r+  )r   r  r1   r   r   rL   r  r   r   r  r   r   r2  rU   r4  rV   2quantized_decomposed_dequantize_per_tensor_defaultX  s   r5  c                    s   |   tjkrt| tj} |   tjksJ d|    t dks9t dkr5 d dks9J dt dksUt dkrQ d dksUJ d|     fdd}tj	| 
  ||  dS )	Nr  r   r#   expect scale as scalar tensor"expect zero_point as scalar tensorc                    s   | }t  dkrdnd}t  dkrdnd}jtjkr-t|tj}jtjkr:t|tj}t|t| | }t	tjd\}}t
t|||}t| S )Nr#   r   rU   r   )r   r   r   rL   r  rA   r   r  r  r  r  r  )r  rM  _scale_zero_pointr  r  r   r"  r   r$  r  r  r  scale_loaderr  zero_point_loaderrU   rV   r*    s   zAquantized_decomposed_quantize_per_tensor_tensor.<locals>.inner_fnr+  )r   rL   r1  r   r  r   r   r  r1   r   r   r2  rU   r;  rV   /quantized_decomposed_quantize_per_tensor_tensory  s.   ""r>  c                    s   t  dkst  dkr d dksJ dt  dks8t  dkr4 d dks8J d|  |ksJJ d| d|   |      fdd}tj|  tj||  d	S )
Nr   r#   r6  r7  r)  r*  c                    s    | }t  dkrdnd}t  dkrdnd}jtjkr-t|tj}jtjkr:t|tj}tt|tj|| }|S )Nr#   r8  rU   )r   r   r   rL   r  rA   r   r+  )r  rM  r9  r:  r  r$  r  r<  r  r=  rU   rV   r*    s   zCquantized_decomposed_dequantize_per_tensor_tensor.<locals>.inner_fnr+  r-  r2  rU   r?  rV   1quantized_decomposed_dequantize_per_tensor_tensor  s*   ""r@  c           
         s4  | d   jdk}|r:tdd | D r:| D ]}|  qtdd | D r1ttjg| R  \} }ttjj| |S t	| dkrFt
| d S t| d |d}t| dtjifdd	| D } d
tttjf dtjfdddd fddtfdd| D }dtffddtjrt| |S |rttj| |S fddd}d dtjjfddt	| |kst	| tjkrt fdd| D rtfddtj j!D }tfdd| D o|}tfdd| D otfd d| D  }	|s|	r|st| |S ttj| |S )!Nr   cpuc                 s   s$    | ]}|  tjtjfv V  qd S r]   )r   rL   int8uint8rl   rM  rU   rU   rV   rq     s    
zcat.<locals>.<genexpr>c                 s   s     | ]}t | d kV  qdS )r|   N)r   r   rD  rU   rU   rV   rq         r#   r   c                    s   g | ]}t | qS rU   r   r  r   rU   rV   rn     r  zcat.<locals>.<listcomp>rm   rK   c                 S   s>   t | trt | jtjr| j S | jS t | tjr| jS | S r]   )rX   r4   r;  r&   r  r  
StorageBoxr   rU   rU   rV   unwrap_tensor  s   

zcat.<locals>.unwrap_tensorc                 S   s   t | tjot | jtjS r]   )rX   r&   ComputedBufferr;  r2   r>  rU   rU   rV   is_reduction     zcat.<locals>.is_reductionc                    sJ   t | ttjfr | S | p$t | tjo$t fdd|  D S )Nc                 3   s     | ]} t j|V  qd S r]   )rB   r   
get_buffer)rl   readcan_fuse_reductionrU   rV   rq     s
    
z2cat.<locals>.can_fuse_reduction.<locals>.<genexpr>)rX   r4   r&   rG  r1   r   get_read_namesrJ  )rP  rK  rH  rU   rV   rP    s   zcat.<locals>.can_fuse_reductionc                 3       | ]} |V  qd S r]   rU   rl   r>  rO  rU   rV   rq     r   c                    sZ   t | rt j| dd\}}t j| S t| tt jfr# | S t| t jr+dS dS )NF)freezeT)	r&   r  r  ConcatKernelcan_realize_into_without_copyrX   r4   rG  r1   )rm   r  r   )should_lower_cat_inputrH  rU   rV   rW  
  s   
z#cat.<locals>.should_lower_cat_inputc                    s\   t | ttjfr | S t | tjsdS |  j}|  D ]}| tj	
|7 }q|S r  )rX   r4   r&   rG  r1   inner_fn_opcountnum_opsrQ  rB   r   rM  )rm   countrN  )op_countrH  rU   rV   r[  !  s   
zcat.<locals>.op_countr   r!   r   c                 S   s   | t jjt jjfv S r]   )rp  catrh  constant_pad_ndr   rU   rU   rV   additional_pointwise_ops6     z%cat.<locals>.additional_pointwise_opsc                 3   s    | ]	}| kV  qd S r]   rU   rS  )MAX_SIMPLE_OP_COUNTr[  rU   rV   rq   ;      c                 3   s    | ]}t | V  qd S r]   )r;   )rl   use)r_  rU   rV   rq   =  s
    
c                 3   rR  r]   rU   r  rW  rU   rV   rq   D  r   c                 3   rR  r]   rU   r  rd  rU   rV   rq   I  r   c                 3   rR  r]   rU   rS  rO  rU   rV   rq   K  r   )"r   r-  r   rF  require_channels_lastrp  r\  re  rh  r   r\  r  r   r   r	  r   r4   r&   rG  r-   r   r   r$   force_pointwise_catr  rU  r   rL   rY   rZ   max_pointwise_cat_inputsrB   r  rA  )
r  r   
cpu_devicerM  r   fusable_reductionMAX_COMPLEX_POINTWISE_CATpointwise_usesfuse_pointwise_usehorizontal_fuse_catrU   )ra  r_  rP  r   rK  r[  rW  rH  rV   r\    s`   



r\  offsetdim1dim2c                    s  |   ttdtdtkfdd tjjt	|d}|rBtjj
tjj |  d}ntjj
tjj  | d}d |r`| df nd|f fddtD }||  fdd	}ttj| ||S )
N)r  rankc                      s   d  d S )Nz(diagonal dimensions cannot be identical z, rU   rU   ro  rp  rU   rV   r  Z      zdiagonal.<locals>.<lambda>r   )r   r   c                    s    g | ]\}}| fvr|qS rU   rU   )rl   r   r  rr  rU   rV   rn   s       zdiagonal.<locals>.<listcomp>c                    s   | d }dgt  }d}tD ]&}|kr | d  ||< q|kr-| d  ||< q| | ||< |d7 }q|t d ksBJ |S )Nr  r   r#   r!   )r   r   )r  diag_idxoriginal_idxcur_dimr  base_idxro  rp  num_dimsoriginal_shaperU   rV   	reindexerv  s   
zdiagonal.<locals>.reindexer)r   r   r   r   rB   r   r   r   r   Ltevaluate_maxevaluate_minr   r   r4   r&   GenericViewr   )rM  rn  ro  rp  offset_negative	diag_sizer  r|  rU   rx  rV   diagonalR  s:   
r  c                 C   s   t t| |||S r]   )r\  r  )rM  rn  ro  rp  rU   rU   rV   diagonal_copy     r  c                 C   $   t | }t||||}t|| |S r]   )r\  r  	mutate_to)rM  srcrn  ro  rp  r  rB  rU   rU   rV   diagonal_scatter     
r  c                 C   s,   t ||  | }tt| |||d |S r  )r6   handle_negative_indexr   r  r  )rm   r   r  rU   rU   rV   select  s   r  c           
   
   C   s   t | |d}|}t|ttfs2|  | }tjjt	|| d |}|g| }||d |  |d< g }d}|D ]}|| }	|
t| |||	dd |	}q8|S )Nr   r#   r  Fr  )r  rX   rr   rs   r   rB   r   r   r  r   r   r  )
rm   r  r   sizes_x_sizechunksr  r  r   r  rU   rU   rV   split  s    
r  c                 C   s   t | ||S r]   )r  )rm   r  r   rU   rU   rV   split_with_sizes     r  c                    s>   t  d tjj   } fddt|D }|S )Nr   c                    s   g | ]}t  |qS rU   )r  r   r   rm   rU   rV   rn         zunbind.<locals>.<listcomp>)r  rB   r   r   r  r   r   )rm   r   r  r  rU   r  rV   unbind  s   r  c                    s   |   }t|}t|| |dkrtt| d|dS |  }tjj}||| |	d t
|| d }||dkrK| |t|| | g |d   || d d  |}	 fdd}
ttj| |	|
S )Nr   )r  r#   c                    s:   | d |     }g | d   ||  d d R S )Nr  r#   rU   )r  dim_idxr   r  rU   rV   r|    s   &zunfold.<locals>.reindexer)r   r   r   r  r  rB   r   r   	guard_leqguard_ltr   r  r  r   r4   r&   r  r   )rm   	dimensionr   r  r  ndimdim_sizer   new_dim_sizeout_sizer|  rU   r  rV   unfold  s   
(r  c                 C   s2   t | |d}t|  }||tjj t| |S r  )r  rr   r   insertr   r   r   rq  )rm   r   r  rU   rU   rV   r    s   
r  c                 C   r  r]   )r  rX   r4   r;  r  rU   rU   rV   
unsqueeze_  r  r  c                 C   sZ   t jjjt|}t|  }|dk r||| 7 }d|  kr(|| k s+J  J |S r  )	rB   r   r   r   r   r   sympifyr   r   )rm   r   rn  r  rU   rU   rV   r    s    r  r  c                 C   sT   t | |d}tjj|  | d }t| |d|}t| |||d }t|t|S )Nr   r!   )	r  rB   r   r   r  r   r  r,  sigmoid)rm   r   new_lenr   r   rU   rU   rV   glu  s
   r  c                    s   |rt    fdd}|S )Nc                     s*   dd }t |tjj g| R i |S )Nc                 S   s   t | tjrt| S | S r]   )rX   r&   r-   r4   r   r   rU   rU   rV   wrap_tensors
  rL  z7fallback_handler.<locals>.handler.<locals>.wrap_tensors)pytreetree_mapr&   FallbackKernelr   )r   r   r  kernelrU   rV   handler	  s   z!fallback_handler.<locals>.handler)rE   ru   )r  r`  r  rU   r  rV   re    s   
re  c                   C      t d d S )NzjTorchinductor does not support code generation for complex operators. Performance may be worse than eager.)warningswarnrU   rU   rU   rV   _warn_complex_not_supported  s   r  r>  c                 C   s<   |   r|r|jtjjjjtjjjj	fv rdS t
  dS dS )z0Do not support reading or writing to this tensorFT)rb  rB  rL   rA   rp  rq  r   rf  rg  rh  r  r>  parentrU   rU   rV   unsupported_input_tensor  s   

r  c                 C   s   t | |rdS | jotjS )z2Do not support writing tensor but can read from itT)r  is_cpur$   disable_cpp_codegenr  rU   rU   rV   unsupported_output_tensor+  s   
r  rJ  c                 C   sh   | j tjju r	dS | j tjju rdS dd }tj| ji | jD ]}||| ddr, dS q || | ddS )NFc                 S   sp   t | tjjs	dS d| jvrdS t| jd D ]}t |tjjs"q|r-t	||r, dS qt
||r5 dS qdS )NFr  T)rX   rL   fxNoder/  r  tree_leaves_subclasses
FakeTensorr  r  )rJ  r  	is_outputr/  rU   rU   rV   check_skip_condition;  s   


zCfallback_node_due_to_unsupported_type.<locals>.check_skip_condition)r  T)	rB  rp  view_as_complexrh  lift_fresh_copyr  arg_tree_leavesr   r   )rJ  allow_cpu_inputsr  r   rU   rU   rV   %fallback_node_due_to_unsupported_type2  s   r  c                    s   | t vs|sJ d|  |r>ttdr>t| gr>tjr%| tjj	j
v s>|s>tjjjr6dtjj_td td|  d fdd}t| tjjr]|  D ]}t| |}|| qOd S t| tjjtjjfrn||  d S td	|  d
t|  )Nz*both a fallback and a decomp for same op: CIFznA make_fallback error occurred in suppress_errors config, and suppress_errors is being disabled to surface it.zmake_fallback(a.  ): a decomposition exists, we should switch to it. To fix this error, either add a decomposition to core_aten_decompositions (preferred) or inductor_decompositions, and delete the corresponding `make_fallback` line. Get help from the inductor team if unsure, don't pick arbitrarily to unblock yourself.c                    s.   t |   d urt|   t| d dt| S Nr^  )rj   rz   r   re  )op_overloadlayout_constraintrU   rV   register_fallbackw  s   

z(make_fallback.<locals>.register_fallbackzUnsupported fallback z with type )r(   r   osgetenvr)   r$   fallback_randomrL   _decompdecompositions_for_rngextra_random_decomps_dynamosuppress_errorslogwarningrR   rX   rY   rv   rx   rb   rZ   HigherOrderOperatorRuntimeErrorr-  )r   r  r  override_decompr  olr  rU   r  rV   make_fallbackW  s>   




r  c                 C   s$   d}| D ]}|| }qt |tjdS )z
    TorchInductor offset calculation differs from PyTorch eager offset
    calculation for random ops (tl.rand vs torch.rand). In future, we should
    strive for same impl for tl.rand and torch.rand.
    r#   r   tensorrL   r  )r  numelr  rU   rU   rV   philox_rand_offset  s   
r  c           	         sd   t | | t j|  | |  fdd}tj| |t| d}t	| }||fS )Nc                    sV   t g tj}t g tj}t t | tj|}t ||}t | S r]   )rA   r   rL   r  ru   r  rand)r  seed_index_exproffset_index_exprrand_index_exprr  r   offset_loader
random_posseed_loaderrU   rV   r*    s   zphilox_rand.<locals>.inner_fnr+  )
r&   r  FlexibleLayoutcontiguous_stridesmake_indexerr  r1   r   rr   r  )	r   seedrn  r  r   r   r*  random_values_nodeoffset_noderU   r  rV   philox_rand  s&   
r  c              	   C   s.   t jrttjtjtj	j
| ||S td)Nz&should be handled in replace_random.py)r$   r  r  r  r4   r   r&   r  rp  native_dropoutrh  rR   )rm   ptrainrU   rU   rV   r    s   r  c                 G   sj   t js|  tdksJ d|   t|dks!t|d tr%t	j
jnt	j
j}tj|| g|R   | S )NrA  Tthis should be handled in decomps unless config.fallback_random or the device is CPUr   )r$   r  r   rL   r   rF  r   rX   r  rp  
bernoulli_Tensorr&   InplaceBernoulliFallback)rm   r   r  rU   rU   rV   r    s   r  c                 G   s4   t js|  tdksJ dtt| g|R  S )NrA  r  )r$   r  r   rL   r   r  r\  )rm   r   rU   rU   rV   bernoulli_p  s   r  c                 C   s   t r]   rR   r   rU   rU   rV   _foobar  r  r  c                 C   r  )Nz1using triton random, expect difference from eager)r  info)saltrU   rU   rV   _warn_triton_random  r  r  c                   C   s   t tjj d S r]   )r  rB   r   creation_timerU   rU   rU   rV   warn_triton_random  r~  r  c                  O   F   | dd d urt| i |S tjr|dd  t| i |S tdN	generatorz-should have been handled in replace_random.py)r0  fallback_rand_generatorr$   r  popfallback_rand_defaultrR   r  rU   rU   rV   r       r  c                  O   r  r  )r0  fallback_randn_generatorr$   r  r  fallback_randn_defaultrR   r  rU   rU   rV   randn  r  r  c                 C   s   t |}t j| |S r]   )r&   get_stride_orderExternKernelrequire_stride_order)input_tensorr  stride_orderrU   rU   rV   inductor_force_stride_order	  s   
r
  c                 C      t d)Nz.should be handled in fuse_seed_creation_pass()r  )r   rU   rU   rV   inductor_seed     r  c                 C   s   t   tt| t|S r]   )r  r4   r   r&   RandomSeedsr8   )rZ  r   rU   rU   rV   inductor_seeds  s   r  c                    s(    fdd}t j  |g dS )Nc                    s   t   S r]   )rA   	load_seedget_namer  r  seedsrU   rV   r*    rZ  z&inductor_lookup_seed.<locals>.inner_fnr+  )r1   r   r   r   )r  r  r*  rU   r  rV   inductor_lookup_seed  s   r  rn  r   r  modec                   s   t jrJ  dv sJ g | } tj}| }tj||| tj| |d	 |
  fdd}tj|||g | d}|  |S )N)r  r  r  c                    s"   t t g t| tjS r]   )rb   rA   r  rL   r  r  r  r  r  rU   rV   r*  3  s   z!inductor_random.<locals>.inner_fnr+  )r$   r  rL   r  r  r&   r  r  r  r  r  r1   r   rF  )r   r  r  rn  r   r   r*  r  rU   r  rV   inductor_random'  s(   
r  lowhighc                   sp   t jrJ g |}tj}| }tj|||tj||d	 |
  fdd}tj|||g |dS )Nr  c              	      s6   t g t | tjt tjt  tjS r]   )rA   	randint64r  rL   r  r  r  r  r  r  r  rU   rV   r*  P  s   z"inductor_randint.<locals>.inner_fnr+  )r$   r  rL   r  r  r&   r  r  r  r  r  r1   r   )r  r  r   r  rn  r   r   r*  rU   r  rV   inductor_randintC  s"   
r  tbc                 C   s4   |   |  d |  d |  d  |  d fS Nr  r   )r  r   
get_strider  rU   rU   rV   _boundaries_helper`  s
   

r"  c                 C   s   |   |  d fS r  )r  r   r!  rU   rU   rV   _sorter_helperi  r`  r#  	out_int32rightsidesortersorted_sequenceselfr%  r&  r'  r(  c          	         s   dd }|r||rd ur$|s$t tjjdd|||dS |d ur.|dkr.d|r3tjntj |   d urF  t	
 dkrY fd	d
}n
 fdd
}| }tj| ||jdS )Nc                 S   s   t j| tjS r]   )rB   r   rC  rD  	BUCKETIZEr!  rU   rU   rV   r  w  s    zsearchsorted.<locals>.<lambda>Fr_  r$  r&  Tr#   c              	      sD   | }t j|td d u rd ntd u rd dS ddS )Nr   r(  sorter_indices)rA   	bucketizer"  r#  )r  r  index_dtyper&  r)  r(  values_loaderrU   rV   r*    s   
zsearchsorted.<locals>.inner_fnc              	      s`    }dt f fdd}tj|t|d u rd ntd u r*d dS |dS )Nr  c                    s>   |   }tttjdd t|d d  d d D S )Nc                 s   s    | ]	\}}|| V  qd S r]   rU   )rl   r  r   rU   rU   rV   rq     rb  zNsearchsorted.<locals>.inner_fn.<locals>.get_flattened_index.<locals>.<genexpr>r  )r   rA   r  r   r  operatorru   r   )r  strides)r  r0  rU   rV   get_flattened_index  s   &z;searchsorted.<locals>.inner_fn.<locals>.get_flattened_indexr,  )r4   rA   r.  r"  r#  )r  r  r4  r/  r  rV   r*    s   	
r+  )re  rp  searchsortedr  rL   r  r  r  rF  r   r   r   r1   r   r  )	r)  r*  r%  r&  r'  r(  validate_bucketizer*  r   rU   r/  rV   r6  m  s>   
r6  r%  r&  
boundariesc                   s   t   dks
J tj| tjrtj tjs(ttj	j
dd|  |dS    |  }|  |r9tjntj fdd}tj|||  dS )Nr#   Fr_  r8  c                    s"   | }t |t d}|S r  )rA   r.  r"  )r  r  r  r9  r0  r$  r&  rU   rV   r*    s   zbucketize.<locals>.inner_fnr+  )r   r   rB   r   rC  rD  r+  re  rp  r.  r  rF  r   r  rL   r  r  r1   r   )rM  r9  r%  r&  r   r*  rU   r:  rV   r.    s&   r.  c                 O   $   t tjtjj||f\}}||fS r]   )r  tree_map_onlyr&   r-   r  require_stride1r   r   r   rU   rU   rV   require_dense     r?  c                 O   r;  r]   )r  r<  r&   r-   r  require_contiguousr>  rU   rU   rV   rA  	  r@  rA  c                 O   r;  r]   )r  r<  r&   r-   r  re  r>  rU   rU   rV   re  		  r@  re  c                    sJ    fdd t  fddt|jD } fdd| D }||fS )Nc                    s^   t  tjrtjd  tjjj	}tj
 |S t  tr- fdd  D S  S )Nr  c                    s    i | ]}| | | qS rU   rU   )rl   key)apply_constraintr   fx_argrU   rV   r   	  rt  zEconstrain_to_fx_strides.<locals>.apply_constraint.<locals>.<dictcomp>)rX   r&   r-   r  r/  r  rB   r   r   r   r  r  r   r   )r   rD  r	  rC  )r   rD  rV   rC  	  s   
z1constrain_to_fx_strides.<locals>.apply_constraintc                 3   s    | ]
\}} ||V  qd S r]   rU   )rl   r   rD  rE  rU   rV   rq   	      
z*constrain_to_fx_strides.<locals>.<genexpr>c                    s"   i | ]\}}| |j | qS rU   r   r   rC  fx_noderU   rV   r   	  s   " z+constrain_to_fx_strides.<locals>.<dictcomp>)rs   r   r   r   rH  r   r   rU   rG  rV   rP   	  s   

rP   c                    sN   fdd t  fddtt|jD } fdd| D }||fS )Nc                    sJ  t |tjs|S |jd }| }t|}|r*|d dkr*tttt	|
 }jtjjkr?| dv r?t	|dks=J d}|jsItj||S d t |tsRJ t	|
 dvr\|S  fd	d
}t |tr|| d ur|||r|tjtj||S  fdd}t |jtjr||s|| rtjtj||S tj||S )Nr  r  r   )r   r}   r|   )r{   r#   r!   r   r   r{   r|   c                    sd   t  fddtt  d D }tjj  d dkp-tjj  d dk}|o1|S )Nc                 3   s.    | ]}t jj |   d kV  qdS r   N)rB   r   r   r  r   r   )	ALIGNMENTrm   rU   rV   rq   J	  s
    
z`sdpa_constraint.<locals>.apply_constraint.<locals>.is_aligned_realized_tensor.<locals>.<genexpr>r#   r  )	r   r   r   r   rB   r   r   r  r   )rm   aligned_stridesaligned_last_dimrL  r   rV   is_aligned_realized_tensorI	  s   zMsdpa_constraint.<locals>.apply_constraint.<locals>.is_aligned_realized_tensorc                    s   t jj|  d   dkS r  )rB   r   r   r  r   r   rO  rU   rV   
is_aligned^	  s   z=sdpa_constraint.<locals>.apply_constraint.<locals>.is_aligned)rX   r&   r-   r/  r  r  rr   r   r   r   r   rB  rp  0_scaled_dot_product_efficient_attention_backwardrh  is_cudar  r  r4   maybe_get_striderB   r   try_match_insignificant_stridesrealize_inputr;  r  r  )r  r   rD  meta_valmeta_strider	  rP  rQ  )rH  rO  rV   rC  %	  sH   

z)sdpa_constraint.<locals>.apply_constraintc                 3   s$    | ]\}\}} |||V  qd S r]   rU   )rl   r  r   rD  rE  rU   rV   rq   j	  s
    


z"sdpa_constraint.<locals>.<genexpr>c                    s$   i | ]\}}| d |j | qS r  r   r   rG  rU   rV   r   n	  s   $ z#sdpa_constraint.<locals>.<dictcomp>)rs   r   r   r   r   rI  rU   rG  rV   sdpa_constraint"	  s   ErZ  )r  c                 C   sn   |}|   |  krt||   }|  | kr t||  }|  | kr3t||  }t|S t|S r]   )r   r|  r   r   r   r   r\  )r*  r  rz  rm   r   rU   rU   rV   r]  4
  s   r]  )memory_formatc                C   s&   t j|  |  |  t|  dS Nr+  )r1   r   r   r   r  rr   r   )rm   r[  rU   rU   rV   r\  B
  s   
r\  c                 C   s   g }t | tr+t | jtjr+| j} t | tjr'||   | j} t | tjst| } t| } |rI| j} |d d d D ]	}tj| |d} q;t| } | S )Nr  r  )rX   r4   r;  r&   r  r   
get_layoutr\  )rm   reinterpret_view_layoutsr  rU   rU   rV   clone_preserve_reinterpret_viewM
  s   r_  r  c                   s(    fdd}t jt| || gdS )Nc                    s   t j| d    dS )Nr   r   rA   r  r  r   r  r  rU   rV   rJ   o
  rL  ziota.<locals>.fnr+  )r1   r   r8   )lengthr  r  r   r   requires_gradrJ   rU   ra  rV   iotae
  s   
rd  r   r  c                    s   |   |  ks
J |  t|  d tjjtdr'| 	    tjj
d tjj| 	    tt| | 	 }|  fdd}tj|  |   |t| 	 dS )Nr   c              	      s6   t t t |   tjt tj| | S r]   )rA   r  eqr  rL   r  r5  r   r  
src_loaderr  rU   rV   r*  
  s   z select_scatter.<locals>.inner_fnr+  )r   r  r  rB   r   r   r   r   r}  r   r  r  r   r  r1   r   r   rr   )rm   r  r   r  r*  rU   rf  rV   select_scatterz
  s    

rh  c                    s     |  ks
J  t d    tj \t }t d  | < t	||}|  fdd}t
j   |t dS )Nr   r#   c              
      s2  dkrkrdkr| S t |  tj}t|  t|    < g }dkr?|t |t t	tj krT|t 
|t t	tj dkrs|t t t|   dtjt dtj |swJ tt j|}t | fddtrdnd}t ||| S )Nr   r#   c                          S r]   rU   rU   )src_idxrg  rU   rV   r  
      z1slice_scatter.<locals>.inner_fn.<locals>.<lambda>r  )rA   r  rL   r  rr   r   r   r  r   r   r  re  r    r  r   r  r   r  r   r  )r  r  r  src_valr   r  r  rg  r  r  rm   r  )rj  rV   r*  
  sR   zslice_scatter.<locals>.inner_fnr+  )r   r  r  r   r&   r  normalize_start_endrr   r   r   r1   r   r   )rm   r  r   r  r  r  src_sizer*  rU   rm  rV   slice_scatter
  s    
.
rp  c                 C   s*   t | ttfrt| dkrt| d S | S r  )rX   rr   rs   r   _unwrapr   rU   rU   rV   rq  
  s   rq  r   r   r  
pin_memoryc                   s  t |d tjfv d|  t | d tt tr ptjnp%t g }t tj	r6 fdd}nBt t
tfrE fdd}n3t dksZt d t
tfrlt dkrl|tt   fdd}ntjtj |d	S tjt|||d
S )Nlayout=rs  c                       t  S r]   r`  r  r;  r   rU   rV   r*  
     ztensor.<locals>.inner_fnc                    ru  r]   rA   r  r  rv  rU   rV   r*  
  rw  r   r   c                    s8    fdd t dkrtdS  dt S )Nc              	      sr   | |k sJ ||  dkrt |  S ||  d |  }t t t d tjt |tj | | ||S )Nr#   r!   r   )rA   r  r  r  r  rL   r  )r  r  mid)binary_searchr;  r   r  rU   rV   rz  
  s   z/tensor.<locals>.inner_fn.<locals>.binary_searchr   )r   rA   r  r  rv  )rz  r  rV   r*  
  s   r  r+  )ri   rL   stridedrX   rq  r   r  get_default_dtyper   r   r  r   r   IntegerrB   r   add_tensor_constantr  r1   r   r8   )r;  r   r   r  rs  r)  r*  rU   rv  rV   r  
  s,   *r  c                 C   s@   t | tr|d urt| |} |d urt| |} | S t| ||dS )Nr  )rX   r4   r   r|  r  )r;  r   r   rU   rU   rV   	as_tensor  s   


r  c                 C   s   t | tjdS r  r  r;  rU   rU   rV   long_tensor&  r  r  c                 C   s   ddl m} |tjjjtjjjd }|d usJ t|dks#J |t	t
| \}}t||| }tj||_tj| tjjjd }t|tjtjtjfrW|jjS t|S )Nr   )resolve_unbacked_bindingsunbacked_bindingsr#   r  )r  r  rB   r   r   r   r  r/  r   r
  iterr   r&   DynamicScalarregister_bufferr   register_operationrX   rL   SymIntSymFloatSymBoolrJ  exprr   r  )r;  r  r  binding_symkeypathbufferr  rU   rU   rV   _local_scalar_dense+  s   
r  c                 C      d S r]   rU   )r;  rh   rU   rU   rV   _assert_scalarZ  s   r  c                    s   | t | ttfstdrjt ttfr  fdd}n"t tjr. fdd}nt dks8J 	 fdd}t
j| |t|dS )Nr   c                       t  S r]   rx  r  r   r   rU   rV   r*  j  rw  z_full.<locals>.inner_fnc                    r  r]   r`  r  r  rU   rV   r*  o  rw  r   c                    s    g S r]   rU   r  )value_loaderrU   rV   r*  v  s   r+  )rX   r   r  r   r   r   r   r   r   r  r1   r   rr   )
fill_valuer   r   r   r*  rU   )r   r   r  rV   _fullc  s    r  c                 K   s   t t|| fi |S r]   create_tensor_liketensor_constructor)rm   r  r   rU   rU   rV   r    s   r  c                    s    d d d d dd d fdd
}|S )NF)namesr   r   r  rs  r[  c                    s   t | d u d t |d tjfv d|  t | d t|}|p#t }t|dkr;t|d tttj	fr;t|d }|D ]
}t|tj
rGJ q=dd |D }t |||S )Nnamed tensorsrt  rs  r#   r   c                 S   r  rU   r  r  rU   rU   rV   rn     r  z5tensor_constructor.<locals>.inner.<locals>.<listcomp>)ri   rL   r{  r8   r|  r   rX   rr   rs   Sizer  r  )r  r   r   r  rs  r[  r   r  r  rU   rV   r8    s   	"z!tensor_constructor.<locals>.innerrU   )r  r8  rU   r  rV   r    s   r  )r  r   r  r   rs  r[  c                 G   sX   t | d u d t|}t|dkr"t|d tttjfr"t|d }t|d ||||dS )Nr  r#   r   r   r  r   rs  )	ri   r8   r   rX   rr   rs   rL   r  empty_strided)r  r   r  r   rs  r[  r   rU   rU   rV   r    s   
"r  c                    s   dddddd fdd
}|S )zZ
    Shim to convert X_like(...) into X(...).  For example zeros_like() into zeros().
    NF)r   r   r  rs  r[  c                   sj   t | d t |d tjfv d|  |d u r|  }nt|}|p%|  }t|  } |||||dS )Nrs  rt  rr  )ri   rL   r{  r   r   r   rr   r   )rm   r   r   r  rs  r[  r   creation_fnrU   rV   _constant_like  s   

z*create_tensor_like.<locals>._constant_likerU   )r  r  rU   r  rV   r    s   
r  c                 C   s   t t| S r]   r  r  rU   rU   rV   r    rw  r  c                    s   d d d d d fdd
}|S )Nr  c                   st   t |ttfs	J t| d t|d tjfv d|  t|p#|  }|p)|  }dd |D }t	 t
|||S )Nrs  rt  c                 S   r  rU   )r   r}  r  rU   rU   rV   rn     r  z7new_constant.<locals>._new_constant.<locals>.<listcomp>)rX   rr   rs   ri   rL   r{  r   r   r   r  r8   rm   r   r   r  r   rs  r  rU   rV   _new_constant  s   z#new_constant.<locals>._new_constantrU   )r  r  rU   r  rV   new_constant  s   r  r  c                C   s8   |d u r|   }|d u r|  }t|d ||t||dS Nr  r   r   r  r8   r  rU   rU   rV   	new_empty  s   r  c                C   s  t | ttfs	J t |tttd fsJ t| d t|d tjfv d|  t|p/t }|p7t	dj
}t|}td||| d}|  |jj}tj|jdgt|  d|_t |tjsbJ dd | D } |rrd	d |D ntj| }tj||| |d
|_|S )Nrs  rt  r  r   )r  r   r   r   )r)  c                 S   r  rU   r  r  rU   rU   rV   rn     r  z!empty_strided.<locals>.<listcomp>c                 S   r  rU   r  r  rU   rU   rV   rn     r  )r   r   r   r  )rX   rr   rs   r-  ri   rL   r{  r   r|  r  r   r8   r  rF  r;  dataclassesreplacer   r&   rI  r  r  r  r  )r   r  r   r  r   rs  	pointwiser  rU   rU   rV   r    s0   
r  c                C   s8   |d u r|   }|d u r|  }t||||t||dS r  r  )rm   r   r  r   r  r   rs  rU   rU   rV   new_empty_strided  s   r  c                 C   s2   dd |D }t tt||jd}tj| |S )Nc                 S      g | ]	}t jj|qS rU   )rB   r   r   r  r  rU   rU   rV   rn   (  r   z copy_strided.<locals>.<listcomp>)rB  )sortedr   r   __getitem__r&   r  r  )rm   r  r	  rU   rU   rV   copy_strided&  s   r  c                 K   s*   | dd usJ dt|| fi |S )Nr   z(dtype should be handled by decomposition)r0  r  )r   r  r   rU   rU   rV   full-  s   r  c                    s   t | tsJ | dkrt| | S | tjksJ |  tdk}t	|  | |r8t
| dg} dg|  |  fdd}tj|  |  || dS )Nr   r#   c                    sF   t | } t|   }t| dkr|g} | S ||  < | S r  )rr   rA   indirect_indexingr   )r  
gather_idxr   index_loaderr   r  rU   rV   rJ   H  s   zgather.<locals>.fnr+  )rX   r4   	get_numelr  r   r   rL   r  r   r  r   r  r1   r   r   )rm   r   r  sparse_gradrn  rJ   rU   r  rV   gather3  s&   	r  c                    s   |rJ t | tsJ t |tsJ dt| v sJ |  |  t| |  g | dd   fdd}tj| 	 |  |dS )Nr   r#   c                    s\   t | t ksJ |  d  | d  }t|d gg | d   }|S )Nz != r   )r   rA   r  )r  	var_index
weight_idxindices_loaderindices_ndimr  weight_loaderweight_sizerU   rV   rJ   f  s   "
zembedding.<locals>.fnr+  )
rX   r4   strr   r  r   r   r1   r   r   )weightr  padding_idxscale_grad_by_freqsparserJ   rU   r  rV   	embeddingY  s    r  c                    s   t dd  D sJ ddd  D  tdd  D r"tddd t D }t|d	ks5J d
d gt  }t|t fdd|D  D ]\}}| |krXtd|||< qJ||fS )Nc                 s   s4    | ]}|d ur|  tjtjtjtjfv V  qd S r]   )r   rL   r  r  r   rC  r   rU   rU   rV   rq   w  s    z.check_and_broadcast_indices.<locals>.<genexpr>z)indices must be int64, byte or bool. Got c                 S   s   g | ]
}|d ur|  qS r]   r   r   rU   rU   rV   rn   {      z/check_and_broadcast_indices.<locals>.<listcomp>c                 s   s,    | ]}|d ur|  tjtjfv V  qd S r]   )r   rL   r   rC  r   rU   rU   rV   rq   |  s    "zFallback for bool indicesc                 S   r   rU   r   r   rU   rU   rV   rn     r   r   z"requires at least 1 non-None indexc                    r  rU   rU   r   r  rU   rV   rn     ro   z.Fallback when indices is on a different device)r   r   rf   r   r   r   r   r   )r  r   
valid_idxsnew_indicesr   rm   rU   r  rV   check_and_broadcast_indicesv  s"   
$
r  c	              
      s   dt dd  D ]\}	}
|
|	 dkrdq
fddtD g 
tt d  d }r> nd |  |d    	f
dd}|fS )	NFr#   Tc                    s    g | ]\}}|d u r | qS r]   rU   )rl   r   r  r  rU   rV   rn     rt  z2index_output_size_and_inner_fn.<locals>.<listcomp>r   c           	   	      s  t | t ks
J t t ksJ t }g }d }r"dn|}d}td d D ]F}||kr8||7 }| d u rR|t | k sFJ || |  |d7 }q.| }|d us\J | }|tj|| |||  | d q.g || |d  }	d u r|S 	|S )Nr   r  r#   r   wrap_neg)r   r   r   rA   r  )	r  rq  	new_indexfirst_tensor_indexstart_offsetnext_idxr   loaderr   )
r   indexed_sizer  indices_loadersnon_consecutive_tensorsoutput_sizetensor_indicestensor_sizer  r  rU   rV   rJ     s>   

z*index_output_size_and_inner_fn.<locals>.fn)r   r   r   )r  r  r  r  r  r  r  r   r  previouscurrentr  rJ   rU   )r   r  r  r  r  r  r  r  r  r  r  rV   index_output_size_and_inner_fn  s$    


"r  c                 C   s,   t | ||\}}}tj|  |  ||dS r\  )index_impl_helperr1   r   r   r   )rm   r  r   r  r*  r   rU   rU   rV   
index_impl  s   r  c           
         s   t ttfs	J |  t|  \}t|dks J ddd D }t|d   }|  fddttD }|rQd|v rQd|vrQt	dfddttD }t
||||d ||d	\}  fd	d
}	||	 fS )Nr   z Must have at least one valid idxc                 S       g | ]}|d ur|  nd qS r]   r  r   rU   rU   rV   rn     rt  z%index_impl_helper.<locals>.<listcomp>c                    s    g | ]} | d ur| qS r]   rU   r   )r  r  rU   rV   rn     rt  z0index is out of bounds for dimension with size 0c                    r  rU   rU   r   r  rU   rV   rn     ro   r  c                    s    | S r]   rU   r5  )index_inner_fnr  rU   rV   r*    rw  z#index_impl_helper.<locals>.inner_fn)rX   rr   rs   r  r  r   r   r   r   
IndexErrorr  )
rm   r  r   r  r  r  r  r  r  r*  rU   )r  r  r  r  rV   r    s0   

r  c                 C   sB   zt | |ddW S  ty    |   ttjjdd| | Y S w )NTr   Fr_  )r  rf   rF  re  rp  r  r  rm   r  rU   rU   rV   r    s   c                 C   s   t | |ddS NFr  )r  r  rU   rU   rV   _unsafe_index  r  r  c                 C      t t| |||S r]   )
index_put_r\  rm   r  r   
accumulaterU   rU   rV   	index_put   r  r  c                 C   s   t t| |||ddS r  )index_put_impl_r\  r  rU   rU   rV   _unsafe_index_put%  s   r  c                 C   sB   |  |   krt||   }|rt| |}t| t|d || S r  )r   r|  ru   r  r  )r*  r  r   r  rU   rU   rV   index_put_as_masked_fill*  s
   
r  c                 C   sl   t  }t|r(|s|r(|sdnd}tjjjdd  }r$| d| }|tj_t	
tjjj| ||| | S )Nzindex put with accumulate.zdeterministic index put.stack_trace Found from : 
 )rL   $are_deterministic_algorithms_enabledr.   rB   r   r  r/  r0  disable_cudagraphs_reasonr&   IndexPutFallbackrB  )r*  r  r   r  deterministicrh   r  rU   rU   rV   index_put_fallback2  s   r  c                 C      t | |||ddS )NTr  r  r*  r  r   r  rU   rU   rV   r  B  r  r  c                 C   r  r  r  r  rU   rU   rV   _unsafe_index_put_G  r  r  c              
      s^  |  dkr9t|dkr9|d  tjtjhv r9|d }tt| t|  D ]}t|d}q)t	| |g||S t
 rDt| |||S |D ]}|d ur_| tjtjhv r_t| |||  S qF|   t }|rt|  r|dkrzt| dg} t| |||} |dkrt| g } | S t||  }zt||  \}}	W n ty   t| ||| Y S w dd |D }
t| tsJ |   |dkrt| dg} t||	d   } fddtt|D }t ||	||
|d |d\}}t||}tj|  |  | |||rdnd d	}tjd t| |d
}tj||_ tj!| |dkr-t| g } | S )Nr#   r   r  c                 S   r  r]   r  r   rU   rU   rV   rn   w  rt  z#index_put_impl_.<locals>.<listcomp>c                    r  rU   rU   r   r  rU   rV   rn     ro   r  
atomic_addr   r   r*  r)  output_indexerscatter_moder   r  r;  )"r  r   r   rL   r   rC  r   r   r  r  r  r  r<   rq  r   r  r   rf   rX   r4   rF  rr   r  r   r&   Scatterr  rI  MutationLayoutSHOULDREMOVErB   r   r  r   r  )r*  r  r   r  r   r  r   r  x_ndimr  r  r  r  expected_vals_sizer*  scatterr  rU   r  rV   r  L  s   




r  r_  c                    sT   t | |ddd\}}  |   fdd}tj|  |  ||dS )NFr  c                    sB   j tjkrt tj}n }t| fddS )Nc                      s    S r]   rU   rU   )_unsafe_index_fnr  self_loaderrU   rV   r    r  z8_unsafe_masked_index.<locals>.inner_fn.<locals>.<lambda>)r   rL   r   rA   r   r  )r  mask_valr  fillr  mask_loaderr  r5  rV   r*    s   z&_unsafe_masked_index.<locals>.inner_fnr+  )r  r  r1   r   r   r   )r*  r  r  r  r)  r   r*  rU   r  rV   _unsafe_masked_index  s   r	  c                    s@   t ||d}|   fddtt D }t| ||ddS )Nr   c                    s6   g | ]} | rt  | |  | d  ndqS r  r  r   r  r  rU   rV   rn     s    (z7_unsafe_masked_index_put_accumulate.<locals>.<listcomp>T)r  )r  r   r   r   r  )rm   r  r  r   masked_valueclamped_indicesrU   r
  rV   #_unsafe_masked_index_put_accumulate  s   
r  c                 C   s   t |t || S r]   )rA   r  r  r   minmaxrU   rU   rV   r       r  c                 C   r  r]   )r\  r  copy_)r*  r  r   r  r  r  output_viewrU   rU   rV   as_strided_scatter  r  r  c                 K   s   t t| |||fi |S r]   )scatter_r\  )rm   r   r  r  r   rU   rU   rV   r    s   r  r  include_selfr  r  r  c             	   C   s^   t |t}t| || |r| nt||r| jnd|r-tj| ||||||d |S d S )Nznot implr  )rX   r4   r@   r   r-  r   r&   ScatterFallback)r  r*  r   r  r  r  r  src_is_tensorrU   rU   rV   scatter_fallback  s(   

	r  r  c                C   sr   |dv sJ |d u r$t tjtjjjj}t|| ||||d}|d ur$|S |dkr+d}n|dkr1d}t	| ||||S )N>   Nru   multiplyr  ru   sumr  prod)
rb   rp  r  rB   r   r  rB  _overloadnamer  scatter_reduce_)r*  r   r  r  r  r  fallback_resultrU   rU   rV   r    s   r  c                 C   r  r]   )scatter_add_r\  rm   r   r  r  rU   rU   rV   scatter_add  r  r$  c                 C   s   t | |||dS )Nr  )r   r#  rU   rU   rV   r"    r~  r"  c                 K   s   t t| ||||fi |S r]   )r   r\  )rm   r   r  r  reduction_typer   rU   rU   rV   scatter_reduce#  s   r&  )r  c             	      s,  |dv sJ t tj dkrdtj v sJ dttr$tttjj |||d}|r5|S tt	s<J dt
| v sFJ t  }|dkrVtdgtt	rit  dkritdgt|t	r|t | dkr|t|dg}| dkrS t    | tt	r nd  fdd	}fd
d}	dd }
|stj  fdd| |d d}tjd t|d}tj||_tj| tj  |	| ||
|d}tjd t|d}tj||_tj| |dkrtg S )N>   Nr  amaxaminmeanr  r#   twozKaten.scatter_reduce_.two is not the unique overload of aten.scatter_reduce_r  r   r   c                    sD     }t|}t| }tj| |dkrdn|  dd| < |S )Nr   r#   F)r  )r   r   rr   rA   r  )r  r  r  indirect_idx)r   r  r*  rU   rV   r  V  s   
z'scatter_reduce_.<locals>.output_indexerc                    s   r| S t   S r]   rA   r  r   r5  )r*  r  rg  rU   rV   rJ   `  s   zscatter_reduce_.<locals>.fnc                 S   s   | dkrdS | d u sJ d S )Nr  r  rU   r  rU   rU   rV   backend_reduce_strg  s   z+scatter_reduce_.<locals>.backend_reduce_strc                    s   t d  S r  r,  r  )r*  rU   rV   r  t  rs  z!scatter_reduce_.<locals>.<lambda>r  r  )r   rp  r   rx   rX   r   r  r  r*  r4   r  r   r   rq  r  r  rF  r  r&   r  r   rI  r  rB   r   r  r   r  )r*  r   r  r  r  r  r!  r  r  rJ   r-  zero_outr  r  rU   )r   r  r*  r  rg  rV   r   (  s   







r   scales_xnexactc           
         s   |    |  |   d  |  d   }dd D t|ks)J |}dd t|D t|D ]\}}|d urGd| |< q9 fddfdd}	tj|  | 	 |	g ||d	S )
Nc                 S   r  rU   r  r   rU   rU   rV   rn     r   z&upsample_nearestnd.<locals>.<listcomp>c                 S   s   g | ]\}}|| qS rU   rU   )rl   r   orU   rU   rV   rn     r  r/  c                    s\   t | tj}  rt | t dtj} t | t |tj} t | tj} t j	| |ddS )N      ?Fr  )
rA   r  rL   r  ru   r  r,  r   r  r  )rm   r  r   )r1  rU   rV   scale_fn  s   z$upsample_nearestnd.<locals>.scale_fnc                    sB   |  d  }| d   }g |fddt | D S )Nc                    s   g | ]\}}} |||qS rU   rU   )rl   r   r  r   )r4  rU   rV   rn     r   z2upsample_nearestnd.<locals>.fn.<locals>.<listcomp>)r   )r  rm   r   )i_sizes
inv_scalesr0  r4  r  rU   rV   rJ     s
    zupsample_nearestnd.<locals>.fnr+  )
realize_hintr  r   r   r   r   r1   r   r   r   )
rm   r  r/  r0  r1  batcho_sizesr   r  rJ   rU   )r1  r5  r6  r0  r4  r  rV   upsample_nearestnd  s(   
r:  c                 C   s   t | ||fddS )Nr#   r0  r:  rm   r  r  rU   rU   rV   upsample_nearest1d  r  r>  c                 C   s   t | ||fdddS )Nr#   Tr0  r1  r<  r=  rU   rU   rV   _upsample_nearest_exact1d  r  r@  scales_hscales_wc                 C   s   t | |||fddS )Nr!   r;  r<  rm   r  rA  rB  rU   rU   rV   upsample_nearest2d  s   rD  c                 C   s   t | |||fdddS )Nr!   Tr?  r<  rC  rU   rU   rV   _upsample_nearest_exact2d  s   rE  scales_dc                 C   s   t | ||||fddS )Nr{   r;  r<  rm   r  rF  rA  rB  rU   rU   rV   upsample_nearest3d  s   rH  c                 C   s   t | ||||fdddS )Nr{   Tr?  r<  rG  rU   rU   rV   _upsample_nearest_exact3d  s   rI  c                    s   t  fdd|D S )Nc                 3   s    | ]	}t | V  qd S r]   rx  r   r   rU   rV   rq     rb  z$_create_constants.<locals>.<genexpr>)rs   )r   r   rU   r   rV   r    s   r  c                    s:   |   |   fdd}tj|  |  |dS )Nc                    sF   t | } t| tksJ  D ]}| d | |  | |< q| S r  )rr   r   )r  r   r  r  r  rU   rV   r    s
   zrev.<locals>.loaderr+  )r  r   r1   r   r   r   )rm   r  r  rU   rJ  rV   rev  s   rK  c              	      sZ  t |d dks
J tdd |D rt| S |  }tttt|d d d |dd d  t |t   g  D ]\}}tj	j
||f q<t|d  }g t |d  D ]\\}}}	|	 |t|	| |  q`t |t |ksJ t|   fddfdd	}
|  tj|  |  |
|d
S )Nr!   r   c                 s   r   rK  rU   rl   r  rU   rU   rV   rq     r   z"constant_pad_nd.<locals>.<genexpr>r#   c                    s~   g }t  d  D ]\}\}}}|dkr|t|d |dkr+|t|| qttj|}t| fddS )Nr   c                      ri  r]   rU   rU   )r  r  rU   rV   r  2  rk  z/constant_pad_nd.<locals>.mask.<locals>.<lambda>)	r   r   range_mask_lowrange_mask_highr   r  rA   r   r  )r  r  r  r  r  rb  )boundsr  
mask_sizesr0  r  r  rV   r  *  s   "zconstant_pad_nd.<locals>.maskc                    sZ   t | d  }t| d   D ]\}\}}|||  qt|t| ks)J |S r]   )rr   r   r   r   )r  r  r  r  r  )bounds_precompr  r0  rU   rV   	offset_fn4  s
   z"constant_pad_nd.<locals>.offset_fnr+  )r   r   r\  r   rr   r   r   r   rB   r   r   lookup_precomputed_sizer   r   r   r   r  r1   r   r   )rm   paddingr  r  lhr  r  r  r   rR  rU   )rO  rQ  r  r  rP  r0  r  rV   r]    s2   *

r]  r   c                 C   s&   t t | tjt t|tjS r]   )rA   r  r  rL   r  r   r}  )r   r  rU   rU   rV   rM  D  s   rM  c                 C   s    t t | tjt |tjS r]   )rA   r  r  rL   r  )r   r  rU   rU   rV   rN  K  s   rN  c                 C   s   t t| |t| |S r]   )rA   r   rM  rN  )r   r  r  rU   rU   rV   
range_maskR  s   rW  r/  c                    sF       d   pdg   fdd}|S )Nr   c                    s|   | d   |  d   t tj fddtD }r1t| fddS t| 	fddS )Nc                    s.   g | ]}t |  | |  |  qS rU   )rW  r   )rV  ih	padding_hrU   rV   rn   f  s   . z=constant_boundary_condition.<locals>.load.<locals>.<listcomp>c                      s   t  dg S )Nr   )constant_boundary_conditionrU   )r   rX  pad_fill_valueprefixrm   rU   rV   r  k  s    
z;constant_boundary_condition.<locals>.load.<locals>.<lambda>c                      s   g  S r]   rU   rU   )rX  r]  r  rU   rV   r  q  rs  )r   r  rA   r   r   r  )r  r  r   r  rV  r\  rT  rY  rm   r  )rX  r]  rV   r  `  s   	z)constant_boundary_condition.<locals>.loadr   r  )rm   r  rT  r\  r   r  rU   r^  rV   r[  Y  s
   r[  c                 C   s   t | d||   || d  || d  || }|r|t | d||   || d  d|| d   || }tjj|d ||  |  ||  dkra|d8 }tjjd|||  |  ||   tjj|| dkrztjj|| d}||fS |}||fS )Nr!   r#   r   F)r   rB   r   r   r  r  r   )rm   r   kernel_sizer  rT  	ceil_modex_outx_altrU   rU   rV   pooling_sizew  s    ,0*$rd  c                 C   s4   t | d} | d | d  }|dkptdd |D S )Nr!   r   r#      c                 s   s    | ]}|d kV  qdS r  rU   r  rU   rU   rV   rq     r   z:should_fallback_max_pool2d_with_indices.<locals>.<genexpr>)r=   r   )r`  dilationwindow_sizerU   rU   rV   'should_fallback_max_pool2d_with_indices  s   
rh  assert_fallbackc                C   s   |dkrddg}|dkrddg}|s|}t |d}t |d}t |d}t |d}t| ts/J t|dks7J t|dks?J t|dksGJ t|dksOJ t|  dv sYJ t||}|d urh||kshJ |||||fS )Nr   r#   r!   rJ  )r=   rX   r4   r   r   rh  )rm   r`  r  rT  rf  rj  use_fallbackrU   rU   rV   max_pool2d_checks  s(   




rl  c                    s0  t |  |dd\ }}|   |  ^ }}}	t|d |\}
}t|	d |\}}| j}|tju r;dn|jrBtdnt	|j
}t||
|g }d s\d s\|s\|rdt| |ddn|   fdd	}tj|  |  tj|dd
|d}tj|  tjtj|dd
|d}||fS )NFri  r   r#   -infr!   rZ  c                    s   | ^ }}}d }d }t t d t d D ]T\}}|d  | d  }	|d  | d  }
g ||	|
}|r_t| d  | tj}|d u rT|}ntt||||}|d u rf|}qt	||}q|rq|S |S r  )
r   productr   rA   r  rL   rB  r  gtr  )r  return_indexr]  bhbwmaxvalmaxindexh_incw_incrX  iwr  r  r`  rT  r  r  rU   rV   rJ     s(   z/_low_memory_max_pool2d_with_offsets.<locals>.fnrp  r+  T)rl  r7  r   rd  r   rL   r   rk  r  rn  r  rr   r[  r  r1   r   r   r   r   r   rB  )rm   r`  r  rT  rf  ra  r   r8  rV  wh_out
ceil_mode1w_out
ceil_mode2r   	min_valuer  rJ   r   offsetsrU   rx  rV   #_low_memory_max_pool2d_with_offsets  s<   

r  c                    sH   |   fdd  fdd}tj|  tj||  d}|S )Nc           	         sf   t  tj}t |d  d  tj}t |d  d  tj}||  }|| }|| | S r  )rA   r  rL   r  )	ru  rv  rq  rr  w_inhbasewbaserX  rw  )input_widthrT  r  rU   rV   increments_to_index  s   zF_low_memory_max_pool2d_offsets_to_indices.<locals>.increments_to_indexc                    sP   | ^ }}}g |||}t tj}|| }|||  } ||||S r]   rA   r  rL   r  )r  r]  rq  rr  rn  kw_constru  rv  )r  kernel_widthoffsets_loaderrU   rV   offsets_to_indices  s   zE_low_memory_max_pool2d_offsets_to_indices.<locals>.offsets_to_indicesr+  )r  r1   r   r   rL   r  r   )r  r  r  r  rT  r  r  rU   )r  r  r  r  rT  r  rV   )_low_memory_max_pool2d_offsets_to_indices  s   r  c                    sN  dkrddg|dkrddg}st |tsJ tdks#J tdks+J tdks3J t|dks;J t| dv sEJ |   |  }t |trzt |jjtrz|jj}	tj	d tj
|	 |	 |	 d|	d}
|
  |
 }n| }|d ur|d dkp|d uo|d dk}tdd |D rt| ||||S | ^ }}
|  ^ }| |   t| }tfd	dtd d D tfd
dtd d D 		 }|dkrt| ||||S |  	
fdd}tj|  |  ||d}|r%tj|S |S )Nr   r#   r!   rJ  )r   r   r   r  c                 s   s    | ]}|d kV  qdS r  rU   r  rU   rU   rV   rq   O  r   z3max_pool2d_with_indices_backward.<locals>.<genexpr>c                 3   <    | ]}t |d   t d | d   d    dV  qdS r   r#   Nr  rl   rV  r`  r  rU   rV   rq   \  
    *
c                 3   <    | ]}t |d   t d| d   d    d V  qdS r#   r   Nr  rl   rz  r  rU   rV   rq   `  r  re  c                    sV  | ^ }}}t |
 | tj}|d  }|d  }t t|d  d  d tj}t t|d  d  d tj}t t|d d tj}t t|d d tj}t |t dtj}t |t dtj}t |t tj}t |t tj}d }	tD ]}
t	D ]}t 	|t |
tj}t 	|t |tj}g |t j
t |t |t dtjd ddt j
t |t |t dtjd dd}|} |}t ||}|	d u rt ||t dtj}	qt t t ||t |||}t |t 	|	||	}	qq|	d us)J |	S )Nr   r#   Fr  r  r  )rA   r  rL   r  r   r  r  r  r   ru   r  r+  re  r  r  r   r  )r  r]  rV  rz  
index_testphstartpwstartphendpwendgradientph_pw_phpw
grad_indexindex_actual	grad_partr   r  grad_loaderh_window_sizer  indices_sizer`  rT  pooled_heightpooled_widthr  w_window_sizewidthrU   rV   rJ   o  sl     


#z,max_pool2d_with_indices_backward.<locals>.fnr+  )rX   r4   r   r   r7  rT  r;  r1   r&   rI  r  r   r   decide_layoutr   r   )fallback_max_pool2d_with_indices_backwardr  rr   r  r   r   r  re  )grad_outputrm   r`  r  rT  rf  ra  r  	gO_strider;  x_bufferx_strideis_channels_lastr8  heightr   r  rg  rJ   r   rU   r  rV    max_pool2d_with_indices_backward&  s|   	
 ;r  r  c                    s*   |   ^ }}}|   fdd}|S )Nc              
      s   |\|\ |\}}t t t   tjt |tjt t  tjt |tj}t | fddS )Nc                      s   g    S r]   rU   rU   )h_start_indexrX  rw  r]  w_start_indexr  rU   rV   r    r  z3pad_adaptive_loader.<locals>.load.<locals>.<lambda>)rA   r   r  r  rL   r  r  )r]  
incrementsstart_indicesend_indicesh_end_indexw_end_indexr  pad_valr  )r  rX  rw  r]  r  rV   r    s$   z!pad_adaptive_loader.<locals>.loadr_  )rm   r  r   rV  rz  r  rU   r  rV   pad_adaptive_loader  s   r  c           
      C   sL   t j| ||d}t j|||d}t j| ||d}t j|||d}	||||	fS )N)out_diminp_dim)r   r   )
start_index	end_indexh_inr  r{  r}  r  r  r  r  rU   rU   rV    compute_indices_adaptive_pooling  s
   r  c                    sD   |\}}|\}}	t | |||||	\  fdd}
|
S )Nc                    s   | ^ }}}|} |}|}|}d }	t td td D ]\}
}|||
|g||g||g}|	d u r>|}	q&||	}	q&|	S r  )r   rn  r   )r  r  r]  rq  rr  r  r  r  r  r  rX  rw  r  h_end_index_fnh_start_index_fnkernel_maxes
pooling_fnw_end_index_fnw_start_index_fnrU   rV   rJ     s"   $z _adaptive_pooling_fn.<locals>.fnr  )r  r  r  in_sizes	out_sizesr  r  r  r{  r}  rJ   rU   r  rV   _adaptive_pooling_fn  s   r  c           
         sF   |\}|\}}t | ||||\  fdd}	|	S )Nc                    s   | ^ }}}|} |}|}|}d }	d }
t td td D ]>\}}||||g||g||g}t||  | | tj}|
d u rO|}
ntt||	||
}
|	d u ra|}	q(||	}	q(|
S r  )	r   rn  r   rA   r  rL   r  r  ro  )r  r  r]  rq  rr  r  r  r  r  rs  rt  rX  rw  r  r  r  r  r  r  r  r  r  rU   rV   rJ     s0   $z)_adaptive_pooling_fn_with_idx.<locals>.fnr  )
r  r  r  r  r  r  r  r{  r}  rJ   rU   r  rV   _adaptive_pooling_fn_with_idx  s   #r  c                    s  t tsJ t|dksJ    ^ }}}tjj|}tjj|}|\}}||kr9||kr9t	S |dksA|dkrTg |||}t
|  dS || dkrm|| dkrm|| || g}t|S t|| d |}	t|| d |}
t|||g } }|	|
 }|dkrt|S dd }dd	 }t|||	|
g||g||gtjd
 tt fdd}tj |||d}|S )Nr!   r   r  r#   re  c                 S      t | | |S r]   r   r  r  r  rU   rU   rV   r  c     z)_adaptive_avg_pool2d.<locals>.start_indexc                 S      t | d | | d |S r  r  r  rU   rU   rV   r  f  rL  z'_adaptive_avg_pool2d.<locals>.end_indexr  r  r  r  r  r  c                    s   t  | t | S r]   )rA   truedivr  r5  fn_sumones_loaderrm   rU   rV   rJ   t  s   z _adaptive_avg_pool2d.<locals>.fnr+  )rX   r4   r   r7  r   rB   r   r   r  r\  r  r   r   
avg_pool2dr7   rr   fallback_adaptive_avg_pool2dr  rA   ru   r  	ones_liker1   r   )rm   r  r8  r  r  r{  r}  o_sizer`  h_kernel_maxw_kernel_maxr  r   rg  r  r  rJ   rvrU   r  rV   _adaptive_avg_pool2d@  sR   

	r  c                    s  t tsJ t|dksJ    ^ }}}tjj|}tjj|}|\}}|dks5|dkrRg |||}t	|
  dt	|tj dfS || dkr`|| dkr`tt|| d |}t|| d |}	t|||g }

 }||	 }|dkrt|S dd }dd	 }t||||	g||g||gtjd
t||||	g||g||gtjd
 fdd} fdd}tj |||
d}tj tj||
d}||fS )Nr!   r   r  r#   re  c                 S   r  r]   r  r  rU   rU   rV   r    r  z(adaptive_max_pool2d.<locals>.start_indexc                 S   r  r  r  r  rU   rU   rV   r    rL  z&adaptive_max_pool2d.<locals>.end_indexr  c                        | t tdS Nrm  r  r  r5  )inner_func_max_valrm   rU   rV   inner_fn_max_val  r`  z-adaptive_max_pool2d.<locals>.inner_fn_max_valc                    r  r  r  r5  )inner_func_max_idxrm   rU   rV   inner_fn_max_idx  r`  z-adaptive_max_pool2d.<locals>.inner_fn_max_idxr+  )rX   r4   r   r7  r   rB   r   r   r  r  r   r   rL   r  
ValueErrorr7   rr   fallback_adaptive_max_pool2dr  rA   r  r  r1   r   )rm   r  r8  r  r  r{  r}  r  r  r  r  r   rg  r  r  r  r  r  rirU   )r  r  rm   rV   adaptive_max_pool2d  sl   
		r  c                    sN      t  d    fdd}|S )Nr#   c                    s   g | }t | }t   }t || | t ||  }t |tj}t |t d tj}t ||t  tjS r  )	rA   r  r   r  r   rL   r  r  r  )r]  r   samplei_expr
alpha_exprseq_ir  r  r   in_sz	kernel_szout_szsamplessamples_loaderrU   rV   r    s   z)_fractional_pooling_offsets.<locals>.load)r   r  )r  r  r  r  r   r  rU   r  rV   _fractional_pooling_offsets  s   r  c                    s   |    |  ^ }\}}|\}}|| dkr!t| ||S tjt|g|d}	|	dd |	dd|   fdd}
t|||g }tj	| 
 |  tj|
dd	|d
}tj	| 
 tjtj|
dd	|d
}||fS )Nre  )r  r  r  r  r   rZ  r#   c              	      s   | ^ }}}t  ||}t ||}d }d }ttd td D ]I\}	}
g |||	 ||
 }|rft ||	  | |
 tj}|d u rT|}nt t 	t 
||t |||}|d u rm|}q*t ||}q*|rx|S |S r  )rA   r  r   rn  r   r  rL   r  r  or_ro  r  r  )r  rp  r]  rq  rr  r  r  rs  rt  rX  rw  r  r  
h_index_fninp_hinp_wr`  
w_index_fnr  rU   rV   rJ     s,   $z!fractional_max_pool2d.<locals>.fnFry  r+  T)r7  r   fallback_fractional_max_pool2dr   r   r  r  rr   r1   r   r   r   rL   r  )rm   r`  r  random_samplesr8  kernel_hkernel_wr{  r}  gen_offsets_for_dimrJ   r  r  r  rU   r  rV   fractional_max_pool2d  sB   

r  c                    s       ^ }}}tjj|}tjj|}|^ }}}	|| dkr9||	 dkr9t|| ||	 gddS t||}
t||	}dd fdd}t||
|g||g||	gt	j
d  fd	d
}tj  |t|d}|S )Nr   r#   )divisor_overridec                 S   s   t | | t|S r]   )r   r   r  r  rU   rU   rV   r  M  r`  z0upsample_nearest2d_backward.<locals>.start_indexc                    s    | d ||S r  rU   r  )r  rU   rV   r  P  rZ  z.upsample_nearest2d_backward.<locals>.end_indexr  c                    s    | t S r]   )r  r5  )r  rm   rU   rV   rJ   \  r  z'upsample_nearest2d_backward.<locals>.fnr+  )r7  r   rB   r   r   r  r  r7   r  rA   ru   r1   r   r   r   rr   )rm   r  
input_sizerA  rB  r8  r  r  out_hout_wr  r  r  rJ   r  rU   )r  r  rm   rV   upsample_nearest2d_backward;  s6   

	r
  rU   c              
   C      t | ||||||ddS )Nr!   rZ  _avg_poolndrm   r`  r  rT  ra  count_include_padr  rU   rU   rV   r  q     
r  c              
   C   r  )Nr{   rZ  r  r  rU   rU   rV   
avg_pool3d  r  r  c                    s  	s	sdg t t 		t t| ts!J tks)J t	ks1J tks9J t|  d d fv sIJ |   |  d   }|   d  t 	fddtD  \}	}
tszt|
rt	| dd
d}n| 
 
d	}t|t|	 }|  ttj}|d
krdkrt}ndkrt}ntd || 	 |S 	fdd|r|r|rd| nd| 
fdd}n	
fdd}tj|  ||d}|S )Nr   r#   r!   c              	      s"   g | ]}t | | qS rU   )rd  r   )ra  rV  r`  rT  r  rU   rV   rn     s    z_avg_poolnd.<locals>.<listcomp>r  rZ  TFre  r{   zUnknown dim: c                    s   | d   }|  d   d }t jfddtD  D ]% fddtD }|g ||}|d u r=|}qt||}q|S )Nc                    s   g | ]}t  | qS rU   )r   r   )r`  rU   rV   rn     r  z/_avg_poolnd.<locals>.fn_sum.<locals>.<listcomp>c                    s,   g | ]} | |  |  |  qS rU   rU   r   )r   rX  rT  r  rU   rV   rn     s   , )r   rn  r   rA   ru   )r  r  r]  totalr   r  )r   r`  rT  r  )r   rX  rV   r    s    z_avg_poolnd.<locals>.fn_sumr/  c                    s   t | t  S r]   )rA   r,  r  r5  )r   r  r  r  rU   rV   rJ     rL  z_avg_poolnd.<locals>.fnc           	         s   | d   }|  d  }g }t D ]<}|| |  |  }t||  | |  } sBt|d}t|| }t|| tj}|| qt	
tj|}t| |S r  )r   r   MinMaxrA   r  rL   r  r   r   r  r,  r  )	r  r]  rq  divide_factorsr   hstarthendfactordivide_factor)r  r   r  rV  r`  rT  r  r  rU   rV   rJ     s    r+  )r=   rX   r4   r   r   r7  r   r   r   r[  r  rr   r   r   r  r2  r,  fallback_avg_pool2dfallback_avg_pool3dr  r1   r   r   )rm   r`  r  rT  ra  r  r  r   r8  r{  
ceil_modeshad_paddingr  rg  fallbackrJ   r  rU   )ra  r  r   r   r  rV  r`  rT  r  r  r  rV   r    sr   




 

r  c                    s  d u sdksJ dssddgt | tsJ t |ts$J tdks,J tdks4J tdks<J t| dv sFJ |   | ^ }td|\}	}
td|\}}|  d pwd pw|
pw||  ^ }	
t| }| }t	fddt
d d D t	fddt
d d D  }|d	krt| ||S fd
d  	
fdd}tj|  |||d}|S )Nr   divisor must be not zeror!   rJ  r#   c                 3   r  r  r  r  r  rU   rV   rq   <  r  z&avg_pool2d_backward.<locals>.<genexpr>c                 3   r  r  r  r  r  rU   rV   rq   @  r  re  c              	      sX  t d tj}t d tj}t d tj}t d tj}t d tj}t d tj}t t | ||}t t |||}	t t ||t t  tj|}
t t |	|t t tj|}t 	|t dtj}t 	|	t dtj}	t |
t  tj}
t |t tj}t t |
|t ||	}|S )z{
        This computes the scaling factor that we will divide an element
        by when `count_include_pad=False`
        r   r#   )
rA   r  rL   r  r+  r,  r  ru   r  r  )r  r  stride_hstride_wpad_hpad_wr  r  r  wstartr  wendr  )r  r`  rT  r  r  rU   rV   !compute_pool_size_without_paddingS  s,   

z>avg_pool2d_backward.<locals>.compute_pool_size_without_paddingc                    sR  | ^ }}}|d  }|d  }t t|d  
d  
d tj}t t|d  
d  
d tj}t t|
d d tj}t t|
d d tj}t |t dtj}t |t dtj}t |t tj}t |t 	tj}d }tD ]}	tD ]}
t 	|t |	tj}t 	|t |
tj}d ur}nssd d  }n ||}t 
g |t jt |t |t dtjddt jt |t |t dtj	dd|}t t ||t ||}|d u rt ||t dtj}qt |t 	|||}qq|d us'J |S )Nr   r#   Fr  r  )rA   r  r   rL   r  r  r  r  r   ru   r  r  r+  r   r  r  r  )r  r]  rV  rz  r  r  r  r  r  r  r  r  r  r  partr  )r&  r  r  r  r  r  r`  rT  r  r  r  r  rU   rV   rJ   o  sv     
	


*zavg_pool2d_backward.<locals>.fnr+  )rX   r4   r   r   r7  rd  r  rr   r   r  r   fallback_avg_pool2d_backwardr1   r   r   )r  rm   r`  r  rT  ra  r  r  r8  r{  r|  r}  r~  r   r  r   rg  rJ   r  rU   )r&  r  r  r  r  r  r  r`  rT  r  r  r  r  r  rV   avg_pool2d_backward  s^   "Ar)  c                    s  d u sdksJ ds	
sg d
t | tsJ t |ts$J t	dks,J tdks4J t
dks<J t| dv sFJ |   | ^ }td	
|\}	}
td	
|\}}td	
|\}}|  t
p|
p|p||  ^ }t| }|	 }	fdd	t
dD \  }|d
krt| |	
|S 	
fdd  	
fdd}tj|  |||d}|S )Nr   r  )r   r   r   r{   )r|   r}   r#   r!   c                 3   s6    | ] t  fd dt  d D V  qdS )c                 3   s<    | ]}t |   t d |       dV  qdS r  r  r  )r   r`  r  rU   rV   rq     r  z0avg_pool3d_backward.<locals>.<genexpr>.<genexpr>r!   N)r  r   )rl   r  )r   rV   rq     s    
z&avg_pool3d_backward.<locals>.<genexpr>}   c              	      s  dd D \}}}dd D \}}}dd D \}	}
}dd t | ||g|||g|||gD \}}}dd t |||g|	|
|g g|||gD \}}}dd |||fD \}}}dd t |||g gD \}}}ttt||t||t||}|S )	Nc                 s       | ]
}t |tjV  qd S r]   r  r  rU   rU   rV   rq     r   zQavg_pool3d_backward.<locals>.compute_pool_size_without_padding.<locals>.<genexpr>c                 s   r+  r]   r  rL  rU   rU   rV   rq     r   c                 s   r+  r]   r  r   rU   rU   rV   rq      rF  c                 s   s*    | ]\}}}t t |||V  qd S r]   )rA   r+  r,  )rl   r  r  padrU   rU   rV   rq     s
    
c              
   s   s>    | ]\}}}}t t ||t t |tj|V  qd S r]   )rA   r  ru   r  rL   r  )rl   r  r   r   r,  rU   rU   rV   rq   
  s    

c                 s   &    | ]}t |t d tjV  qdS rK  rA   r  r  rL   r  )rl   r  rU   rU   rV   rq     
    
c                 s   *    | ]\}}t |t |tjV  qd S r]   rA   r  r  rL   r  )rl   r  r   rU   rU   rV   rq     
    
)r   rA   r,  r+  )pdr  r  stride_dr   r!  pad_dr"  r#  kernel_dr  r  dstartr  r$  dendr  r%  r  )depthr  r`  rT  r  r  rU   rV   r&    s8   $z>avg_pool3d_backward.<locals>.compute_pool_size_without_paddingc                    sJ  | ^ }}}}dd t |||gD \}}}dd t |||gD \}}}dd t |||gD \}}	}
dd |||fD \}}}dd t ||	|
g	
gD \}}	}
d }tD ]}tD ]}tD ]}dd t |||g|||gD \}}}d ur}nssd d	  d
  }n |||}tg |tjt|t|td	tj		ddtjt|t|	td	tj	
ddtjt|t|
td	tj	dd|}t
t
t||t||	t||
}|d u rt||tdtj}qjt|t|||}qjqdq^|d us#J |S )Nc                 s   s    | ]	\}}|| V  qd S r]   rU   )rl   r   r,  rU   rU   rV   rq   $  rb  z2avg_pool3d_backward.<locals>.fn.<locals>.<genexpr>c                 s   s2    | ]\}}}t t|| | |tjV  qd S r]   rA   r  r   rL   r  )rl   r   r   r  rU   rU   rV   rq   &  s
    
c                 s   s,    | ]\}}t t||d  tjV  qdS r  r:  )rl   r   r  rU   rU   rV   rq   +  s
    
c                 s   r-  rK  r.  )rl   pstartrU   rU   rV   rq   0  r/  c                 s   r0  r]   r1  )rl   pend
pooled_dimrU   rU   rV   rq   4  r2  c                 s   r0  r]   )rA   ru   r  rL   r  )rl   r;  p_rU   rU   rV   rq   @  r2  r   r#   r!   Fr  r  )r   r   rA   r  r  r  r+  r  rL   r  r   r  r  r  ru   )r  r]  r  rV  rz  pdstartr  r  pdendr  r  r  pd_r  r  r3  r  r  r  r'  r  )r&  r  d_window_sizer  r  r  r  r`  rT  pooled_depthr  r  r  r  rU   rV   rJ   "  s    	

8zavg_pool3d_backward.<locals>.fnr+  )rX   r4   r   r   r7  rd  r  r   rr   r   r   fallback_avg_pool3d_backwardr1   r   r   )r  rm   r`  r  rT  ra  r  r  r8  d_outceil_mode_dr{  ceil_mode_hr}  ceil_mode_wr   r  r   rg  rJ   r  rU   )r&  r  rB  r9  r  r  r  r  r  r`  rT  rC  r  r  r  r  r  rV   avg_pool3d_backward  s^   &%WrI  c                 C   s   |   }t|tr|g}n|stt|}t|dkr*t|dv s(J d| g S t|}tt|D ]5}|| dk rL||  t|rHt|nd7  < d||   krZt|k sin t|dkrg|| dksiJ q4tt|t|ksxJ d|S )Nr   )rU   r8  rY  zinvalid axis: r#   zreduction axis not unique)r   rX   r   r   r   rs   rr   rt   )rm   r  r   r   rU   rU   rV   _validate_reduction_axis  s    
 :rJ  c          
         s   |d ur	t | |} |  tt| |}g }g g }g ttD ]}||v r5| ||  q"| ||  q" fdd}r^t}	D ]}tj	j
|	|< qTn|}	|   t|  |pm|  |  ||	|dS )Nc                    s   t |t ks
J rt  t ksJ  fddD  t  t ks)J d gt  t |  }tt t|D ]\}}|||< q@|S )Nc                    r  rU   rU   r   r  rU   rV   rn     ro   z9_make_reduction_inner.<locals>.loader.<locals>.<listcomp>)r   r   r   r   )r  reduction_indexr  r  varinner_loaderkeepdimskept_idxreduced_idxr   r  rV   r    s   
z%_make_reduction_inner.<locals>.loader)r   	dst_dtyperW  r*  r)  reduction_ranges)r   r   rt   rJ  r   r   r   rr   r   r   r   r  r   r   r   )
rm   r  rO  r   r  
kept_sizesreduced_sizesr   r  r  rU   rM  rV   _make_reduction_inner  s<   



rV  r%  c                    s   dd d fdd}|S )NFr   c                   sB   t | ||| d}tjd| d|}t|jjtr|  |S )Nr  rO  r   r  )r%  
input_noderU   )rV  r2   r   rX   r;  rF  )rm   r  rO  r   r   r  r  r%  rU   rV   r8    s   zmake_reduction.<locals>.innerr   rU   )r%  r  r8  rU   rY  rV   make_reduction  s   rZ  c                C   sJ   |d ur	t | |} |  }t| |}t|  |  f|  f|  |dS )N)r   dtypes	inner_fnsr   r  )r   r   r  r   r   r   r  )rm   r  r   r   rU   rU   rV   _make_scan_inner  s   

r]  r   c                   s   |d ur	t | |} |   t| |}|  }|tjtjfv r$t | tj} t| ||}t	 fdd|D }t
j||  |  d}t|t| }t t|||S )Nc                 3   r   r]   rU   r   r   rU   rV   rq     r   zmean.<locals>.<genexpr>r  )r   r   rJ  r   rL   r2  r1  r  sum_r?   r&   r,   r   r+   r   rr   div)rm   r  keepdimr   output_dtype
sum_resultdenomrU   r^  rV   r)    s   

r)  c           
         s   |d u rd}|    t| |}t| |dd}|r|  tt| |}t|||}t fdd|D }|r>t	|| d}t
j||  |  d}t|t|  }t||}	|s^|	fS |rb|nt||}|	|fS )Nr#   T)ra  c                 3   r   r]   rU   r   r^  rU   rV   rq     r   z var_mean_sum_.<locals>.<genexpr>r   r  )r   rJ  r)  rF  squarer+  r_  r?   r   r  r&   r,   r   r   r+   r   rr   r`  r  )
rm   r  
correctionra  return_meanx_meandiffsrc  rd  x_varrU   r^  rV   var_mean_sum_  s&   

rk  c                 C   sV   t | |}t| ||d d d}|d }t|d }t|tjo*t|tjk o*t|dkS )NrW  r)  rS  r#   )	rJ  rV  r?   rX   r   r}  r   r$   unroll_reductions_threshold)rm   r  ra  r   r)  reduction_numelrU   rU   rV   use_two_step_variance  s   


rn  c                   s    d u rd t | ||d d d}|d}|d |d tjjd|fd|  d|\}}}	|  |  |  t| |}t	fdd	|D d
d  fdd}
t
|
|}|rj|  ||fS |fS )Nr#   rW  r*  rR  rW  welford_reduce)r\  r%  r   c                 3   r   r]   rU   r   r^  rU   rV   rq   8  r   z$var_mean_welford_.<locals>.<genexpr>c                 S   s4   t | tjr| jstt| tj|S t	| |S r]   )
rX   r   r   	is_numberrA   r   r  rL   r  r  rw  rU   rU   rV   get_constant_or_index_expr:  s   z5var_mean_welford_.<locals>.get_constant_or_index_exprc                    s4    }}t d}| t |||  S r  )rA   r  r  )r;  cNzero)rf  r   rq  rnumelrU   rV   r4  ?  s   

z#var_mean_welford_.<locals>.scale_fnrU   )rV  r  r&   WelfordReductionr   r   rF  r   rJ  r?   r9  )rm   r  rf  ra  rg  r   r  r)  m2r   r4  rL  rU   )rf  r   rq  ru  r   rV   var_mean_welford_"  s6   




rx  c                   s   |    t }t| |dd} t| ||||d}t| ||dr&tdi |ntdi |}t fdd|D }|s>|d S |S )	NFra  )rm   r  rf  ra  rg  )r  ra  c                 3   s    | ]
}t | d dV  qdS )Fra  NrF  rk   	out_dtyperU   rV   rq   ]  r   z#var_mean_helper_.<locals>.<genexpr>r   rU   )r   r   r   r   rn  rk  rx  rs   )rm   r  rf  ra  rg  compute_dtyper   r  rU   ry  rV   var_mean_helper_M  s    	r|  )rf  ra  c                C   r  )NFr  rf  ra  rg  r|  rm   r  rf  ra  rU   rU   rV   var_a     
r  c                C   r  )NTr}  r~  r  rU   rU   rV   var_meanh  r  r  c                 C   st   |dk rt t| | |S |dkrtd|S |dkr| S t | |d |}t||}|d dkr8t|| }|S )Nr   r#   r!   )pow_recursiverA   r  r  r,  )rm   r  r   r  rU   rU   rV   r  o  s   r  c                 C      t | |S r]   )rA   powr   r   rU   rU   rV   
pow_native~  r  r  )r   c                    sV  t trtkrt tS t trdkrt S t tr,dkr,t S tdd  fD }t|}t toQd  k oIdk n  pQ|oQdk}|ro   fdd	}t	j
    |  d
S t  tr dkr}tdS  dkrt rtS |rt  trt S t trt S t S t S )Nr3  r#   c                 s   s$    | ]}t |tjr| V  qd S r]   )rX   r&   r4   r   rk   rU   rU   rV   rq     r  zpow.<locals>.<genexpr>i    r   c                    s   t |   S r]   )r  r   r5  r   r   r  rU   rV   rJ     r`  zpow.<locals>.fnr+  r!   )rX   r  r   r  sqrtr\  r
  r   r  r1   r   r   r   r   r   r  r   exp2fallback_pow_scalarfallback_pow_tensor_scalarfallback_pow_tensor_tensorr  )r   r   r   is_integer_powembed_exponentrJ   rU   r  rV   r    s@   
"







r  c                 C   s   t | tr	| j}n| }t |tr|j}t |tjs3tj|  |  |	 | 
 dj}t |tjs3J t |tjrR| sR| sRt |jtjsR|  |j|_| S tjj|||d | S )Nr+  unsafe_alias)rX   r4   r;  r&   rG  r1   r   r   r   r  r   is_input_bufferis_module_buffer	NopKernelrF  r  realize_into)changedr  r  changed_datarU   rU   rV   r    s:   

r  c                 C   s   t | t| |S r]   )r  r  )rm   r  rU   rU   rV   fill_  r~  r  c                 C   @   | |u r| S t ||  }t||  }t||  }t| |S r]   r|  r   r   r   r   r   r  )ri  r  rz  rU   rU   rV   r       
r  c                 C   r  r]   )rA   floordivr  rU   rU   rV   r    r  r  c                 C   r  r]   )rA   truncdivr  rU   rU   rV   r    r  r  c                 C   s   t | ot |}t| ot|}|dkr(|rJ d|r!t| |S tt| |S |dkr@|r2J d|r9t| |S tt| |S t| |S )Nr  z5floordiv operands can not be boolean at the same timer  z5truncdiv operands can not be boolean at the same time)r   r   r  r  r`  r  r  )r   r   rounding_modeboth_integerboth_booleanrU   rU   rV   div_mode  s   
r  c                 C   s8   t | ot |}|rt| |S ttjj}t|| |S r]   )r   logical_andr/   rp  r,  r  r9  )r   r   	both_boolrJ   rU   rU   rV   r,  	  s
   
r,  c              	   C   s   t | tjrt| jS t | tjrt|  S t | tjr| S t | tjs'dS t	j
j|  }t|' ttjdd | j|   }W d   n1 sPw   Y  W d   n1 s_w   Y  t |t	j
jjsnJ t |jtjrx|jS dS )z:Try convert an arbitrary IR node into an ir.Constant valueNallow_indexingT)rX   r&   
MutableBoxget_constant_valuer;  r  r  r   LoopsrL   	_inductorops_handlerExtractConstantsHandlerr   rB   set_ops_handlerr   objectr  r*  inner_fn_argsvirtualizedOpsValuer   )rm   r  r   rU   rU   rV   r    s(   
 r  c                 C   s|   t dd | |fD }|rt| |S t| }d ur3|jdkr)ttd|j}nd|j }t| |S dd }t|| |S )Nc                 s   s     | ]}t |pt|V  qd S r]   )r   r   rk   rU   rU   rV   rq   4  rE  zdiv_prim.<locals>.<genexpr>r   infr/  c                  W   r  r]   )rA   r  r   rU   rU   rV   rJ   A  r  zdiv_prim.<locals>.fn)	r   r  r  r   mathcopysignr  r,  r9  )r   r   is_integraldivisorr  rJ   rU   rU   rV   div_prim2  s   



r  c                 C   s    t | |ftjd\} }t| |S r  )r  r   INT_TO_FLOATr  r  rU   rU   rV   r`  G  s   


r`  c                 C   s4   t | pt| }|rdd }ndd }t|| |S )Nc                 S   r  r]   )rA   modr  rU   rU   rV   rJ   Y  rw  zfmod.<locals>.fnc                 S   r  r]   )rA   fmodr  rU   rU   rV   rJ   ^  rw  )r   r   r9  )r   r   r  rJ   rU   rU   rV   r  S  s
   
r  c                C   B   t |  st|  r|d u rtj}td|d}|| |||dS )Nr  r[  r   r   r   r   rL   r  rZ  rm   r  rO  r   rJ   rU   rU   rV   r_  d     

r_  c                 C      t |  st|  r|d u rtj}t|  dkr.|dv s!J |p&|  }t| |ddS dd }t| ||d}t	j
jd
i |d|i\}|d u rRt| ||d	S |S )Nr   r   r  Tra  c                 S      | \}|\}t ||fS r]   )rA   ru   a_tupleb_tupler   r   rU   rU   rV   
combine_fn     zcumsum.<locals>.combine_fnr  r   r  r   r   rU   )r   r   r   rL   r  r   r   r   r]  r&   Scanr   fallback_cumsumrm   r  r   r  r   r  rU   rU   rV   cumsumv      

r  c                 C   r  )Nr   r  Tra  c                 S   r  r]   )rA   r,  r  rU   rU   rV   r    r  zcumprod.<locals>.combine_fnr  r  r  rU   )r   r   r   rL   r  r   r   r   r]  r&   r  r   fallback_cumprodr  rU   rU   rV   cumprod  r  r  c                 C   sv   dd }|   }t|  dkr|dv sJ t| S t| ||d}tjjdi |d|i\}|d u r9t| |dS |S )	Nc              	   S   s\   | \}|\}t ||}t ||}||kt | B }t |t t || | |fS r]   )rA   r  r  r  r  log1pexp)r  r  r   r   min_vmax_vr  rU   rU   rV   log_add_exp_helper  s   $z(logcumsumexp.<locals>.log_add_exp_helperr   r  r  r  rZ  rU   )	r   r   r   r\  r]  r&   r  r   fallback_logcumsumexp)rm   r   r  r   r   r  rU   rU   rV   logcumsumexp  s   r  c                 C   s   t |  dkr|dv sJ t| t| tjdfS |  }tjd|dd}|tj	u r,dn|j
r5t|jnt|j}t| ||d}|tjf|d< |  d	d
 f|d< tjjdi |d|i\}}|d u rlt| |dS ||fS )Nr   r  r   argmaxFr   arg_break_ties_leftr  r[  c                 S      dS NrindexrU   r  rU   rU   rV   r        zcummax.<locals>.<lambda>r\  r  rZ  rU   )r   r   r\  rc  rL   r  r   r&   get_reduction_combine_fnr   rk  rl  r  rn  r]  r  r  r   fallback_cummax)rm   r  r   r  r  r   r   r  rU   rU   rV   cummax  (   


r  c                 C   s   t |  dkr|dv sJ t| t| tjdfS |  }tjd|dd}|tj	u r,dn|j
r5t|jnt|j}t| ||d}|tjf|d	< |  d
d f|d< tjjdi |d|i\}}|d u rlt| |dS ||fS )Nr   r  r   argminFr  Tr  r[  c                 S   r  r  rU   r  rU   rU   rV   r    r  zcummin.<locals>.<lambda>r\  r  rZ  rU   )r   r   r\  rc  rL   r  r   r&   r  r   rk  rl  r  rn  r]  r  r  r   fallback_cummin)rm   r  r   r  	max_valuer   r   r  rU   rU   rV   cummin  r  r  c                C   r  )Nr  r[  r   r  r  rU   rU   rV   r    r  r  c                 C   s   t | tj} td| ||dS )Nr   r  rO  )r   rL   r   rZ  rm   r   ra  rU   rU   rV   
reduce_any  s   r  c                 C   2   |d urt | ||dt| ||dfS t | d |dS Nr  )reduce_amaxreduce_argmaxr  rU   rU   rV   
reduce_max	  
   r  c                 C   r  r  )reduce_aminreduce_argminr  rU   rU   rV   
reduce_min  r  r  xor_sumr  r  r  r[  r  
logical_or)r5  r(  stabler   
descendingc          
   	   C   s:  |d u rd}|   }|  }tt||}t|dkr't| td|tj|fS t|r/|| nd}tj	j
|ttjjsFt| |||dS t|ddtj|dd}dgt| }t|r`|||< t||}t||}tjj|| j|jf|  | f||||d\}	}|	d u rt| |||dS |d usJ |	t|tjfS )NFr   r#   r  )r  r  r   r   rc  )r   r[  r\  r   r  r  r  )r   r   r   r   r\  r  rL   r  rB   r   r   statically_known_ltrn  int16r  sort_fallbackrd  rq  r   r&   Sortr   r   r  r   )
rm   r  r   r  r  r   r  r  
view_shaper   rU   rU   rV   sort_stable0  s>   



	r  c                 C   s   t | d||dS )NFr  )r  )rm   r   r  rU   rU   rV   sortX  r~  r  c                 C   s   t | |tj|dS )Nr   r   r7  r  r   r  )r   r   r7  rU   rU   rV   register_pointwise_numeric]  s   r  c                 C   s   t | tjddS )NT)r   r  r  r^  rU   rU   rV    register_pointwise_numeric_ldf64f  s
   r   r  logical_not)r(  )r   r   r  identity)rD  pointwise_overrides_datac                 #   s    t |  t|  jd }|d u rd S  fdd}t|tjjr6| D ]}t||}| j||fV  q#d S | j||fV  d S )Nc                    s    j d u r	t| S d S r]   )tritonre  r^  r  rU   rV   make_triton_fallback  s   
z6_get_pointwise_overrides.<locals>.make_triton_fallback)	r  rb   r   rX   rL   rY   rv   rx   r   )nsr   r   r  olnamer  rU   r  rV   _get_pointwise_overrides  s   
r  r  c                    s,   | t |< t|   fdd}t| | d S )Nc                     sB    | i |}g }t | d |D ]\}}|t||dd q|S )Nr   Tr  )r   r   r  )r   r   resultsmut_resultsr   r  outplace_oprU   rV   rJ     s
   z$register_foreach_inplace.<locals>.fn)rI   rH   ru   r   )aten_opoutplace_aten_opr  rJ   rU   r  rV   register_foreach_inplace  s   
r  c                    s   t | d d fdd}|S )Nr^  c                     s.    | i |}t || d  }t| d |S r  )r   r   r  )r   r   r  r  rU   rV   rJ   ?  s   zregister_inplace.<locals>.fn)r   )r  r  rJ   rU   r  rV   register_inplace>  s   
r  c                 C   r  r]   rU   r  rU   rU   rV   sym_constrain_rangeh  r  r  c                 C   &   t jjjd }t|tjsJ |jjS Nr  	rB   r   r  r/  rX   rL   r  rJ  r  r   r   r  rU   rU   rV   sym_sizem  s   r  c                 C   r  r  r  r  rU   rU   rV   
sym_stride~  s   r  c                 C   s   |   S r]   )r  )r   rU   rU   rV   	sym_numel  r  r  c                 C   r  r]   )r   Addr   rU   rU   rV   sym_sum  s   
r  c                 O   r  )NzHelpful for debuggingre   )r*  r   r   rU   rU   rV   foobar  r  r  c                 C   s   |    t| S r]   )rF  r\  r   rU   rU   rV   _realize  s   r  c                 C   s   |    t| | | S r]   )rF  r&   ResizeStorageBytes)variabler  rU   rU   rV   resize_storage_bytes_  s   r  c                 C   s"   |    |   tt| |S r]   )rF  r4   r   r&   SetSourceTensorKernel)r*  source_tensorrU   rU   rV   set__source_tensor  s   r"  c                 C   r  r]   r  )ri  r  rU   rU   rV   
fsdp_copy_  r  r#  c          	         sv  t | tsJ t |ttfsJ |d u rtj}|tjkr#td| |tjkr0t	|dks0J |tj
kr=t	|dks=J |  |  }|  }t | jtjrV| j | _t rvtjjjrvt|rhtdnt|rst|jndndtjjdrt|||dS t | gd	g}|!  tj"#||}t$||||%  fd
d}t&j'|||t|d}|S )Nzunsupported memory format: r|   r}   nanTr  r   r  r#   c                    sH   |  t  tj}t tj}t ||}t | fddS )Nc                      s
    gS r]   rU   rU   )
flat_indexflat_loaderrU   rV   r    s   
 z*resize.<locals>.inner_fn.<locals>.<lambda>)rA   r  rL   r  r  r  )r  flat_index_exprlimitr  r&  	old_numelout_indexeruninitalized_val)r%  rV   r*    s
   zresize.<locals>.inner_fnr+  )(rX   r4   rr   rs   rL   contiguous_formatpreserve_formatr  channels_lastr   channels_last_3dr  r   r  r;  r&   r  r  r  r`   r  fill_uninitialized_memoryr   r  r   rn  r  rB   r   r   statically_known_equalsr  r  r  r   stride_ordered_for_memory_formatr  r  r1   r   )	rm   r   r[  r   r   x_flat
out_strider*  r   rU   r)  rV   resize  sT   



	r6  )auto_functionalizedc                 C   sB   ddl m} ||}tj| ||i ||d dd | D S )Nr   )kernel_side_table)
kernel_idxgridtma_descriptor_metadatakernel_argsc                 S   s    i | ]\}}t |tr||qS rU   r   )rl   rB  r  rU   rU   rV   r     rt  z'triton_kernel_wrap_.<locals>.<dictcomp>)*torch._higher_order_ops.triton_kernel_wrapr8  get_constant_argsr&   UserDefinedTritonKernelr   )r9  constant_args_idxr:  r;  r   r8  constant_argsrU   rU   rV   triton_kernel_wrap_  s   	

rB  c                 C   sj   t dd | g|D r$d}tjjjdd  }r | d| }|tj_tj	| |||}t
ttj	|S )Nc                 s   r  r]   r  rk   rU   rU   rV   rq     r  zcond.<locals>.<genexpr>z"control flow operator: torch.cond.r  r  )r   rB   r   r  r/  r0  r  r&   Conditionalr   rr   mapr4   )predtrue_fnfalse_fnoperandsrh   r  r  rU   rU   rV   rg     s   rg   c                 C   sh   t dd || D r#d}tjjjdd  }r| d| }|tj_tj	| |||}t
ttj	|S )Nc                 s   r  r]   r  rk   rU   rU   rV   rq   $  s
    
zwhile_loop.<locals>.<genexpr>z(control flow operator: torch.while_loop.r  r  )r   rB   r   r  r/  r0  r  r&   	WhileLoopr   rr   rD  r4   )cond_fnbody_fncarried_inputsadditional_inputsrh   r  r  rU   rU   rV   
while_loop"  s   rN  subgraph_fn
identifierc                 C   s   t j| |}tttj|S r]   )r&   InvokeSubgraphr   rr   rD  r4   )rO  rP  rH  r  rU   rU   rV   invoke_subgraph1  s   rR  r  c                    s   ddl m m}  fddt||D }|| |fdd}t|d |d d}td	d
 |D |d< tdd
 |D |d< tjj	d|dd|}|d d u rUt
d|S )Nr#   )InputDescriptorlower_pointwise_subgraphc                    s    g | ]} |  | d qS )r  )r   r   rk   )rS  rU   rV   rn   ;  s    z$associative_scan.<locals>.<listcomp>c                    s    g t | t |R  S r]   )r  r  )lhsrhs)lowered_combine_fnrU   rV   wrapped_combine_fnA  s
   z,associative_scan.<locals>.wrapped_combine_fnr   r  c                 s       | ]}|  V  qd S r]   r  rk   rU   rU   rV   rq   H  r   z#associative_scan.<locals>.<genexpr>r[  c                 s   rY  r]   r  rk   rU   rU   rV   rq   I  r   r\  F)r  can_fallback_to_atenz/Unable to generate code for associative_scan oprU   )subgraph_loweringrS  rT  r   r   r]  rs   r&   r  r   r  )r  xsr   rT  subgraph_inputsrX  r   r  rU   )rS  rW  rV   associative_scan7  s$   


r^  c                 C   r  r]   rU   )tokensrU   rU   rV   _sink_tokensT  r  r`  c                 O   s   t jj|g|R i |}ddlm} ||||}|d usJ tjj| }|d u r,|fS t	t j
tj|}t|ttfs@||fS |g|R S )Nr   )get_effect_key)r&   EffectfulKernelr   torch._higher_order_ops.effectsra  rB   r   effectful_opsr  r<  MultiOutputr4   rX   rr   rs   )tokenr   r   r   r  ra  effect_typeeffectful_kernelrU   rU   rV   with_effectsY  s   ri  )register_comm_loweringsr  )quantized_lowerings)mkldnn_lowerings)jagged_lowerings)NN)NNNNFN)Fr]   )r   r   r  r#   Tr8  )r   r   r#   rY  )T)NTF)r   NNr#   )r  FF)r!   F)NNN)Nr/  N)r  )NNNN)rU   r   FTNr   )r  F(  r  r   r   loggingr  r2  r  r  collectionsr   typingr   r   r   r   r   r   r	   r
   r   unittest.mockr   r   rL   $torch.ao.quantization.fx._decomposedtorch.fxtorch.utils._pytreer`   _pytreer  (torch._higher_order_ops.associative_scanr   r=  r   torch._prims_commonr   r   r   r   r   r   r   r   r   r   r   torch.fx.experimental.sym_noder   r   torch.utils._sympy.functionsr   r   r   r   r    _dynamo.utilsr"    r$   r%   r&   r'   decompositionr(   r)   r*   r+   r,   r-   r.   r/   r0   r1   r2   r3   r4   r5   r6   r7   r8   r9   r:   r;   r<   r=   r>   r?   r@   r  rA   rB   FALLBACK_ALLOW_LIST	getLoggerr  r  rC   r  __annotations__rD   rY   rZ   rt   rE   rp  tr_c10drf  rF   rG   rH   rI   quantized_decomposedr\   r[   ri   rj   rz   r  r  r  r)  bmmconvolutionconvolution_backwardmax_pool2d_with_indicesr  mmrD  rE  _int_mmrC  rB  r  r  r  r2  r  r!  	complex32	complex64r   r1  r   r   r   r   r   r   r   r   r   r   r   r	  r   r  r  r9  rV  r   r   rg  rj  ru  rq  rx  r   r|  
device_putr}  r  r  r  r  r   aliasdetachdetach_liftview_ofr  r   r  r  r  r  r  r  r  r  r  rh  r  r   r  r  r  _unsafe_viewreshaper  slicer  r  r  quantize_per_channelr(  dequantize_per_channelr.  quantize_per_tensorr  r3  dequantize_per_tensorr5  r  r>  r@  r\  r  r  r  r  r  r  r  r  r  r  r  r  re  	lru_cacher  r  r  r  r  r  r  r  r  rngprimsr  r  r  	bernoullir  r  r  r  r  r  r   r  r  r  r  r  randintforce_stride_orderr
  r  r  r  r  lookup_seedr  randomr  r  r   r"  r#  r6  r.  r?  rA  re  rP   rZ  _adaptive_avg_pool3dadaptive_max_pool3dfractional_max_pool3dmax_pool3d_with_indicesuniformexponential_pdist_forwardsoft_margin_loss_backward_cdist_forward_cdist_backward
_trilinearsegment_reduce_segment_reduce_backwardhistc	histogrambin_ct_histogramdd_bin_edges_histogramdd_from_bin_ctsaddbmm_addmm_activation
_cudnn_rnn_cudnn_rnn_backward_embedding_bag_embedding_bag_forward_only_embedding_bag_backward*_embedding_bag_per_sample_weights_backward_fused_moving_avg_obs_fq_helper*_fused_moving_avg_obs_fq_helper_functional max_pool3d_with_indices_backward_adaptive_avg_pool2d_backward_adaptive_avg_pool3d_backwardadaptive_max_pool2d_backwardadaptive_max_pool3d_backwardfractional_max_pool2d_backwardfractional_max_pool3d_backwardreplication_pad1d_backwardreplication_pad2d_backwardupsample_linear1d_backwardupsample_bicubic2d_backwardupsample_trilinear3d_backwardgrid_sampler_2d_backward_pdist_backwardr  r  kthvaluetopkr  median	nanmedianrandpermresize_
resize_as__linalg_detlinalg_householder_productlinalg_inv_exlinalg_ldl_factor_exlinalg_ldl_solve	linalg_lulinalg_lu_factor_exlinalg_lu_solvelinalg_matrix_exp	linalg_qr_linalg_slogdet_linalg_solve_exlinalg_solve_triangular_linalg_svd	lu_unpackormqr_linalg_check_errorslinalg_pinvatol_rtol_tensor_linalg_eightriangular_solvelinalg_cholesky_excholesky_inversecholesky_solvegeqrf_fft_r2cnonzerogcd_thnn_fused_lstm_cell_prims	rng_primsrun_and_save_rng_staterun_with_rng_statemasked_scattermasked_scatter_backwardr  angle_efficientzerotensor(_sparse_coo_tensor_with_dims_and_tensors	to_sparse
_to_sparser   r  '_scaled_dot_product_efficient_attentionrR  #_scaled_dot_product_flash_attention,_scaled_dot_product_flash_attention_backward#_scaled_dot_product_cudnn_attention,_scaled_dot_product_cudnn_attention_backward+_scaled_dot_product_flash_attention_for_cpu4_scaled_dot_product_flash_attention_for_cpu_backward_flash_attention_forward_flash_attention_backward_efficient_attention_forward_efficient_attention_backwardindex_reducer]  r\  r_  r  rd  rh  rp  rq  scalar_tensorr  
LongTensorr  r  r  r  r  r  r  r  r  rc  r  
zeros_liker  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r	  fallback__unsafe_masked_indexr  ,fallback__unsafe_masked_index_put_accumulater  r  r  r  r  r$  r"  r&  r   r:  r>  r@  rH  rI  r  rK  r]  rM  rN  rW  r[  rd  rh  rl  r  r  r  r  r  r  r  r  r  r  r  r  r   r  r
  r  r  r  r  r(  rI  rD  rJ  rV  rZ  r]  r)  rk  rn  rx  r|  rL  r  r  r  r  r  Tensor_Tensorr  Scalarr  Tensor_Scalarr  r  r  r  r  r  r`  r  r,  r   r  r  true_divider  r  r  r_  r  r  r  r  r  r  r  r  r  r  r  r   r  r  r  r  r  r  r'  r  r(  r  r  r  r  r  ru   r  r  r  r   rsqrtr  r  expm1relur  r  re  r+  cossinabsbitwise_andbitwise_left_shiftbitwise_not
bitwise_orbitwise_right_shiftbitwise_xorlgammaerfspecial_erfr  tantanhr  r  r  logical_xorr  r  	clamp_min	clamp_maxnegr  	remaindersignsignbit	_neg_viewler  r  ro  re  necoshsinhacosacoshasinasinhatan2atanatanhr  erfcerfinvhypotlog10log2	nextaftercodegen.commonrD  r  r  r   r   r   r7  _foreach_addforeach_add_listforeach_add_scalar_foreach_mulforeach_mul_listforeach_mul_scalar_foreach_sub_foreach_neg_foreach_abs_foreach_powScalarAndTensor_foreach_divforeach_div_listforeach_div_scalar_foreach_sqrt_foreach_rsqrt_foreach_maximum_foreach_minimum_foreach_clamp_min_foreach_clamp_max_foreach_reciprocal_foreach_sign_foreach_copyr  _foreach_add__foreach_mul__foreach_div_r  add_bitwise_and_bitwise_left_shift_bitwise_not_bitwise_or_bitwise_right_shift_bitwise_xor_mul_div_Tensor_modelogical_and_logical_not_logical_or_logical_xor_sub_relu_sigmoid___and__
__lshift____or__
__rshift____xor____iand____ilshift____ior____irshift____ixor__r  r  r  r  r   methodfuncr  r  _inductor_testrF  r  inductorr  set_source_Tensorr"  fsdpr#  r6  *torch._higher_order_ops.auto_functionalizer7  rB  higher_orderrg   rN  rR  Subgraphr^  r`  ri  comm_loweringrj  r  rk  register_quantized_opsregister_woq_mm_opsrl  register_onednn_fusion_opsrm  register_jagged_opsrU   rU   rU   rV   <module>   s  
,4<0
,&
	

G
8
5\P
4


#












1
E2,$-)|6



		
%2
&






	
	(, 	
]/Q


C
8



.


%!I%

[




!*&t/


*1"*H
!
 
)5


B


N


E-


s
	 $

	 D1
+


/
#
"	











'	













@
"
"
