o
    ߗi1                 -   @   s/  d dl Z d dlmZ d dlmZ d dlmZmZmZm	Z	m
Z
 d dlZd dlmZ d dlmZmZmZ d dlmZmZmZmZ d dlmZ d dlmZmZ d d	lmZmZmZmZm Z m!Z!m"Z"m#Z#m$Z$m%Z% d d
l&m'Z'm(Z(m)Z)m*Z*m+Z+ d dl,m-Z-m.Z. d dl/m0Z1 ej2j3Z3ej45dddZ6dd Z7de fddZ8dd Z9dd Z:e7e3j;e3j<ge+ dddej=ddfddZ>e7e3j?j@e3j?jAge+ dd ZBe7e3jCj@e3jCjAge+ ddd d!ZCe7e3jDe+ d"d# ZDe7e3jEj@e3jEjAe3jFj@e3jFjAge+d$d%d&d' ZGe7e3jHj@e3jHjAge+ d(d) ZHd*d+ ZIe7e3jJj@e3jJjAge+ d,d- ZKe7e3jLj@e3jLjAge+ d.d/ ZMe7e3jNjOdd0d1d2ZPe7e3jNj@ejQdddd3d4d5ZRe7e3jSj@e3jSjAge+ ejQdddd3d6d7ZTe7e3jSjUe3jSjVge+ ejQdddd3d8d9ZWe7e3jXj@e3jXjAge+ ddddd3d:d;ZYe7e3jZj@e3jZjAge+ d<d= Z[e7e3j\j@dd>d?Z]d@dA Z^e7e3j_j@dBdC Z`e7e3ja			ddDedEedFedGee dHeeb dIeejc fdJdKZde7e3je	ddLedMedNedIeejc fdOdPZfe7e3jgdQdQddRdDedLedMedNedIeejc f
dSdTZhe7e3ji					 	Q	ddUejdVejdGee dWee dIeejc dXejdYekdZekd[ejfd\d]Zle7e3jmj@d^d_d`edaekdbedcejddebdeejdfefdgdhZne7e3joj@d^d_d`edaekdbedcejddebdeejdfefdidjZpe+ e7e3jqj@dkdl Zre7e3jsj@dddd dddmdneddebdoee d%ee dpee dqekdrejdfefdsdtZte7e3juj@e3jujvge+ dudv Zwe7e3jujxddwdxZye7e3jzj@e3jzjvge+ dydz Z{e7e3jzjxdd{d|Z|e7e3j}j@d}d~ Z~e7e3j}jAdd Ze7e3jj@dd Ze7e3jjdd Ze7e3jj@dd Ze7e3jj@ddddddddZe7e3jj@dddZe7e3jj@dddZe7e3jj@dddZe7e3jj@dd Ze7e3jjdd Zd`edebfddZd`ededebfddZ	^ddedebdejfddZddedebdebfddZdededejdebfddZ	ddebdedDedebfddZdebfddZe7e3jj@e3jjge+ddddedebdejfddZe7e3jj@e3jjAge+ dDedfefddZe7e3jge+dddDefddZdedfefddZe7e3je+ d`ededejdfefddZe7e3je+ dd`ededejdfefddZe7e3je+ dd`edejdfefddZe7e3je+ dd`edejdfefddÄZe7e3jj@ddedejdejfddƄZe7e3jj@e3jjAge+ dDededfefddɄZe7e3jj@ddedejfdd˄Ze7e3jj@e3jjAge+ddd΃dddϜd`edejdejdfe	eeef fdd҄Ze7e3jj@e3jjAge+ ddӜdedededejdfef
ddՄZe7e3jj@e3jjAge+ddd׃d^d؜dedejdfe	eeef fddۄZe7e3jj@e3jjAge+ddd΃d^ddݜdedejdejdfe	eeef fdd߄Ze7e3jj@e3jjAge+ d^dddedededejdejdfefddZe7e3je+ddd׃	^	^ddededejdejdfe	eeef f
ddZdebdfe	ejejf fddZe7e3jj@e3jjAge+ddddedebdfe	eef fddZe7e3jj@e3jjge+dddd̓dedfe	eeeef fddZe7e3jj@		^	ddedejdejdeeb fddZdededfe	eek eek f fddZdededeeb dfe	eef fddZdDededfejfd dZe7e3jd^dddddddededejdejdee dee dee dee dfe	eeeef fddZe7e3jj@e3jjAgd^ddddededejdejdejdee dfefdd	Ze7e3je+d
dd^d	^		d d`ededejdejdejdfe	eef fddZe7e3jj@dd Ze7e3jŃe+ 	^	d!dDedededejdejdfefddZŐdd ZƐdd Ze7e3jȃe+ dd Ze7e3jʃe+ dd Zːdd Ze7e3j̓e+ddd  Ze7e3jσe+dd!d" ZАd#d$ Ze7e3j҃e+ d%d& Ze7e3jԃe+ d'd( Ze7e3jj@e3jje3jj@e3jjge+dd)d* Zِd+d, Ze7e3jۃe+ d-d. Ze7e3j݃e+ d/d0 Ze7e3jj@e3jje3jj@e3jjge+dd1d2 Ze7e3je+ d"d`ed4edfefd5d6Ze7e3je+ d7ed`ed4ed8edfef
d9d:Ze7e3jj@e3jjAge+ dQdQd;d<d=Ze7e3jj@e3jjAge+ dd0d>d?Ze7e3jjd#dAdBZe7e3jjd#dCdDZe7e3jj@e3jjAge+ ddEdFZe7e3jj@		ddGdHZe7e3je+ dIdJ ZdKdL Zd$dNdOZ	ddPejdEejdQe
eek ekf dRe
eek ekf dSe
eek ekf dTejdUekdVee
eek ekf  fdWdXZdYdZ Ze7e3jj@dPejdEejdGeej d[eej d\eej d]ejd^ed_efd`daZe7e3jj@dPejdEejdGejdQeek dReek dSeek dTejdVeek dUekfdbdcZejj	rej45ddddZe7ej2j jj@dedf Ze7ej2j jj@dgdh Zejj	r?ej45diddZe7ej2jjdjdk Z	ej45dlddZ
e7ej2jjj@dmdn Ze7ej2jjj@e7ej2jjjdodp Ze7ej2jjj@e7ej2jjj@dqdr Zej45dsddZe7ej2jj	t	u	v	d%dwdxZdydz Ze7e3jj@	t	u		^	d&d{d|Zd}d~ Ze7e3jj@dd Ze7e3je+ 	t	u		^	d&ddZe7e3j e+ddd Z!e7e3j"j@dd Z#e7e3j$j@dd Z%e7e3j&j@dd Z'e7e3j(e+ddd Z)dedebfddZ*e7e3j+e+dd%dd Z,e7e3j-e+ddd Z.e7e3j/e+dd%dd Z0e7e3j1e+ddd Z2e7e3j3jdddZ4e7e3j5j@e3j5jAge+ dd Z6e7e3j7j@e3j7jAge+ dddekdekfddZ7e7e3j8je3j9jgdd Z:e7e3j;j@gdd Z<e7e3j=j@e3j=jAge+ dQdQd;ddZ>e7e3j?j@e3j@j@gdddddZAe7e3jBj@gdddddZCe7e3jDge+ dd ZEe7e3jFgdd ZGe7e3jHgdd ZIe7e3jJgdd ZKe7e3jLgdd ZMe7e3jNgdd ZOe7e3jPj@dd ZQe7e3jRe+ dd ZSe7e3jTj@		 				d'ddZUe7e3jVj@dd ZWdddZXe7e3jYj@e3jYjAge+ d(dddÐdĄZZe7e3j[j@e3j\j@gdŐdƄ Z]e7e3j[jxe3j[j^e3j\jxe3j\j^e3j_j@e3j_j`ge+d$d%d)dǐdȄZae7e3jbj@dɐdʄ Zce7e3jdj@dːd̄ Zee7e3jfj@d͐d΄ Zge7e3jhjie3jjjie3jhje3jjje3jkj@e3jlj@e3jmj@gdϐdЄ Zne7e3jojie3jpjie3joje3jpjgdvdѐd҄Zqe7e3jrj@e3jrjsgdӐdԄ ZtdՐdք Zue7e3jvje3jvjigdאd؄ Zwe7e3jxje3jxjigdِdڄ Zye7e3jzj@dېd܄ Z{e7e3j|je3j|jigdݐdބ Z}e7e3j~je3j~jigdߐd Ze7e3jj@dd Ze7e3jje+ dvdfefddZe7e3jge+ 	d*ddZe7e3jg	d*ddZe7e3jg	d*ddZe7e3jj@e3jj@gdddZe7e3jjidd Ze7e3jj@dd Ze7e3jdd Ze7e3je+ dd Ze7e3jdd Ze7e3jj@dddZe7e3jj@dd ZdddZe7e3jj@dd  Zdd Zdd Zdd Zdd Z	ddDed	ekd
ekdekdekdekdekdekdekdekdekdekdekdekdekdekdekdekdekdekdebdejf,ddZdd  ZdDeded	ekd
ekdekdekdekdekdekdekdekdekdekdekdekdekdekdekdebf&d!d"Zd#d$ Ze7e3jj@d%d& Ze7e3jj@	t	u	v	d%d'd(Ze7e3jj@d)d* Ze7e3je+dd%	t	u	v	d%d+d,Ze7e3je+dd-d. ZdDed/efd0d1ZG d2d3 d3eZdDed/ed4ekfd5d6Ze7e3jj@d7d8 Ze7e3je+ d9d: Ze7e3je+dd;d<d= Ze7e3jj@gd>d? Ze7e3jj@					d+d@dAZe7e3jjkdBdC Ze7e3jj@dDdE Ze7e3jj@d,dFdGZddaekdHekdIejfdJdKZdLdM ZdNdO Ze7e3jj@ddPdQZÐddRdSZĐddTdUZŐdVdW ZƐddXdYZǐd-dZd[Ze7e3jj@d\d] Ze7e3j˃d^d_ Ze7e3j͐je3j͐je3j͐je3j͐jge+ dd`daZe7e3jӐje3jӐje3jӐje3jӐjgddbdcZe7e3jg	d			d.deedfedgedhediejdjejdkee fdldmZe7e3jg	d			d.deedfedgednee doejdhediejdjejdkee fdpdqZe7e3jg	ddredeedfedgededsedteduedvekdwekdhediejdxedyedkee fdzd{Ze7e3jg	d			d/deedfedgedhediejd|ee dkee fd}d~Ze7e3jg		ddredeedfedgededsedhediejd|ee dkee fddZe7e3jg	d		d0deedfedgednee doejdiejdkee fddZe7e3jg		d1dredeedfedgednee dedsedxedyedhedeej diejdkee fddZe7e3jg	ddredeedfedgededsedxedyednedteduedvekdwekdhediejdkee f ddZe7e3jg					d+deedfedgedtee duee dvekdwekdhediejdjejdkee deek deek dee dee fddZe7e3jg			ddredeedfedgededsedteduedvekdwekdhediejdxedyedkee deek deek f"ddZe7e3jg					d2deedfedgedGee dee dee deek deek dhedekdoejdkee dee dee deek fddZe7e3jg			d-dredeedfedgedGee dee dee dejdejdsedhedxedyedekdejdkee deek dejf$ddZe7e3jj@g				d3d`ejdNejdejdejdGeej deej dIeejc dejfddZe7e3jje3jjge+ dddZe7e3jjdddZe7e3jj@e3jjAge+ ddd0ddZdd Zdd Ze7e3jj@e3jj@gdddZe7e3jj@e3jj@gdddZe7e3jj@e3jj@g		ddedee
ekejf  dee
ekejf  dee dee f
ddZe7e3j j@e3jj@gdddZ e7e3jj@e3jje3jj`e3jjgd4ddZdd Ze7e3jj@		dddZe7e3j	j@dd Z	e7e3j
j@dÐdĄ Z
dŐdƄ ZdǐdȄ Ze7e3jj@e3jj@gd(dɐdʄZe7e3jj@d5dːd̄Ze7e3jj@d6d͐d΄Ze7e3je+ 	d7dϐdЄZe7e3jj@e3jj`ge+d$d%d)dѐd҄ZejZdӐdԄ Ze7e3jj@dՐdք Ze7e3jj@dאd؄ Ze7e3jj@dِdڄ Ze7e3jj@dېd܄ Ze7e3jje3jj ge+ dddݜdސd߄Z!e7e3j"ge+ d8ddZ#e7e3j$j@e3j%j@g		dddZ&e7e3j'j@dd Z'e7e3j(j@e3j(jAge+ dddZ(e7ej2j3j)dd Z)e7ej2j3j*dd Z*e7e3j+e+ dddddddZ,dd Z-e7e3j.	d9ddZ/e7e3j0	d9ddZ1e7e3j2	d9ddZ3e7e3j4e+ dddddZ5e7e3j6e+ dekd`edfefddZ7e7e3j8d`efddZ9e7e3j:j@	dd:d$edpee d eek defddZ;dd Z<dd Z=e<e3j> e<e3j? e<e3j@ e<e3jA e<e3jB e<e3jC e<e3jD e<e3jE e<e3jF e=e3jG e=e3jH e=e3jI e=e3jJ e=e3jK e=e3jL e=e3jM e=e3jN e=e3jO e=e3jP e=e3jQ e=e3jR dd	 ZSe7e3jTe+ d
d ZTe7e3jUe+ dQdddZUe7e3jVe+ dQdddZVeSe3jTZWeSe3jUZXeSe3jVZYd dl,Zd dlZZd dl[Zdd Z\e\  dS (;      N)Enum)wraps)ListOptionalSequenceTupleUnion)SymBoolSymFloatTensor)_add_op_to_registry_convert_out_paramsglobal_decomposition_table
meta_table)
OpOverload)_prim_elementwise_meta$ELEMENTWISE_PRIM_TYPE_PROMOTION_KIND)
BoolLikecorresponding_complex_dtypecorresponding_real_dtypeelementwise_dtypesELEMENTWISE_TYPE_PROMOTION_KIND	FloatLikeIntLikemake_contiguous_strides_forNumber
TensorLike)_maybe_convert_to_dtype_maybe_resize_out_resize_output_check_safe_copy_outout_wrapper)_broadcast_shapes_maybe_broadcast)_pytreeatenIMPLMetac                    s    fdd}|S )Nc                    s$   t    fdd}t|  S )Nc                    s   t t|   d S N)r   r   opfn W/home/ubuntu/transcripts/venv/lib/python3.10/site-packages/torch/_meta_registrations.pyregister3   s   z0register_meta.<locals>.wrapper.<locals>.register)r   pytree	tree_map_)r,   r/   r)   r+   r.   wrapper0   s   zregister_meta.<locals>.wrapperr-   )r*   r2   r-   r)   r.   register_meta/   s   	r3   type_promotionc                    s>   t j|d| i\}  fdd|D }t| }t|dtjiS )Ntype_promotion_kindc                    s   g | ]}t | qS r-   )r   .0xresult_dtyper-   r.   
<listcomp>E       z$elementwise_meta.<locals>.<listcomp>r4   )utilsr   r#   r   r   DEFAULT)r4   args_r-   r9   r.   elementwise_meta<   s   
rA   c                 C   s(   t jt jt jt jt jt ji}|| | S r(   )torch	complex32halfcfloatfloatcdoubledoubleget)dtypefrom_complexr-   r-   r.   toRealValueTypeP   s
   rL   c                    s2   t tg|R   t k fdd d S )Nc                      s   d d  S )Nzoutput with shape z# doesn't match the broadcast shape r-   r-   broadcasted_shape
self_shaper-   r.   <lambda>]       z)check_inplace_broadcast.<locals>.<lambda>)tupler"   rB   _check)rO   
args_shaper-   rM   r.   check_inplace_broadcastY   s
   rU   Fc	           	         s  t tjrt dkdd  t tjr$t dkdd  tdd fD rMtt  d u r> ntt	 fdd npRt t tj
s[J tt tfdd t tsqJ tdkd	d  tjf|d
||dS )Nr   c                   S      dS Nz:linspace only supports 0-dimensional start and end tensorsr-   r-   r-   r-   r.   rP   q       z(meta_linspace_logspace.<locals>.<lambda>c                   S   rV   rW   r-   r-   r-   r-   r.   rP   v   rX   c                 s   s    | ]}t |tV  qd S r(   )
isinstancecomplex)r7   argr-   r-   r.   	<genexpr>y   s    z)meta_linspace_logspace.<locals>.<genexpr>c                         d  d S )Nzlinspace(): inferred dtype z& can't be safely cast to passed dtype r-   r-   )default_complex_dtyperJ   r-   r.   rP      rQ   c                      s*   dt j dt  j dt j dS )Nz4received an invalid combination of arguments - got (, ))type__name__r-   )endstartstepsr-   r.   rP      s    c                   S   rV   )Nz$number of steps must be non-negativer-   r-   r-   r-   r.   rP      rX   meta)rJ   layoutdevice
pin_memoryrequires_grad)rY   rB   r   rS   dimanyr=   r   get_default_dtypeis_complex_dtyperJ   _check_typer   empty)	rd   rc   re   baserJ   rh   rg   ri   rj   r-   )r^   rJ   rc   rd   re   r.   meta_linspace_logspacea   sH   

rr   c                    sN   t  jt jk fdd t |  dko  dk dd  |  jS )Nc                         d j  S )Nz2take(): Expected a long tensor for index, but got rJ   r-   indexr-   r.   rP          zmeta_take.<locals>.<lambda>r   c                   S   rV   )Nz*take(): tried to take from an empty tensorr-   r-   r-   r-   r.   rP      rX   )rB   rS   rJ   long_check_indexnumel	new_emptyshape)selfrv   r-   ru   r.   	meta_take   s   

r~   rk   c                   sh   j }j }t||kdd  t dko dk fdd tjj}|S )Nc                   S   rV   )Nz=linalg.cross: inputs must have the same number of dimensions.r-   r-   r-   r-   r.   rP      rX   zlinalg_cross.<locals>.<lambda>   c                      s"   d  d   d   S )Nzlinalg.cross: inputs dimension z must have length 3. Got  and sizer-   rk   otherr}   r-   r.   rP      s
   )ndimrB   rS   r   r"   r|   r{   )r}   r   rk   x_dy_d	out_shaper-   r   r.   linalg_cross   s   
r   c                 C   s$   t | d t| d tj| tjdS )Nzlinalg.matrix_expmemory_format)squareCheckInputscheckFloatingOrComplexrB   
empty_likecontiguous_formatr}   r-   r-   r.   linalg_matrix_exp   s   

r   valuesindicesc                 C   sV   t j| j| j| jd}t j| j| jt jd}|  dkr'| jdkr't|| j ||fS )Nrh   rJ   r   )	rB   rp   r|   rh   rJ   int64rz   r   maybe_wrap_dim)r}   rk   r   r   r-   r-   r.   	cummaxmin   s
   r   c                 C   s   t || j t|  S r(   )r   r   rB   r   
contiguous)r}   rk   r-   r-   r.   logcumsumexp   s   r   c                    s  |j }t|}|| }tt|}dd t|D }	|D ]}
d|	|
< qg g }}|D ]}
|	|
 s6||
 q*||
 q*|| }t|}|  |d | }|j fdddd |||d   }||}dgt|j|d   }|	|}|
d}||d< |}tt|D ]}|||  ||d	 < q| 	|} d
d t|D }d	}|d	 }|dkr|| d ||| < ||||  9 }|d	8 }|dkst||D ]}| d	||  ||| < q| |||  S )Nc                 S      g | ]}d qS Fr-   r7   r@   r-   r-   r.   r;      rQ   z_exec_fft.<locals>.<listcomp>Tc                        |  S r(   r-   r8   self_stridesr-   r.   rP          z_exec_fft.<locals>.<lambda>keyreverser   r      c                 S   r   r   r-   r   r-   r-   r.   r;     rQ   )r   lenlistrangeappendstridesortpermuter|   reshaper   
as_stridedstorage_offset)outr}   	out_sizesrk   forwardr   signal_ndim
batch_dimsdim_permuteis_transformed_dimdleftright	batch_endtmpinputbatched_sizes
batch_sizebatched_out_sizesiout_stridesbatch_numelr-   r   r.   	_exec_fft   sL   





r   c                    sb   | j jsJ | j}| |}|s|S |d d  }|   |j fdddd t|| |||}|S )Nc                    r   r(   r-   r   r   r-   r.   rP      r   zmeta_fft_c2c.<locals>.<lambda>Tr   )rJ   
is_complexr|   r{   r   r   r   )r}   rk   normalizationr   r   outputsorted_dimsr-   r   r.   meta_fft_c2c  s   
r   c                 C   sR   | j jsJ t|  }|r|d }|| d d }|||< | j|t| j dS )Nr      r   rt   )rJ   is_floating_pointr   r   r{   r=   r   )r}   rk   r   onesidedoutput_sizeslast_dimlast_dim_halfsizer-   r-   r.   meta_fft_r2c&  s   r   )	generatorc                C   s   t |t| gS r(   )r   rB   Size)nr   r   r-   r-   r.   meta_randperm6  s   r   rJ   rg   rh   ri   c                C      t j| ||||dS Nr   rB   rp   )r   rJ   rg   rh   ri   r-   r-   r.   meta_randperm_default;  s   	
r   c                C   s   t j|||||dS r   r   )highr   rJ   rg   rh   ri   r-   r-   r.   meta_randintI  s   
r   c                C   s   t j|||||dS r   r   )lowr   r   rJ   rg   rh   ri   r-   r-   r.   meta_randint_lowY  s   
r   c                C   r   r   r   )r   rJ   rg   rh   ri   r-   r-   r.   meta_rand_defaultj  s   
r   c                 C   s8   | j jsJ t|  }|||d < | j|t| j dS )Nr   rt   )rJ   r   r   r   r{   rL   )r}   rk   r   lastdimr   r-   r-   r.   meta_fft_c2rr  s   r   c                 C   sf   ddl m} || st| dkrtdt|tr1|| |}|  | kr1t	j
||   | S )Nr   )free_unbacked_symbolsr   zQmore than one element of the written-to tensor refers to a single memory location)%torch.fx.experimental.symbolic_shapesr   rB   _debug_has_internal_overlapRuntimeErrorrY   r   tor   r%   expand_copydefault)r}   srcnon_blockingr   intermediater-   r-   r.   
meta_copy_{  s   
r   c                 C   sX   t |  }t |  }||  krdn|| ||  }||d ||| ||fS Nr   )r   r   r   rk   insert)tensorrk   result_sizesresult_strides
new_strider-   r-   r.   inferUnsqueezeGeometry  s    r   c                 C   s0   t ||  d }t| |\}}| || | S r   )r   rk   r   as_strided_)r}   rk   g_sizes	g_stridesr-   r-   r.   meta_unsqueeze_  s   r   r   weight_metabias_activation_opt	out_dtypec           	      C   s   t | j}|d ur|d|dksJ d|d| dd ks%J |d|d< t| jdks7J dd| df}|d urQ| jtjkrM|tjksQJ d| j||d u r[| jn|d	||}|S )	Nr   zoutput size mismatchr   r   r   z*we can only handle the squashed input case9out_dtype is only supported for i8i8->i32 linear operatorrt   )
r   r|   r   r   rJ   rB   int8int32r{   r   )	r   r   r   r   r   r   r   transposed_stridesr   r-   r-   r.   meta_sparse_structured_linear  s$   
	r  mat1	mat1_metamat2c                 C   s   t | jdks	J t |jdksJ t |jdksJ | d|dd ks)J | d|dg}|d urF|jtjkrB|tjksFJ d|j||d u rP|jn|d}|S )Nr   r   r   r   rt   r   r|   r   rJ   rB   r   r   r{   )r  r  r  r   r   r   r-   r-   r.   meta_sparse_structured_mm  s   r  r   )alphabetar   c          	      C   s   t | jdksJ dt |jdksJ t |jdksJ t |jdks&J | d|dks4J d|d|dd ksBJ |d|dg}|d ur_|jtjkr[|tjks_J d|j||d u ri|jn|d}|S )Nr   zEonly input broadcasted to columns of mat1 * mat2 product is supportedr   r   r   rt   r  )	r   r  r  r  r  r	  r   r   r   r-   r-   r.   meta_sparse_structured_addmm  s,   r
  compressed_Adense_Br  transpose_resultalg_idsplit_ksplit_k_one_kernelc	                 C   s  |j tjtjtjtjtjhv sJ d| j |j ksJ dt|jdks(J d| j tjtjfv }	|	r5dnd}
|	rA|	 rAJ d|
d}|
d	}|  d
 |
|  }|d urb||
dksbJ |d urx|	rt|tjtjtjtjhv sxJ d|r~||fn||f}|j||dS )Nz;_cslt_sparse_mm only supports fp16, bf16, int8, and fp8e4m3zinputs must have the same dtyper   z'_cslt_sparse_mm only supports 2d inputs
   	   z.dense input must be transposed for 8bit dtypesr   r      z\out_dtype is not supported for {compressed_A.dtype} x {dense_B.dtype} -> {out_dtype} matmul!rt   )rJ   rB   float32float16bfloat16r   float8_e4m3fnr   r|   is_contiguousr   rz   r   r{   )r  r  r   r  r   r  r  r  r  is_8bit_input_typecompression_factorkr   moutput_shaper-   r-   r.   meta__cslt_sparse_mm  sJ   

r  T)include_selfr}   rk   rv   sourcereducer  returnc                C   s   t j| t jdS Nr   )rB   r   r   r}   rk   rv   r   r!  r  r-   r-   r.   meta_index_reduce6  s   
r%  c                C      | S r(   r-   r$  r-   r-   r.   meta_index_reduce_C  s   
r'  c                 C   s.   t |  }|  dkr| ||< | |S )Nr   )r   r   rk   rz   r{   )r}   rk   rv   result_sizer-   r-   r.   meta_index_selectQ  s   
r)  )lengthsr   offsetsaxisunsafeinitialdatar*  r+  r,  r-  c          
         sf   |d urt d fdd}|d ur||jS |d ur/|jd d |jd d f }	||	S td)Nz?segment_reduce(): indices based reduction is not supported yet.c                    s(   t j| j d d   jdt jdS )Nr   rf   rJ   rh   r   )rB   rp   r|   rJ   r   )lengths_shaper,  r/  r-   r.   segment_reduce_lengths_tensork  s   z:meta_segment_reduce.<locals>.segment_reduce_lengths_tensorr   r   z<segment_reduce(): Either lengths or offsets must be defined.)NotImplementedErrorr|   r   )
r/  r!  r*  r   r+  r,  r-  r.  r3  r1  r-   r2  r.   meta_segment_reduceZ  s   
r5  c                 C   
   |  dS Nr-   r{   r   r-   r-   r.   meta_max~     
r9  c                 C   6   t | j|f}t| ||}| || j|tjdfS Nrt   r=   reduction_dimsr|   _compute_reduction_shaper{   rB   rx   r}   rk   keepdimr  r-   r-   r.   meta_max_dim  
   rB  c                 C   r6  r7  r8  r   r-   r-   r.   meta_min  r:  rD  c                 C   r;  r<  r=  r@  r-   r-   r.   meta_min_dim  rC  rE  c                 C   s4   |   r
t| j}n	t| tjd\}}tj| |dS )Nr5   rt   )r   r   rJ   r   r   INT_TO_FLOATrB   r   )r}   r:   r@   r-   r-   r.   
meta_angle  s   
rH  c                 C   s$   t ||  | j |t | S r(   )rB   _resize_output_r   rh   copy_angle)r}   r   r-   r-   r.   meta_angle_out  s   rL  c                 C      d S r(   r-   )valr-   r-   r.   assert_async     rO  c                 C   rM  r(   r-   )rN  
assert_msgr-   r-   r.   assert_async_meta  rP  rR  c                 C   rM  r(   r-   )sr-   r-   r.   
print_meta  rP  rT  rJ   rg   rh   ri   r   c                 C   s   t jdddS )Nr   rf   rh   r   rU  r-   r-   r.   make_dep_token  s   	rW  c                 C   s4   ddl m} t| ttfrtd|| ||d d S )Nr   )constrain_range'Constraining SymFloat or Symbool is nyiminmax)r   rX  rY   r
   r	   
ValueError)r   r[  r\  rX  r-   r-   r.   sym_constrain_range  s   r^  c                 C      t j| ||d |S NrZ  )r%   r^  r   r[  r\  	dep_tokenr-   r-   r.   functional_sym_constrain_range     rc  c                 C   s   ddl m} |d u r|d u rt|  d S t| ttfr tdt| t	u r>|d ur1t
| |k |d ur<t
| |k d S || ||d d S )Nr   )_constrain_range_for_sizerY  rZ  )r   re  rB   _check_is_sizerY   r
   r	   r]  ra   intrS   )r   r[  r\  re  r-   r-   r.   sym_constrain_range_for_size  s   
rh  c                 C   r_  r`  )r%   rh  ra  r-   r-   r.   'functional_sym_constrain_range_for_size  rd  ri  c                 C   s   |S r(   r-   )rN  rQ  rb  r-   r-   r.   functional_assert_async_meta  rP  rj  f_namec                 C   sX   |   dksJ | d| d| dks*J | d| d d| d dd S )Nr   z3: The input tensor must have at least 2 dimensions.r   z5: A must be batches of square matrices, but they are  by 	 matrices)rk   r   )r}   rk  r-   r-   r.   r     s    r   Anamec                    s   t j jk fdd t j jk fdd t  d dk fdd t  ddk fdd d S )Nc                         dj  d j  dS )Nz:Expected b and A to be on the same device, but found b on z
 and A on 	 instead.rV  r-   ro  r}   r-   r.   rP   
  
   z(linearSolveCheckInputs.<locals>.<lambda>c                      rq  )Nz=Expected b and A to have the same dtype, but found b of type z and A of type rr  rt   r-   rs  r-   r.   rP     rt  r   rl  c                      s   d  d d  d dS )Nz3A must be batches of square matrices, but they are rl  rm  r   rn  r   r-   ro  r-   r.   rP     s
   c                      s:   d d  d d  d d d d d 
S )NzIncompatible matrix sizes for z: each A matrix is r   rm  z but each b matrix is rl  r   r-   ro  rp  r}   r-   r.   rP   "  s   )rB   rS   rh   rJ   r   )r}   ro  rp  r-   rv  r.   linearSolveCheckInputs  s    


rw  tallow_low_precision_dtypesc                    s^   | j  t|  p|   fdd |s-t tjtjtjtjfv  fdd d S d S )Nc                          d  S )Nz<: Expected a floating point or complex tensor as input. Got r-   r-   rJ   rk  r-   r.   rP   3      z(checkFloatingOrComplex.<locals>.<lambda>c                      rz  )Nz*: Low precision dtypes not supported. Got r-   r-   r{  r-   r.   rP   8  r|  )	rJ   rB   rS   r   r   rF   rH   rE   rG   )rx  rk  ry  r-   r{  r.   r   +  s   r   arg_namec                    s"   t |  dk fdd d S )Nr   c                          d  dS )Nz: The input tensor z! must have at least 2 dimensions.r-   r-   r}  rk  r-   r.   rP   @  rQ   zcheckIsMatrix.<locals>.<lambda>)rB   rS   rk   )ro  rk  r}  r-   r  r.   checkIsMatrix=  s   
r  Br   c                    sZ   t   t tr ddkn	 ddk fdd d S )Nrl  r   c                      sH    drdnd d  d d  d d d d d d	S )
Nz2: Incompatible shapes of A and B for the equation zAX = BzXA = Bz (rl  r8   r   r   r`   r   r-   ro  r  rk  r   r-   r.   rP   I  s   
z#checkInputsSolver.<locals>.<lambda>)r   r  rB   rS   r   )ro  r  r   rk  r-   r  r.   checkInputsSolverD  s   

*r  resultfn_nameresult_namec                    s&   t jjk fdd d S )Nc                	      s$     d d dj  dj  	S )Nz: Expected z5 and input tensors to be on the same device, but got z on z and input on rV  r-   r  r   r  r  r-   r.   rP   Y  s   z!checkSameDevice.<locals>.<lambda>)rB   rS   rh   )r  r  r   r  r-   r  r.   checkSameDeviceQ  s   
r  UPLOc                    s8      }tt dko|dkp|dk fdd d S )Nr   ULc                      
   d  S )Nz1Expected UPLO argument to be 'L' or 'U', but got r-   r-   r  r-   r.   rP   d     
 zcheckUplo.<locals>.<lambda>)upperrB   rS   r   )r  UPLO_uppercaser-   r  r.   	checkUplo`  s
   
r  eigenvalueseigenvectorsr  	compute_vc                 C   sp   t | d t| t| j}|r | |}||t|dd n| dg}|  | j|t| j	d}||fS )Nzlinalg.eighF	row_majorr   rt   )
r   r  r   r|   r{   r   r   poprL   rJ   )ro  r  r  r|   vecsvalsr-   r-   r.   meta__linalg_eighh  s   


r  c                 C   s@   t | d t| jr| jnt| j}| j| jd d |dS )Nzlinalg.eigvalsr   rt   r   r=   rn   rJ   r   r{   r|   )r   complex_dtyper-   r-   r.   meta__linalg_eigvals{  s   


r  c                 C   sX   t | d t| jr| jnt| j}| j| jd d |d}| j| j|d}||fS )Nz
linalg.eigr   rt   r  )r   r  r   vectorsr-   r-   r.   meta_linalg_eig  s   


r  r   c                 C   s   | j jtjdddS )Nr   rl  r   )mTclonerB   r   	transpose)r   r-   r-   r.   cloneBatchedColumnMajor     r  r  c                 C   s   t | S r(   )r  )r}   ro  r  r-   r-   r.   _cholesky_solve_helper  s   r  c                    sP   t jdkfdd t  jdk fdd t d\}}t|||S )Nr   c                         d j  dS )Nz-b should have at least 2 dimensions, but has  dimensions insteadr   r-   r   r-   r.   rP     r|  z cholesky_solve.<locals>.<lambda>c                      r  )Nz-u should have at least 2 dimensions, but has r  r  r-   ru  r-   r.   rP     r|  cholesky_solve)rB   rS   r   !_linalg_broadcast_batch_dims_namer  )r}   ro  r  self_broadcastedA_broadcastedr-   rs  r.   r    s   

r  c                 C   s.   |   dkrtj| tjdS t| d t| S )Nr   r   cholesky)rz   rB   r   legacy_contiguous_formatr   r  r}   r  r-   r-   r.   r    s   
r  c                 C   s   t | d t| S )Ncholesky_inverse)r   r  r  r-   r-   r.   r    s   
r  check_errorsc                 C   sf   t | d t| d | j}t|}t|d}| |}||| | j|d|d  tjd}||fS )Nzlinalg.choleskyFr   r   rt   )	r   r   r|   r   r   r{   r   rB   r   )ro  r  r  A_shaper   	L_stridesr  infosr-   r-   r.   linalg_cholesky_ex  s   



r  tauc                    s  t jdkdd  t ddkdd  t ddkdd  t jj dkfd	d jdkr[jd d }jd d  t  |k fd
d t jjkfdd tdd t jjtjddjj	dS )Nr   c                   S   rV   )NzHtorch.linalg.householder_product: input must have at least 2 dimensions.r-   r-   r-   r-   r.   rP     rX   z,linalg_householder_product.<locals>.<lambda>rl  r   c                   S   rV   )Nzbtorch.linalg.householder_product: input.shape[-2] must be greater than or equal to input.shape[-1]r-   r-   r-   r-   r.   rP     rX   c                   S   rV   )Nz`torch.linalg.householder_product: input.shape[-1] must be greater than or equal to tau.shape[-1]r-   r-   r-   r-   r.   rP     rX   r   c                         dj  d j  S )Nzptorch.linalg.householder_product: Expected tau to have one dimension less than input, but got tau.ndim equal to  and input.ndim is equal to r  r-   r   r  r-   r.   rP     
   c                      r  )Nzltorch.linalg.householder_product: Expected batch dimensions of tau to be equal to input.shape[:-2], but got r-   r-   actual_batch_tau_shaper-   r.   rP        c                      r  )Nz,torch.linalg.householder_product: tau dtype z does not match input dtype rt   r-   r  r-   r.   rP     s   
z torch.linalg.householder_productr  Fr  r   r   rJ   rh   )
rB   rS   r   r   r|   rJ   r  empty_stridedr   rh   )r   r  expected_batch_tau_shaper-   )r  r   r  r.   linalg_householder_product  sD   


r  c                 C   s^   t | d t| ddd | | j}|| jt| jdd | j| jd d tjd}||fS )Nzlinalg.inv_exF)ry  r  rl  rt   r   r   r{   r|   r   r   rB   r   )ro  r  r  r  r-   r-   r.   linalg_inv_ex_meta	  s   
r  LDpivotsinfo)	hermitianr  r  c                C   st   t | d t| d tj| jt| jdd| j| jd}| j| jd d tj	d}| j| jd d tj	d}|||fS )Nztorch.linalg.ldl_factor_exFr  r  r   rt   rl  )
r   r   rB   r  r|   r   rJ   rh   r{   rg  )r}   r  r  r  r  r  r-   r-   r.   linalg_ldl_factor_ex_meta  s   


r  )r  c                   s   t d td t d t jdk fdd jd d }t|jkfdd ttj	fdd tj	 j	k fdd t
 \}}tj|t|d	d
 j	 jdS )Nztorch.linalg.ldl_solver   c                      r  )NzMtorch.linalg.ldl_solve: Expected B to have at least 2 dimensions, but it has r  r  r-   )r  r-   r.   rP   8     z'linalg_ldl_solve_meta.<locals>.<lambda>r   c                      r  )Nzjtorch.linalg.ldl_solve: Expected LD.shape[:-1] and pivots.shape to be the same, but got pivots with shape  insteadr|   r-   r  r-   r.   rP   @  r  c                      rs   )Nz<torch.linalg.ldl_solve: Expected pivots to be integers. Got rt   r-   r  r-   r.   rP   G  rw   c                      r  )Nz!torch.linalg.ldl_solve: LD dtype z does not match b dtype rt   r-   )r  r  r-   r.   rP   K      Fr  r  )r   r   rw  rB   rS   r   r|   r=   is_integer_dtyperJ   _linalg_broadcast_batch_dimsr  r   rh   )r  r  r  r  expected_pivots_shapeB_broadcast_sizer@   r-   )r  r  r  r.   linalg_ldl_solve_meta*  s6   
	






r  Pr  )pivotr  c          	         s   t  jdk fdd t j}|d }|d }t||}||d< |r+ |}n dg}||d<  |}||d< ||d<  |}|||fS )Nr   c                      r  )Nz@linalg.lu: Expected tensor with 2 or more dimensions. Got size: r  r  r-   ru  r-   r.   rP   [  r|  z linalg_lu_meta.<locals>.<lambda>rl  r   r   )rB   rS   r   r   r|   r[  r{   )	ro  r  sizesr  r   r  r  r  r  r-   ru  r.   linalg_lu_metaV  s$   





r  LU)r  r  c          	         s   t  jdk fdd t j}|d }|d }t j|t|dd j jd}|	  t
|||d<  j|t jd	}|	   j|t jd	}|||fS )
Nr   c                      r  )NzFtorch.lu_factor: Expected tensor with 2 or more dimensions. Got size: r  r  r-   ru  r-   r.   rP   |  r|  z*linalg_lu_factor_ex_meta.<locals>.<lambda>rl  r   Fr  r  rt   )rB   rS   r   r   r|   r  r   rJ   rh   r  r[  r{   rg  )	ro  r  r  r  r  r   r  r  r  r-   ru  r.   linalg_lu_factor_ex_metar  s&   



r  )r   adjointr  c                   s   t d tj jk fdd tjtjkdd  td t |d tddkdd  tjd d jkfdd t	 \}}tj
|t|| d	 j jd
}| dkru|su| ru| }|S )Nztorch.linalg.lu_solvec                      rq  )NzPlinalg.lu_solve: Expected LU and B to have the same dtype, but found LU of type  and B of type r  rt   r-   )r  r  r-   r.   rP     rt  z&linalg_lu_solve_meta.<locals>.<lambda>c                   S   rV   )NzElinalg.lu_solve: pivots should be a Tensor of scalar type torch.int32r-   r-   r-   r-   r.   rP     rX   zlinalg.lu_solver   c                   S   rV   )NzYlinalg.lu_solve: Number of pivots per batch should be same as the dimension of the matrixr-   r-   r-   r-   r.   rP     rX   c                      r  )Nzclinalg.lu_solve: Expected LU.shape[:-1] and pivots.shape to be the same, but got pivots with shape r  r  r-   r  r-   r.   rP     r  r  r  r   )r   rB   rS   rJ   rg  r   r  r   r|   r  r  r   rh   rz   r   conj)r  r  r  r   r  r  r@   r  r-   )r  r  r  r.   linalg_lu_solve_meta  s<   




r  unpack_dataunpack_pivotsc                    s   t  jdk fdd |rt |jt jkdd  t j}|d }|d }t||}||d< |r9 |}n dg}|rX||d<  |}	||d< ||d<  |}
n dg}	 dg}
||	|
fS )Nr   c                      r  )NzFtorch.lu_unpack: Expected tensor with 2 or more dimensions. Got size: r  r  r-   r  r-   r.   rP     r|  z lu_unpack_meta.<locals>.<lambda>c                   S   rV   )Nztorch.lu_unpack: LU_pivots is expected to be a contiguous tensor of torch.int32 dtype.
Note: this function is intended to be used with the output produced by torch.linalg.lu_factorr-   r-   r-   r-   r.   rP        rl  r   r   )	rB   rS   r   rJ   r   r   r|   r[  r{   )r  r  r  r  r  r  r   r  r  r  r  r-   r  r.   lu_unpack_meta  s4   





r  modec                    sd    dkrd}d}||fS  dkrd}d}||fS  dkr$d}d}||fS t d fdd ||fS )NreducedTcompleteFrc                         d  dS )Nzqr received unrecognized mode 'z=' but expected one of 'reduced' (default), 'r', or 'complete'r-   r-   r  r-   r.   rP     s   z _parse_qr_mode.<locals>.<lambda>rB   rS   )r  	compute_qr  r-   r  r.   _parse_qr_mode  s"   	
r  QRr  c                 C   s   t | d t| d t|\}}| jd }| jd }t||}|r>t| j}|r*|n||d< | |}||t|dd n| dg}t| j}	|sM|sO|n||	d< | |	}
|
|	t|	dd ||
fS )Nz	linalg.qrrl  r   Fr  r   )	r  r   r  r|   r[  r   r{   r   r   )ro  r  r  reduced_moder  r   r  Q_shaper  R_shaper  r-   r-   r.   linalg_qr_meta  s"   








r  sign	logabsdetc                 C   s   t | d t| dd | j}| |d d }| j|d d t| jd}tj|t|d| j| j	d}| j|d d tj
d}||||fS )Nzlinalg.slogdetFrl  rt   r  r   )r   r   r|   r{   rL   rJ   rB   r  r   rh   r   )ro  r|   r  r  r  r  r-   r-   r.   _linalg_slogdet*  s   
r  full_matrices
compute_uvdriverc                 C   s   t | d t| d t| jd d }| jd }| jd }t||}|r]|||r*|n|g }| |}	|	|t|dd ||rB|n||g }
| |
}t| dk}||
t|
|d n| dg}	| dg}| j||g t	| j
d}|	||fS )	Nz
linalg.svdrl  r   Fr  cudar   rt   )r  r   r   r|   r[  r{   r   r   device_hintrL   rJ   )ro  r  r  r  r   r  r   r  U_shaper  V_shapeVis_cudaSr-   r-   r.   _linalg_svd_meta>  s$   







r  arg1arg2c                 C   sn   | j d d }|j d d }t||}t|}|| d| dg7 }t|}||d|dg7 }||fS )Nrl  r   )r|   r"   r   r   )r  r   arg1_batch_sizesarg2_batch_sizesexpand_batch_portionarg1_expand_sizearg2_expand_sizer-   r-   r.   r  d  s   
r  c                 C   sV   |rt | || t| |\}}|| jkr| n| |}||jkr"|n||}||fS r(   )rw  r  r|   expand)r  r   rp  r  r  arg1_broadcastedarg2_broadcastedr-   r-   r.   r  u  s   r  r   c                 C   s6   | j d d }|jdkp| jd |jko|j |k}|S )Nr   r   )r|   r   )r   r   expected_batched_rhs_shapevector_caser-   r-   r.   linalg_solve_is_vector_rhs  s
   
r  )r   r  r  r  r  r  c                   sn  t  d t jjk fdd t }|r dn}	t |	|d t|	 \}
}t|p6| dd  |rC|
d d n|
}tj|t	|| jj
d} j} j}tj|t	|d j j
d} j|d d tjd} j|d d	 tjd}||||f}||||f}td
d |D rt||D ]\}}t||j ||j|  t||dd q|S )Nzlinalg.solvec                      s   d j  dj  dS )NzKlinalg.solve: Expected A and B to have the same dtype, but found A of type r  r  rt   r-   ro  r  r-   r.   rP     rt  z"_linalg_solve_ex.<locals>.<lambda>r   c                   S   rV   )Nzlinalg.solve: Vector broadcasting of the left hand side is not supported for left=False. In this case linalg.solve is equivalent to B / A.squeeze(-1)r-   r-   r-   r-   r.   rP     r  r  Frt   rl  c                 s   s    | ]}|d uV  qd S r(   r-   r6   r-   r-   r.   r\         z#_linalg_solve_ex.<locals>.<genexpr>)	copy_fromcopy_toexact_dtype)r   rB   rS   rJ   r  	unsqueezer  r  r  r   rh   r|   r   r{   r   allzipr   r   r   r    )ro  r  r   r  r  r  r  r  r
  B_B_broad_shaper@   result_shaperesult_r|   r   LU_pivots_info_r   resr  or-   r  r.   _linalg_solve_ex  sL   



r  )r   unitriangularr   r  r   c          	      C   s   |d u r
|  dg}t|tsJ t| ||d t|| d \}}|dd o+| }|r6t||j	}|S t
||j	rL||ddj	 |dd |S )Nr   zlinalg.solve_triangularrl  r   )r{   rY   r   r  r  r  r  is_conjr   r|   r   resize_
transpose_)	ro  r  r  r   r  r   r  A_avoid_copy_Ar-   r-   r.   linalg_solve_triangular_meta  s   
r$  XM)r  r  c           	         s   t jdkfdd t  jdk fdd t d  jt jkrOt \}}t j|t|ddj	j
d}t j|t|dd j	 j
d}||fS  jt jks[ jt jkrjt }d	g}||fS t dd
d  ||fS )Nr   c                      r  )NzMtorch.triangular_solve: Expected b to have at least 2 dimensions, but it has r  r  r-   r   r-   r.   rP     r  z'triangular_solve_meta.<locals>.<lambda>c                      r  )NzMtorch.triangular_solve: Expected A to have at least 2 dimensions, but it has r  r  r-   ru  r-   r.   rP     r  triangular_solveFr  r  r   c                   S   rV   )Nz+triangular_solve: Got an unexpected layout.r-   r-   r-   r-   r.   rP     rX   )rB   rS   r   rw  rg   stridedr  r  r   rJ   rh   
sparse_csr
sparse_bsrr   r{   )	r}   ro  r  r  r  self_broadcast_sizeA_broadcast_sizesolutioncloned_coefficientr-   rs  r.   triangular_solve_meta  s<   	




r/  c                 C   sp   t | d t| d | | jd d }| | j}|| jt| jdd | j| jd d tjd}|||fS )Nz
linalg.detrl  Fr  r   rt   r  )ro  detr  r  r-   r-   r.   _linalg_det_meta  s   


r1  c                    s  t jdkdd  t jdkdd  |rdndt j jd kfdd t j jd kfdd t jd jd kd	d  t jj d
kfdd t jjkfdd jdkrjd d }jd d t |kfdd jd d  t  |k fdd t jjkfdd t jjkfdd tdd tdd t jjtjddjjdS )Nr   c                   S   rV   )Nz3torch.ormqr: input must have at least 2 dimensions.r-   r-   r-   r-   r.   rP   0  rX   zormqr.<locals>.<lambda>c                   S   rV   )Nz3torch.ormqr: other must have at least 2 dimensions.r-   r-   r-   r-   r.   rP   3  rX   rl  r   c                      r  )Ntorch.ormqr: other.shape[z0] must be greater than or equal to tau.shape[-1]r-   r-   left_size_conditionr-   r.   rP   9  rw   c                      r  )Nr2  z"] must be equal to input.shape[-2]r-   r-   r3  r-   r.   rP   =  rw   c                   S   rV   )NzHtorch.ormqr: tau.shape[-1] must be less than or equal to input.shape[-1]r-   r-   r-   r-   r.   rP   B  rX   r   c                      r  )Nz[torch.ormqr: Expected tau to have one dimension less than input, but got tau.ndim equal to r  r  r-   r  r-   r.   rP   G  r  c                      r  )Nzhtorch.ormqr: Expected other to have the same number of dimensions as input, but got other.ndim equal to r  r  r-   r   r   r-   r.   rP   N  r  c                      r  )NzWtorch.ormqr: Expected batch dimensions of tau to be equal to input.shape[:-2], but got r-   r-   r  r-   r.   rP   Y  r  c                      r  )NzYtorch.ormqr: Expected batch dimensions of other to be equal to input.shape[:-2], but got r-   r-   )actual_batch_other_shaper-   r.   rP   b  r  c                         d j  dj  S )NzPtorch.ormqr: Expected input and tau to have the same dtype, but input has dtype z and tau has dtype rt   r-   r  r-   r.   rP   j  r  c                      r7  )NzRtorch.ormqr: Expected input and other to have the same dtype, but input has dtype z and other has dtype rt   r-   r5  r-   r.   rP   q  r  ztorch.ormqrr  r   Fr  r  )	rB   rS   r   r|   rJ   r  r  r   rh   )r   r  r   r   r  expected_batch_shaper-   )r6  r  r   r4  r   r  r.   ormqr&  sn   	







r9  c                   s   t td  k fdd j}| d k}|}| }|r3td|D ]}|o0|dk}q&ntd|D ]}|oB|dk}q8t |pI| fdd d S )Nr   c                      s   dd   dt  S )Nzpadding size is expected to be r   z, but got: r   r-   )rk   paddingr-   r.   rP         z,_padding_check_valid_input.<locals>.<lambda>r   r   c                      s    d d  d d  dj  S )Nz	Expected r   zD or r   zcD (batch mode) tensor with possibly 0 batch size and other non-zero dimensions for input, but got: r  r-   )rk   r   r-   r.   rP     s   )rB   rS   r   r   r   r   )r   r;  rk   	input_dimis_batch_modevalid_batch_modevalid_non_batch_moder   r-   )rk   r   r;  r.   _padding_check_valid_input  s$   rA  c                   s   d}d d}j dkrd} d7  |d7 }t|dd |\|}   |rHtk o>k  fdd tdkfdd j dkra|fS ||fS )	Nr   r   r   r   c                         d d d  dj  S NzcArgument #4: Padding size should be less than the corresponding input dimension, but got: padding (r_   ) at dimension 
 of input r  r-   dim_wr   pad_lpad_rr-   r.   rP        z_pad1d_common.<locals>.<lambda>c                      r]   )Nz
input (W: z%) is too small. Calculated output W: r-   r-   )input_woutput_wr-   r.   rP     rQ   r   )r   r   rA  rB   rS   r{   )r   r;  is_reflection	dim_planenbatchnplaner-   )rG  r   rK  rL  rH  rI  r.   _pad1d_common  s0   




rQ  c                 C      t | |ddS NTrM  rQ  r   r;  r-   r-   r.   meta_reflection_pad1d     rW  c                 C   rR  NFrT  rU  rV  r-   r-   r.   meta_replication_pad1d  rX  rZ  c                   s   d |st t|dkdd  jdkr d7  |\ }|  |r=t |k o3|k  fdd t  k fdd jS )Nr   r   c                   S   rV   )Nz padding size is expected to be 2r-   r-   r-   r-   r.   rP     rX   z(_pad1d_backward_common.<locals>.<lambda>r   c                      rB  rC  r  r-   rF  r-   r.   rP     rJ  c                         d d   S Nz(grad_output width unexpected. Expected: , Got: r   r-   rG  grad_outputrL  r-   r.   rP     r<   rB   rS   r   r   r   r{   r|   )r_  r   r;  rM  rK  r-   )rG  r_  r   rL  rH  rI  r.   _pad1d_backward_common  s$   

ra  
grad_inputc                 C      t | ||ddS rS  ra  r_  r   r;  r-   r-   r.   meta_reflection_pad1d_backward     rf  c                 C   rc  rY  rd  re  r-   r-   r.   meta_replication_pad1d_backward  rg  rh  c                   s2  dd d}d}t |dd j}|dkr'd}d7  d7  |d7 }|\	
|} 
   	 |rptk oS	k 	fdd t
k ofk  
fdd tdkpydkfd	d jd
kr|fS ||fS )Nr   r   r   r      c                      rB  rC  r  r-   rF  r-   r.   rP     rJ  z_pad2d_common.<locals>.<lambda>c                         d d d  dj  S NzcArgument #6: Padding size should be less than the corresponding input dimension, but got: padding (r_   rD  rE  r  r-   dim_hr   pad_bpad_tr-   r.   rP     rJ  c                      s   d  d d d S )Nz
input (H:  W: z%) is too small. Calculated output H: r-   r-   )input_hrK  output_hrL  r-   r.   rP   &  s
   r   rA  r   r   rB   rS   r{   )r   r;  rM  
dim_slicesrO  r   rP  r-   )rm  rG  r   rq  rK  rr  rL  rn  rH  rI  ro  r.   _pad2d_common  sB   




ru  c                 C   rR  rS  ru  rV  r-   r-   r.   meta_reflection_pad2d2  rX  rw  c                 C   rR  rY  rv  rV  r-   r-   r.   meta_replication_pad2d8  rX  rx  c                    s   dd d}d}|j }| dkr!|d }d7  d7  |d7 }|\}}}}	|| }
|  }| }|| |	 || | tkfdd t k fdd ||j S )Nr   r   r   ri  c                      r[  r\  r   r-   r^  r-   r.   rP   ^  r<   z%meta_pad2d_backward.<locals>.<lambda>c                      r[  Nz)grad_output height unexpected. Expected: r]  r   r-   rm  r_  rr  r-   r.   rP   b  r<   )r|   rk   rB   rS   r   r{   )r_  r}   r;  rN  rO  rO   rH  rI  ro  rn  rP  rq  rK  r-   )rm  rG  r_  rr  rL  r.   meta_pad2d_backward>  s2   
r{  c             	      s  ddd d}t |dd jdk}|r+d}d7 d7  d7  |d7 }|\
|}    
   	|rtk odk fdd tk ow
k 
fd	d tk ok  fd
d t	dkpdkpdk	fdd |r||	fS |	fS )Nr   r   r   r   r      c                      rB  rC  r  r-   rF  r-   r.   rP     rJ  z_pad3d_common.<locals>.<lambda>c                      rj  rk  r  r-   rl  r-   r.   rP     rJ  c                      rj  )NzcArgument #8: Padding size should be less than the corresponding input dimension, but got: padding (r_   rD  rE  r  r-   )dim_dr   pad_bkpad_fr-   r.   rP     rJ  c                      s(   d  d d d d d S )Nz
input (D:  H: rp  z%) is too small. Calculated output D: r-   r-   )input_drq  rK  output_drr  rL  r-   r.   rP     s   rs  )r   r;  rM  rN  
batch_moderO  rP  r-   )r}  rm  rG  r   r  rq  rK  r  rr  rL  rn  r~  r  rH  rI  ro  r.   _pad3d_commong  sP   





r  c                 C   rR  rS  r  rV  r-   r-   r.   meta_reflection_pad3d  rX  r  c                 C   rR  rY  r  rV  r-   r-   r.   meta_replication_pad3d  rX  r  c                    s(  t t|dkdd  |jdksJ j|jksJ ddd |jdkr2d7 d7  d7  |\}}}}}}| }	|}
|}|	| | |
| | || | t kfdd t kfd	d t  k fd
d ||jS )N   c                   S   rV   )Nz padding size is expected to be 6r-   r-   r-   r-   r.   rP     rX   z%meta_pad3d_backward.<locals>.<lambda>r   r   r   r|  c                      r[  r\  r   r-   r^  r-   r.   rP     r<   c                      r[  ry  r   r-   rz  r-   r.   rP     r<   c                      r[  )Nz(grad_output depth unexpected. Expected: r]  r   r-   )r}  r_  r  r-   r.   rP     r<   r`  )r_  r   r;  rH  rI  ro  rn  r  r~  r  rq  rK  r-   )r}  rm  rG  r_  r  rr  rL  r.   meta_pad3d_backward  s<   




r  r   pc                 C   s^   t |  dd  | d}|dkr| dgjt jdS | ||d  d fjt jdS )Nc                   S   rV   )Nz(_pdist_forward requires contiguous inputr-   r-   r-   r-   r.   rP     rX   z%meta__pdist_forward.<locals>.<lambda>r   r   r   r   )rB   rS   r  r   r{   r   r  )r}   r  r   r-   r-   r.   meta__pdist_forward  s   
r  gradpdistc                 C   s8   t | dd  t | dd  t j|t jdS )Nc                   S   rV   )Nz._pdist_backward requires self to be contiguousr-   r-   r-   r-   r.   rP     rX   z&meta__pdist_backward.<locals>.<lambda>c                   S   rV   )Nz/_pdist_backward requires pdist to be contiguousr-   r-   r-   r-   r.   rP     rX   r   )rB   rS   r  r   r  )r  r}   r  r  r-   r-   r.   meta__pdist_backward  s   r  )r	  r  c          	         s     d}  d} d}|||ft  dkdd  t dkdd  tj j  ko=jkn   fdd  j}j|d |d td kocd kfd	d   S )
Nr   r   r   r   c                   S   rV   Nzbatch1 must be a 3D tensorr-   r-   r-   r-   r.   rP     rX   zmeta_baddbmm.<locals>.<lambda>c                   S   rV   Nzbatch2 must be a 3D tensorr-   r-   r-   r-   r.   rP     rX   c                      s   dj  d j  dj  S )Nz+Input dtypes must be the same, got: input: z
, batch1: z
, batch2: rt   r-   )batch1batch2r}   r-   r.   rP         c                	      &   d d d d  d d  d	S Nz@Expected size for first two dimensions of batch2 tensor to be: [r_   z] but got: [r   r   ].r-   r-   batch2_sizesbscontraction_sizer-   r.   rP     s   )r   r  rB   rS   rk   rJ   r|   r{   )	r}   r  r  r	  r  dim1dim2dim3batch1_sizesr-   )r  r  r  r  r  r}   r.   meta_baddbmm  s&   


r  c                C      t |  S r(   rB   r   r   r}   r   r-   r-   r.   meta_bernoulli  s   r        ?c                 C   r&  r(   r-   r}   r  r   r-   r-   r.   meta_bernoulli_  rP  r  c                 C   r  r(   r  r  r-   r-   r.   meta_bernoulli_p$  rX  r  c                 C   
   t | S r(   rB   r   r  r-   r-   r.   meta_poisson*  r:  r  c                 C   s6   t |
|  k dd  t j| t jd}t | |fS )Nc                   S   rV   )NzJError in fused_moving_avg_obs_fake_quant_cpu: ch_axis must be < self.dim()r-   r-   r-   r-   r.   rP   B  rX   z6meta__fused_moving_avg_obs_fq_helper.<locals>.<lambda>rt   )rB   rS   rk   r   bool)r}   observer_onfake_quant_onrunning_minrunning_maxscale
zero_pointaveraging_const	quant_min	quant_maxch_axisper_row_fake_quantsymmetric_quantmaskr-   r-   r.   $meta__fused_moving_avg_obs_fq_helper0  s   
r  c                    sn   t |  dkdd  t | dkdd  | j\ |j\t  k fdd | S )Nr   c                   S   rV   )Nza must be 2Dr-   r-   r-   r-   r.   rP   K  rX   zmeta_mm.<locals>.<lambda>c                   S   rV   )Nzb must be 2Dr-   r-   r-   r-   r.   rP   L  rX   c                	      s   d d  d d d	S )Nz/a and b must have same reduction dim, but got [r_   z] X [r  r-   r-   M1M2Nr  r-   r.   rP   Q  s    )rB   rS   rk   r|   r{   abr-   r  r.   meta_mmH  s   

r  c                    s0   |rt  fddtjD S tj S )Nc                 3   s&    | ]}| vrj | nd V  qdS )r   Nr  r7   r   dimsr}   r-   r.   r\   X  s   $ z+_compute_reduction_shape.<locals>.<genexpr>)rR   r   r   r=   compute_reduction_output_shaper|   )r}   r  rA  r-   r  r.   r?  V  s   r?  strc                 C   sD   t | tjjr| jjS t| dr t| jdr | jjdkr | jjS dS )Nrh   ra   rf   r  )rY   rB   _subclasses
FakeTensorfake_devicera   hasattrrh   )r   r-   r-   r.   r  a  s   
r  input_tensorr   r;  dilationis_transposedgroupsoutput_paddingc                 C   s  dt dt dt dt dt dt fdd}dt dt dt dt dt d	t dt fd
d}	|jdd  }
| jdd  }|r<||jd  }n|jd }|jd | | jd krQtd| jd |g}t|tre|gt| }nt|dkrt|d gt| }t|tr|gt| }nt|dkr|d gt| }t|tr|gt| }nt|dkr|d gt| }d }|rt|tr|gt| }nt|dkr|d gt| }n|}tt|D ]2}|r||	|| || || |
| || ||  q|||| || || |
| ||  q|S )Nlnr  r   r  rS  r"  c                 S   s$   | d|  ||d   d | d S )a  
        Formula to apply to calculate the length of some dimension of the output

        See: https://pytorch.org/docs/stable/generated/torch.nn.Conv2d.html

        Args:
            ln: length of the dimension
            p: padding in that dim
            d: dilation in that dim
            k: kernel size in that dim
            s: stride in that dim
        Returns:
            The output length
        r   r   r-   )r  r  r   r  rS  r-   r-   r.   _formulax  s   $z+calc_conv_nd_return_shape.<locals>._formular*   c                 S   s(   | d | d|  ||d   | d S )a  
        Formula to apply to calculate the length of some dimension of the output
        if transposed convolution is used.
        See: https://pytorch.org/docs/stable/generated/torch.nn.ConvTranspose2d.html

        Args:
            ln: length of the dimension
            p: padding in that dim
            d: dilation in that dim
            k: kernel size in that dim
            s: stride in that dim
            op: output padding in that dim

        Returns:
            The output length
        r   r   r-   )r  r  r   r  rS  r*   r-   r-   r.   _formula_transposed  s   (z6calc_conv_nd_return_shape.<locals>._formula_transposedr   r   r   zInvalid channel dimensions)rg  r|   r   rY   r   r   r   r   )r  r   r   r;  r  r  r  r  r  r  kernel_sizer  out_channels	ret_shapeoutput_padding_listr   r-   r-   r.   calc_conv_nd_return_shapen  sZ   "
&




"r  c                 C      t j| t jkS r(   rB   _prims_commonsuggest_memory_formatchannels_lasttenr-   r-   r.   is_channels_last     r  running_meanrunning_vartrainingexponential_average_factorepsilonc                    s    j }|d ur
|j n|j }	|d ur|j n|j }
 fdd} |j| d}|r4 |	} |
}n
 d} d}|||fS )Nc                      s(   t  rtjS  jtjdrtjS tjS r#  )r  rB   r  r  r   r-   r  r-   r.   pick_memory_format  s
   z2meta_miopen_batch_norm.<locals>.pick_memory_formatr   r   )r|   r{   r   )r  r   r   r  r  r  r  r  r   save_mean_shapesave_var_shaper  r   	save_meansave_varr-   r  r.   meta_miopen_batch_norm  s   



r  c	              	      sf    fdd}	t  ||||||r|nd }
d}d} |dkr%d|
|<  |
}|j|	 d}|S )Nc                      s^   t  dkrt strtjS nt rtjS  jtjdr#tjS  jtjdr-tjS d S Nr  r   )r  r  rB   r  r  r   preserve_formatr-   r  r   r-   r.   r  		  s   z%meta_conv.<locals>.pick_memory_formatr   r   r   )r  r   r{   r   )r  r   r   r   r;  r  r  r  r  r  	shape_outinput_channels_dimoutput_channels_dimr   r-   r  r.   	meta_conv  s$   

r  mkldnnc
              	   C   sH   t | ||||d|g }
| |
}tj}|  dkrtj}|j|d}|S )NFr|  r   )r  r{   rB   r  rk   channels_last_3dr   )r  r   r   r;  r   r  r  attrscalars	algorithmr  r   out_memory_formatr-   r-   r.   meta_mkldnn_convolution_default/	  s   
r  c                 C   s$   |  g | jd d |jd R S Nr   r   r{   r|   )r  r   r   r  r  r  r-   r-   r.   meta_linear_pointwise_defaultF	  s   $r  mklc                 C   s$   |  g | jd d |jd R S r  r  )r  packed_weightorig_weightr   r   r-   r-   r.   meta_mkl_linearQ	  s   r  onednnc              	   C   sJ   t | ||||	d|
d }|tjtjfv sJ | j||d}|jtjd}|S )NFrt   r   )r  rB   r  r  r{   r   r  )r8   x_scalex_zpww_scalew_zpr   r   r;  r  r  output_scaleoutput_zero_pointoutput_dtyper  r  r  r  r   r-   r-   r.   meta_qconv2d_pointwise[	  s   
r  c                 C   s>   t | j}|jd |d< |	tjtjfv sJ | j||	d}|S )Nr   r   rt   )r   r|   rB   r  r  r{   )r8   r  r  r  r  r  r   r  r	  r
  post_op_namepost_op_argspost_op_algorithmr  r   r-   r-   r.   meta_qlinear_pointwise~	  s
   
r  c                 C   s&   t | j}|jd |d< | |}|S )Nr   r   )r   r|   r{   )r8   r  r   r  r   r-   r-   r.   meta_linear_dynamic_fp16	  s   

r  	quantizedr-   r   r   c                 C   sr   t | |||||\}}}|  dkr| dnd}	tj}
|  dkr(|||g}n|	|||g}tj|| j| j|
dS Nri  r   r   r0  )#max_pool2d_checks_and_compute_shaperk   r   rB   r  rp   rJ   rh   r   r  r   r;  r  	ceil_modenInputPlaneoutputHeightoutputWidthrO  r   r   r-   r-   r.   meta_quantized_max_pool2d	  s$   r  c                    s4   t   koj k fdd d S )Nc                      s8   d  d d dd   d dj   S )NzExpected a tensor of dimension z and tensor.size[z] == r_   zbut got : dimension z] = rk   r|   r-   rk   dim_sizer   r   r-   r.   rP   	  s    z check_dim_size.<locals>.<lambda>)rB   rS   rk   r|   )r   rk   r  r   r-   r  r.   check_dim_size	  s   r  c                 C   sb  dd }|d|\}}	t t|dv dd  t|dkr#||	}
}nt|dkr3|d |d }
}n|d	|\}
}|d
|\}}t |d u pJ|dkdd  |  dkrZ| dnd}| d}| d}| d}t||||
d|}t||	||d|}t| }t| ||	|
|||dd|||||| |  dkr|||g}n||||g}t j	|| j
| j|dS )Nc                    D   t t|dv  fdd |d }t|dkr|n|d }||fS )Nr   r   c                      r  )Nzavg_pool2d: 4 must either be a single int, or a tuple of two intsr-   r-   rp  r-   r.   rP   	  rw   z1meta_avg_pool2d.<locals>.unpack.<locals>.<lambda>r   r   rB   rS   r   rp  rN  HWr-   r#  r.   unpack	     

zmeta_avg_pool2d.<locals>.unpackr  r   r   r   c                   S   rV   NzOavg_pool2d: stride must either be omitted, a single int, or a tuple of two intsr-   r-   r-   r-   r.   rP   	  rX   z!meta_avg_pool2d.<locals>.<lambda>r   r   r   r;  c                   S   rV   Nzdivisor must be not zeror-   r-   r-   r-   r.   rP   	  rX   ri  r  rl  r   r   r0  )rB   rS   r   rk   r   pooling_output_shaper=   r  pool2d_shape_checkrp   rJ   rh   )r   r  r   r;  r  count_include_paddivisor_overrider(  kHkWdHdWpadHpadWrO  r  inputHeight
inputWidthr  r  r   r   r-   r-   r.   meta_avg_pool2d	  sb   
	




r:  c                 C   sj   t | ||||||dd|	|
|||| |  }|	}t|||d | t|||d | t|||d | d S )Nr   r   r   )r/  rk   r  )r   
gradOutputrO  r2  r3  r4  r5  r6  r7  r  r8  r9  r  r  
mem_formatr   nOutputPlaner-   r-   r.   avg_pool2d_backward_shape_check
  s,   r>  c                 C   s  t t|dkpt|dkdd  |d }t|dkr|n|d }	t t|dkp5t|dkp5t|dkdd  t|dkrB|n|d }
t|dkrN|	nt|dkrV|
n|d }t t|dkpgt|dkdd  |d }t|dkrx|n|d }t |d u p|dkdd  |j}| d	kr|d
 nd}|d }|d }|d }t||||
d|}t||	||d|}t|}t|| |||	|
||||||||| t j	||j
|j|dS )Nr   r   c                   S   rV   )NzKavg_pool2d: kernel_size must either be a single int, or a tuple of two intsr-   r-   r-   r-   r.   rP   V
  rX   z*meta_avg_pool2d_backward.<locals>.<lambda>r   c                   S   rV   r+  r-   r-   r-   r-   r.   rP   \
  rX   c                   S   rV   )NzGavg_pool2d: padding must either be a single int, or a tuple of two intsr-   r-   r-   r-   r.   rP   b
  rX   c                   S   rV   r,  r-   r-   r-   r-   r.   rP   i
  rX   ri  r  r-  rl  r   r0  )rB   rS   r   r|   rk   r.  r=   r  r>  rp   rJ   rh   )gradOutput_r   r  r   r;  r  r0  r1  r2  r3  r4  r5  r6  r7  
input_sizerO  r  r8  r9  r  r  r<  r-   r-   r.   meta_avg_pool2d_backwardH
  sj   "(
rA  c                 C   s
  t t|dv dd  |d }t|dkr|n|d }t|dkr$|n|d }	t | p2t|dv dd  |s;|n|d }
|sC|nt|dkrK|
n|d }|sS|	nt|dkr[|
n|d }t t|dv dd  |d }t|dkrw|n|d }t|dkr|n|d }t | jd	v d
d  t | p|dkdd  | d}| d}| d}| d}| d}t||||
d|}t||||d|}t||	||d|}t| ||||	|
|||||ddd||||||ddd | jdkr| ||||fS | |||||fS )Nr   r   c                   S   rV   NzFavg_pool3d: kernel_size must be a single int, or a tuple of three intsr-   r-   r-   r-   r.   rP   
  rX   z!meta_avg_pool3d.<locals>.<lambda>r   r   r   c                   S   rV   NzJavg_pool3d: stride must be omitted, a single int, or a tuple of three intsr-   r-   r-   r-   r.   rP   
  rX   c                   S   rV   NzBavg_pool3d: padding must be a single int, or a tuple of three intsr-   r-   r-   r-   r.   rP   
  rX   ri  r|  c                   S   rV   Nz9non-empty 4D or 5D (batch mode) tensor expected for inputr-   r-   r-   r-   r.   rP   
  rX   c                   S   rV   r,  r-   r-   r-   r-   r.   rP   
  rX   r  r-  rl  r   zavg_pool3d()T)check_input_sizeri  )rB   rS   r   r   r   r.  pool3d_shape_checkr{   )r   r  r   r;  r  r0  r1  kTr2  r3  dTr4  r5  padTr6  r7  rO  nslicesitimeiheightiwidthotimeoheightowidthr-   r-   r.   meta_avg_pool3d
  s   
  






rT  c                 C   s  t t|dv dd  |d }t|dkr|n|d }	t|dkr$|n|d }
t | p2t|dv dd  |s;|n|d }|sC|	nt|dkrK|n|d }|sS|
nt|dkr[|n|d }t t|dv dd  |d }t|dkrw|n|d }t|dkr|n|d }t |jd	v d
d  t | p|dkdd  |d}|d}|d}|d}t||||d|}t||	||d|}t||
||d|}t|| |||	|
||||||||||||d ||jS )NrB  c                   S   rV   rC  r-   r-   r-   r-   r.   rP   
  rX   z*meta_avg_pool3d_backward.<locals>.<lambda>r   r   r   c                   S   rV   rD  r-   r-   r-   r-   r.   rP   
  rX   c                   S   rV   rE  r-   r-   r-   r-   r.   rP     rX   rF  c                   S   rV   rG  r-   r-   r-   r-   r.   rP     rX   c                   S   rV   r,  r-   r-   r-   r-   r.   rP     rX   r  r-  rl  r   zavg_pool3d_backward())	rB   rS   r   r   r   r.  avg_pool3d_backward_shape_checkr{   r|   )r_  r   r  r   r;  r  r0  r1  rJ  r2  r3  rK  r4  r5  rL  r6  r7  rM  rN  rO  rP  otime_for_shape_checkoheight_for_shape_checkowidth_for_shape_checkr-   r-   r.   meta_avg_pool3d_backward
  st   
  




rY  c                    sZ   t  jdkp jdk fdd  jd d t| }t }t j| j j	|dS )Nr   ri  c                      rs   )Nz"Expected 3D or 4D tensor, but got r  r-   r   r-   r.   rP   ;  rw   z*meta_adaptive_avg_pool2d.<locals>.<lambda>rl  r0  )
rB   rS   r   r|   rR   r=   r  rp   rJ   rh   )r}   output_sizer  r   r-   r   r.   meta_adaptive_avg_pool2d7  s   

r[  c                    s@   t  jdkp jdk fdd   jd d t| S )Nri  r|  c                      rs   )Nz"Expected 4D or 5D tensor, but got r  r-   r   r-   r.   rP   M  rw   z*meta_adaptive_avg_pool3d.<locals>.<lambda>r-  )rB   rS   r   r{   r|   rR   )r}   rZ  r-   r   r.   meta_adaptive_avg_pool3dI  s
   
r\  c                    s    j }td|D ]t dk fdd qt|dkp$|dkfdd tj jk fdd tj}trDtj}	j
j|d	S )
Nr   r   c                      s   d j  d dS )Nz{adaptive_avg_pool2d_backward(): Expected grad_output to have non-zero                       size for non-batch dimensions,  with dimension  being emptyr  r-   )grad_outr   r-   r.   rP   X  s
    z4meta__adaptive_avg_pool2d_backward.<locals>.<lambda>r   ri  c                      rs   )NzBadaptive_avg_pool2d_backward(): Expected 3D or 4D tensor, but got r  r-   r   r-   r.   rP   ]  rw   c                      r  Nexpected dtype z! for `grad_output` but got dtype rt   r-   )r_  r}   r-   r.   rP   a  r  r   )r   r   rB   rS   r   rJ   r   r  r  r{   r|   r   )r_  r}   r   r   r-   )r_  r   r}   r.   "meta__adaptive_avg_pool2d_backwardR  s$   

rb  c                 C   s   t | d tj|tjdS )Nadaptive_avg_pool3d_backwardr   )!_adaptive_pool_empty_output_checkrB   r   r  r_  r}   r-   r-   r.   "meta__adaptive_avg_pool3d_backwardi  s   
rf  r_  c                    s<   j }td|D ]tdk fdd qd S )Nr   r   c                      s     dj  d dS )Nzc(): Expected grad_output to have non-zero size for non-batch dimensions, but grad_output has sizes r]  r^  r  r-   r}  r_  r   r-   r.   rP   u  s
   z3_adaptive_pool_empty_output_check.<locals>.<lambda>)r   r   rB   rS   r   )r_  r}  r   r-   rg  r.   rd  p  s   rd  c                    s"  j }t|dv fdd td|D ] t dk fdd qtt|dkdd  d}d}d}j d	krGd}|d7 }|d }|\}}j d
krm|||f}|}	j|tjd}
|	|
fS ||||f}t	}|j
|d}	j|tjdj
|d}
|	|
fS )Nr   ri  c                      rs   )Nz:adaptive_max_pool2d(): Expected 3D or 4D tensor, but got: r  r-   r   r-   r.   rP     rw   z*meta_adaptive_max_pool2d.<locals>.<lambda>r   r   c                         dj  d  dS )Nzjadaptive_max_pool2d(): Expected input to have non-zero size for non-batch dimensions, but input has sizes r]  r^  r  r-   r   r   r-   r.   rP     
   r   c                   S   rV   )NzCadaptive_max_pool2d(): internal error: output_size.size() must be 2r-   r-   r-   r-   r.   rP     rX   ri  r   rt   r   )r   rB   rS   r   r   r   r{   r   r=   r  r   )r   rZ  r   dimHsizeBsizeDosizeHosizeWr   r   r   r   r-   rk  r.   meta_adaptive_max_pool2d|  sD   







rr  c                    sd    j }t|dv  fdd t d tj jk fdd t}jj	|dS )Nrh  c                      rs   )NzKadaptive_max_pooling2d_backward(): Expected 3D or 4D grad_output, but got: r  r-   r_  r-   r.   rP     rw   z3meta_adaptive_max_pool2d_backward.<locals>.<lambda>adaptive_max_pool2d_backwardc                      r  r`  rt   r-   r_  r   r-   r.   rP     r  r   )
r   rB   rS   rd  rJ   r=   r  r{   r|   r   )r_  r   r   r   r   r-   ru  r.   !meta_adaptive_max_pool2d_backward  s   



rv  c                    s   j }t|dv fdd td|D ] t dk fdd qtt|dkdd  d}d}d}|d	krFd}|d7 }|}|\}}}|d
kr[||||f}	n|||||f}	|	}
j|	tjd}|
|fS )NrF  c                      rs   )Nz:adaptive_max_pool3d(): Expected 4D or 5D tensor, but got: r  r-   ri  r-   r.   rP     rw   z*meta_adaptive_max_pool3d.<locals>.<lambda>r   r   c                      rj  )Nzjadaptive_max_pool3d(): Expected input to have non-zero size for non-batch dimensions, but input has sizes r]  r^  r  r-   rk  r-   r.   rP     rl  r   c                   S   rV   )NzCadaptive_max_pool3d(): internal error: output_size.size() must be 3r-   r-   r-   r-   r.   rP     rX   r|  ri  rt   )r   rB   rS   r   r   r   r{   r   )r   rZ  r   dimDrn  ro  osizeTrp  rq  r   r   r   r-   rk  r.   meta_adaptive_max_pool3d  s8   





ry  c                 C   s   t | d ||jS )Nadaptive_max_pool3d_backward)rd  r{   r|   )r_  r   r   r-   r-   r.   !meta_adaptive_max_pool3d_backward  s   
r{  c                 C   s   |d u rt d| |S )Nz:cannot repeat_interleave a meta tensor without output_size)r   r{   )repeatsrZ  r-   r-   r.   meta_repeat_interleave_Tensor  s   
r}  c                 C   s:   | j jsJ |j jsJ t| j|j}| j|t| j dS r<  )rJ   r   r"   r|   r{   r   )realimagr   r-   r-   r.   meta_complex  s   r  )
fill_valuer   r  c                C   s   | j ||  ftjdS r<  )r{   rk   rB   rx   )r}   r   r  r-   r-   r.   nonzero_static  s   r  c              
      s  t tdd  g }tD ]q\ d ur|t jt jt jt jt jfv dd  jt jt jfv rv }t	|t 
j jkfdd tjD ]#t 
j j  k fdd ||d qQq| q| q|t t	jkfdd dd lm} t|j t	jk rd  t	jk sd}d	}D ]|dkrǈd urd}q|dkr҈d u rd
}qd ur nqd}|sg }g }tD ]\ d ur|  | qtD ]\ d u r|  | q||g }	g }
g }tD ]&\}d u rB|r8|
j|  q"|	j|  q"tj}q"|	| |
 S )Nc                   S   rV   )Nz#at least one index must be providedr-   r-   r-   r-   r.   rP     rX   z#meta_index_Tensor.<locals>.<lambda>c                   S   rV   )Nz?tensors used as indices must be long, int, byte or bool tensorsr-   r-   r-   r-   r.   rP     rX   c                      rs   )N)too many indices for tensor of dimension r  r-   r   r-   r.   rP     rw   c                	      s$   dj  d  dj  d  S )NzThe shape of the mask z
 at index z0 does not match the shape of the indexed tensor r  r-   )r   rv   jr  r}   r-   r.   rP     s
    r   c                      s   dj  dt  dS )Nr  z (got r`   )r   r   r-   )r   r}   r-   r.   rP   *  r<  r   Fr   T)rB   rS   r  	enumeraterJ   rx   rg  r   nonzeror   ry   r   r   r|   r   selecttorch._refs_refsr   r#   r   r{   )r}   r   r  r  refsstatehas_contiguous_subspacer  transposed_indicesbefore_shapeafter_shapereplacement_shaperk   r-   )r   rv   r   r  r  r}   r.   meta_index_Tensor	  s   








r  c                 C   sT   d }d }d }|
d r|  | }|
d r|  | }|
d r%|  |}|||fS )Nr   r   r   r{   r   )grad_output_input_weight_bias_sizes_optr   r;  r  
transposedr  r  output_maskbackend_grad_inputbackend_grad_weightbackend_grad_biasr-   r-   r.   meta_convolution_backwardm  s   

r  c                   s     d} d}| ||f} t  dkdd  t dkdd  t  d dk fdd t  d dk fd	d t|  d|ko^|  d|kd
d  | |   S )Nr   r   r   c                   S   rV   r  r-   r-   r-   r-   r.   rP     rX   zmeta_addbmm.<locals>.<lambda>c                   S   rV   r  r-   r-   r-   r-   r.   rP     rX   r   c                         d  d d d S )Nz8batch1 and batch2 must have same number of batches, got r   r   r   r-   r  r  r-   r.   rP     r  c                
      6   d  d d  d d d d d d	S )Nz#Incompatible matrix sizes for bmm (r   r8   r   r   r`   r   r-   r  r-   r.   rP     
   c                   S   rV   )Nz.self tensor does not match matmul output shaper-   r-   r-   r-   r.   rP     rX   )r   r  rB   rS   rk   r{   )r}   r  r  r	  r  r  r  r-   r  r.   meta_addbmm  s$   

r  )
grad_scale	found_infc       	            s4   | |||||fD ] t t t fdd qd S )Nc                         dt   S Nz'exponent must be a tensor list but got ra   r-   lr-   r.   rP     r|  z#meta__fused_adam_.<locals>.<lambda>rB   rS   rY   r   )r}   gradsexp_avgsexp_avg_sqsmax_exp_avg_sqsstate_stepslrbeta1beta2weight_decayepsamsgradmaximizer  r  r-   r  r.   meta__fused_adam_  s   
r  c       	            sZ   | |||||fD ] t t t fdd qdd }|| ||||||||fS )Nc                      r  r  r  r-   r  r-   r.   rP     r|  z"meta__fused_adam.<locals>.<lambda>c                 S   s   dd | D S )Nc                 S   s   g | ]}t |qS r-   r  )r7   rx  r-   r-   r.   r;     r<   z=meta__fused_adam.<locals>.empty_like_list.<locals>.<listcomp>r-   )tensor_listr-   r-   r.   empty_like_list  s   z)meta__fused_adam.<locals>.empty_like_listr  )r}   r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r-   r  r.   meta__fused_adam  s   
r  c                    s   t   dkdd  t  dkdd  t  jt ju  fdd t jt ju fdd t  ddk fd	d  j ddft jd
S )Nr   c                   S   rV   )Nza must be a 2D tensorr-   r-   r-   r-   r.   rP     rX   zmeta__int_mm.<locals>.<lambda>c                   S   rV   )Nzb must be a 2D tensorr-   r-   r-   r-   r.   rP     rX   c                      rs   )Nzexpected self to be int8, got rt   r-   )r  r-   r.   rP     rw   c                      rs   )Nzexpected mat2 to be int8, got rt   r-   )r  r-   r.   rP     rw   r   r   c                
      r  )Nz'Incompatible matrix sizes for _int_mm (r   r8   r   r   r`   r   r-   r  r-   r.   rP     r  rt   )rB   rS   rk   rJ   r   r   r{   r   r  r-   r  r.   meta__int_mm  s   



 r  c                    st   t   dkdd  t  jt ju  fdd  d} dd } j|d ||d  d	|d ft jd
S )Nr   c                   S   rV   Nzw must be a 2D tensorr-   r-   r-   r-   r.   rP     rX   z2meta__convert_weight_to_int4pack.<locals>.<lambda>c                      rs   Nzexpected w to be uint8, got rt   r-   r  r-   r.   rP      rw   r   r      r      rt   )rB   rS   rk   rJ   uint8r   r{   r   r  inner_k_tilesr   r  r-   r  r.    meta__convert_weight_to_int4pack  s   



r  c                    s`   t   dkdd  t  jt ju  fdd  d} d} j||d ft jdS )Nr   c                   S   rV   r  r-   r-   r-   r-   r.   rP     rX   z:meta__convert_weight_to_int4pack_for_cpu.<locals>.<lambda>c                      rs   Nzexpected w to be int32, got rt   r-   r  r-   r.   rP     rw   r   r   rt   )rB   rS   rk   rJ   r   r   r{   r  r  r-   r  r.   (meta__convert_weight_to_int4pack_for_cpu  s   




r  c                    s   t  dkdd  t   dkdd  t jt jt jt jfv fdd t  jt ju  fdd j	d 	dd	 jd
S )Nr   c                   S   rV   Nzx must be a 2D tensorr-   r-   r-   r-   r.   rP      rX   z*meta__weight_int4pack_mm.<locals>.<lambda>ri  c                   S   rV   )Nzw must be a 4D tensorr-   r-   r-   r-   r.   rP   !  rX   c                      rs   Nz#expected x to be f32/f16/bf16, got rt   r-   r   r-   r.   rP   $  rw   c                      rs   r  rt   r-   r  r-   r.   rP   (  rw   r   r  rt   )
rB   rS   rk   rJ   r  r  r  r   r{   r   r8   r  q_group_sizeq_scale_and_zerosr-   r  r8   r.   meta__weight_int4pack_mm  s   


"r  c                    s   t  dkdd  t   dkdd  t jt jt jt jfv fdd t  jt ju  fdd j	d 	djdS )	Nr   c                   S   rV   r  r-   r-   r-   r-   r.   rP   /  rX   z2meta__weight_int4pack_mm_for_cpu.<locals>.<lambda>c                   S   rV   r  r-   r-   r-   r-   r.   rP   0  rX   c                      rs   r  rt   r-   r   r-   r.   rP   3  rw   c                      rs   r  rt   r-   r  r-   r.   rP   7  rw   r   rt   )
rB   rS   rk   rJ   r  r  r  r  r{   r   r  r-   r  r.    meta__weight_int4pack_mm_for_cpu-  s   


r  c                    s   t  dkdd  t jt jt jt jfv fdd t   dkdd  t  jt ju  fdd j	d 	djdS )	Nr   c                   S   rV   r  r-   r-   r-   r-   r.   rP   >  rX   z*meta__weight_int8pack_mm.<locals>.<lambda>c                      rs   r  rt   r-   r   r-   r.   rP   A  rw   c                   S   rV   r  r-   r-   r-   r-   r.   rP   C  rX   c                      rs   )Nzexpected w to be int8, got rt   r-   r  r-   r.   rP   F  rw   r   rt   )
rB   rS   rk   rJ   r  r  r  r   r{   r   )r8   r  q_scalesr-   r  r.   meta__weight_int8pack_mm<  s   


r  c           	         s  t  dkfdd t  dkfdd t ddkfdd t tjdd  t tjdd  t |d	kd
d  t  dv  fdd d}d}jd d }jd d }tt 	||}|
||g |S )Nr   c                         d    dS )Nz1cdist only supports at least 2D tensors, X1 got: Dr   r-   )x1r-   r.   rP   O  rQ   z$meta_cdist_forward.<locals>.<lambda>c                      r  )Nz1cdist only supports at least 2D tensors, X2 got: r  r   r-   )x2r-   r.   rP   S  rQ   r   c                      r  )Nz4X1 and X2 must have the same number of columns. X1: r   z X2: r   r-   )r  r  r-   r.   rP   W  r  c                   S   rV   )Nz=cdist only supports floating-point dtypes, X1 got: {x1.dtype}r-   r-   r-   r-   r.   rP   [  rX   c                   S   rV   )Nz=cdist only supports floating-point dtypes, X2 got: {x2.dtype}r-   r-   r-   r-   r.   rP   _  rX   r   c                   S   rV   )Nz)cdist only supports non-negative p valuesr-   r-   r-   r-   r.   rP   a  rX   Nr   r   c                      r  )Nz%possible modes: None, 1, 2, but was: r-   r-   )compute_moder-   r.   rP   d  r  rl  )rB   rS   rk   r   r=   is_float_dtyperJ   r|   r   broadcast_shapesextendr{   )	r  r  r  r  r1r2batch_tensor1batch_tensor2r  r-   )r  r  r  r.   meta_cdist_forwardK  s@   









r  c                 C   s   |j d }|j d }|j d }|j d d }|j d d }	tt||	}
|
 }|||g t|
}|dksE|dksE|dksE|dkrJt|S |t|j krV|	|}tj
|tjdS )Nr   rl  r   r   )r|   r   rB   r  copyr  mathprod
zeros_liker  r   r   )r  r  r  r  cdistc1r  r  r  r  r  tensor1_expand_sizebatch_productr-   r-   r.   meta_cdist_backwardo  s   



 

r  c	                    s  t  jt jt jfv  fdd t jt jt jfv fdd t tjfdd d}	|rEt |	dkdd  |	d8 }	|	d}
t	d\}}}d urt ||kd	d  t j
dkfd
d t    k fdd fdddd fdd}tdkr  d}  }||kr |	d}nS d}nM||
|}|||fv s|sӈ d}nd}|	}jd }||kr|rt |dkdd  |d8 }|jd }n| }|
|||fS )Nc                      rs   )Nz(expected indices to be long or int, got rt   r-   )r   r-   r.   rP     rw   z$meta_embedding_bag.<locals>.<lambda>c                      rs   )Nz(expected offsets to be long or int, got rt   r-   )r+  r-   r.   rP     rw   c                      rs   )Nz/expected weight to be floating point type, got rt   r-   )r   r-   r.   rP     rw   r   r   c                   S   rV   Nz1include_last_offset: numBags should be at least 1r-   r-   r-   r-   r.   rP     rX   r   c                   S   rV   )Nz@embedding_bag: per_sample_weights only supported with mode='sum'r-   r-   r-   r-   r.   rP     rX   c                      r  )Nz1expected per_sample_weights to be 1D tensor, got r  r  r-   )per_sample_weightsr-   r.   rP     r|  c                      s   d   d    dS )Nz%expected per_sample_weights.numel() (z$ to be the same as indices.numel() (r`   rz   r-   )r   r  r-   r.   rP     s   c                    s    | ||o| ddkS Nr   r   r   r   r  r   padding_idx)is_fast_path_index_selectr-   r.   is_fast_path_index_select_scale  s   z;meta_embedding_bag.<locals>.is_fast_path_index_select_scalec                 S   s<   | j tjks| j tjko| ddko|ddko|dk S Nr   r   )rJ   rB   rF   rD   r   )r   r   r  r-   r-   r.   r    s   z5meta_embedding_bag.<locals>.is_fast_path_index_selectc                    s"   |d ur| |||S  | ||S r(   r-   r  )r  r  r-   r.   is_fast_path  s   z(meta_embedding_bag.<locals>.is_fast_pathcpuc                   S   rV   r  r-   r-   r-   r-   r.   rP     rX   )rB   rS   rJ   rx   rg  r=   r  r   r{   r   r   rz   r  r|   )r   r   r+  scale_grad_by_freqr  sparser  include_last_offsetr  num_bagsr   MODE_SUM	MODE_MEANMODE_MAXr  
offset2bagbag_sizemax_indicesfast_path_sumnumBagsr-   )r   r  r  r+  r  r   r.   meta_embedding_bag  sv   









r  c                 G   sB   t | ||g|R  \}}}}t|dkr|| }||||fS )Nr  )r  r  r{   r   )r   r   r+  r?   r   r  r  r  r-   r-   r.   meta_embedding_bag_forward_only  s   r   c                 C   s.   |r|S | j js| j jr| j S |rtjS | j S r(   )rJ   r   r   rB   rx   )r   rJ   promote_int_to_longr-   r-   r.   _get_reduction_dtype  s   r  rt   c                C   s6   t | |dd}t| j|}t| ||}| j||dS )NT)r  rt   )r  r=   r>  r|   r?  r{   )r   r  rA  rJ   r
  r  r-   r-   r.   meta_nansum  s   r  c                 C   s$   t | jtt|  }| |S r(   )r=   r  r|   rR   r   rk   r{   )r   r  r-   r-   r.   meta_median  s   
r  c                 C   sL   t | dkrtd t| j|f}t| ||}| || j|tjdfS )Nr  zmedian CUDA with indices outputrt   )	r  r=   alert_not_deterministicr>  r|   r?  r{   rB   rx   )r   rk   rA  r  r-   r-   r.   meta_median_mode_dim  s   
r  c                 C   r&  r(   r-   r   r-   r-   r.   meta_logical_not_)  rP  r  c                    sd   t t|  kdd  t|   }d| t| j   fddttD }| |S )Nc                   S   rV   )NzZNumber of dimensions of repeat dims can not be smaller than number of dimensions of tensorr-   r-   r-   r-   r.   rP   2  rX   zmeta_repeat.<locals>.<lambda>r  c                    s   g | ]
} | |  qS r-   r-   r  padded_sizer|  r-   r.   r;   9  r  zmeta_repeat.<locals>.<listcomp>)rB   rS   r   rk   rR   r|   r   r{   )r}   r|  num_new_dimensionstarget_sizer-   r  r.   meta_repeat.  s   
r  c                 C   r&  r(   r-   r   r-   r-   r.   
meta_zero_=  rP  r  c                 C   s   t |tjrt| j|j | S r(   )rY   rB   r   rU   r|   r}   r   r-   r-   r.   meta_binop_inplaceB  s   r  c                 C   sf   dd }dd }dd }|| r||rt d|| r$||s$t dt|tjr1t| j|j | S )	a*  
    Some checks for inplace ops.
    Checks for promotion rules for some dtypes.
    int.add/sub_(float) and bool.add/sub_(others) are rejected.
    Promoting in these in-place operations would require reallocating
    and copying over elements, hence not allowed.
    Checks for alpha param.
    c                 S       t | trt| jS t | tS r(   )rY   r   r=   r  rJ   r   r[   r-   r-   r.   is_integerice     

z.meta_binop_inplace_alpha.<locals>.is_integericc                 S   r  r(   )rY   r   r=   r  rJ   r   r  r-   r-   r.   
is_floatick  r  z,meta_binop_inplace_alpha.<locals>.is_floaticc                 S   r  r(   )rY   r   r=   is_boolean_dtyperJ   r   r  r-   r-   r.   is_booleanicq  r  z.meta_binop_inplace_alpha.<locals>.is_booleanicz]Promotion of int.add/sub_(float) in in-place ops are not possible due to element size change.z_Promotion of book.add/sub_(others) in in-place ops are not possible due to element size change.)r   rY   rB   r   rU   r|   )r}   r   r  r  r  r  r-   r-   r.   meta_binop_inplace_alphaS  s   r  c                 K      t | tjdS Nr4   rA   r   r>   )r}   kwargsr-   r-   r.   
meta_round  s   r  c                    sl   t tj fdd tt jr&t tj fdd d S t tt fdd d S )Nc                           dj  S )Nz7: Expected input tensor to have an integral dtype. Got rt   r-   )r  r}   r-   r.   rP     rQ   z#shift_dtype_check.<locals>.<lambda>c                      r  )Nz6: Expected shift value to have an integral dtype. Got rt   r-   r  rN  r-   r.   rP     rQ   c                      s     d S )Nz): Expected shift value to be an int. Got r-   r-   r  r-   r.   rP     r|  )rB   rS   r=   r  rJ   rY   r   r   r  r}   rN  r-   r   r.   shift_dtype_check  s   

r!  c                 C      t d| | t| |tjdS )Nrshiftr  r!  rA   r   r>   r  r-   r-   r.   meta_rshifts     r%  c                 C   r"  )Nlshiftr  r$  r  r-   r-   r.   meta_lshifts  r&  r(  c                 C      |  | jS r(   r  r   r-   r-   r.   	meta_zero     r*  c                 C   r&  r(   r-   r}   rN  r-   r-   r.   
meta_fill_  rP  r-  c                 C   r  r(   r  r,  r-   r-   r.   	meta_fill     
r.  c                 C   r&  r(   r-   r   r-   r-   r.   
meta_relu_  rP  r0  c                 C      t | |tjdS r  r  )r}   r   r  r-   r-   r.   meta__add_relu     r2        ?UUUUUU?c                 C   r  r(   r  r}   noiselowerr  r  r   r-   r-   r.   meta_rrelu_with_noise  s   
r9  c                 C   s   t | t |fS r(   r  r6  r-   r-   r.    meta_rrelu_with_noise_functional  s   r:  c                 C   r&  r(   r-   )r}   r8  r  r  r   r-   r-   r.   meta_rrelu_with_noise_  s   r;  c                 C   r  r(   r  r}   r   r   
accumulater-   r-   r.   meta_index_put  r/  r>  c                 C   s   t | j|j | S r(   rU   r|   )r}   r  valuer-   r-   r.   meta_masked_fill_  s   rA  c                 C   s    |  |  jt| d}|S r#  )r{   r   r   r=   r  )r}   r  r  masked_scaler-   r-   r.   meta__masked_scale  s   rC  c                    s@   t |jt jt jfv dd  t  jjk fdd  S )Nc                   S   rV   )NzMask must be bool or uint8r-   r-   r-   r-   r.   rP     rX   z&meta_masked_scatter_.<locals>.<lambda>c                      r7  )NzEmasked_scatter: expected self and source to have same dtypes but got r   rt   r-   r}   r   r-   r.   rP     s
    )rB   rS   rJ   r  r  )r}   r  r   r-   rD  r.   meta_masked_scatter_  s   
rE  c                 C   s*   t | |\} }tj| tjd}t|||S r#  )r#   rB   r   r   rE  )r}   r  r   r   r-   r-   r.   meta_masked_scatter  s   rF  c                 C   s
   |  |S r(   r8  )r}   r  r  r-   r-   r.   meta_masked_scatter_backward
  r/  rG  c                 C   r&  r(   r-   r<  r-   r-   r.   meta_index_put_  rP  rH  c                 C   r)  r(   )viewr|   r   r-   r-   r.   
meta_alias  r+  rJ  c                    s   t |  dkdd  t | dkdd  |  }|  |d |d |d } d }||ft  d koB d k fdd |}|sqd urqt  dkd	d  t  kfd
d |S )Nr   c                   S   rV   r  r-   r-   r-   r-   r.   rP     rX   z)common_meta_baddbmm_bmm.<locals>.<lambda>c                   S   rV   r  r-   r-   r-   r-   r.   rP     rX   r   r   r   c                	      r  r  r-   r-   r  r-   r.   rP   (  s    c                   S   rV   )Nzself must be a 3D tensorr-   r-   r-   r-   r.   rP   1  rX   c                      s   d  d   S )Nz*Expected an input tensor shape with shape z but got shape: r   r-   )rZ  self_baddbmmr-   r.   rP   4  r  )rB   rS   rk   r   r{   )r  r  is_bmmrK  r  res_rowsres_colsr   r-   )r  r  r  rZ  rK  r.   common_meta_baddbmm_bmm  s*   


rO  c                 C   s   t | |dS )NT)rO  )r}   r  r-   r-   r.   meta_bmm:  r+  rP  c                 C   s<   | | }| | }|dkrt |dk t |dk kr|d8 }|S r  )r  )r8   yqr  r-   r-   r.   div_rtn?  s
    rS  c                 C   sZ   t | | | ||d   d |r|d nd |d }|r+|d | | | kr+|d8 }|S r  )rS  )	inputSize
kernelSizerH  rI  r   r  r  
outputSizer-   r-   r.   pooling_output_shape_pad_lrI  s*   

	rW  c                    sl   t |dkdd  t dkfdd t d   d d k fdd t| | |S )Nr   c                   S   rV   )Nzstride should not be zeror-   r-   r-   r-   r.   rP   e  rX   z&pooling_output_shape.<locals>.<lambda>c                      r  )Nz'pad must be non-negative, but got pad: r-   r-   )padr-   r.   rP   f  r  r   r   c                      s   d d d  S )NzApad should be at most half of effective kernel size, but got pad=z, kernel_size=z and dilation=r-   r-   r  rU  rX  r-   r.   rP   i  s
   )rB   rS   rW  )rT  rU  rX  r   r  r  r-   rY  r.   r.  d  s   r.  c              	      sN     }tdkodkdd  t|dko|dkdd  t|dko+|dkdd   ddko= ddk}|tjkrWt|dkoQ|oQ d	dkd
d  n"t|d	krf ddkrf|pr|dkor|or d	dk fdd td 
kod 	k	
fdd tdkodkfdd d S )Nr   c                   S   rV   )NzCkernel size should be greater than zero, but got kH: {kH}, kW: {kW}r-   r-   r-   r-   r.   rP     rX   z$pool2d_shape_check.<locals>.<lambda>c                   S   rV   )Nz>stride should be greater than zero, but got dH: {dH}, dW: {dW}r-   r-   r-   r-   r.   rP     rX   c                   S   rV   )Nz\dilation should be greater than zero, but got dilationH: {dilationH}, dilationW: {dilationW}r-   r-   r-   r-   r.   rP     rX   r   r   ri  r   c                   S   rV   )NzExpected 4D (batch mode) tensor expected for input with channels_last layout with optional 0 dim batch size for input, but got: {input.size()}r-   r-   r-   r-   r.   rP     rX   c                         d    S )NzYExpected 3D or 4D (batch mode) tensor with optional 0 dim batch size for input, but got: r   r-   ri  r-   r.   rP     r|  c                      s   d d d d  S )NzKpad should be smaller than or equal to half of kernel size, but got padW = z	, padH = z, kW = z, kH = r-   r-   )r2  r3  r6  r7  r-   r.   rP     s    c                      s*   d d  d d d d dS NzGiven input size: (r8   z). Calculated output size: (z). Output size is too smallr-   r-   )r8  r9  r  r=  r  r  r-   r.   rP     s    )rk   rB   rS   r   r  )r   r2  r3  r4  r5  r6  r7  	dilationH	dilationWr  r8  r9  r  r  r   r   
valid_dimsr-   )r   r8  r9  r2  r3  r  r=  r  r  r6  r7  r.   r/  s  sB   

r/  rM  rJ  r2  r3  rK  r4  r5  pTpHpW	dilationTr\  r]  rN  rO  rP  rQ  rR  rS  rH  c              
      s  	j }tdkodkodkfdd tdko&dko& dk fdd tdko<dko<dkfdd t|dv 	fdd t|D ]|dkradkraqVt	dk	fd	d qV|rt
kokok
fd
d td kod kod kfdd tdkodkodk
fdd d S )Nr   c                         d d  d S )Nz5kernel size should be greater than zero, but got kT: z, kH: z, kW: r-   r-   )r2  rJ  r3  r-   r.   rP        z$pool3d_shape_check.<locals>.<lambda>c                      rc  )Nz0stride should be greater than zero, but got dT: z, dH: z, dW: r-   r-   )r4  rK  r5  r-   r.   rP     rd  c                      rc  )Nz9dilation should be greater than zero, but got dilationT: z, dilationH: z, dilationW: r-   r-   )r\  rb  r]  r-   r.   rP     rd  rF  c                      r  )Nz/: Expected 4D or 5D tensor for input, but got: r  r-   )r  r   r-   r.   rP     rQ   r|  c                      s     dj  d dS )NzZ: Expected input's non-batch dimensions to have positive length, but input has a shape of z and non-batch dimension z has length zero!)r|   r   r-   )r  r   r   r-   r.   rP     s
   c                      s*   d d  d d d d dS )Nzinput image (T: r  rp  z ) smaller than kernel size (kT:  kH:  kW: r`   r-   r-   )rO  rN  rP  r2  rJ  r3  r-   r.   rP     s   r   c                      s(   d d d  d d d S )NzHpad should be smaller than or equal to half of kernel size, but got kT: rf  re  z padT: z padW: z padH: r-   r-   )r2  rJ  r3  r`  r_  ra  r-   r.   rP     s   r   c                      s6   d d d  d d d d d dS r[  r-   r-   )rO  rN  rP  rM  rR  rQ  rS  r-   r.   rP     s   )r   rB   rS   r   r   )r   rM  rJ  r2  r3  rK  r4  r5  r_  r`  ra  rb  r\  r]  rN  rO  rP  rQ  rR  rS  r  rH  r   r-   )r4  rK  r5  r\  rb  r]  r  r   rO  r   rN  rP  r2  rJ  r3  rM  rR  rQ  rS  r`  r_  ra  r.   rI    sJ   	"rI  c                 C   s   | j }t| |||||||	|
|||||||||||| t|||d | t|||d | t|||d | t|||d | t|||d | t|||d | t|||d | t|||d | d S )Nri  r   r   r   r   rI  r  )r   r_  r   rM  rJ  r2  r3  rK  r4  r5  r_  r`  ra  rb  r\  r]  rN  rO  rP  rQ  rR  rS  r  r   r-   r-   r.   max_pool3d_backward_shape_check  s@   rh  c                 C   s   | j }t| ||||||||	|
|ddd|||||||d t|||d | t|||d | t|||d | t|||d | d S )Nr   Tri  r   r   rg  )r   r_  rM  rJ  r2  r3  rK  r4  r5  r_  r`  ra  rN  rO  rP  rQ  rR  rS  r  r   r-   r-   r.   rU  L  s:   rU  c                 C   sB  dd }|d|\}}t t|dv dd  t|dkr#||}	}
n|d|\}	}
|d	|\}}|d
|\}}| d}| d}| d}t| }|t jkr^t |  dkdd  n|t jkrpt |  dv dd  nt ddd  t	||||	||}t	||||
||}t
| |||	|
|||||||||| |||fS )Nc                    r   )Nr!  c                      r  )Nzmax_pool2d: r"  r-   r-   r#  r-   r.   rP     rw   zEmax_pool2d_checks_and_compute_shape.<locals>.unpack.<locals>.<lambda>r   r   r$  r%  r-   r#  r.   r(    r)  z3max_pool2d_checks_and_compute_shape.<locals>.unpackr  r*  c                   S   rV   )NzOmax_pool2d: stride must either be omitted, a single int, or a tuple of two intsr-   r-   r-   r-   r.   rP     rX   z5max_pool2d_checks_and_compute_shape.<locals>.<lambda>r   r   r;  r  r-  rl  r   ri  c                   S   rV   )NzMnon-empty 4D (batch mode) tensor expected for input with channels_last layoutr-   r-   r-   r-   r.   rP     rX   rh  c                   S   rV   )Nz9non-empty 3D or 4D (batch mode) tensor expected for inputr-   r-   r-   r-   r.   rP     rX   Fc                   S   rV   )Nz?Unsupport memory format. Supports only ChannelsLast, Contiguousr-   r-   r-   r-   r.   rP     rX   )rB   rS   r   r   r=   r  r  rk   r   r.  r/  )r   r  r   r;  r  r  r(  r2  r3  r4  r5  r6  r7  r\  r]  r  r8  r9  r   r  r  r-   r-   r.   r    sb   		









r  c                    s   t |||||\}tj jk fdd |jfdd}	|	  |	| t}
tjjjj	|
dS )Nc                      r  )NzExpected dtype z  for `gradOutput` but got dtype rt   r-   re  r-   r.   rP     r  z7meta_max_pool2d_with_indices_backward.<locals>.<lambda>c                    s:   t | d   t | d  t | d  d S )Nr   r   r   )r  )rx  )r=  r   r  r  r-   r.   _check_dim_size  s   z>meta_max_pool2d_with_indices_backward.<locals>._check_dim_sizer0  )
r  rB   rS   rJ   r   r=   r  rp   r|   rh   )r_  r}   r  r   r;  r  r  r   r  ri  r   r-   )r_  r=  r   r  r  r}   r.   %meta_max_pool2d_with_indices_backward  s.   

rj  c                 C   s   t | |||||\}}}|  dkr| dnd}	t| }
|  dkr*|||g}n|	|||g}tj|| j| j|
dtj|tj	| j|
dfS r  )
r  rk   r   r=   r  rB   rp   rJ   rh   r   r  r-   r-   r.   meta_max_pool2d_with_indices  s2   
rk  c           
   	      s  t jdv fdd j}t|d |D ] t  dkd  d  d qt td	kd
d  t t|d	kdd  d}dd|dkr_d}nd}t jjkdd  t jdkfdd d}d}d	 t ||kd t ||kdd  t  d	k fdd t |d d  d kfdd t |d d  d kfdd  dkr|||d |d g}	n	||d |d g}	t j|	jj	dt j|	t j
j	dfS )Nrh  c                      rs   )Nz:fractional_max_pool2d: Expected 3D or 4D tensor, but got: r  r-   r   r-   r.   rP   %  rw   z,meta_fractional_max_pool2d.<locals>.<lambda>r   r   z^fractional_max_pool2d: Expected input to have non-zero  size for non-batch dimenions, but got r]  z emptyr   c                   S   rV   )NzNfractional_max_pool2d: kernel_size musteither be a single int or tuple of Intsr-   r-   r-   r-   r.   rP   3  rX   c                   S   rV   )NzOfractional_max_pool2d: output_size must either be a single int or tuple of Intsr-   r-   r-   r-   r.   rP   8  rX   r-  rl  r   ri  r   c                   S   rV   )Nz6Expect _random_samples to have the same dtype as inputr-   r-   r-   r-   r.   rP   F  rX   c                      rs   )Nz1Expect _random samples to have 3 dimensions got, r  r-   )random_samplesr-   r.   rP   J  rw   z=Expect _random_samples.size(0) no less then input batch size.c                   S   rV   )Nz<Expect _random_samples.size(1) equals to input channel size.r-   r-   r-   r-   r.   rP   V  rX   c                      r  )Nz/Expect _random_samples.size(2) equals to 2 got .r-   r-   )r   r-   r.   rP   X  rw   c                         dd  d  S )Nz%fractional_max_pool2d: kernel height r   z' is too large relative to input height r-   r-   )input_heightr  r-   r.   rP   \  r  c                      rn  )Nz$fractional_max_pool2d: kernel width r   z& is too large relative to input width r-   r-   )input_widthr  r-   r.   rP   `  r  rJ   rh   )rB   rS   r   r   r   r   rJ   rk   rp   rh   r   )
r}   r  rZ  rl  r   input_channelsinput_batchr   cr   r-   )r   ro  rp  r  rl  r}   r.   meta_fractional_max_pool2d!  s   










ru  c                 C   s  t t|dv dd  |d }t|dkr|n|d }t|dkr$|n|d }t | p2t|dv dd  |s;|n|d }	|sC|nt|dkrK|	n|d }
|sS|nt|dkr[|	n|d }t t|dv dd  |d }t|dkrw|n|d }t|dkr|n|d }t t|dv d	d  |d }t|dkr|n|d }t|dkr|n|d }t | jd
v dd  | jdkr| dnd}| d}| d}| d}| d}t||||	||}t||||
||}t||||||}t| |||||	|
|||||||||||||d | jdkot| t j	k}| jdkr:| 
d}|  o2|jt j	d}||||f}n|||||f}| |}| j|t jd}|r_|jt j	d}|jt j	d}||fS )NrB  c                   S   rV   NzMmax_pool3d: kernel_size must either be a single int, or a tuple of three intsr-   r-   r-   r-   r.   rP     rX   z.meta_max_pool3d_with_indices.<locals>.<lambda>r   r   r   c                   S   rV   NzQmax_pool3d: stride must either be omitted, a single int, or a tuple of three intsr-   r-   r-   r-   r.   rP     rX   c                   S   rV   NzImax_pool3d: padding must either be a single int, or a tuple of three intsr-   r-   r-   r-   r.   rP     rX   c                   S   rV   NzJmax_pool3d: dilation must be either a single int, or a tuple of three intsr-   r-   r-   r-   r.   rP     rX   rF  c                   S   rV   rG  r-   r-   r-   r-   r.   rP     rX   r|  r  r-  rl  r   zmax_pool3d_with_indices()ri  r   rt   )rB   rS   r   r   r   r.  rI  r=   r  r  r  r  r{   r   r   )r   r  r   r;  r  r  rJ  r2  r3  rK  r4  r5  r_  r`  ra  rb  r\  r]  rO  rM  rN  rO  rP  rQ  rR  rS  r  input_channels_last_checkr   r   r   r-   r-   r.   meta_max_pool3d_with_indicesv  s   

  







r|  c                 C   s^  t t|dv dd  |d }t|dkr|n|d }	t|dkr$|n|d }
t | p2t|dv dd  |s;|n|d }|sC|	nt|dkrK|n|d }|sS|
nt|dkr[|n|d }t t|dv dd  |d }t|dkrw|n|d }t|dkr|n|d }t t|dv d	d  |d }t|dkr|n|d }t|dkr|n|d }t |jd
v dd  |d}|d}|d}|d}| d}| d}| d}t|| ||||	|
|||||||||||||||d |jdkot|t jk}|jdkr|	d}|
  o|j
t jd}||j}|r-|jt jd}|S )NrB  c                   S   rV   rv  r-   r-   r-   r-   r.   rP     rX   z7meta_max_pool3d_with_indices_backward.<locals>.<lambda>r   r   r   c                   S   rV   rw  r-   r-   r-   r-   r.   rP     rX   c                   S   rV   rx  r-   r-   r-   r-   r.   rP     rX   c                   S   rV   ry  r-   r-   r-   r-   r.   rP     rX   rF  c                   S   rV   rG  r-   r-   r-   r-   r.   rP     rX   r  r-  rl  r   z"max_pool3d_with_indices_backward()r|  ri  r   )rB   rS   r   r   r   rh  r=   r  r  r  r  r{   r|   r   )r_  r   r  r   r;  r  r  r   rJ  r2  r3  rK  r4  r5  r_  r`  ra  rb  r\  r]  rM  rN  rO  rP  rQ  rR  rS  r  r{  rb  r-   r-   r.   %meta_max_pool3d_with_indices_backward  s   
  









r}  gridc                    s   t j jk fdd t jt jko jt jk fdd t jd  jd k fdd t  jd jd k fdd tdjD ]t j dkfd	d qPd S )
Nc                      r  )NzNgrid_sampler(): expected input and grid to be on same device, but input is on z and grid is on rV  r-   r~  r   r-   r.   rP   I  r  z+check_grid_sampler_common.<locals>.<lambda>c                      r  )NzTgrid_sampler(): expected input and grid to have torch.strided layout, but input has z and grid has )rg   r-   r  r-   r.   rP   P  r  r   c                      r  )NzZgrid_sampler(): expected grid and input to have same batch size, but got input with sizes  and grid with sizes r  r-   r  r-   r.   rP   W  r  r   r   c                      s   dj d  d j S )Nz+grid_sampler(): expected grid to have size r   z, in last dimension, but got grid with sizes )r   r|   r-   r  r-   r.   rP   ^  s   c                      rj  )NzYgrid_sampler(): expected input to have non-empty spatial dimensions, but input has sizes r]  r^  r  r-   rk  r-   r.   rP   g  rl  )rB   rS   rh   rg   r(  r|   r   r   )r   r~  r-   )r~  r   r   r.   check_grid_sampler_commonF  s,   
r  c                   @   s   e Zd ZdZdZdZdS )GridSamplerInterpolationr   r   r   N)rb   
__module____qualname__BILINEARNEARESTBICUBICr-   r-   r-   r.   r  n  s    r  interpolation_modec                    sP   t jdkoj jk fdd t jdko |tjjk dd  d S )Nr|  c                      r  )Nzdgrid_sampler(): expected 5D input and grid with same number of dimensions, but got input with sizes r  r  r-   r  r-   r.   rP   w  s
   z'check_grid_sampler_3d.<locals>.<lambda>c                   S   rV   )Nz<grid_sampler(): bicubic interpolation only supports 4D inputr-   r-   r-   r-   r.   rP     rX   )rB   rS   r   r  r  r@  )r   r~  r  r-   r  r.   check_grid_sampler_3dt  s   

r  c           
      C   s:   |d }|rt j|t jd}nd }t j|t jd}	||	fS Nr   r   )rB   r  r   r   
r_  r   r~  r  padding_modealign_cornersr  input_requires_gradrb  	grad_gridr-   r-   r.   grid_sampler_2d_backward_meta  s   
r  c           
      C   s\   t | | t| || | jd }| jd }|jd }|jd }|jd }	| |||||	fS )Nr   r   r   r   )r  r  r|   r{   )
r   r~  r  r  r  r  Cout_Dout_Hout_Wr-   r-   r.   grid_sampler_3d  s   
	




r  r  c           
      C   sP   t || t||| |d }|rtj|tjd}nd }tj|tjd}	||	fS r  )r  r  rB   r  r  r   r  r-   r-   r.   grid_sampler_3d_backward  s   
r  c                 O   s:   | dd }|st|}||d< tj| g|R i |S )NrJ   )rI   r=   	get_dtyperB   rp   )r   r  r?   r  rJ   r-   r-   r.   full  s
   
r  c                 C   s   |t jkrJt |d u dd  t jd|d u r| jn|||d u r"| jn||d}| jr8||  | 	 | 
  n||  |  d |d |S tjj| |||||d}|d |S )Nc                   S   rV   )Nz9memory format option is only supported by strided tensorsr-   r-   r-   r-   r.   rP     rX   zzeros_like.<locals>.<lambda>r   r   TrU  )rB   
sparse_coorS   rp   rJ   rh   	is_sparsesparse_resize_and_clear_r   
sparse_dim	dense_dimrk   _coalesced_r%   r   r   fill_)r}   rJ   rg   rh   ri   r   r  r-   r-   r.   r    s:   
	

	r  c           	         s   ddl m}  }t|dkdd   dkr n |   }t| |kp1||k  fdd dkrAn| t }t } |    }| = | = 	|||S )Nr   guard_size_obliviousc                   S   rV   )Nz-select() cannot be applied to a 0-dim tensor.r-   r-   r-   r-   r.   rP     rX   zmeta_select.<locals>.<lambda>c                      s   d d   d  S )Nzselect(): index z! out of range for tensor of size z at dimension r   r-   rk   rv   r}   r-   r.   rP     s
    )
r   r  rk   rB   ry   r   r   r   r   r   )	r}   rk   rv   r  r   r   new_sizer   new_storage_offsetr-   r  r.   meta_select  s(   
r  c                 C   r  r(   r=   clone_preserve_strides)r}   r   rk   rv   r-   r-   r.   meta_select_scatter  r/  r  c                 C   r  r(   r  )r}   r   rk   rd   rc   stepr-   r-   r.   meta_slice_scatter!  r/  r  dim_post_exprwrap_scalarc                 C   sb   |dkr
|sJ d}| }|d }| |k s| |kr'J d|  d| d| d| dk r/| |7 } | S )Nr   r   zdim z out of bounds (r_   r`   r-   )rk   r  r  r[  r\  r-   r-   r.   r   '  s   ,r   c                 C   s   |   dkrdS | j| S r  r  )rx  rk   r-   r-   r.   ensure_nonempty_size3  s   r  c                    st   t  d}t  d}t||kdd  t|D ] kr7tttk fdd qd S )Nr   c                   S   rV   )NzDIndex tensor must have the same number of dimensions as input tensorr-   r-   r-   r-   r.   rP   =  rX   z$gather_shape_check.<locals>.<lambda>c                      s$   d dj  dj  d   S )Nz!Size does not match at dimension z expected index  to be no larger than self  apart from dimension r  r-   rk   r   rv   r}   r-   r.   rP   C  s    )r\  rk   rB   rS   r   r  )r}   rk   rv   	self_dims
index_dimsr-   r  r.   gather_shape_check8  s   r  c                    sb   ddl m} t||  }|  dk}|s+t jtjk fdd t	| |  | 
 jS )Nr   r  c                      rs   )Nz2gather(): Expected dtype int64 for index, but got rt   r-   ru   r-   r.   rP   Q  rw   zmeta_gather.<locals>.<lambda>)r   r  r   rk   rz   rB   rS   rJ   rx   r  r{   r|   )r}   rk   rv   sparse_gradr  wrapped_dimis_index_emptyr-   ru   r.   meta_gatherH  s   

r  c                 C   s   |r*| dkrdS | dkrdS | dkrdS | dkrdS | d	kr d
S t ddd  d S | dkr0dS | dkr6dS t ddd  d S )Nsum
REDUCE_ADDr  REDUCE_MULTIPLYmeanREDUCE_MEANamaxREDUCE_MAXIMUMaminREDUCE_MINIMUMFc                   S   rV   )Nz=reduce argument must be either sum, prod, mean, amax or amin.r-   r-   r-   r-   r.   rP   f  rX   z#get_operator_enum.<locals>.<lambda>addmultiplyc                   S   rV   )Nz/reduce argument must be either add or multiply.r-   r-   r-   r-   r.   rP   n  rX   r  )reduce_use_new_optionsr-   r-   r.   get_operator_enumX  s,   r  c                    sd   ddl m} || dkrt|jtjk fdd |d ur0t|j|jk fdd d S d S )Nr   r  c                      
     dS )Nz"(): Expected dtype int64 for indexr-   r-   method_namer-   r.   rP   y  r  z,scatter_gather_dtype_check.<locals>.<lambda>c                      r  )Nz0(): Expected self.dtype to be equal to src.dtyper-   r-   r  r-   r.   rP     r  )r   r  rz   rB   rS   rJ   rx   )r  r}   rv   src_optr  r-   r  r.   scatter_gather_dtype_checks  s   



r  c                 C   s
   t | dS r   )r\  r   r-   r-   r.   ensure_nonempty_dim  s   
r  c           	         s0  ddl m} | dkrd S tt t kdd  d}t }t|D ]}t|}| kr:q.|t|krEd} nq.|scd urct|D ]}t|}|t|krbd} nqPd urtt t kdd  t|  fdd d S t|  fd	d d S )
Nr   r  c                   S   rV   NzCIndex tensor must have the same number of dimensions as self tensorr-   r-   r-   r-   r.   rP     rX   z%scatter_shape_check.<locals>.<lambda>FTc                   S   rV   r  r-   r-   r-   r-   r.   rP     rX   c                      s&   dj  dj  d  dj   S )NExpected index r  r  z and to be no larger than src r  r-   rk   rv   r}   r  r-   r.   rP     s    c                      s   dj  dj  d   S )Nr  r  r  r  r-   r  r-   r.   rP     s    )	r   r  rz   rB   rS   r  rk   r   r  )	r}   rk   rv   r  r  is_wrong_shaper  r   index_d_sizer-   r  r.   scatter_shape_check  sJ   

r  c                 C   sD   t ||  }td| || t| ||| |d ur t|| d S d S )Nscatter)r   rk   r  r  r  )r}   rk   rv   r   r  r  r  r-   r-   r.   scatter_meta_impl  s   r  c                 C   s   t | |||d | | jS Nr  r  r{   r|   r}   rk   rv   r   r-   r-   r.   meta_scatter_add  s   r  c                 C   s   t | |||d | S r  r  r  r-   r-   r.   meta_scatter_add_  rd  r  c                 C   s0   t |tjr|nd }t| |||| | | jS r(   )rY   rB   r   r  r{   r|   r}   rk   rv   src_or_valuer!  r   r-   r-   r.   meta_scatter  s   
r  c                 C   s(   t |tjr|nd }t| |||| | S r(   )rY   rB   r   r  r  r-   r-   r.   meta_scatter_  s   	r          queryr   r@  	dropout_p	is_causalreturn_debug_maskr  c                 C   s
  |  d}|  d}|  d}	|  d}
| d}| dd}t|dd}tj|||	ftj| jd}|rb|
dkr=dnd}t|	| }|dkrMd}n|dkrSd}tj|||	|f| j	| jd}n
tjd| j	| jd}||d d |	|tjd	tj
d
dtjd	tj
d
d|f	S )Nr   r   r   r   rq  @         r-   rf   )r   r  rB   r   rp   rF   rh   r  ceilrJ   rx   )r  r   r@  r  r  r  r  r   	num_headsmax_seqlen_batch_qhead_dimmax_seqlen_batch_kquery_t	attention	logsumexpblocksize_cmax_seqlen_k
debug_maskr-   r-   r.   (meta__scaled_dot_product_flash_attention  sF   






r  	attn_biascompute_log_sumexpc	              	   C   s   |  d}	|  d}
|  d}| d}|  d}| d}tj|	|
||f| j| jd}tj|	|
|ftj| jd}tjdtjdd}tjdtjdd}||d d ||||d f	S )Nr   r   r   r   rq  r-   rf   )r   rB   rp   rJ   rh   rF   rx   )r  r   r@  r  r  r  r  r  r  r  r&  S_QS_KVD_QKD_Vr  
logsum_expseedoffsetr-   r-   r.   (meta__scaled_dot_product_cudnn_attention$  s0   





r  r_  r  	cum_seq_q	cum_seq_kmax_qmax_kphilox_seedphilox_offsetc                 C   sX   t |dddd}t |dddd}t |dddd}|||fS r  )rB   r   r  )r_  r  r   r@  r   r  r  r  r  r  r  r  r   r  r  grad_qgrad_kgrad_vr-   r-   r.   'meta__scaled_dot_product_flash_backwardO  s   
r  	attn_maskc                 C   s\   |  d}|  d}|  d}	|  d}
t| }tj||	|ftj| jddd}||fS )Nr   r   r   r   rq  )r   rB   r   rp   rF   rh   r  )r  r   r@  r  r  r  r  r   r  r  r  r  r  r-   r-   r.   0meta__scaled_dot_product_flash_attention_for_cpuk  s$   





r  c
                 C   s   | d}
| d}| d}| d}| d}tj|
|||fd|j|jd}tj|
|||fd|j|jd}tj|
|||fd|j|jd}|||fS )Nr   r   r   r   r   r   r   r   rq  )r   rB   empty_permutedrJ   rh   )r_  r  r   r@  r   r  r  r  r  r  r   r  r  len_qlen_kr  r  r  r-   r-   r.   9meta__scaled_dot_product_flash_attention_for_cpu_backward  s0   








r  c                 C   s   |  dd} | dd}| dd}| d}| d}	|d}
| d}| d}|d}tj||	||| j| jd}|rHt|	d d nd}tj|||ftj| jd}| dd}tjdtj	d	d}tjdtj	d	d}||||fS )
Nr   r   r   rl  r   rq  r  r-   rf   )
r  r   rB   rp   rJ   rh   r  r  rF   rx   )r  r   r@  r  r  r  r  r  r  r&  r  r  KKvr  logsumexp_dimr  r  r  r-   r-   r.   ,meta__scaled_dot_product_efficient_attention  s(   





r  grad_input_maskc                 C   s  | d}| d}| d}| d}| d}| d}tj||||fd|j|jd}tj||||fd|j|jd}tj||||fd|j|jd}d }|d ur|
d r| d}|d dkrb|n|d |d  }t|  }||d< tj||j|jd}|d	d |f }||||fS )
Nr   r   r   r   r  rq  r   r  .)r   rB   r	  rJ   rh   r   rp   )r_  r  r   r@  r  r   r  r   r  r  r  r  r  r   r  r  r  
head_dim_vr  r  r  r  	grad_biaslastDimlastDimAligned	new_sizesr-   r-   r.   +meta__scaled_dot_product_efficient_backward  sF   









 
r  c                 C   s(   t |}t |}t |}|||fS r(   r  )r_  r  r   r@  r   r  r   r  r  r  r  r  r  r  r  r  r  r  r  r-   r-   r.   'meta__scaled_dot_product_cudnn_backward!  s   



r  window_size_leftwindow_size_right	seqused_kalibi_slopesc                 C   s  |d u r	|  dn| d }|d u r|  dn|}|d u r#| dn|}|  d}|  d}t| }tj|||ftj| jd}|	rn|dkrIdnd}t|| }|dkrYd}n|dkr_d}tj||||f| j	| jd}n
tjd| j	| jd}||tjd	tj
d
dtjd	tj
d
d|fS )Nr   r   rl  r   rq  r  r  r  r-   rf   )r   rz   rB   r   rp   rF   rh   r  r  rJ   rx   )r  r   r@  r  r  r  r  r  r  r  r  r  r  r  r  r   r  r  r  r  r  r  r  r  r  r-   r-   r.   meta__flash_attention_forward>  s<   



r  c                 C   s(   t |}t |}t |}|||fS r(   r  )r_  r  r   r@  r   r  r  r  r  r  r  r  r   r  r  r  r  
grad_querygrad_key
grad_valuer-   r-   r.   meta__flash_attention_backward  s   



r!  cu_seqlens_qcu_seqlens_kmax_seqlen_qr  custom_mask_typecausal_diagonalseqlen_kwindow_sizec                 C   s  |  d}|  d}| d}|  d}|  d}| d}tj||||| j| jd}|d ur6| dd n|}|}|d urF|d usDJ |}|d urL|n|}|
rYt|d d nd}tj|||ftj| jd}tjdtjdd}tjdtjdd}||||||fS )	Nr   r   rl  r   rq  r  r-   rf   )	r   rB   rp   rJ   rh   r  r  rF   rx   )r  r   r@  r   r"  r#  r$  r  r  r%  r  r  r&  r'  r(  r  r&  r  r  r  r  r  logsumexp_batch_dimactual_max_seqlen_qactual_max_seqlen_kr  r  r  r  r-   r-   r.   !meta__efficient_attention_forward  s.   





r,  bias_requires_gradnum_splits_keyshared_storage_dqdkdvc                 C   sL  |rSt |jd |jd kdd  t |jd |jd kdd  t jg |jdd d|jd |jd R |j|jd	}|d
d}|d
d}|d
d}nt |}t |}t |}|d ur|d}|d dkrs|n|d |d  }t	| }||d< t j||j|jd	}|dd |f }nt jd|jd}||||fS )Nr   c                   S   rV   )Nz,seqlen must match for `shared_storage_dqdkdvr-   r-   r-   r-   r.   rP     rX   z4meta__efficient_attention_backward.<locals>.<lambda>r   c                   S   rV   )Nz3embedding dim must match for `shared_storage_dqdkdvr-   r-   r-   r-   r.   rP     rX   r   rl  r   rq  r-  r   r  .r-   rV  )
rB   rS   r|   rp   rJ   rh   r  r   r   r   )r_  r  r   r@  r   r"  r#  r$  r  r  r  r   r  r%  r-  r  r.  r/  chunkr  r  r   r  r  r  r  r-   r-   r.   "meta__efficient_attention_backward  s:   *



 r1  scale_ascale_bscale_resultuse_fast_accumc                    s<  dd }t  dko dkfdd t |jo$|jfdd tdkrdd	 }	d
d }
dd }t |	 pJ|fdd t |
 p\|fdd t dd dkfdd t dd dkodd dkfdd t jt jkojt jkdd  j\ }d	 dkr	 dkrnMt  dkoĈ dkfdd d krddkrddkrdkrt 
 o
 dd  nt d fdd |d ur|nj}t jdd|jdS )Nc                 S   s   | t jt jt jt jfv S r(   )rB   r  float8_e5m2float8_e4m3fnuzfloat8_e5m2fnuzrt   r-   r-   r.   is_fp8_type  s   z#meta_scaled_mm.<locals>.is_fp8_typer   c                      s   d   d    S )Nz%Inputs must be 2D but got self.dim()=z and mat2.dim()=r   r-   r  r}   r-   r.   rP   $  r<  z meta_scaled_mm.<locals>.<lambda>c                      r  )Nz8Expected both inputs to be fp8 types but got self.dtype=z and mat2.dtype=rt   r-   r:  r-   r.   rP   (  r  r  c                 S   s   | d | d ko| d dkS r  r-   r  r-   r-   r.   is_row_major-     z$meta_scaled_mm.<locals>.is_row_majorc                 S   s   | d dko| d dkS r  r-   r  r-   r-   r.   is_col_major0  r  z$meta_scaled_mm.<locals>.is_col_majorc                 S   s   |  ddkp|  ddkS r  r   )	tensor_2dr-   r-   r.   has_zero_dim3  r<  z$meta_scaled_mm.<locals>.has_zero_dimc                      rZ  )Nz#self must be row_major, got stride r  r-   r   r-   r.   rP   8  r|  c                      rZ  )Nz#mat2 must be col_major, got stride r  r-   r  r-   r.   rP   <  r|  r   r  r   c                      s   d  d S )NzBExpected self.size(1) to be divisible by 16, but got self.size(1)=r   r   r-   r   r-   r.   rP   @  rQ   c                      rs   )Nz>Expected both dimensions of mat2 to be divisble by 16 but got r  r-   r@  r-   r.   rP   D  rw   c                   S   rV   )Nz6Both scale_a and scale_b must be float (fp32) tensors.r-   r-   r-   r-   r.   rP   J  rX   c                      s   d   d  S )NzLFor non-tensorwise scaling, scale tensors must be 2D, but got scale_a.dim()=z and scale_b.dim()=r   r-   )r2  r3  r-   r.   rP   U  r<  c                   S   rV   )Nz@Both scale_a and scale_b must be contiguous for rowwise scaling.r-   r-   r-   r-   r.   rP   a  rX   Fc                      sB   d  d d d d d d d d d dS )	Nz}Invalid scaling configuration. For tensorwise scaling, both scales should be scalar. For rowwise scaling, scale_a should be (z, 1), scale_b should be (1, z). Got scale_a.size()=(r   r_   r   z) and scale_b.size()=(r`   r   r-   )r  r   r2  r3  r-   r.   rP   g  s   rq  )rB   rS   rk   rJ   r  r   r   r  r|   rz   r  rp   rh   )r}   r  r2  r3  r   r4  r   r5  r9  r;  r=  r?  r  
_out_dtyper-   )r  r  r   r2  r3  r}   r.   meta_scaled_mm  sn   


"


 rB  c                 C   s    t | ||||dd | | jS NT)r  r  r}   rk   rv   r   r!  r  r-   r-   r.   meta_scatter_reduce_twot  s   rE  c                 C   s   t | ||||dd | S rC  r  rD  r-   r-   r.   meta_scatter_reduce__two{  s   rF  c                   sh   t d    k odkn   fdd   dkr&t j|t j jdS t j d|t j jdS )Nr   r   c                      rZ  )Nz@The probabilty distributions dimensions must be 1 or 2, but got r   r-   ri  r-   r.   rP     r|  z"meta_multinomial.<locals>.<lambda>r   rq  )rB   rS   rk   rp   rx   rh   r   )r   num_samplesreplacementr   r-   ri  r.   meta_multinomial  s   
rI  c                 C   s   d}| D ]}||9 }q|S r   r-   )vsr  vr-   r-   r.   multiply_integers  s   
rL  c                    s   t tkfdd d  t t k fdd t tdd dd  D o9tdd D fdd d d \}}||gR S )Nc                         d  dt  S )Nz%It is expected output_size equals to , but got size r:  r-   )num_spatial_dimsrZ  r-   r.   rP     r  z'upsample_common_check.<locals>.<lambda>r   c                      rM  )Nz$It is expected input_size equals to rN  r:  r-   )expected_input_dimsr@  r-   r.   rP     r  c                 s       | ]}|d kV  qdS r   Nr-   )r7   rS  r-   r-   r.   r\     r  z(upsample_common_check.<locals>.<genexpr>c                      r]   )NzDInput and output sizes should be greater than 0, but got input size z and output size r-   r-   )r@  rZ  r-   r.   rP     s
    )rB   rS   r   r  )r@  rZ  rO  rO  channelsr-   )rP  r@  rO  rZ  r.   upsample_common_check  s   

*rT  c                    sZ   t   dkpt  dd   fdd t  |dd} |jt	 dS )Nr   r   c                      rZ  )Nz>Non-empty 3D data tensor expected but got a tensor with sizes r   r-   ri  r-   r.   rP     r|  z$upsample_nearest1d.<locals>.<lambda>rO  r   
rB   rS   rz   rL  r   rT  r{   r   r=   r  )r   rZ  scalesfull_output_sizer-   ri  r.   upsample_nearest1d     


rY  c           	         s   t   dkpt  dd   fdd t  |dd} |}t } j	\}}}} j
jdkr?|dk r?t j}|j|d	}|S )
Nr   r   c                      rZ  Nz>Non-empty 4D data tensor expected but got a tensor with sizes r   r-   ri  r-   r.   rP     r|  z$upsample_nearest2d.<locals>.<lambda>r   rU  r  ri  r   )rB   rS   rz   rL  r   rT  r{   r=   r  r|   rh   ra   r   r   )	r   rZ  scales_hscales_wrX  r   r   r@   
n_channelsr-   ri  r.   upsample_nearest2d  s   



r_  rZ  r@  r\  r]  c                    st   t ||dd tjdkfdd tdD ]t  k fdd q|jt	dS )Nr   rU  ri  c                      rs   )NzFExpected grad_output to be a tensor of dimension 4 but got: dimension r  r-   rs  r-   r.   rP     rw   z-upsample_nearest2d_backward.<locals>.<lambda>c                
      s&   d d   d d  S )NzCExpected grad_output to have the same shape as output; output.size(z) = z but got grad_output.size(r   r-   rX  r_  r   r-   r.   rP     s   r   )
rT  rB   rS   r   r   r   r{   r   r=   r  )r_  rZ  r@  r\  r]  r-   r`  r.   upsample_nearest2d_backward  s   

	ra  c                    sZ   t   dkpt  dd   fdd t  |dd} |jt	 dS )Nr   r   c                      rZ  )Nz>Non-empty 5D data tensor expected but got a tensor with sizes r   r-   ri  r-   r.   rP     r|  z$upsample_nearest3d.<locals>.<lambda>r   rU  r   rV  )r   rZ  scales_dr\  r]  rX  r-   ri  r.   upsample_nearest3d  rZ  rc  c           
      C   s   t | t j| t jd}}|d urQ|d urQt|tsJ t|ts$J |j}| }	t||}t||}|||	 |||	 t	||d t	||d ||fS ||fS )Nrt   )r  r  )
rB   r   r   rY   r   r|   r   r   r   r    )
r}   stablerk   
descendingr   r   rK  r   r   
out_strider-   r-   r.   	meta_sort  s   	

rg  c                    s  t jdkfdd t jjkfdd dd urPt jdkfdd t  kfdd t jjkfdd t jdkfd	d d
   t   k fdd t tfddfD dd  d S )Nr   c                          j  dS Nz != 2r  r-   input_gatesr-   r.   rP   +  rw   z%rnn_cell_checkSizes.<locals>.<lambda>c                         j  d j  S N != r  r-   )hidden_gatesrk  r-   r.   rP   .      r   c                      rh  )Nz != 1r  r-   )
input_biasr-   r.   rP   2  rw   c                      s      d  S rm  r  r-   )
gates_sizerq  r-   r.   rP   5  rp  c                      rl  rm  r  r-   )hidden_biasrq  r-   r.   rP   9  rp  c                      rh  ri  r  r-   )prev_hiddenr-   r.   rP   ;  rw   r   c                
      s,      dd d d d  d
S )Nrn  r   z * z // z (aka r`   )rz   r   r-   )expected_prev_hidden_numelfactorrr  rk  rt  r-   r.   rP   ?  s   , c                 3   s    | ]	}|j  j kV  qd S r(   rV  r6   rj  r-   r.   r\   B  s
    

z&rnn_cell_checkSizes.<locals>.<genexpr>c                   S   rV   )Nz%expected all inputs to be same devicer-   r-   r-   r-   r.   rP   F  rX   )rB   rS   r   r|   r   rz   r  )rk  ro  rq  rs  rv  rt  r-   )ru  rv  rr  rs  ro  rq  rk  rt  r.   rnn_cell_checkSizes#  s8   





rw  c                 C   sL   t | |||d| tj| tjd}tj|tjd}tj|tjd}|||fS )Nri  r   )rw  rB   r   r   )rk  ro  cxrq  rs  	workspacehycyr-   r-   r.   _thnn_fused_lstm_cell_metaJ  s
   
r|  c                 C   s(  t |dk}|rt |}|d }| jd }n|
r| jd n| jd }|
r)| jd n| jd }d}|r4dnd}|dkr<|n|}|rG||| g}n|
rP|||| gn|||| g}| |}|	| ||g}|d u rptjd| jd}n||}||	| ||g}|rdnd}| j|tjd}|||||fS )Nr   r   r   r   rV  rt   )r   r|   r{   rB   rp   rh   r  )r   r   weight_stride0
weight_bufhxrx  r  hidden_size	proj_size
num_layersbatch_firstdropouttrainbidirectionalbatch_sizesdropout_stateis_input_packed
seq_length
mini_batchbatch_sizes_sumnum_directionsout_sizer   r   
cell_shaper{  rz  reserve_shapereserver-   r-   r.   
_cudnn_rnnY  s2   

r  c                 C   s   |r| j d n| j d }|r| j d n| j d }|
}|r!|||gn|||g}| |}|d u r8tjd| jd}n||j }|d u rKtjd| jd}n||j }tjd| jtjd}||||fS )Nr   r   rV  r   )r|   r{   rB   rp   rh   r  )r   w0w1w2w3hx_cx_r   r  r  r  r  
has_biasesr  r  r  r  r  output_chanelsr   r   rz  r{  ry  r-   r-   r.   mkldnn_rnn_layer  s    
r  c                    sT   | j dkrt dkp dk fdd d S t|  dk fdd d S )Nr   r   c                      rz  )Nz4: Expected reduction dim -1 or 0 for scalar but got r-   r-   rk   r  r-   r.   rP     r|  z'zero_numel_check_dims.<locals>.<lambda>c                      r~  )Nz: Expected reduction dim z to have non-zero size.r-   r-   r  r-   r.   rP     rQ   )r   rB   ry   r   )r}   rk   r  r-   r  r.   zero_numel_check_dims  s   
r  c                    sF   |d urt || }t||  d S t| dk fdd d S )Nr   c                      r  )Nz@: Expected reduction dim to be specified for input.numel() == 0.r-   r-   r#  r-   r.   rP     r  z%check_argmax_argmin.<locals>.<lambda>)r   rk   r  rB   rS   rz   )rp  r}   rk   r-   r#  r.   check_argmax_argmin  s   

r  c                 C   sD   t d| | t| j|d ur|fnd }t| ||}| j|tjdS )Nargmaxrt   )r  r=   r>  r|   r?  r{   rB   r   )r}   rk   rA  r  r|   r-   r-   r.   argmax_argmin_meta  s   r  c                 C   s$   |t jkrt j}t jd||||dS )Nr-   r   )rB   jaggedr(  rp   )rS  rJ   rg   rh   ri   r-   r-   r.   scalar_tensor  s
   

r  c                 C   s   t ||  dd}|  dkrdn| |}t|dko||kdd  t| j}t|dkr3|||< | || j|tj	dfS )NTr  r   r   c                   S   rV   )Nzk not in range for dimensionr-   r-   r-   r-   r.   rP     rX   ztopk_meta.<locals>.<lambda>rt   )
r   rk   r   rB   rS   r   r|   r   r{   r   )r}   r  rk   largestsorted	sliceSizetopKSizer-   r-   r.   	topk_meta  s   
r  c           
      C   s@   |d us|d usJ d|  }|   }	tj||	j|	j|	jdS )Nz;segment_reduce(): Either lengths or offsets must be defined)rJ   rh   rg   )r   rB   r   rJ   rh   rg   )
r  r   r/  r!  r*  r+  r,  r.  data_contiggrad_contigr-   r-   r.   meta__segment_reduce_backward  s   r  c                    s   t  |  dd |  dkr|  nd}t|dko||k fdd t| jd   | j d d   }|rF|  dkrF| d | || j|tj	dfS )NTr  r   r   c                      r  )Nz9kthvalue(): selected number k out of range for dimension r-   r-   r   r-   r.   rP     r  zkthvalue_meta.<locals>.<lambda>rt   )
r   rk   r   rB   rS   r   r|   r   r{   r   )r}   r  rk   rA  dimSizer|   r-   r   r.   kthvalue_meta  s   
$r  c                 C   s   | d ur| n|}t | dkdd  | }| d ur(t |  |kdd  |d ur8t | |kdd  t | |kdd  t | |kdd  t | dkdd  t | |d	 |d
  d kdd  d S )Nr   c                   S   rV   N r-   r-   r-   r-   r.   rP     rX   z(checkLSTMBackwardSizes.<locals>.<lambda>c                   S   rV   r  r-   r-   r-   r-   r.   rP     rX   c                   S   rV   r  r-   r-   r-   r-   r.   rP      rX   c                   S   rV   r  r-   r-   r-   r-   r.   rP   !  rX   c                   S   rV   r  r-   r-   r-   r-   r.   rP   "  rX   c                   S   rV   r  r-   r-   r-   r-   r.   rP   #  rX   r   r   ri  c                   S   rV   r  r-   r-   r-   r-   r.   rP   $  rX   )rB   rS   rk   r   rz   )grad_hygrad_cyrx  r{  ry  defined_gradexp_sizer-   r-   r.   checkLSTMBackwardSizes  s   ,r  c           	      C   s`   | d u r
|d u r
dS t | |||| tj|td}tj|td}|r)|jdddnd }|||fS )NNNNr   r   F)rA  )r  rB   r   legacy_contiguous_memory_formatr  )	r  r  rx  r{  ry  has_bias
grad_gatesgrad_cxr  r-   r-   r.   #_thnn_fused_lstm_cell_backward_impl(  s   
r  c                 C   sf   d }d }d }|d r| |  }|d s|d r.| |d| df}| |d}|||fS )Nr   r   r   r   r  )r  r  r  r  rb  grad_weightr  r-   r-   r.   linear_backward6  s   
r  c                    s   t jdkrjd ||  dksJ dj d| dd   fdd	}jd ||  }jd
 | }jd | }g jd d |||R }|}|j| d}|S )Nr   r-  r   z'Invalid input shape for pixel_shuffle: z with upscale_factor = c                 S   r  r(   r  r  r-   r-   r.   r  I  r  z,meta_pixel_shuffle.<locals>.is_channels_lastc                      sL    rt dkrtjS tjS jtjdrtjS jtjdr$tjS d S r  )r  rB   r   r  r  r  r-   r  r}   r-   r.   r  L  s   z.meta_pixel_shuffle.<locals>.pick_memory_formatrl  r   r   )r   r|   r{   r   )r}   upscale_factorr  r  HrWrr   r   r-   r  r.   meta_pixel_shuffleC  s   & 
r  c                 C   sZ   |  | j}| |j}| |j}| |j}| |j}| |j}|||||||fS r(   r  )r   weight0weight1weight2weight3r  cx_tmpr   hy_cy_grad_output_r_optgrad_hy_r_optgrad_cy_r_optr   r  r  r  r  r  r  r  r  ry  diff_xdiff_hxdiff_cxdiff_w1diff_w2diff_br-   r-   r.   mkldnn_rnn_layer_backwarda  s   r  )	out_int32r   c                C   s   t j| |rt jnt jd S r<  )rB   r   r   r   r   )r}   
boundariesr  r   r-   r-   r.   meta_bucketize  s
   r  d   c                    s   dt dkrt fdd tt t fdd t dk fdd tttfdd tttfd	d tkd
d  tj jj	dS )Nzhistc()r  c                      r  )Nz%"histogram_cpu" not implemented for ''rt   r-   ri  r-   r.   rP     r|  zmeta_histc.<locals>.<lambda>c                      s    dt   S )Nz#: argument 'bins' must be int, not r  r-   binsr  r-   r.   rP     rp  r   c                      rz  )Nz: bins must be > 0, but got r-   r-   r  r-   r.   rP     r|  c                           dt  S )Nz%: argument 'min' must be Number, not r  r-   )r  r[  r-   r.   rP     rp  c                      r  )Nz%: argument 'max' must be Number, not r  r-   )r  r\  r-   r.   rP     rp  c                   S   rV   )Nz&{fn_name}: max must be larger than minr-   r-   r-   r-   r.   rP     rX   r   )
r  rB   rS   r   rY   r   r   rp   rh   rJ   )r   r  r[  r\  r-   )r  r  r   r\  r[  r.   
meta_histc  s*   
r  c                    sd   t   |dd}t  dkptdd   dd  D  fdd  |jt	 d	S )
Nr   rU  r   c                 s   rQ  rR  r-   )r7   r   r-   r-   r.   r\     r  z,meta_upsample_bimode2d_aa.<locals>.<genexpr>r   c                      rZ  r[  r   r-   ri  r-   r.   rP     r|  z+meta_upsample_bimode2d_aa.<locals>.<lambda>r   )
rT  r   rB   rS   rz   r  r{   r   r=   r  )r   rZ  r  r\  r]  rX  r-   ri  r.   meta_upsample_bimode2d_aa  s   

(

r  c                 C   s\   t | dkdd  t | dkdd  t |jjdd  t |jjdd  d S )Nr   c                   S   rV   )Nz%found_inf must be a 1-element tensor.r-   r-   r-   r-   r.   rP     rX   z<_amp_foreach_non_finite_check_and_unscale_.<locals>.<lambda>c                   S   rV   )Nz%inv_scale must be a 1-element tensor.r-   r-   r-   r-   r.   rP     rX   c                   S   rV   )Nz!found_inf must be a float tensor.r-   r-   r-   r-   r.   rP     rX   c                   S   rV   )Nz!inv_scale must be a float tensor.r-   r-   r-   r-   r.   rP     rX   )rB   rS   rz   rJ   r   )r}   r  	inv_scaler-   r-   r.   *_amp_foreach_non_finite_check_and_unscale_  s   r  c                 C   s   t |  }| |S r(   )r   r   r{   )r}   nanposinfneginfr(  r-   r-   r.   
nan_to_num  s   
r  c                 C   s   | j tjtjtjtjhvsJ d| j  d| j}t||}t||}||kr)| S t| 	 }t| 
 }|| || ||< ||< || || ||< ||< | || | S )Nz>torch.transpose_: in-place transposition is not supported for z layout)rg   rB   r)  
sparse_cscr*  
sparse_bscr   r   r   r   r   r   )r}   dim0r  ndimsr   r   r-   r-   r.   r!    s(   	

r!  c                 C   sz   | j }| jr"|  }|  }|dkr|dks!J d| d| dn|  dks0J d| dt| d|dk r:dS dS )	Nr   r   zEt_ expects a tensor with <= 2 sparse and 0 dense dimensions, but got z sparse and z dense dimensionsz6t_ expects a tensor with <= 2 dimensions, but self is r  r   )r   r  r  r  rk   r!  )r}   r  r  r  r-   r-   r.   t_  s   
r  )r  r   sidesorterc                   s   t tjdkpjd d  jd d k fdd t d u p)jjkfdd t |dkp9| d |rAt jnt j}t t jrSt j |d	 S t j
d	|jd
S )Nr   r   c                      s   dt j dt  j S )Nztorch.searchsorted(): boundaries tensor should be 1 dimension or the first N-1 dimensions of boundaries tensor and input value tensor must match, but we got boundaries tensor z and input value tensor r   r|   r-   )r}   sorted_sequencer-   r.   rP     s
   z#meta_searchsorted.<locals>.<lambda>c                      s,   dt  j dd urt j S g  S )Nz[torch.searchsorted(): boundary and sorter must have the same size, but got boundary tensor z and got sorter tensor r  r-   )r  r  r-   r.   rP   "  s   r   zetorch.searchsorted(): side and right can't be set to opposites, got side of left while right was Truert   r-   rq  )rB   rS   r   r|   r   r   rY   r   r   r   rp   rh   )r  r}   r  r   r  r  rJ   r-   )r}   r  r  r.   meta_searchsorted  s"   
r  c                    s(   t  t jt jt jfv fdd d S )Nc                      r  )Nz/Unsupported input type encountered for isin(): r-   r-   rt   r-   r.   rP   :  r  z3_check_for_unsupported_isin_dtype.<locals>.<lambda>)rB   rS   r  
complex128	complex64rt   r-   rt   r.   !_check_for_unsupported_isin_dtype7  s   
r  c                 C   s:   |	rt | ||||||||
|
S t| ||||||||
|
S r(   )r%   _embedding_bag_sparse_backward!meta_embedding_bag_dense_backward)r  r   r+  r  r  maximum_indicesnum_weightsr  r  r  r  r  r-   r-   r.   meta_embedding_bag_backward>  s2   r  c
                    sf   t  jt jt jt jt jfv  fdd td\}
}}||kr't |d u  | 	df}|S )Nc                      rs   )Nz$Unsupported input type encountered: rt   r-   r  r-   r.   rP   x  rw   z3meta_embedding_bag_dense_backward.<locals>.<lambda>r   r   )
rB   rS   rJ   r  r  r  float64r   r{   r   )r  r   r  r  r  r  r  r  r  r  r  r  r  index_grad_weightr-   r  r.   r  i  s   
r  c                 C   s   t d\}}}	| d}
t||kd t|  dk t| dk |d}t| dk t|d|
k | |f}|S )Nr   r   zHembedding_bag_backward: per_sample_weights only supported for mode='sum'r   r   )r   r   rB   rS   rk   r{   )r  r   r   r+  r  r  r  r  r  r  embedding_featuresrG  r   r-   r-   r.   .meta_embedding_bag_per_sample_weights_backward  s   


r  )assume_uniqueinvertc                C   sx   t t| tpt|tdd  t| tst j| |jd} t|ts*t j|| jd}t| j t|j t j| t j	dS )Nc                   S   rV   )Nz<At least one of elements and test_elements must be a Tensor.r-   r-   r-   r-   r.   rP     rX   zmeta_isin.<locals>.<lambda>rV  rt   )
rB   rS   rY   r   r   rh   r  rJ   r   r  )elementstest_elementsr  r  r-   r-   r.   	meta_isin  s   



r  r   c                 C   s4   t | dkdd  t|tjd\}}t j||dS )Nr   c                   S   rV   )Nz,polygamma(n, x) does not support negative n.r-   r-   r-   r-   r.   rP     rX   z meta_polygamma.<locals>.<lambda>rF  rt   )rB   rS   r   r   rG  r   )r   r}   r@   r:   r-   r-   r.   meta_polygamma  s   
r  c                 C   s   t d)Nz.Tensor.item() cannot be called on meta tensors)r   r   r-   r-   r.   meta_local_scalar_dense  s   r   max_lengthspadding_valuec                 C   s\   t |dksJ t |dksJ |d jd d }|d }||g| jdd  R }| |S r  )r   r|   r{   )r   r+  r  r  r  r  r  r-   r-   r.   $meta__jagged_to_padded_dense_forward  s   
r  c                 C      t | t dd }|S )Nc                 S   r  r  rA   r   rG  r   r-   r-   r.   _f  s   z)_create_unary_float_meta_func.<locals>._fr3   r!   funcr  r-   r-   r.   _create_unary_float_meta_func     r
  c                 C   r  )Nc                 S   r1  r  r  )r8   rQ  r-   r-   r.   r    r3  z*_create_binary_float_meta_func.<locals>._fr  r  r-   r-   r.   _create_binary_float_meta_func  r  r  c                    s<   t   fdd} j d}||_ttt||}|S )Nc                    s(    | g|R i |}t | j|j | S r(   r?  )r}   r?   r  r   r+   r-   r.   _fn  s   z#_register_inplace_meta.<locals>._fnr@   )r   rb   r3   getattrr%   )r,   r  inplace_namer-   r+   r.   _register_inplace_meta  s   r  c                    sh   t j jk fdd  g}ttr,t jjkfdd | t|dtjiS )Nc                      r  )Nra  z for `end`, but got dtype rt   r-   )rc   rd   r-   r.   rP     r  zlerp.<locals>.<lambda>c                      r7  )Nra  z for `weight`, but got dtype rt   r-   )rd   r   r-   r.   rP     r  r4   )	rB   rS   rJ   rY   r   r   rA   r   r>   )rd   rc   r   r?   r-   )rc   rd   r   r.   lerp
  s    



r  )r@  c                C   s   t | ||tjdS r  r  r   tensor1tensor2r@  r-   r-   r.   addcmul  s   
r  c                C   s8   t t|jot|j dd  t| ||tjdS )Nc                   S   rV   )N)zFInteger division with addcdiv is no longer supported, and in a future zErelease addcdiv will perform a true division of tensor1 and tensor2. z4The historic addcdiv behavior can be implemented as zA(input + value * torch.trunc(tensor1 / tensor2)).to(input.dtype) zfor integer inputs and as z6(input + value * tensor1 / tensor2) for float inputs. z?The future addcdiv behavior is just the latter implementation: z4(input + value * tensor1 / tensor2), for all dtypes.r-   r-   r-   r-   r.   rP   -  rX   zaddcdiv.<locals>.<lambda>r  )rB   rS   r=   r  rJ   rA   r   r>   r  r-   r-   r.   addcdiv%  s   

r  c                  C   s4  i } dD ]}t | }|D ]}|| vr|| | |< qq|  D ]y\}}t|tjjr*qt|ts1J |tjj	j
| tj| drR|t d v rQt| dq|jrVq| dv r]qd| v rjt|| qd| v rwt|| qd| v rt|| qd	| v rt|| qt|| qd S )
N)rf   post_autogradpre_autogradCompositeImplicitAutogradrf   z is a CompositeImplicitAutograd op, we shouldn't register meta function for it. Instead, we should let the decomposition run and write meta kernels for the base operators.>   aten::cloneaten::copy_aten::rot90aten::_to_copyaten::empty_stridedaten::constant_pad_ndaten::as_strided_scatterzmkldnn::zmkl::zonednn::zquantized::)r   itemsrY   rB   _opsHigherOrderOperatorr   py_impl_CDispatchKeyr'   %_dispatch_has_kernel_for_dispatch_keyrp  r   is_view2_meta_lib_dont_use_me_use_register_meta_for_mkldnnimpl/_meta_lib_dont_use_me_use_register_meta_for_mkl2_meta_lib_dont_use_me_use_register_meta_for_onednn5_meta_lib_dont_use_me_use_register_meta_for_quantized'_meta_lib_dont_use_me_use_register_meta)activate_meta_tablera   registryopoop_overloadr,   r-   r-   r.   activate_metaI  sN   r3  r   r  r(   )NNNFr   r   F)NN)Tru  )r  )r  T)FF)TT)r  )FTN)TFF)TF)r   )r  N)r"  r  )r-   r   r  F)r-   r   FTN)Fr   FNFr   )NF)r   F)r4  r5  FN)NNNNN)r   NNr   )NNF)r  FFN)r  FNN)r  FN)FN)FNNNN)NNNF)Nr   FNN)NNNN)r   TT)NNr   N)r  r   r   )r   )r  (]  r  enumr   	functoolsr   typingr   r   r   r   r   rB   torch._prims_commonr  r=   r	   r
   r   torch._decompr   r   r   r   
torch._opsr   torch._primsr   r   r   r   r   r   r   r   r   r   r   r   torch._prims_common.wrappersr   r   r   r    r!   r  r"   r#   torch.utilsr$   r0   opsr%   libraryLibraryr.  r3   rA   rL   rU   linspacelogspacer(  rr   taker   r   r~   r   r   cummaxcumminr   r   r   _fft_c2cr   _fft_r2cr   randpermgenerator_outr   rx   r   randintr   r   low_outr   randr   _fft_c2rr   rJ  r   r   
unsqueeze_r   _sparse_semi_structured_linearr  rJ   r  _sparse_semi_structured_mmr  _sparse_semi_structured_addmmr
  _cslt_sparse_mmr  rg  r  index_reducer%  index_reduce_r'  index_selectr)  segment_reducer5  r\  	unary_outr9  rk   rB  r[  rD  rE  rK  rH  rL  _assert_asyncrO  msgrR  _printrT  _make_dep_tokenrW  r^  _functional_sym_constrain_rangerc  rh  (_functional_sym_constrain_range_for_sizeri  _functional_assert_asyncrj  r   rw  r   r  r  r  r  _linalg_eighr  r  _linalg_eigvalslinalg_eigvalsr  
linalg_eigr  r  r  r  r  r  r  r  linalg_inv_exr  linalg_ldl_factor_exr  linalg_ldl_solver  	linalg_lur  linalg_lu_factor_exr  linalg_lu_solver  	lu_unpackr  r  	linalg_qrr  r  r  _linalg_svdr  r  r  r  r  linalg_solve_triangularr$  r'  r/  _linalg_detr1  r9  rA  rQ  reflection_pad1drW  replication_pad1drZ  ra  reflection_pad1d_backwardrf  replication_pad1d_backwardrh  ru  reflection_pad2drw  replication_pad2drx  reflection_pad2d_backwardrb  replication_pad2d_backwardr{  r  reflection_pad3dr  replication_pad3dr  reflection_pad3d_backwardreplication_pad3d_backwardr  _pdist_forwardrF   r  _pdist_backwardr  baddbmmr  	bernoullir  
bernoulli_r  r  r  poissonr  _fused_moving_avg_obs_fq_helperr  mmr  r?  r  r  r  miopen_batch_normr  convolutionr  r%  _has_mkldnnr)  r  _convolution_pointwiser  _linear_pointwiser  has_mklr+  r  _mkl_linearr  r,  r  qconv2d_pointwiser  qlinear_pointwiser   r  linear_dynamic_fp16linear_relu_dynamic_fp16r  r-  r  
max_pool2dr  r  
avg_pool2dr:  r>  avg_pool2d_backwardrA  
avg_pool3drT  avg_pool3d_backwardrY  _adaptive_avg_pool2dr[  _adaptive_avg_pool3dr\  _adaptive_avg_pool2d_backwardrb  _adaptive_avg_pool3d_backwardrf  rd  adaptive_max_pool2drr  rt  rv  adaptive_max_pool3dry  rz  r{  repeat_interleaver}  rZ   r  r  rv   _unsafe_indexr  convolution_backwardr  addbmmr  _fused_adam__fused_adamw_r  _fused_adamr  _int_mmr  _convert_weight_to_int4packr  #_convert_weight_to_int4pack_for_cpur  _weight_int4pack_mmr  _weight_int4pack_mm_for_cpur  _weight_int8pack_mmr  _cdist_forwardr  _cdist_backwardr  _embedding_bagr  _embedding_bag_forward_onlyr   r  nansumr  median	nanmedianr  
dim_valuesr  r   r  logical_not_r  repeatr  zero_r  mul_Scalardiv_logical_and_logical_or_logical_xor_r  add_sub_r  rounddecimalsr  r!  
__rshift__r%  
__lshift__r(  zeror*  r  r-  fillr.  relu_r0  	_add_relur2  rrelu_with_noiser9  rrelu_with_noise_functionalr:  rrelu_with_noise_r;  	index_put_unsafe_index_putr>  masked_fill_rA  _masked_scalerC  masked_scatter_rE  masked_scatterrF  masked_scatter_backwardrG  
index_put_rH  aliasrJ  rO  bmmrP  rS  rW  r.  r/  rI  rh  rU  r   max_pool2d_with_indices_backwardrj  max_pool2d_with_indicesrk  fractional_max_pool2dru  max_pool3d_with_indicesr|   max_pool3d_with_indices_backwardr}  r  r  r  grid_sampler_2d_backwardr  r  r  r  r  r  r  select_scatterr  slice_scatterr  r   r  r  gatherr  r  r  r  r  r  scatter_addr  scatter_add_r  r  r   r@  r!  value_reducer  scatter_r  #_scaled_dot_product_flash_attentionr  #_scaled_dot_product_cudnn_attentionr  ,_scaled_dot_product_flash_attention_backwardr  +_scaled_dot_product_flash_attention_for_cpur  4_scaled_dot_product_flash_attention_for_cpu_backwardr  '_scaled_dot_product_efficient_attentionr  0_scaled_dot_product_efficient_attention_backwardr  ,_scaled_dot_product_cudnn_attention_backwardr  _flash_attention_forwardr  _flash_attention_backwardr!  _efficient_attention_forwardr,  _efficient_attention_backwardSymIntr1  
_scaled_mmrB  scatter_reducetwotwo_outrE  scatter_reduce_rF  multinomialrI  rL  rT  rY  _upsample_nearest_exact1dr_  _upsample_nearest_exact2dra  "_upsample_nearest_exact2d_backwardrc  _upsample_nearest_exact3dr   rd  values_stablerg  rw  _thnn_fused_lstm_cellr|  r  r  r  r  r  argminr  r  topkr  _segment_reduce_backwardr  kthvaluer  r   r  r  r  r  pixel_shuffler  r  	bucketize
Tensor_outr  histcr  _upsample_bilinear2d_aa_upsample_bicubic2d_aar  r  r  r!  r  searchsortedr  r  _embedding_bag_backwardr  _embedding_bag_dense_backwardr  *_embedding_bag_per_sample_weights_backwardr  isinr  	polygammar  _local_scalar_denser   _jagged_to_padded_dense_forwardr  r
  r  special_airy_aispecial_bessel_y0special_bessel_y1special_modified_bessel_i0special_modified_bessel_i1special_modified_bessel_k0special_modified_bessel_k1!special_scaled_modified_bessel_k0!special_scaled_modified_bessel_k1special_chebyshev_polynomial_tspecial_chebyshev_polynomial_uspecial_chebyshev_polynomial_vspecial_chebyshev_polynomial_w&special_shifted_chebyshev_polynomial_t&special_shifted_chebyshev_polynomial_u&special_shifted_chebyshev_polynomial_v&special_shifted_chebyshev_polynomial_wspecial_hermite_polynomial_hspecial_hermite_polynomial_hespecial_laguerre_polynomial_lspecial_legendre_polynomial_pr  r  r  r  lerp_addcmul_addcdiv_torch._refs.nn.functionaltorch._refs.specialr3  r-   r-   r-   r.   <module>   s  0
	8	6



	
!"	
2



#
	

	











	




'



"

2
*
*
"7
(&"
%


	
;

/Z&5 ?'$,



e

%	
,"M,
H
TN



.


*(c$
#d	







-


!
T	
]>	
6L+&
T

ge( 

	, $1	








8	*	
		
*'	
7	
	
<	
	
0	
7d



'7'

"
.

*


"
	




E