o
    "i                 -   @   sX*  d dl Z d dlmZ d dlmZmZmZmZmZ d dl	Z	d dl
mZ d dl	mZmZmZ d dlmZmZmZmZ d dlmZ d dlmZmZ d dl
mZmZmZmZmZmZm Z m!Z! d d	l"m#Z#m$Z$m%Z%m&Z&m'Z' d d
l(m)Z)m*Z* d dl+m,Z- e	j.j/Z/e	j01dddZ2dd Z3defddZ4dd Z5dd Z6e3e/j7e/j8ge' ddde	j9ddfddZ:e3e/j;j<e/j;j=ge' dd Z>e3e/j?j<e/j?j=ge' dddd Z?e3e/j@e' d!d" Z@e3e/jAj<e/jAj=e/jBj<e/jBj=ge'd#d$d%d& ZCe3e/jDj<e/jDj=ge' d'd( ZDd)d* ZEe3e/jFj<e/jFj=ge' d+d, ZGe3e/jHj<e/jHj=ge' d-d. ZIe3e/jJjKdd/d0d1ZLe3e/jJj<e	jMdddd2d3d4ZNe3e/jOj<e/jOj=ge' e	jMdddd2d5d6ZPe3e/jOjQe/jOjRge' e	jMdddd2d7d8ZSe3e/jTj<e/jTj=ge' ddddd2d9d:ZUe3e/jVj<e/jVj=ge' d;d< ZWe3e/jXj<dd=d>ZYd?d@ ZZe3e/j[j<dAdB Z\e3e/j]			ddCedDedEedFee dGee^ dHee	j_ fdIdJZ`e3e/ja	ddKedLedMedHee	j_ fdNdOZbe3e/jcdPdPddQdCedKedLedMedHee	j_ f
dRdSZde3e/je				ddTe	jdUe	jdFee dVee dHee	j_ dWeffdXdYZge3e/jhj<dZd[d\ed]eid^ed_e	jd`e^daefdbefdcddZje3e/jkj<dZd[d\ed]eid^ed_e	jd`e^daefdbefdedfZle' e3e/jmj<dgdh Zne3e/joj<dddd dddidjed`e^dkee d$ee dlee dmeidnefdbefdodpZpe3e/jqj<e/jqjrge' dqdr Zse3e/jqjtddsdtZue3e/jvj<e/jvjrge' dudv Zwe3e/jvjtddwdxZxe3e/jyj<dydz Zze3e/jyj=d{d| Z{e3e/j|j<d}d~ Z}e3e/j|j~dd Ze3e/jj<dd Ze3e/jj<ddddddddZe3e/jj<dddZe3e/jj<dddZe3e/jj<dddZe3e/jj<dd Ze3e/jj~dd Zd\ede^fddZd\edede^fddZ	Zddede^deffddZddede^de^fddZdededefde^fddZ	dde^dedCede^fddZde^fddZe3e/jj<e/jjge'dd		Zddede^deffddZe3e/jj<e/jj=ge' dCedbefddZe3e/jge'dddCefddZdedbefddZe3e/je' d\ededefdbefddZe3e/je' dd\ededefdbefddZe3e/je' dd\edefdbefddZe3e/je' dd\edefdbefddZe3e/jj<ddedefdeffddZe3e/jj<e/jj=ge' dCededbefddńZe3e/jj<ddedeffddǄZe3e/jj<e/jj=ge'dddʃddd˜d\edefdefdbeeeef fdd΄Ze3e/jj<e/jj=ge' ddϜdedededefdbef
ddфZe3e/jj<e/jj=ge'dddӃdZdԜdedefdbeeeef fddׄZe3e/jj<e/jj=ge'dddʃdZddٜdedefdefdbeeeef fddۄZe3e/jj<e/jj=ge' dZddܜdedededefdefdbefdd߄Ze3e/je'dddӃ	Z	Zddededefdefdbeeeef f
ddZde^dbeefeff fddZe3e/jj<e/jj=ge'dd	ddede^dbeeef fddZe3e/jj<e/jjge'ddddɃdedbeeeeef fddZe3e/jj<		Z	ddedefdefdee^ fddZdededbeeei eei f fddZdededee^ dbeeef fddZdCededbeffddZe3e/jdZdddddddededefdefdee dee dee dee dbeeeeef fdd Ze3e/jj<e/jj=gdZddddededefdefdefdee dbefddZe3e/je'dd	Z		dd\ededefdefdefdbeeef fd	d
Ze3e/jj<dd Ze3e/je' 	Z	ddCedededefdefdbefddZdd Zdd Ze3e/jăe' dd Ze3e/jƃe' dd Zǐdd Ze3e/jɃe'ddd Ze3e/j˃e'ddd Z̐dd Ze3e/j΃e' d d! Ze3e/jЃe' d"d# Ze3e/jj<e/jje/jj<e/jjge'dd$d% ZՐd&d' Ze3e/j׃e' d(d) Ze3e/jكe' d*d+ Ze3e/jj<e/jje/jj<e/jjge'dd,d- Ze3e/jރe' dd\ed/edbefd0d1Ze3e/je' d2ed\ed/eߐd3edbef
d4d5Ze3e/jj<e/jj=ge' dPdPd6d7d8Ze3e/jj<e/jj=ge' dd/d9d:Ze3e/jj߃dd<d=Ze3e/jjdd>d?Ze3e/jj<		dd@dAZe3e/je' dBdC ZdDdE ZddGdHZ	dܐdIe	jdDe	jdJeeei eif dKeeei eif dLeeei eif dMefdNeidOeeeei eif  fdPdQZdRdS Ze3e/jj<dIe	jdDe	jdFe	jdJeei dKeei dLeei dMefdOeei dNeifdTdUZe	jj	r6e	j01dVddZe3e	j.jjj<dWdX Ze3e	j.jjj<dYdZ Ze	jjre	j01d[ddZe3e	j.jj d\d] Ze	j01d^ddZe3e	j.jjj<d_d` Ze3e	j.jjj<e3e	j.jjjdadb Ze	j01dcddZ	e3e	j.j
j	d	e	f	ddgdhZdidj Ze3e/jj<	d	e		Z	ddkdlZdmdn Ze3e/jj<dodp Ze3e/je' 	d	e		Z	ddqdrZe3e/je'ddsdt Ze3e/jj<dudv Ze3e/jj<dwdx Ze3e/jj<dydz Ze3e/je'dd{d| Zd}ede^fd~dZe3e/j e'dd$dd Z!e3e/j"e'ddd Z#e3e/j$e'dd$dd Z%e3e/j&e'ddd Z'e3e/j(jdܐddZ)e3e/j*j<e/j*j=ge' dd Z+e3e/j,j<e/j,j=ge' dddeideifddZ,e3e/j-je/j.jgdd Z/e3e/j0j<gdd Z1e3e/j2j<e/j2j=ge' dPdPd6ddZ3e3e/j4j<gdddddZ5e3e/j6j<gdddddZ7e3e/j8ge' dd Z9e3e/j:gdd Z;e3e/j<gdd Z=e3e/j>gdd Z?e3e/j@j<dd ZAe3e/jBe' dd ZCe3e/jDj<		 				dddZEe3e/jFj<dd ZGdߐddZHe3e/jIj<e/jIj=ge' dddddZJe3e/jKj<e/jLj<gdd ZMe3e/jKjte/jKjNe/jLjte/jLjNe/jOj<e/jOjPge'd#d$dddZQe3e/jRj<dd ZSe3e/jTj<dd ZUe3e/jVj<dd ZWe3e/jXjYe/jZjYe/jXje/jZje/j[j<e/j\j<e/j]j<gdd Z^e3e/j_jYe/j`jYe/j_je/j`jgdfddZae3e/jbj<e/jbjcgdd Zddd Zee3e/jfje/jfjYgdÐdĄ Zge3e/jhje/jhjYgdŐdƄ Zie3e/jjj<dǐdȄ Zke3e/jlje/jljYgdɐdʄ Zme3e/jnje/jnjYgdːd̄ Zoe3e/jpj<d͐d΄ Zqe3e/jrj<e/jsj<gdڐdϐdЄZte3e/jujYdѐd҄ Zve3e/jwj<dӐdԄ Zxe3e/jydՐdք Zze3e/j{e' dאd؄ Z|e3e/j}dِdڄ Z~e3e/jj<dڐdېd܄Ze3e/jj<dݐdބ ZdܐdߐdZe3e/jj<dd Zdd Zdd Zdd Zdd Z	ddCedeideideideideideideideideideideideideideideideideideideide^deff,dd Zdd ZdCed}edeideideideideideideideideideideideideideideideide^f&ddZdd Ze3e/jj<dd Ze3e/jj<	dd	d
Ze3e/jj<dd Ze3e/je' dd Zdd Ze3e/je' dd Ze3e/je'dd$	d	e	f	dddZe3e/je'ddd ZdCedefddZG dd deZdCededeifddZe3e/jj<dd  Ze3e/je' d!d" Ze3e/je'dd#d$d% Ze3e/jj<gd&d' Ze3e/jj<					dd(d)Ze3e/jjid*d+ Ze3e/jj<d,d- Ze3e/jj<dd.d/Zdd]eid0eid1effd2d3Zd4d5 Zd6d7 Ze3e/jj<dڐd8d9Zdڐd:d;Zdܐd<d=Zd>d? Zdܐd@dAZddBdCZe3e/jj<dDdE Ze3e/jdFdG Ze3e/jje/jje/jje/jjge' dܐdHdIZe3e/jje/jje/jje/jjgdܐdJdKZe3e/jg	dܐdLedMedNedOededPedQedRedSeidTeidUeߐdVefdWedXedYee fdZd[Ze3e/jg	\			ddMedNedOedUeߐdVefd]ee dYee fd^d_Ze3e/jg		dސdLedMedNedOededPedUeߐdVefd]ee dYee fd`daZe3e/jg		ddLedMedNedOedbee dedPedWedXedUeߐdceef dVefdYee fdddeZe3e/jg			dېdLedMedNedOededPedQedRedSeidTeidUeߐdVefdWedXedYee dfeei dgeei f"dhdiZe3e/jg			ddLedMedNedOedFee djee dkee dle	j͐dme	j͐dPedUeߐdWedXedneidoefdYee dpeei dqeff$drdsZe3e/jj<g						dd\e	jdMe	jdFee	j dHee	j_ dtee	j duee	j dvee	j dweffdxdyZe3e/jѐje/jѐjge' dߐdzd{Ze3e/jՐj҃dߐd|d}Ze3e/jj<e/jj=ge' ddd/d~dZؐdd Zِdd Ze3e/jj<e/jj<gdܐddZe3e/jj<e/jj<gdސddZe3e/jj<e/jj<g		dސd}edeeeie	jf  deeeie	jf  dee dee f
ddZe3e/jj<e/jj<gdېddZe3e/jj<e/jje/jjPe/jjgdddZe3e/jjdddddZdd Ze3e/jj<	dސddZe3e/jj<dd Ze3e/jj<dd Zdd Zdd Ze3e/jj<e/jj<gdddZe3e/jj<dddZe3e/jj<dddZe	jZdd Ze3e/jj<dd Ze3e/jj<dd Ze3e/jj<dd Ze3e/jj<dd Ze3e/jje/jjge' dddddZ e3e/jge' dddZe3e/jj<e/jj<g	dސddZe3e/jj<dd Ze3e/jj<e/jj=ge' dېddZe3e	j.j/jdd Ze3e	j.j/j	dd Z	e3e/j
e' dddddddÄZdĐdń Ze3e/j	ddƐdǄZe3e/j	ddȐdɄZe3e/je' dddʜdːd̄Ze3e/je' deid\edbefdΐdτZe3e/jj<dАdф Ze3e/jd\efdҐdӄZdԐdՄ Zd֐dׄ Zee/j ee/j ee/j ee/j ee/j ee/j  ee/j! ee/j" ee/j# ee/j$ ee/j% ee/j& ee/j' ee/j( ee/j) ee/j* ee/j+ ee/j, ee/j- ee/j. ee/j/ d dl(Z	d dl0Z	d dl1Z	dؐdل Z2e2  dS (      N)Enum)ListOptionalSequenceTupleUnion)SymBoolSymFloatTensor)_add_op_to_registry_convert_out_paramsglobal_decomposition_table
meta_table)
OpOverload)_prim_elementwise_meta$ELEMENTWISE_PRIM_TYPE_PROMOTION_KIND)corresponding_complex_dtypecorresponding_real_dtypeelementwise_dtypesELEMENTWISE_TYPE_PROMOTION_KINDIntLikemake_contiguous_strides_forNumber
TensorLike)_maybe_convert_to_dtype_maybe_resize_out_resize_output_check_safe_copy_outout_wrapper)_broadcast_shapes_maybe_broadcast)_pytreeatenIMPLMetac                    s    fdd}|S )Nc                    s$   t    fdd}t|  S )Nc                    s   t t|   d S N)r   r   opfn W/home/ubuntu/SoloSpeech/.venv/lib/python3.10/site-packages/torch/_meta_registrations.pyregister0   s   z0register_meta.<locals>.wrapper.<locals>.register)r   pytree	tree_map_)r)   r,   r&   r(   r+   wrapper-   s   zregister_meta.<locals>.wrapperr*   )r'   r/   r*   r&   r+   register_meta,   s   	r0   type_promotionc                    s>   t j|d| i\}  fdd|D }t| }t|dtjiS )Ntype_promotion_kindc                    s   g | ]}t | qS r*   )r   .0xresult_dtyper*   r+   
<listcomp>B       z$elementwise_meta.<locals>.<listcomp>r1   )utilsr   r    r   r   DEFAULT)r1   args_r*   r6   r+   elementwise_meta9   s   
r>   c                 C   s(   t jt jt jt jt jt ji}|| | S r%   )torch	complex32halfcfloatfloatcdoubledoubleget)dtypefrom_complexr*   r*   r+   toRealValueTypeM   s
   rI   c                    s2   t tg|R   t k fdd d S )Nc                      s   d d  S )Nzoutput with shape z# doesn't match the broadcast shape r*   r*   broadcasted_shape
self_shaper*   r+   <lambda>Z       z)check_inplace_broadcast.<locals>.<lambda>)tupler   r?   _check)rL   
args_shaper*   rJ   r+   check_inplace_broadcastV   s
   rR   Fc	           	         s  t tjrt dkdd  t tjr$t dkdd  tdd fD rMtt  d u r> ntt	 fdd npRt t tj
s[J tt tfdd t tsqJ tdkd	d  tjf|d
||dS )Nr   c                   S      dS Nz:linspace only supports 0-dimensional start and end tensorsr*   r*   r*   r*   r+   rM   n       z(meta_linspace_logspace.<locals>.<lambda>c                   S   rS   rT   r*   r*   r*   r*   r+   rM   s   rU   c                 s   s    | ]}t |tV  qd S r%   )
isinstancecomplex)r4   argr*   r*   r+   	<genexpr>v   s    z)meta_linspace_logspace.<locals>.<genexpr>c                         d  d S )Nzlinspace(): inferred dtype z& can't be safely cast to passed dtype r*   r*   )default_complex_dtyperG   r*   r+   rM      rN   c                      s*   dt j dt  j dt j dS )Nz4received an invalid combination of arguments - got (, ))type__name__r*   )endstartstepsr*   r+   rM      s    c                   S   rS   )Nz$number of steps must be non-negativer*   r*   r*   r*   r+   rM      rU   meta)rG   layoutdevice
pin_memoryrequires_grad)rV   r?   r
   rP   dimanyr:   r   get_default_dtypeis_complex_dtyperG   _check_typer   empty)	ra   r`   rb   baserG   re   rd   rf   rg   r*   )r[   rG   r`   ra   rb   r+   meta_linspace_logspace^   sH   

ro   c                    sN   t  jt jk fdd t |  dko  dk dd  |  jS )Nc                         d j  S )Nz2take(): Expected a long tensor for index, but got rG   r*   indexr*   r+   rM          zmeta_take.<locals>.<lambda>r   c                   S   rS   )Nz*take(): tried to take from an empty tensorr*   r*   r*   r*   r+   rM      rU   )r?   rP   rG   long_check_indexnumel	new_emptyshape)selfrs   r*   rr   r+   	meta_take   s   

r{   rh   c                   sh   j }j }t||kdd  t dko dk fdd tjj}|S )Nc                   S   rS   )Nz=linalg.cross: inputs must have the same number of dimensions.r*   r*   r*   r*   r+   rM      rU   zlinalg_cross.<locals>.<lambda>   c                      s"   d  d   d   S )Nzlinalg.cross: inputs dimension z must have length 3. Got  and sizer*   rh   otherrz   r*   r+   rM      s
   )ndimr?   rP   r   r   ry   rx   )rz   r   rh   x_dy_d	out_shaper*   r   r+   linalg_cross   s   
r   c                 C   s$   t | d t| d tj| tjdS )Nzlinalg.matrix_expmemory_format)squareCheckInputscheckFloatingOrComplexr?   
empty_likecontiguous_formatrz   r*   r*   r+   linalg_matrix_exp   s   

r   valuesindicesc                 C   sV   t j| j| j| jd}t j| j| jt jd}|  dkr'| jdkr't|| j ||fS )Nre   rG   r   )	r?   rm   ry   re   rG   int64rw   r   maybe_wrap_dim)rz   rh   r   r   r*   r*   r+   	cummaxmin   s
   r   c                 C   s   t || j t|  S r%   )r   r   r?   r   
contiguous)rz   rh   r*   r*   r+   logcumsumexp   s   r   c                    s  |j }t|}|| }tt|}dd t|D }	|D ]}
d|	|
< qg g }}|D ]}
|	|
 s6||
 q*||
 q*|| }t|}|  |d | }|j fdddd |||d   }||}dgt|j|d   }|	|}|
d}||d< |}tt|D ]}|||  ||d	 < q| 	|} d
d t|D }d	}|d	 }|dkr|| d ||| < ||||  9 }|d	8 }|dkst||D ]}| d	||  ||| < q| |||  S )Nc                 S      g | ]}d qS Fr*   r4   r=   r*   r*   r+   r8      rN   z_exec_fft.<locals>.<listcomp>Tc                        |  S r%   r*   r5   self_stridesr*   r+   rM          z_exec_fft.<locals>.<lambda>keyreverser|   r      c                 S   r   r   r*   r   r*   r*   r+   r8     rN   )r   lenlistrangeappendstridesortpermutery   reshaper   
as_stridedstorage_offset)outrz   	out_sizesrh   forwardr   signal_ndim
batch_dimsdim_permuteis_transformed_dimdleftright	batch_endtmpinputbatched_sizes
batch_sizebatched_out_sizesiout_stridesbatch_numelr*   r   r+   	_exec_fft   sL   





r   c                    sb   | j jsJ | j}| |}|s|S |d d  }|   |j fdddd t|| |||}|S )Nc                    r   r%   r*   r   r   r*   r+   rM     r   zmeta_fft_c2c.<locals>.<lambda>Tr   )rG   
is_complexry   rx   r   r   r   )rz   rh   normalizationr   r   outputsorted_dimsr*   r   r+   meta_fft_c2c  s   
r   c                 C   sR   | j jsJ t|  }|r|d }|| d d }|||< | j|t| j dS )Nr|      r   rq   )rG   is_floating_pointr   r   rx   r:   r   )rz   rh   r   onesidedoutput_sizeslast_dimlast_dim_halfsizer*   r*   r+   meta_fft_r2c#  s   r   )	generatorc                C   s   t |t| gS r%   )r   r?   Size)nr   r   r*   r*   r+   meta_randperm3  s   r   rG   rd   re   rf   c                C      t j| ||||dS Nr   r?   rm   )r   rG   rd   re   rf   r*   r*   r+   meta_randperm_default8  s   
r   c                C   s   t j|||||dS r   r   )highr   rG   rd   re   rf   r*   r*   r+   meta_randintA  s   
r   c                C   s   t j|||||dS r   r   )lowr   r   rG   rd   re   rf   r*   r*   r+   meta_randint_lowK  s   
r   c                C   r   r   r   )r   rG   rd   re   rf   r*   r*   r+   meta_rand_default\  s   
r   c                 C   s8   | j jsJ t|  }|||d < | j|t| j dS )Nr|   rq   )rG   r   r   r   rx   rI   )rz   rh   r   lastdimr   r*   r*   r+   meta_fft_c2rd  s   r   c                 C   sR   t | dkrtdt|tr'|| |}|  | kr'tj	||   | S )Nr   zQmore than one element of the written-to tensor refers to a single memory location)
r?   _debug_has_internal_overlapRuntimeErrorrV   r
   tor   r"   expand_copydefault)rz   srcnon_blockingintermediater*   r*   r+   
meta_copy_m  s   
r   c                 C   sX   t |  }t |  }||  krdn|| ||  }||d ||| ||fS Nr   )r   r   r   rh   insert)tensorrh   result_sizesresult_strides
new_strider*   r*   r+   inferUnsqueezeGeometry  s    r   c                 C   s0   t ||  d }t| |\}}| || | S r   )r   rh   r   as_strided_)rz   rh   g_sizes	g_stridesr*   r*   r+   meta_unsqueeze_  s   r   r   weight_metabias_activation_opt	out_dtypec           	      C   s   t | j}|d ur|d|dksJ d|d| dd ks%J |d|d< t| jdks7J dd| df}|d urQ| jtjkrM|tjksQJ d| j||d u r[| jn|d	||}|S )	Nr   zoutput size mismatchr   r|   r   z*we can only handle the squashed input case9out_dtype is only supported for i8i8->i32 linear operatorrq   )
r   ry   r   r   rG   r?   int8int32rx   r   )	r   r   r   r   r   r   r   transposed_stridesr   r*   r*   r+   meta_sparse_structured_linear  s$   
	r   mat1	mat1_metamat2c                 C   s   t | jdks	J t |jdksJ t |jdksJ | d|dd ks)J | d|dg}|d urF|jtjkrB|tjksFJ d|j||d u rP|jn|d}|S )Nr   r   r   r   rq   r   ry   r   rG   r?   r   r   rx   )r   r   r   r   r   r   r*   r*   r+   meta_sparse_structured_mm  s   r  r   )alphabetar   c          	      C   s   t | jdksJ dt |jdksJ t |jdksJ t |jdks&J | d|dks4J d|d|dd ksBJ |d|dg}|d ur_|jtjkr[|tjks_J d|j||d u ri|jn|d}|S )Nr   zEonly input broadcasted to columns of mat1 * mat2 product is supportedr   r   r   rq   r  )	r   r   r   r   r  r  r   r   r   r*   r*   r+   meta_sparse_structured_addmm  s,   r  compressed_Adense_Br  transpose_resultc                 C   s   |j tjtjtjtjhv sJ d| j |j ksJ dt|jdks&J d| j tjk}|r0dnd}|d}|d}	| 	 d	 ||  }
|d urS|
|dksSJ |d urg|rc|tjtjtj
hv sgJ d
|rm|	|
fn|
|	f}|j||d}|S )Nz2_cslt_sparse_mm only supports fp16, bf16, and int8zinputs must have the same dtyper   z'_cslt_sparse_mm only supports 2d inputs
   	   r   r      z?out_dtype is only supported for i8i8->fp16, bf16, or i32 matmulrq   )rG   r?   float32float16bfloat16r   r   ry   r   rw   r   rx   )r  r  r   r  r   r  is_int8_input_typecompression_factorkr   moutput_shaperesultr*   r*   r+   meta__cslt_sparse_mm  s8   	

r  T)include_selfrz   rh   rs   sourcereducer  returnc                C   s   t j| t jdS Nr   )r?   r   r   rz   rh   rs   r  r  r  r*   r*   r+   meta_index_reduce  s   
r  c                C      | S r%   r*   r  r*   r*   r+   meta_index_reduce_!  s   
r  c                 C   s.   t |  }|  dkr| ||< | |S )Nr   )r   r   rh   rw   rx   )rz   rh   rs   result_sizer*   r*   r+   meta_index_select/  s   
r   )lengthsr   offsetsaxisunsafeinitialdatar!  r"  r#  r$  c          
         sf   |d urt d fdd}|d ur||jS |d ur/|jd d |jd d f }	||	S td)Nz?segment_reduce(): indices based reduction is not supported yet.c                    s(   t j| j d d   jdt jdS )Nr   rc   rG   re   r   )r?   rm   ry   rG   r   )lengths_shaper#  r&  r*   r+   segment_reduce_lengths_tensorI  s   z:meta_segment_reduce.<locals>.segment_reduce_lengths_tensorr|   r   z<segment_reduce(): Either lengths or offsets must be defined.)NotImplementedErrorry   r   )
r&  r  r!  r   r"  r#  r$  r%  r*  r(  r*   r)  r+   meta_segment_reduce8  s   
r,  c                 C   
   |  dS Nr*   rx   r   r*   r*   r+   meta_max\     
r0  c                 C   6   t | j|f}t| ||}| || j|tjdfS Nrq   r:   reduction_dimsry   _compute_reduction_shaperx   r?   ru   rz   rh   keepdimr  r*   r*   r+   meta_max_dimb  
   r9  c                 C   r-  r.  r/  r   r*   r*   r+   meta_minl  r1  r;  c                 C   r2  r3  r4  r7  r*   r*   r+   meta_min_dimr  r:  r<  c                 C   s4   |   r
t| j}n	t| tjd\}}tj| |dS )Nr2   rq   )r   r   rG   r   r   INT_TO_FLOATr?   r   )rz   r7   r=   r*   r*   r+   
meta_angle|  s   
r?  c                 C   s$   t ||  | j |t | S r%   )r?   _resize_output_r   re   copy_angle)rz   r   r*   r*   r+   meta_angle_out  s   rC  c                 C      d S r%   r*   )valr*   r*   r+   assert_async     rF  c                 C   rD  r%   r*   )rE  
assert_msgr*   r*   r+   assert_async_meta  rG  rI  c                 C   rD  r%   r*   )sr*   r*   r+   
print_meta  rG  rK  rG   rd   re   rf   r   c                 C   s   t jdddS )Nr   rc   re   r   rL  r*   r*   r+   make_dep_token  s   	rN  c                 C   4   ddl m} t| ttfrtd|| ||d d S )Nr   )constrain_range'Constraining SymFloat or Symbool is nyiminmax)%torch.fx.experimental.symbolic_shapesrP  rV   r	   r   
ValueError)r   rS  rT  rP  r*   r*   r+   sym_constrain_range     rW  c                 C      t j| ||d |S NrR  )r"   rW  r   rS  rT  	dep_tokenr*   r*   r+   functional_sym_constrain_range     r]  c                 C   rO  )Nr   )_constrain_range_for_sizerQ  rR  )rU  r_  rV   r	   r   rV  )r   rS  rT  r_  r*   r*   r+   sym_constrain_range_for_size  rX  r`  c                 C   rY  rZ  )r"   r`  r[  r*   r*   r+   'functional_sym_constrain_range_for_size  r^  ra  c                 C   s   |S r%   r*   )rE  rH  r\  r*   r*   r+   functional_assert_async_meta  rG  rb  f_namec                 C   sX   |   dksJ | d| d| dks*J | d| d d| d dd S )Nr   z3: The input tensor must have at least 2 dimensions.r|   z5: A must be batches of square matrices, but they are  by 	 matrices)rh   r   )rz   rc  r*   r*   r+   r     s    r   Anamec                    s   t j jk fdd t j jk fdd t  d dk fdd t  ddk fdd d S )Nc                         dj  d j  dS )Nz:Expected b and A to be on the same device, but found b on z
 and A on 	 instead.rM  r*   rg  rz   r*   r+   rM     
   z(linearSolveCheckInputs.<locals>.<lambda>c                      ri  )Nz=Expected b and A to have the same dtype, but found b of type z and A of type rj  rq   r*   rk  r*   r+   rM     rl  r|   rd  c                      s   d  d d  d dS )Nz3A must be batches of square matrices, but they are rd  re  r|   rf  r   r*   rg  r*   r+   rM     s
   c                      s:   d d  d d  d d d d d 
S )NzIncompatible matrix sizes for z: each A matrix is r|   re  z but each b matrix is rd  r   r*   rg  rh  rz   r*   r+   rM     s   )r?   rP   re   rG   r   )rz   rg  rh  r*   rn  r+   linearSolveCheckInputs  s    


ro  tallow_low_precision_dtypesc                    s^   | j  t|  p|   fdd |s-t tjtjtjtjfv  fdd d S d S )Nc                          d  S )Nz<: Expected a floating point or complex tensor as input. Got r*   r*   rG   rc  r*   r+   rM   	      z(checkFloatingOrComplex.<locals>.<lambda>c                      rr  )Nz*: Low precision dtypes not supported. Got r*   r*   rs  r*   r+   rM     rt  )	rG   r?   rP   r   r   rC   rE   rB   rD   )rp  rc  rq  r*   rs  r+   r     s   r   arg_namec                    s"   t |  dk fdd d S )Nr   c                          d  dS )Nz: The input tensor z! must have at least 2 dimensions.r*   r*   ru  rc  r*   r+   rM     rN   zcheckIsMatrix.<locals>.<lambda>)r?   rP   rh   )rg  rc  ru  r*   rw  r+   checkIsMatrix  s   
rx  Br   c                    sZ   t   t tr ddkn	 ddk fdd d S )Nrd  r|   c                      sH    drdnd d  d d  d d d d d d	S )
Nz2: Incompatible shapes of A and B for the equation zAX = BzXA = Bz (rd  r5   r|   r   r]   r   r*   rg  ry  rc  r   r*   r+   rM   $  s   
z#checkInputsSolver.<locals>.<lambda>)r   rx  r?   rP   r   )rg  ry  r   rc  r*   rz  r+   checkInputsSolver  s   

*r{  r  fn_nameresult_namec                    s&   t jjk fdd d S )Nc                	      s$     d d dj  dj  	S )Nz: Expected z5 and input tensors to be on the same device, but got z on z and input on rM  r*   r|  r   r  r}  r*   r+   rM   1  s   z!checkSameDevice.<locals>.<lambda>)r?   rP   re   )r|  r  r   r}  r*   r~  r+   checkSameDevice,  s   
r  UPLOc                    s8      }tt dko|dkp|dk fdd d S )Nr   ULc                      
   d  S )Nz1Expected UPLO argument to be 'L' or 'U', but got r*   r*   r  r*   r+   rM   <     
 zcheckUplo.<locals>.<lambda>)upperr?   rP   r   )r  UPLO_uppercaser*   r  r+   	checkUplo8  s
   
r  eigenvalueseigenvectorsr  	compute_vc                 C   sp   t | d t| t| j}|r | |}||t|dd n| dg}|  | j|t| j	d}||fS )Nzlinalg.eighF	row_majorr   rq   )
r   r  r   ry   rx   r   r   poprI   rG   )rg  r  r  ry   vecsvalsr*   r*   r+   meta__linalg_eigh@  s   


r  c                 C   s@   t | d t| jr| jnt| j}| j| jd d |dS )Nzlinalg.eigvalsr|   rq   r   r:   rk   rG   r   rx   ry   )r   complex_dtyper*   r*   r+   meta__linalg_eigvalsW  s   


r  c                 C   sX   t | d t| jr| jnt| j}| j| jd d |d}| j| j|d}||fS )Nz
linalg.eigr|   rq   r  )r   r  r   vectorsr*   r*   r+   meta_linalg_eigc  s   


r  r   c                 C   s   | j jtjdddS )Nr   rd  r|   )mTcloner?   r   	transpose)r   r*   r*   r+   cloneBatchedColumnMajorq  s   r  r  c                 C   s   t | S r%   )r  )rz   rg  r  r*   r*   r+   _cholesky_solve_helperu  s   r  c                    sP   t jdkfdd t  jdk fdd t d\}}t|||S )Nr   c                         d j  dS )Nz-b should have at least 2 dimensions, but has  dimensions insteadr   r*   r   r*   r+   rM     rt  z cholesky_solve.<locals>.<lambda>c                      r  )Nz-u should have at least 2 dimensions, but has r  r  r*   rm  r*   r+   rM     rt  cholesky_solve)r?   rP   r   !_linalg_broadcast_batch_dims_namer  )rz   rg  r  self_broadcastedA_broadcastedr*   rk  r+   r  {  s   

r  c                 C   s.   |   dkrtj| tjdS t| d t| S )Nr   r   cholesky)rw   r?   r   legacy_contiguous_formatr   r  rz   r  r*   r*   r+   r    s   
r  c                 C   s   t | d t| S )Ncholesky_inverse)r   r  r  r*   r*   r+   r    s   
r  check_errorsc                 C   sf   t | d t| d | j}t|}t|d}| |}||| | j|d|d  tjd}||fS )Nzlinalg.choleskyFr   r   rq   )	r   r   ry   r   r   rx   r   r?   r   )rg  r  r  A_shaper   	L_stridesr  infosr*   r*   r+   linalg_cholesky_ex  s   



r  tauc                    s  t jdkdd  t ddkdd  t ddkdd  t jj dkfd	d jdkr[jd d }jd d  t  |k fd
d t jjkfdd tdd t jjtjddjj	dS )Nr   c                   S   rS   )NzHtorch.linalg.householder_product: input must have at least 2 dimensions.r*   r*   r*   r*   r+   rM     rU   z,linalg_householder_product.<locals>.<lambda>rd  r|   c                   S   rS   )Nzbtorch.linalg.householder_product: input.shape[-2] must be greater than or equal to input.shape[-1]r*   r*   r*   r*   r+   rM     rU   c                   S   rS   )Nz`torch.linalg.householder_product: input.shape[-1] must be greater than or equal to tau.shape[-1]r*   r*   r*   r*   r+   rM     rU   r   c                         dj  d j  S )Nzptorch.linalg.householder_product: Expected tau to have one dimension less than input, but got tau.ndim equal to  and input.ndim is equal to r  r*   r   r  r*   r+   rM     
   c                      r  )Nzltorch.linalg.householder_product: Expected batch dimensions of tau to be equal to input.shape[:-2], but got r*   r*   actual_batch_tau_shaper*   r+   rM        c                      r  )Nz,torch.linalg.householder_product: tau dtype z does not match input dtype rq   r*   r  r*   r+   rM        
z torch.linalg.householder_productr  Fr  r   r   rG   re   )
r?   rP   r   r   ry   rG   r  empty_stridedr   re   )r   r  expected_batch_tau_shaper*   )r  r   r  r+   linalg_householder_product  sD   


r  c                 C   s^   t | d t| ddd | | j}|| jt| jdd | j| jd d tjd}||fS )Nzlinalg.inv_exF)rq  r  rd  rq   r   r   rx   ry   r   r   r?   r   )rg  r  r  r  r*   r*   r+   linalg_inv_ex_meta  s   
r  LDpivotsinfo)	hermitianr  r  c                C   st   t | d t| d tj| jt| jdd| j| jd}| j| jd d tj	d}| j| jd d tj	d}|||fS )Nztorch.linalg.ldl_factor_exFr  r  r|   rq   rd  )
r   r   r?   r  ry   r   rG   re   rx   int)rz   r  r  r  r  r  r*   r*   r+   linalg_ldl_factor_ex_meta  s   


r  )r  c                   s   t d td t d t jdk fdd jd d }t|jkfdd ttj	fdd tj	 j	k fdd t
 \}}tj|t|d	d
 j	 jdS )Nztorch.linalg.ldl_solver   c                      r  )NzMtorch.linalg.ldl_solve: Expected B to have at least 2 dimensions, but it has r  r  r*   )ry  r*   r+   rM        z'linalg_ldl_solve_meta.<locals>.<lambda>r|   c                      r  )Nzjtorch.linalg.ldl_solve: Expected LD.shape[:-1] and pivots.shape to be the same, but got pivots with shape  insteadry   r*   r  r*   r+   rM     r  c                      rp   )Nz<torch.linalg.ldl_solve: Expected pivots to be integers. Got rq   r*   r  r*   r+   rM     rt   c                      r  )Nz!torch.linalg.ldl_solve: LD dtype z does not match b dtype rq   r*   )ry  r  r*   r+   rM   #      Fr  r  )r   r   ro  r?   rP   r   ry   r:   is_integer_dtyperG   _linalg_broadcast_batch_dimsr  r   re   )r  r  ry  r  expected_pivots_shapeB_broadcast_sizer=   r*   )ry  r  r  r+   linalg_ldl_solve_meta  s6   







r  Pr  )pivotr  c          	         s   t  jdk fdd t j}|d }|d }t||}||d< |r+ |}n dg}||d<  |}||d< ||d<  |}|||fS )Nr   c                      r  )Nz@linalg.lu: Expected tensor with 2 or more dimensions. Got size: r  r  r*   rm  r*   r+   rM   3  rt  z linalg_lu_meta.<locals>.<lambda>rd  r|   r   )r?   rP   r   r   ry   rS  rx   )	rg  r  sizesr  r   r  r  r  r  r*   rm  r+   linalg_lu_meta.  s$   





r  LU)r  r  c          	         s   t  jdk fdd t j}|d }|d }t j|t|dd j jd}|	  t
|||d<  j|t jd	}|	   j|t jd	}|||fS )
Nr   c                      r  )NzFtorch.lu_factor: Expected tensor with 2 or more dimensions. Got size: r  r  r*   rm  r*   r+   rM   Q  rt  z*linalg_lu_factor_ex_meta.<locals>.<lambda>rd  r|   Fr  r  rq   )r?   rP   r   r   ry   r  r   rG   re   r  rS  rx   r  )	rg  r  r  r  r  r   r  r  r  r*   rm  r+   linalg_lu_factor_ex_metaJ  s&   



r  )r   adjointr  c                   s   t d tj jk fdd tjtjkdd  td t |d tddkdd  tjd d jkfdd t	 \}}tj
|t|| d	 j jd
}| dkru|su| ru| }|S )Nztorch.linalg.lu_solvec                      ri  )NzPlinalg.lu_solve: Expected LU and B to have the same dtype, but found LU of type  and B of type r  rq   r*   )ry  r  r*   r+   rM   y  rl  z&linalg_lu_solve_meta.<locals>.<lambda>c                   S   rS   )NzElinalg.lu_solve: pivots should be a Tensor of scalar type torch.int32r*   r*   r*   r*   r+   rM     rU   zlinalg.lu_solver|   c                   S   rS   )NzYlinalg.lu_solve: Number of pivots per batch should be same as the dimension of the matrixr*   r*   r*   r*   r+   rM     rU   c                      r  )Nzclinalg.lu_solve: Expected LU.shape[:-1] and pivots.shape to be the same, but got pivots with shape r  r  r*   r  r*   r+   rM     r  r  r  r   )r   r?   rP   rG   r  r   r{  r   ry   r  r  r   re   rw   r   conj)r  r  ry  r   r  r  r=   r  r*   )ry  r  r  r+   linalg_lu_solve_metak  s<   




r  unpack_dataunpack_pivotsc                    s   t  jdk fdd |rt |jt jkdd  t j}|d }|d }t||}||d< |r9 |}n dg}|rX||d<  |}	||d< ||d<  |}
n dg}	 dg}
||	|
fS )Nr   c                      r  )NzFtorch.lu_unpack: Expected tensor with 2 or more dimensions. Got size: r  r  r*   r  r*   r+   rM     rt  z lu_unpack_meta.<locals>.<lambda>c                   S   rS   )Nztorch.lu_unpack: LU_pivots is expected to be a contiguous tensor of torch.int32 dtype.
Note: this function is intended to be used with the output produced by torch.linalg.lu_factorr*   r*   r*   r*   r+   rM        rd  r|   r   )	r?   rP   r   rG   r   r   ry   rS  rx   )r  r  r  r  r  r  r   r  r  r  r  r*   r  r+   lu_unpack_meta  s4   





r  modec                    sd    dkrd}d}||fS  dkrd}d}||fS  dkr$d}d}||fS t d fdd ||fS )NreducedTcompleteFrc                         d  dS )Nzqr received unrecognized mode 'z=' but expected one of 'reduced' (default), 'r', or 'complete'r*   r*   r  r*   r+   rM     s   z _parse_qr_mode.<locals>.<lambda>r?   rP   )r  	compute_qr  r*   r  r+   _parse_qr_mode  s"   	
r  QRr  c                 C   s   t | d t| d t|\}}| jd }| jd }t||}|r>t| j}|r*|n||d< | |}||t|dd n| dg}t| j}	|sM|sO|n||	d< | |	}
|
|	t|	dd ||
fS )Nz	linalg.qrrd  r|   Fr  r   )	rx  r   r  ry   rS  r   rx   r   r   )rg  r  r  reduced_moder  r   r  Q_shaper  R_shaper  r*   r*   r+   linalg_qr_meta  s"   








r  sign	logabsdetc                 C   s   t | d t| dd | j}| |d d }| j|d d t| jd}tj|t|d| j| j	d}| j|d d tj
d}||||fS )Nzlinalg.slogdetFrd  rq   r  r|   )r   r   ry   rx   rI   rG   r?   r  r   re   r   )rg  ry   r  r  r  r  r*   r*   r+   _linalg_slogdet  s   
r  full_matrices
compute_uvdriverc                 C   s   t | d t| d t| jd d }| jd }| jd }t||}|r]|||r*|n|g }| |}	|	|t|dd ||rB|n||g }
| |
}t| dk}||
t|
|d n| dg}	| dg}| j||g t	| j
d}|	||fS )	Nz
linalg.svdrd  r|   Fr  cudar   rq   )rx  r   r   ry   rS  rx   r   r   device_hintrI   rG   )rg  r  r  r  r   r  r   r  U_shaper  V_shapeVis_cudaSr*   r*   r+   _linalg_svd_meta  s$   







r  arg1arg2c                 C   sn   | j d d }|j d d }t||}t|}|| d| dg7 }t|}||d|dg7 }||fS )Nrd  r|   )ry   r   r   r   )r  r  arg1_batch_sizesarg2_batch_sizesexpand_batch_portionarg1_expand_sizearg2_expand_sizer*   r*   r+   r  <  s   
r  c                 C   sV   |rt | || t| |\}}|| jkr| n| |}||jkr"|n||}||fS r%   )ro  r  ry   expand)r  r  rh  r  r  arg1_broadcastedarg2_broadcastedr*   r*   r+   r  L  s   r  r   c                 C   s6   | j d d }|jdkp| jd |jko|j |k}|S )Nr|   r   )ry   r   )r   r   expected_batched_rhs_shapevector_caser*   r*   r+   linalg_solve_is_vector_rhs^  s
   
r  )r   r  r  r  r  r  c                   sn  t  d t jjk fdd t }|r dn}	t |	|d t|	 \}
}t|p6| dd  |rC|
d d n|
}tj|t	|| jj
d} j} j}tj|t	|d j j
d} j|d d tjd} j|d d	 tjd}||||f}||||f}td
d |D rt||D ]\}}t||j ||j|  t||dd q|S )Nzlinalg.solvec                      s   d j  dj  dS )NzKlinalg.solve: Expected A and B to have the same dtype, but found A of type r  r  rq   r*   rg  ry  r*   r+   rM   u  rl  z"_linalg_solve_ex.<locals>.<lambda>r|   c                   S   rS   )Nzlinalg.solve: Vector broadcasting of the left hand side is not supported for left=False. In this case linalg.solve is equivalent to B / A.squeeze(-1)r*   r*   r*   r*   r+   rM     r  r  Frq   rd  c                 s   s    | ]}|d uV  qd S r%   r*   r3   r*   r*   r+   rY         z#_linalg_solve_ex.<locals>.<genexpr>)	copy_fromcopy_toexact_dtype)r   r?   rP   rG   r  	unsqueezer{  r  r  r   re   ry   r   rx   r   allzipr   r   r   r   )rg  ry  r   r  r  r  r  r  r  B_B_broad_shaper=   result_shaperesult_ry   r   LU_pivots_info_r   resr  or*   r  r+   _linalg_solve_exf  sL   



r  )r   unitriangularr   r  r   c          	      C   s   |d u r
|  dg}t|tsJ t| ||d t|| d \}}|dd o+| }|r6t||j	}|S t
||j	rL||ddj	 |dd |S )Nr   zlinalg.solve_triangularrd  r|   )rx   rV   r   r{  r  r  is_contiguousis_conjr   ry   r   resize_
transpose_)	rg  ry  r  r   r  r   r  A_avoid_copy_Ar*   r*   r+   linalg_solve_triangular_meta  s   
r  solutioncloned_coefficientr  c           	         s   t jdkfdd t  jdk fdd t d  jt jkrOt \}}t j|t|ddj	j
d}t j|t|dd j	 j
d}||fS  jt jks[ jt jkrjt }d	g}||fS t dd
d  ||fS )Nr   c                      r  )NzMtorch.triangular_solve: Expected b to have at least 2 dimensions, but it has r  r  r*   r   r*   r+   rM     r  z'triangular_solve_meta.<locals>.<lambda>c                      r  )NzMtorch.triangular_solve: Expected A to have at least 2 dimensions, but it has r  r  r*   rm  r*   r+   rM     r  triangular_solveFr  r  r   c                   S   rS   )Nz+triangular_solve: Got an unexpected layout.r*   r*   r*   r*   r+   rM     rU   )r?   rP   r   ro  rd   stridedr  r  r   rG   re   
sparse_csr
sparse_bsrr   rx   )	rz   rg  r  r  r  self_broadcast_sizeA_broadcast_sizer  r  r*   rk  r+   triangular_solve_meta  s<   	




r&  c                 C   sp   t | d t| d | | jd d }| | j}|| jt| jdd | j| jd d tjd}|||fS )Nz
linalg.detrd  Fr  r|   rq   r  )rg  detr  r  r*   r*   r+   _linalg_det_meta  s   


r(  c                    s  t jdkdd  t jdkdd  |rdndt j jd kfdd t j jd kfdd t jd jd kd	d  t jj d
kfdd t jjkfdd jdkrjd d }jd d t |kfdd jd d  t  |k fdd t jjkfdd t jjkfdd tdd tdd t jjtjddjjdS )Nr   c                   S   rS   )Nz3torch.ormqr: input must have at least 2 dimensions.r*   r*   r*   r*   r+   rM     rU   zormqr.<locals>.<lambda>c                   S   rS   )Nz3torch.ormqr: other must have at least 2 dimensions.r*   r*   r*   r*   r+   rM     rU   rd  r|   c                      r  )Ntorch.ormqr: other.shape[z0] must be greater than or equal to tau.shape[-1]r*   r*   left_size_conditionr*   r+   rM     rt   c                      r  )Nr)  z"] must be equal to input.shape[-2]r*   r*   r*  r*   r+   rM     rt   c                   S   rS   )NzHtorch.ormqr: tau.shape[-1] must be less than or equal to input.shape[-1]r*   r*   r*   r*   r+   rM     rU   r   c                      r  )Nz[torch.ormqr: Expected tau to have one dimension less than input, but got tau.ndim equal to r  r  r*   r  r*   r+   rM     r  c                      r  )Nzhtorch.ormqr: Expected other to have the same number of dimensions as input, but got other.ndim equal to r  r  r*   r   r   r*   r+   rM   #  r  c                      r  )NzWtorch.ormqr: Expected batch dimensions of tau to be equal to input.shape[:-2], but got r*   r*   r  r*   r+   rM   .  r  c                      r  )NzYtorch.ormqr: Expected batch dimensions of other to be equal to input.shape[:-2], but got r*   r*   )actual_batch_other_shaper*   r+   rM   7  r  c                         d j  dj  S )NzPtorch.ormqr: Expected input and tau to have the same dtype, but input has dtype z and tau has dtype rq   r*   r  r*   r+   rM   ?  r  c                      r.  )NzRtorch.ormqr: Expected input and other to have the same dtype, but input has dtype z and other has dtype rq   r*   r,  r*   r+   rM   F  r  ztorch.ormqrr  r   Fr  r  )	r?   rP   r   ry   rG   r  r  r   re   )r   r  r   r   r  expected_batch_shaper*   )r-  r  r   r+  r   r  r+   ormqr  sn   	







r0  c                   s   t td  k fdd j}| d k}|}| }|r3td|D ]}|o0|dk}q&ntd|D ]}|oB|dk}q8t |pI| fdd d S )Nr   c                      s   dd   dt  S )Nzpadding size is expected to be r   z, but got: r   r*   )rh   paddingr*   r+   rM   Z      z,_padding_check_valid_input.<locals>.<lambda>r   r   c                      s    d d  d d  dj  S )Nz	Expected r   zD or r   zcD (batch mode) tensor with possibly 0 batch size and other non-zero dimensions for input, but got: r  r*   )rh   r   r*   r+   rM   o  s   )r?   rP   r   r   r   r   )r   r2  rh   	input_dimis_batch_modevalid_batch_modevalid_non_batch_moder   r*   )rh   r   r2  r+   _padding_check_valid_inputW  s$   r8  c                   s   d}d d}j dkrd} d7  |d7 }t|dd |\|}   |rHtk o>k  fdd tdkfdd j dkra|fS ||fS )	Nr   r   r~   r}   c                         d d d  dj  S NzcArgument #4: Padding size should be less than the corresponding input dimension, but got: padding (r\   ) at dimension 
 of input r  r*   dim_wr   pad_lpad_rr*   r+   rM        z_pad1d_common.<locals>.<lambda>c                      rZ   )Nz
input (W: z%) is too small. Calculated output W: r*   r*   )input_woutput_wr*   r+   rM     rN   r   )r   r   r8  r?   rP   rx   )r   r2  is_reflection	dim_planenbatchnplaner*   )r>  r   rB  rC  r?  r@  r+   _pad1d_commonv  s0   




rH  c                 C      t | |ddS NTrD  rH  r   r2  r*   r*   r+   meta_reflection_pad1d     rN  c                 C   rI  NFrK  rL  rM  r*   r*   r+   meta_replication_pad1d  rO  rQ  c                   s   d |st t|dkdd  jdkr d7  |\ }|  |r=t |k o3|k  fdd t  k fdd jS )Nr   r   c                   S   rS   )Nz padding size is expected to be 2r*   r*   r*   r*   r+   rM     rU   z(_pad1d_backward_common.<locals>.<lambda>r~   c                      r9  r:  r  r*   r=  r*   r+   rM     rA  c                         d d   S Nz(grad_output width unexpected. Expected: , Got: r   r*   r>  grad_outputrC  r*   r+   rM     r9   r?   rP   r   r   r   rx   ry   )rV  r   r2  rD  rB  r*   )r>  rV  r   rC  r?  r@  r+   _pad1d_backward_common  s$   

rX  
grad_inputc                 C      t | ||ddS rJ  rX  rV  r   r2  r*   r*   r+   meta_reflection_pad1d_backward     r]  c                 C   rZ  rP  r[  r\  r*   r*   r+   meta_replication_pad1d_backward  r^  r_  c                   s2  dd d}d}t |dd j}|dkr'd}d7  d7  |d7 }|\	
|} 
   	 |rptk oS	k 	fdd t
k ofk  
fdd tdkpydkfd	d jd
kr|fS ||fS )Nr   r   r   r}      c                      r9  r:  r  r*   r=  r*   r+   rM     rA  z_pad2d_common.<locals>.<lambda>c                         d d d  dj  S NzcArgument #6: Padding size should be less than the corresponding input dimension, but got: padding (r\   r;  r<  r  r*   dim_hr   pad_bpad_tr*   r+   rM     rA  c                      s   d  d d d S )Nz
input (H:  W: z%) is too small. Calculated output H: r*   r*   )input_hrB  output_hrC  r*   r+   rM     s
   r~   r8  r   r   r?   rP   rx   )r   r2  rD  
dim_slicesrF  r   rG  r*   )rd  r>  r   rh  rB  ri  rC  re  r?  r@  rf  r+   _pad2d_common  sB   




rl  c                 C   rI  rJ  rl  rM  r*   r*   r+   meta_reflection_pad2d  rO  rn  c                 C   rI  rP  rm  rM  r*   r*   r+   meta_replication_pad2d  rO  ro  c                    s   dd d}d}|j }| dkr!|d }d7  d7  |d7 }|\}}}}	|| }
|  }| }|| |	 || | tkfdd t k fdd ||j S )Nr   r   r   r`  c                      rR  rS  r   r*   rU  r*   r+   rM   3  r9   z%meta_pad2d_backward.<locals>.<lambda>c                      rR  Nz)grad_output height unexpected. Expected: rT  r   r*   rd  rV  ri  r*   r+   rM   7  r9   )ry   rh   r?   rP   r   rx   )rV  rz   r2  rE  rF  rL   r?  r@  rf  re  rG  rh  rB  r*   )rd  r>  rV  ri  rC  r+   meta_pad2d_backward  s2   
rr  c             	      s  ddd d}t |dd jdk}|r+d}d7 d7  d7  |d7 }|\
|}    
   	|rtk odk fdd tk ow
k 
fd	d tk ok  fd
d t	dkpdkpdk	fdd |r||	fS |	fS )Nr~   r   r   r   r}      c                      r9  r:  r  r*   r=  r*   r+   rM   Y  rA  z_pad3d_common.<locals>.<lambda>c                      ra  rb  r  r*   rc  r*   r+   rM   `  rA  c                      ra  )NzcArgument #8: Padding size should be less than the corresponding input dimension, but got: padding (r\   r;  r<  r  r*   )dim_dr   pad_bkpad_fr*   r+   rM   g  rA  c                      s(   d  d d d d d S )Nz
input (D:  H: rg  z%) is too small. Calculated output D: r*   r*   )input_drh  rB  output_dri  rC  r*   r+   rM   o  s   rj  )r   r2  rD  rE  
batch_moderF  rG  r*   )rt  rd  r>  r   rx  rh  rB  ry  ri  rC  re  ru  rv  r?  r@  rf  r+   _pad3d_common<  sP   





r{  c                 C   rI  rJ  r{  rM  r*   r*   r+   meta_reflection_pad3d{  rO  r}  c                 C   rI  rP  r|  rM  r*   r*   r+   meta_replication_pad3d  rO  r~  c                    s(  t t|dkdd  |jdksJ j|jksJ ddd |jdkr2d7 d7  d7  |\}}}}}}| }	|}
|}|	| | |
| | || | t kfdd t kfd	d t  k fd
d ||jS )N   c                   S   rS   )Nz padding size is expected to be 6r*   r*   r*   r*   r+   rM     rU   z%meta_pad3d_backward.<locals>.<lambda>r~   r   r   rs  c                      rR  rS  r   r*   rU  r*   r+   rM     r9   c                      rR  rp  r   r*   rq  r*   r+   rM     r9   c                      rR  )Nz(grad_output depth unexpected. Expected: rT  r   r*   )rt  rV  ry  r*   r+   rM     r9   rW  )rV  r   r2  r?  r@  rf  re  rv  ru  rx  rh  rB  r*   )rt  rd  r>  rV  ry  ri  rC  r+   meta_pad3d_backward  s<   




r  r   pc                 C   s^   t |  dd  | d}|dkr| dgjt jdS | ||d  d fjt jdS )Nc                   S   rS   )Nz(_pdist_forward requires contiguous inputr*   r*   r*   r*   r+   rM     rU   z%meta__pdist_forward.<locals>.<lambda>r   r   r   r   )r?   rP   r  r   rx   r   r  )rz   r  r   r*   r*   r+   meta__pdist_forward  s   
r  gradpdistc                 C   s8   t | dd  t | dd  t j|t jdS )Nc                   S   rS   )Nz._pdist_backward requires self to be contiguousr*   r*   r*   r*   r+   rM     rU   z&meta__pdist_backward.<locals>.<lambda>c                   S   rS   )Nz/_pdist_backward requires pdist to be contiguousr*   r*   r*   r*   r+   rM     rU   r   )r?   rP   r  r   r  )r  rz   r  r  r*   r*   r+   meta__pdist_backward  s   r  )r  r  c          	         s     d}  d} d}|||ft  dkdd  t dkdd  tj j  ko=jkn   fdd  j}j|d |d td kocd kfd	d   S )
Nr   r   r   r~   c                   S   rS   Nzbatch1 must be a 3D tensorr*   r*   r*   r*   r+   rM     rU   zmeta_baddbmm.<locals>.<lambda>c                   S   rS   Nzbatch2 must be a 3D tensorr*   r*   r*   r*   r+   rM     rU   c                      s   dj  d j  dj  S )Nz+Input dtypes must be the same, got: input: z
, batch1: z
, batch2: rq   r*   )batch1batch2rz   r*   r+   rM         c                	      &   d d d d  d d  d	S Nz@Expected size for first two dimensions of batch2 tensor to be: [r\   z] but got: [r   r   ].r*   r*   batch2_sizesbscontraction_sizer*   r+   rM     s   )r   r  r?   rP   rh   rG   ry   rx   )	rz   r  r  r  r  dim1dim2dim3batch1_sizesr*   )r  r  r  r  r  rz   r+   meta_baddbmm  s&   


r  c                C      t |  S r%   r?   r   r   )rz   r   r*   r*   r+   meta_bernoulli  s   r        ?c                 C   r  r%   r*   rz   r  r   r*   r*   r+   meta_bernoulli_  rG  r  c                 C   r  r%   r  r  r*   r*   r+   meta_bernoulli_p  rO  r  c                 C   s6   t |
|  k dd  t j| t jd}t | |fS )Nc                   S   rS   )NzJError in fused_moving_avg_obs_fake_quant_cpu: ch_axis must be < self.dim()r*   r*   r*   r*   r+   rM     rU   z6meta__fused_moving_avg_obs_fq_helper.<locals>.<lambda>rq   )r?   rP   rh   r   bool)rz   observer_onfake_quant_onrunning_minrunning_maxscale
zero_pointaveraging_const	quant_min	quant_maxch_axisper_row_fake_quantsymmetric_quantmaskr*   r*   r+   $meta__fused_moving_avg_obs_fq_helper  s   
r  c                    sn   t |  dkdd  t | dkdd  | j\ |j\t  k fdd | S )Nr   c                   S   rS   )Nza must be 2Dr*   r*   r*   r*   r+   rM     rU   zmeta_mm.<locals>.<lambda>c                   S   rS   )Nzb must be 2Dr*   r*   r*   r*   r+   rM     rU   c                	      s   d d  d d d	S )Nz/a and b must have same reduction dim, but got [r\   z] X [r  r*   r*   M1M2Nr  r*   r+   rM      s    )r?   rP   rh   ry   rx   abr*   r  r+   meta_mm  s   

r  c                    s0   |rt  fddtjD S tj S )Nc                 3   s&    | ]}| vrj | nd V  qdS )r   Nr  r4   r   dimsrz   r*   r+   rY   '  s   $ z+_compute_reduction_shape.<locals>.<genexpr>)rO   r   r   r:   compute_reduction_output_shapery   )rz   r  r8  r*   r  r+   r6  %  s   r6  strc                 C   s   t | tjjr| jjS dS )Nr  )rV   r?   _subclasses
FakeTensorfake_devicer^   )r   r*   r*   r+   r  0  s   r  input_tensorr   r2  dilationis_transposedgroupsoutput_paddingc                 C   s  dt dt dt dt dt dt fdd}dt dt dt dt dt d	t dt fd
d}	|jdd  }
| jdd  }|r<||jd  }n|jd }|jd | | jd krQtd| jd |g}t|tre|gt| }nt|dkrt|d gt| }t|tr|gt| }nt|dkr|d gt| }t|tr|gt| }nt|dkr|d gt| }d }|rt|tr|gt| }nt|dkr|d gt| }n|}tt|D ]2}|r||	|| || || |
| || ||  q|||| || || |
| ||  q|S )Nlnr  r   r  rJ  r  c                 S   s$   | d|  ||d   d | d S )a  
        Formula to apply to calculate the length of some dimension of the output

        See: https://pytorch.org/docs/stable/generated/torch.nn.Conv2d.html

        Args:
            ln: length of the dimension
            p: padding in that dim
            d: dilation in that dim
            k: kernel size in that dim
            s: stride in that dim
        Returns:
            The output length
        r   r   r*   )r  r  r   r  rJ  r*   r*   r+   _formulaA  s   $z+calc_conv_nd_return_shape.<locals>._formular'   c                 S   s(   | d | d|  ||d   | d S )a  
        Formula to apply to calculate the length of some dimension of the output
        if transposed convolution is used.
        See: https://pytorch.org/docs/stable/generated/torch.nn.ConvTranspose2d.html

        Args:
            ln: length of the dimension
            p: padding in that dim
            d: dilation in that dim
            k: kernel size in that dim
            s: stride in that dim
            op: output padding in that dim

        Returns:
            The output length
        r   r   r*   )r  r  r   r  rJ  r'   r*   r*   r+   _formula_transposedR  s   (z6calc_conv_nd_return_shape.<locals>._formula_transposedr   r   r   zInvalid channel dimensions)r  ry   r   rV   r   r   r   r   )r  r   r   r2  r  r  r  r  r  r  kernel_sizer  out_channels	ret_shapeoutput_padding_listr   r*   r*   r+   calc_conv_nd_return_shape7  sZ   "
&




"r  c                 C      t j| t jkS r%   r?   _prims_commonsuggest_memory_formatchannels_lasttenr*   r*   r+   is_channels_last     r  c	              	      sf    fdd}	t  ||||||r|nd }
d}d} |dkr%d|
|<  |
}|j|	 d}|S )Nc                      s^   t  dkrt strtjS nt rtjS  jtjdr#tjS  jtjdr-tjS d S Nr  r   )r  r  r?   r  r  r   preserve_formatr*   r  r   r*   r+   pick_memory_format  s   z%meta_conv.<locals>.pick_memory_formatr   r   r   )r  r   rx   r   )r  r   r   r   r2  r  r  r  r  r  	shape_outinput_channels_dimoutput_channels_dimr   r*   r  r+   	meta_conv  s$   

r  mkldnnc
              	   C   sH   t | ||||d|g }
| |
}tj}|  dkrtj}|j|d}|S )NFrs  r   )r  rx   r?   r  rh   channels_last_3dr   )r  r   r   r2  r   r  r  attrscalars	algorithmr  r   out_memory_formatr*   r*   r+   meta_mkldnn_convolution_default  s   
r  c                 C   s$   |  g | jd d |jd R S Nr|   r   rx   ry   )r  r   r   r  r  r  r*   r*   r+   meta_linear_pointwise_default  s   $r  mklc                 C   s$   |  g | jd d |jd R S r  r  )r  packed_weightorig_weightr   r   r*   r*   r+   meta_mkl_linear  s   r  onednnc              	   C   sJ   t | ||||	d|
d }|tjtjfv sJ | j||d}|jtjd}|S )NFrq   r   )r  r?   r  r  rx   r   r  )r5   x_scalex_zpww_scalew_zpr   r   r2  r  r  output_scaleoutput_zero_pointoutput_dtyper  r  r  r  r   r*   r*   r+   meta_qconv2d_pointwise	  s   
r  c                 C   s>   t | j}|jd |d< |	tjtjfv sJ | j||	d}|S )Nr   r|   rq   )r   ry   r?   r  r  rx   )r5   r  r  r  r  r  r   r  r  r  post_op_namepost_op_argspost_op_algorithmr  r   r*   r*   r+   meta_qlinear_pointwise'	  s
   
r  	quantizedr*   r   r   c                 C   sr   t | |||||\}}}|  dkr| dnd}	tj}
|  dkr(|||g}n|	|||g}tj|| j| j|
dS Nr`  r   r~   r'  )#max_pool2d_checks_and_compute_shaperh   r   r?   r  rm   rG   re   r   r  r   r2  r  	ceil_modenInputPlaneoutputHeightoutputWidthrF  r   r   r*   r*   r+   meta_quantized_max_pool2dC	  s$   r  c                    s4   t   koj k fdd d S )Nc                      s8   d  d d dd   d dj   S )NzExpected a tensor of dimension z and tensor.size[z] == r\   zbut got : dimension z] = rh   ry   r*   rh   dim_sizer   r   r*   r+   rM   e	  s    z check_dim_size.<locals>.<lambda>)r?   rP   rh   ry   )r   rh   r  r   r*   r  r+   check_dim_sizeb	  s   r  c                 C   sb  dd }|d|\}}	t t|dv dd  t|dkr#||	}
}nt|dkr3|d |d }
}n|d	|\}
}|d
|\}}t |d u pJ|dkdd  |  dkrZ| dnd}| d}| d}| d}t||||
d|}t||	||d|}t| }t| ||	|
|||dd|||||| |  dkr|||g}n||||g}t j	|| j
| j|dS )Nc                    D   t t|dv  fdd |d }t|dkr|n|d }||fS )Nr   r   c                      r  )Nzavg_pool2d: 4 must either be a single int, or a tuple of two intsr*   r*   rh  r*   r+   rM   w	  rt   z1meta_avg_pool2d.<locals>.unpack.<locals>.<lambda>r   r   r?   rP   r   rh  rE  HWr*   r	  r+   unpackt	     

zmeta_avg_pool2d.<locals>.unpackr  r   r   r   c                   S   rS   NzOavg_pool2d: stride must either be omitted, a single int, or a tuple of two intsr*   r*   r*   r*   r+   rM   	  rU   z!meta_avg_pool2d.<locals>.<lambda>r   r   r   r2  c                   S   rS   Nzdivisor must be not zeror*   r*   r*   r*   r+   rM   	  rU   r`  r  rd  r|   r~   r'  )r?   rP   r   rh   r   pooling_output_shaper:   r  pool2d_shape_checkrm   rG   re   )r   r  r   r2  r  count_include_paddivisor_overrider  kHkWdHdWpadHpadWrF  r  inputHeight
inputWidthr  r   r   r   r*   r*   r+   meta_avg_pool2dj	  sb   
	




r   c                 C   sj   t | ||||||dd|	|
|||| |  }|	}t|||d | t|||d | t|||d | d S )Nr   r~   r   )r  rh   r  )r   
gradOutputrF  r  r  r  r  r  r  r  r  r  r  r   
mem_formatr   nOutputPlaner*   r*   r+   avg_pool2d_backward_shape_check	  s,   r$  c                 C   s  t t|dkpt|dkdd  |d }t|dkr|n|d }	t t|dkp5t|dkp5t|dkdd  t|dkrB|n|d }
t|dkrN|	nt|dkrV|
n|d }t t|dkpgt|dkdd  |d }t|dkrx|n|d }t |d u p|dkdd  |j}| d	kr|d
 nd}|d }|d }|d }t||||
d|}t||	||d|}t|}t|| |||	|
||||||||| t j	||j
|j|dS )Nr   r   c                   S   rS   )NzKavg_pool2d: kernel_size must either be a single int, or a tuple of two intsr*   r*   r*   r*   r+   rM   	  rU   z*meta_avg_pool2d_backward.<locals>.<lambda>r   c                   S   rS   r  r*   r*   r*   r*   r+   rM   	  rU   c                   S   rS   )NzGavg_pool2d: padding must either be a single int, or a tuple of two intsr*   r*   r*   r*   r+   rM   	  rU   c                   S   rS   r  r*   r*   r*   r*   r+   rM   
  rU   r`  r  r  rd  r|   r'  )r?   rP   r   ry   rh   r  r:   r  r$  rm   rG   re   )gradOutput_r   r  r   r2  r  r  r  r  r  r  r  r  r  
input_sizerF  r  r  r  r  r   r"  r*   r*   r+   meta_avg_pool2d_backward	  sj   "(
r'  c                 C   s
  t t|dv dd  |d }t|dkr|n|d }t|dkr$|n|d }	t | p2t|dv dd  |s;|n|d }
|sC|nt|dkrK|
n|d }|sS|	nt|dkr[|
n|d }t t|dv dd  |d }t|dkrw|n|d }t|dkr|n|d }t | jd	v d
d  t | p|dkdd  | d}| d}| d}| d}| d}t||||
d|}t||||d|}t||	||d|}t| ||||	|
|||||ddd||||||ddd | jdkr| ||||fS | |||||fS )Nr   r~   c                   S   rS   NzFavg_pool3d: kernel_size must be a single int, or a tuple of three intsr*   r*   r*   r*   r+   rM   :
  rU   z!meta_avg_pool3d.<locals>.<lambda>r   r   r   c                   S   rS   NzJavg_pool3d: stride must be omitted, a single int, or a tuple of three intsr*   r*   r*   r*   r+   rM   B
  rU   c                   S   rS   NzBavg_pool3d: padding must be a single int, or a tuple of three intsr*   r*   r*   r*   r+   rM   J
  rU   r`  rs  c                   S   rS   Nz9non-empty 4D or 5D (batch mode) tensor expected for inputr*   r*   r*   r*   r+   rM   R
  rU   c                   S   rS   r  r*   r*   r*   r*   r+   rM   W
  rU   r  r  rd  r|   zavg_pool3d()T)check_input_sizer`  )r?   rP   r   r   r   r  pool3d_shape_checkrx   )r   r  r   r2  r  r  r  kTr  r  dTr  r  padTr  r  rF  nslicesitimeiheightiwidthotimeoheightowidthr*   r*   r+   meta_avg_pool3d-
  s   
  






r:  c                 C   s  t t|dv dd  |d }t|dkr|n|d }	t|dkr$|n|d }
t | p2t|dv dd  |s;|n|d }|sC|	nt|dkrK|n|d }|sS|
nt|dkr[|n|d }t t|dv dd  |d }t|dkrw|n|d }t|dkr|n|d }t |jd	v d
d  t | p|dkdd  |d}|d}|d}|d}t||||d|}t||	||d|}t||
||d|}t|| |||	|
||||||||||||d ||jS )Nr(  c                   S   rS   r)  r*   r*   r*   r*   r+   rM   
  rU   z*meta_avg_pool3d_backward.<locals>.<lambda>r   r   r   c                   S   rS   r*  r*   r*   r*   r*   r+   rM   
  rU   c                   S   rS   r+  r*   r*   r*   r*   r+   rM   
  rU   r,  c                   S   rS   r-  r*   r*   r*   r*   r+   rM   
  rU   c                   S   rS   r  r*   r*   r*   r*   r+   rM   
  rU   r  r  rd  r|   zavg_pool3d_backward())	r?   rP   r   r   r   r  avg_pool3d_backward_shape_checkrx   ry   )rV  r   r  r   r2  r  r  r  r0  r  r  r1  r  r  r2  r  r  r3  r4  r5  r6  otime_for_shape_checkoheight_for_shape_checkowidth_for_shape_checkr*   r*   r+   meta_avg_pool3d_backward
  st   
  




r?  c                    sZ   t  jdkp jdk fdd  jd d t| }t }t j| j j	|dS )Nr~   r`  c                      rp   )Nz"Expected 3D or 4D tensor, but got r  r*   r   r*   r+   rM   
  rt   z*meta_adaptive_avg_pool2d.<locals>.<lambda>rd  r'  )
r?   rP   r   ry   rO   r:   r  rm   rG   re   )rz   output_sizer  r   r*   r   r+   meta_adaptive_avg_pool2d
  s   

rA  c                    s@   t  jdkp jdk fdd   jd d t| S )Nr`  rs  c                      rp   )Nz"Expected 4D or 5D tensor, but got r  r*   r   r*   r+   rM   
  rt   z*meta_adaptive_avg_pool3d.<locals>.<lambda>r  )r?   rP   r   rx   ry   rO   )rz   r@  r*   r   r+   meta_adaptive_avg_pool3d
  s
   
rB  c                    s    j }td|D ]t dk fdd qt|dkp$|dkfdd tj jk fdd tj}trDtj}	j
j|d	S )
Nr   r   c                      s   d j  d dS )Nz{adaptive_avg_pool2d_backward(): Expected grad_output to have non-zero                       size for non-batch dimensions,  with dimension  being emptyr  r*   )grad_outr   r*   r+   rM   
  s
    z4meta__adaptive_avg_pool2d_backward.<locals>.<lambda>r~   r`  c                      rp   )NzBadaptive_avg_pool2d_backward(): Expected 3D or 4D tensor, but got r  r*   r   r*   r+   rM   
  rt   c                      r  Nzexpected dtype z! for `grad_output` but got dtype rq   r*   )rE  rz   r*   r+   rM   
  r  r   )r   r   r?   rP   r   rG   r   r  r  rx   ry   r   )rE  rz   r   r   r*   )rE  r   rz   r+   "meta__adaptive_avg_pool2d_backward
  s$   

rG  c                 C   s   t | d tj|tjdS )Nadaptive_avg_pool3d_backwardr   )!_adaptive_pool_empty_output_checkr?   r   r  rV  rz   r*   r*   r+   "meta__adaptive_avg_pool3d_backward  s   
rK  rV  c                    s<   j }td|D ]tdk fdd qd S )Nr   r   c                      s     dj  d dS )Nzc(): Expected grad_output to have non-zero size for non-batch dimensions, but grad_output has sizes rC  rD  r  r*   ru  rV  r   r*   r+   rM     s
   z3_adaptive_pool_empty_output_check.<locals>.<lambda>)r   r   r?   rP   r   )rV  ru  r   r*   rL  r+   rI    s   rI  c                    s"  j }t|dv fdd td|D ] t dk fdd qtt|dkdd  d}d}d}j d	krGd}|d7 }|d }|\}}j d
krm|||f}|}	j|tjd}
|	|
fS ||||f}t	}|j
|d}	j|tjdj
|d}
|	|
fS )Nr~   r`  c                      rp   )Nz:adaptive_max_pool2d(): Expected 3D or 4D tensor, but got: r  r*   r   r*   r+   rM     rt   z*meta_adaptive_max_pool2d.<locals>.<lambda>r   r   c                         dj  d  dS )Nzjadaptive_max_pool2d(): Expected input to have non-zero size for non-batch dimensions, but input has sizes rC  rD  r  r*   r   r   r*   r+   rM   #  
   r   c                   S   rS   )NzCadaptive_max_pool2d(): internal error: output_size.size() must be 2r*   r*   r*   r*   r+   rM   +  rU   r`  r~   rq   r   )r   r?   rP   r   r   r   rx   r   r:   r  r   )r   r@  r   dimHsizeBsizeDosizeHosizeWr   r   r   r   r*   rP  r+   meta_adaptive_max_pool2d  sD   







rW  c                    sd    j }t|dv  fdd t d tj jk fdd t}jj	|dS )NrM  c                      rp   )NzKadaptive_max_pooling2d_backward(): Expected 3D or 4D grad_output, but got: r  r*   rV  r*   r+   rM   N  rt   z3meta_adaptive_max_pool2d_backward.<locals>.<lambda>adaptive_max_pool2d_backwardc                      r  rF  rq   r*   rV  r   r*   r+   rM   U  r  r   )
r   r?   rP   rI  rG   r:   r  rx   ry   r   )rV  r   r   r   r   r*   rZ  r+   !meta_adaptive_max_pool2d_backwardH  s   



r[  c                    s   j }t|dv fdd td|D ] t dk fdd qtt|dkdd  d}d}d}|d	krFd}|d7 }|}|\}}}|d
kr[||||f}	n|||||f}	|	}
j|	tjd}|
|fS )Nr,  c                      rp   )Nz:adaptive_max_pool3d(): Expected 4D or 5D tensor, but got: r  r*   rN  r*   r+   rM   b  rt   z*meta_adaptive_max_pool3d.<locals>.<lambda>r   r   c                      rO  )Nzjadaptive_max_pool3d(): Expected input to have non-zero size for non-batch dimensions, but input has sizes rC  rD  r  r*   rP  r*   r+   rM   g  rQ  r~   c                   S   rS   )NzCadaptive_max_pool3d(): internal error: output_size.size() must be 3r*   r*   r*   r*   r+   rM   o  rU   rs  r`  rq   )r   r?   rP   r   r   r   rx   r   )r   r@  r   dimDrS  rT  osizeTrU  rV  r   r   r   r*   rP  r+   meta_adaptive_max_pool3d\  s8   





r^  c                 C   s   t | d ||jS )Nadaptive_max_pool3d_backward)rI  rx   ry   )rV  r   r   r*   r*   r+   !meta_adaptive_max_pool3d_backward  s   
r`  c                 C   s   |d u rt d| |S )Nz:cannot repeat_interleave a meta tensor without output_size)r   rx   )repeatsr@  r*   r*   r+   meta_repeat_interleave_Tensor  s   
rb  c                 C   s:   | j jsJ |j jsJ t| j|j}| j|t| j dS r3  )rG   r   r   ry   rx   r   )realimagr   r*   r*   r+   meta_complex  s   re  )
fill_valuer   rf  c                C   s   | j ||  ftjdS r3  )rx   rh   r?   ru   )rz   r   rf  r*   r*   r+   nonzero_static  s   rg  c              
      s  t tdd  g }tD ]q\ d ur|t jt jt jt jt jfv dd  jt jt jfv rv }t	|t 
j jkfdd tjD ]#t 
j j  k fdd ||d qQq| q| q|t t	jkfdd dd lm} t|j t	jk rd  t	jk sd}d	}D ]|dkrǈd urd}q|dkr҈d u rd
}qd ur nqd}|sg }g }tD ]\ d ur|  | qtD ]\ d u r|  | q||g }	g }
g }tD ]&\}d u rB|r8|
j|  q"|	j|  q"tj}q"|	| |
 S )Nc                   S   rS   )Nz#at least one index must be providedr*   r*   r*   r*   r+   rM     rU   z#meta_index_Tensor.<locals>.<lambda>c                   S   rS   )Nz?tensors used as indices must be long, int, byte or bool tensorsr*   r*   r*   r*   r+   rM     rU   c                      rp   )N)too many indices for tensor of dimension r  r*   r   r*   r+   rM     rt   c                	      s$   dj  d  dj  d  S )NzThe shape of the mask z
 at index z0 does not match the shape of the indexed tensor r  r*   )r   rs   jr  rz   r*   r+   rM     s
    r   c                      s   dj  dt  dS )Nrh  z (got r]   )r   r   r*   )r   rz   r*   r+   rM     r3  r   Fr   T)r?   rP   r  	enumeraterG   ru   r  r   nonzeror   rv   r   r   ry   r   selecttorch._refs_refsr   r    r   rx   )rz   r   r  rk  refsstatehas_contiguous_subspacer  transposed_indicesbefore_shapeafter_shapereplacement_shaperh   r*   )r   rs   r   ri  r  rz   r+   meta_index_Tensor  s   








rv  c                 C   sT   d }d }d }|
d r|  | }|
d r|  | }|
d r%|  |}|||fS )Nr   r   r   rx   r   )grad_output_input_weight_bias_sizes_optr   r2  r  
transposedr  r  output_maskbackend_grad_inputbackend_grad_weightbackend_grad_biasr*   r*   r+   meta_convolution_backward	  s   

r  c                   s     d} d}| ||f} t  dkdd  t dkdd  t  d dk fdd t  d dk fd	d t|  d|ko^|  d|kd
d  | |   S )Nr   r   r~   c                   S   rS   r  r*   r*   r*   r*   r+   rM   -  rU   zmeta_addbmm.<locals>.<lambda>c                   S   rS   r  r*   r*   r*   r*   r+   rM   .  rU   r   c                         d  d d d S )Nz8batch1 and batch2 must have same number of batches, got r   r   r   r*   r  r  r*   r+   rM   1  r  c                
      6   d  d d  d d d d d d	S )Nz#Incompatible matrix sizes for bmm (r   r5   r   r   r]   r   r*   r  r*   r+   rM   5  
   c                   S   rS   )Nz.self tensor does not match matmul output shaper*   r*   r*   r*   r+   rM   <  rU   )r   r  r?   rP   rh   rx   )rz   r  r  r  r  r  r  r*   r  r+   meta_addbmm'  s$   

r  )
grad_scale	found_infc       	            s4   | |||||fD ] t t t fdd qd S )Nc                         dt   S Nz'exponent must be a tensor list but got r^   r*   lr*   r+   rM   W  rt  z#meta__fused_adam_.<locals>.<lambda>r?   rP   rV   r   )rz   gradsexp_avgsexp_avg_sqsmax_exp_avg_sqsstate_stepslrbeta1beta2weight_decayepsamsgradmaximizer  r  r*   r  r+   meta__fused_adam_A  s   
r  c       	            sZ   | |||||fD ] t t t fdd qdd }|| ||||||||fS )Nc                      r  r  r  r*   r  r*   r+   rM   q  rt  z"meta__fused_adam.<locals>.<lambda>c                 S   s   dd | D S )Nc                 S   s   g | ]}t |qS r*   r?   r   )r4   rp  r*   r*   r+   r8   u  r9   z=meta__fused_adam.<locals>.empty_like_list.<locals>.<listcomp>r*   )tensor_listr*   r*   r+   empty_like_listt  s   z)meta__fused_adam.<locals>.empty_like_listr  )rz   r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r*   r  r+   meta__fused_adam[  s   
r  c                    s   t   dkdd  t  dkdd  t  jt ju  fdd t jt ju fdd t  ddk fd	d  j ddft jd
S )Nr   c                   S   rS   )Nza must be a 2D tensorr*   r*   r*   r*   r+   rM     rU   zmeta__int_mm.<locals>.<lambda>c                   S   rS   )Nzb must be a 2D tensorr*   r*   r*   r*   r+   rM     rU   c                      rp   )Nzexpected self to be int8, got rq   r*   )r  r*   r+   rM     rt   c                      rp   )Nzexpected mat2 to be int8, got rq   r*   )r  r*   r+   rM     rt   r   r   c                
      r  )Nz'Incompatible matrix sizes for _int_mm (r   r5   r   r   r]   r   r*   r  r*   r+   rM     r  rq   )r?   rP   rh   rG   r   r   rx   r   r  r*   r  r+   meta__int_mm  s   



 r  c                    sp   t   dkdd  t  jt ju  fdd  d} d} j|d ||d  d	|d ft jd
S )Nr   c                   S   rS   Nzw must be a 2D tensorr*   r*   r*   r*   r+   rM     rU   z2meta__convert_weight_to_int4pack.<locals>.<lambda>c                      rp   Nzexpected w to be int32, got rq   r*   r  r*   r+   rM     rt   r   r      r      rq   )r?   rP   rh   rG   r   r   rx   )r  inner_k_tilesr   r  r*   r  r+    meta__convert_weight_to_int4pack  s   




r  c                    s   t  dkdd  t   dkdd  t jt jt jt jfv fdd t  jt ju  fdd j	d 	dd	 jd
S )Nr   c                   S   rS   Nzx must be a 2D tensorr*   r*   r*   r*   r+   rM     rU   z*meta__weight_int4pack_mm.<locals>.<lambda>r`  c                   S   rS   )Nzw must be a 4D tensorr*   r*   r*   r*   r+   rM     rU   c                      rp   Nz#expected x to be f32/f16/bf16, got rq   r*   r   r*   r+   rM     rt   c                      rp   r  rq   r*   r  r*   r+   rM     rt   r   r  rq   )
r?   rP   rh   rG   r  r  r  r   rx   r   )r5   r  q_group_sizeq_scale_and_zerosr*   r  r5   r+   meta__weight_int4pack_mm  s   


"r  c                    s   t  dkdd  t jt jt jt jfv fdd t   dkdd  t  jt ju  fdd j	d 	djdS )	Nr   c                   S   rS   r  r*   r*   r*   r*   r+   rM     rU   z*meta__weight_int8pack_mm.<locals>.<lambda>c                      rp   r  rq   r*   r   r*   r+   rM     rt   c                   S   rS   r  r*   r*   r*   r*   r+   rM     rU   c                      rp   )Nzexpected w to be int8, got rq   r*   r  r*   r+   rM     rt   r   rq   )
r?   rP   rh   rG   r  r  r  r   rx   r   )r5   r  q_scalesr*   r  r+   meta__weight_int8pack_mm  s   


r  c           	         s  t  dkfdd t  dkfdd t ddkfdd t tjdd  t tjdd  t |d	kd
d  t  dv  fdd d}d}jd d }jd d }tt 	||}|
||g |S )Nr   c                         d    dS )Nz1cdist only supports at least 2D tensors, X1 got: Dr}   r*   )x1r*   r+   rM     rN   z$meta_cdist_forward.<locals>.<lambda>c                      r  )Nz1cdist only supports at least 2D tensors, X2 got: r  r}   r*   )x2r*   r+   rM     rN   r|   c                      r  )Nz4X1 and X2 must have the same number of columns. X1: r|   z X2: r   r*   )r  r  r*   r+   rM     r  c                   S   rS   )Nz=cdist only supports floating-point dtypes, X1 got: {x1.dtype}r*   r*   r*   r*   r+   rM     rU   c                   S   rS   )Nz=cdist only supports floating-point dtypes, X2 got: {x2.dtype}r*   r*   r*   r*   r+   rM     rU   r   c                   S   rS   )Nz)cdist only supports non-negative p valuesr*   r*   r*   r*   r+   rM     rU   Nr   r   c                      r  )Nz%possible modes: None, 1, 2, but was: r*   r*   )compute_moder*   r+   rM     r  rd  )r?   rP   rh   r   r:   is_float_dtyperG   ry   r   broadcast_shapesextendrx   )	r  r  r  r  r1r2batch_tensor1batch_tensor2r  r*   )r  r  r  r+   meta_cdist_forward  s@   









r  c                 C   s   |j d }|j d }|j d }|j d d }|j d d }	tt||	}
|
 }|||g t|
}|dksE|dksE|dksE|dkrJt|S |t|j krV|	|}tj
|tjdS )Nr|   rd  r   r   )ry   r   r?   r  copyr  mathprod
zeros_liker  r   r   )r  r  r  r  cdistc1r  r  r  r  r  tensor1_expand_sizebatch_productr*   r*   r+   meta_cdist_backward  s   



 

r  c	                    s<  t  jt jt jfv  fdd t jt jt jfv fdd t tjfdd d}	|rEt |	dkdd  |	d8 }	|	d}
t	d\}}}d urt ||kd	d  t jjkfd
d t j
dkfdd t    k fdd fdddd fdd}tdkrʈ  d}  }||krĈ |	d}nT d}nN||
|}|||fv s|s d}nd}|	}jd }||kr|rt |dkdd  |d8 }|jd }n| }|
|||fS )Nc                      rp   )Nz(expected indices to be long or int, got rq   r*   r   r*   r+   rM     rt   z$meta_embedding_bag.<locals>.<lambda>c                      rp   )Nz(expected offsets to be long or int, got rq   r*   )r"  r*   r+   rM     rt   c                      rp   )Nz/expected weight to be floating point type, got rq   r*   )r   r*   r+   rM     rt   r   r   c                   S   rS   Nz1include_last_offset: numBags should be at least 1r*   r*   r*   r*   r+   rM   !  rU   r~   c                   S   rS   )Nz@embedding_bag: per_sample_weights only supported with mode='sum'r*   r*   r*   r*   r+   rM   +  rU   c                      ri  )Nzexpected weight (z) and per_sample_weights (z) to have same dtyperq   r*   )per_sample_weightsr   r*   r+   rM   /  r9   c                      r  )Nz1expected per_sample_weights to be 1D tensor, got r  r  r*   )r  r*   r+   rM   3  rt  c                      s   d   d    dS )Nz%expected per_sample_weights.numel() (z$ to be the same as indices.numel() (r]   rw   r*   )r   r  r*   r+   rM   7  s   c                    s    | ||o| ddkS Nr   r   r   r   r  r   padding_idx)is_fast_path_index_selectr*   r+   is_fast_path_index_select_scale=  s   z;meta_embedding_bag.<locals>.is_fast_path_index_select_scalec                 S   s<   | j tjks| j tjko| ddko|ddko|dk S Nr   r   )rG   r?   rC   rA   r   )r   r   r  r*   r*   r+   r  B  s   z5meta_embedding_bag.<locals>.is_fast_path_index_selectc                    s"   |d ur| |||S  | ||S r%   r*   r  )r  r  r*   r+   is_fast_pathJ  s   z(meta_embedding_bag.<locals>.is_fast_pathcpuc                   S   rS   r  r*   r*   r*   r*   r+   rM   d  rU   )r?   rP   rG   ru   r  r:   r  r   rx   r   r   rw   r  ry   )r   r   r"  scale_grad_by_freqr  sparser  include_last_offsetr  num_bagsr   MODE_SUM	MODE_MEANMODE_MAXr  
offset2bagbag_sizemax_indicesfast_path_sumnumBagsr*   )r   r  r  r"  r  r   r+   meta_embedding_bag  s~   










r  c                 G   sB   t | ||g|R  \}}}}t|dkr|| }||||fS )Nr  )r  r  rx   r   )r   r   r"  r<   r   r  r  r  r*   r*   r+   meta_embedding_bag_forward_onlym  s   r  c                 C   s.   |r|S | j js| j jr| j S |rtjS | j S r%   )rG   r   r   r?   ru   )r   rG   promote_int_to_longr*   r*   r+   _get_reduction_dtypew  s   r  rq   c                C   s6   t | |dd}t| j|}t| ||}| j||dS )NT)r  rq   )r  r:   r5  ry   r6  rx   )r   r  r8  rG   r  r  r*   r*   r+   meta_nansum  s   r  c                 C   s$   t | jtt|  }| |S r%   )r:   r  ry   rO   r   rh   rx   )r   r  r*   r*   r+   meta_median  s   
r  c                 C   sL   t | dkrtd t| j|f}t| ||}| || j|tjdfS )Nr  zmedian CUDA with indices outputrq   )	r  r:   alert_not_deterministicr5  ry   r6  rx   r?   ru   )r   rh   r8  r  r*   r*   r+   meta_median_mode_dim  s   
r  c                 C   r  r%   r*   r   r*   r*   r+   meta_logical_not_  rG  r  c                    sd   t t|  kdd  t|   }d| t| j   fddttD }| |S )Nc                   S   rS   )NzZNumber of dimensions of repeat dims can not be smaller than number of dimensions of tensorr*   r*   r*   r*   r+   rM     rU   zmeta_repeat.<locals>.<lambda>r  c                    s   g | ]
} | |  qS r*   r*   r  padded_sizera  r*   r+   r8     r  zmeta_repeat.<locals>.<listcomp>)r?   rP   r   rh   rO   ry   r   rx   )rz   ra  num_new_dimensionstarget_sizer*   r  r+   meta_repeat  s   
r  c                 C   r  r%   r*   r   r*   r*   r+   
meta_zero_  rG  r  c                 C      t |tjrt| j|j | S r%   rV   r?   r
   rR   ry   rz   r   r*   r*   r+   meta_binop_inplace  s   r  c                 C   r  r%   r  )rz   r   r  r*   r*   r+   meta_binop_inplace_alpha  s   	r  c                 K      t | tjdS Nr1   )r>   r   r;   )rz   kwargsr*   r*   r+   
meta_round  s   r  c                    sl   t tj fdd tt jr&t tj fdd d S t tt fdd d S )Nc                           dj  S )Nz7: Expected input tensor to have an integral dtype. Got rq   r*   )r|  rz   r*   r+   rM     rN   z#shift_dtype_check.<locals>.<lambda>c                      r  )Nz6: Expected shift value to have an integral dtype. Got rq   r*   r|  rE  r*   r+   rM     rN   c                      s     d S )Nz): Expected shift value to be an int. Got r*   r*   r  r*   r+   rM     rt  )r?   rP   r:   r  rG   rV   r
   r   r|  rz   rE  r*   r  r+   shift_dtype_check  s   

r  c                 C      t d| | t| |tjdS )Nrshiftr  r  r>   r   r;   r  r*   r*   r+   meta_rshifts     r   c                 C   r  )Nlshiftr  r  r  r*   r*   r+   meta_lshifts  r  r  c                 C      |  | jS r%   r  r   r*   r*   r+   	meta_zero     r  c                 C   r  r%   r*   rz   rE  r*   r*   r+   
meta_fill_  rG  r  c                 C   
   t | S r%   r  r  r*   r*   r+   	meta_fill     
r
  c                 C   r  r%   r*   r   r*   r*   r+   
meta_relu_  rG  r  c                 C   r	  r%   r  rz   r   r   
accumulater*   r*   r+   meta_index_put  r  r  c                 C   s   t | j|j | S r%   )rR   ry   )rz   r  valuer*   r*   r+   meta_masked_fill_$  s   r  c                 C   s    |  |  jt| d}|S r  )rx   r   r   r:   r  )rz   r  r  masked_scaler*   r*   r+   meta__masked_scale*  s   r  c                 C   s:   t |jt jt jfv dd  t | j|jkdd  | S )Nc                   S   rS   )NzMask must be bool or uint8r*   r*   r*   r*   r+   rM   5  rU   z&meta_masked_scatter_.<locals>.<lambda>c                   S   rS   )Nzdmasked_scatter: expected self and source to have same dtypes but got {self.dtype} and {source.dtype}r*   r*   r*   r*   r+   rM   9  rU   )r?   rP   rG   r  uint8)rz   r  r  r*   r*   r+   meta_masked_scatter_2  s   
r  c                 C   s*   t | |\} }tj| tjd}t|||S r  )r    r?   r   r   r  )rz   r  r  r   r*   r*   r+   meta_masked_scatter?  s   r  c                 C   s
   |  |S r%   r/  )rz   r  r  r*   r*   r+   meta_masked_scatter_backwardG  r  r  c                 C   r  r%   r*   r  r*   r*   r+   meta_index_put_L  rG  r  c                 C   r  r%   )viewry   r   r*   r*   r+   
meta_aliasQ  r  r  c                    s   t |  dkdd  t | dkdd  |  }|  |d |d |d } d }||ft  d koB d k fdd |}|sqd urqt  dkd	d  t  kfd
d |S )Nr~   c                   S   rS   r  r*   r*   r*   r*   r+   rM   W  rU   z)common_meta_baddbmm_bmm.<locals>.<lambda>c                   S   rS   r  r*   r*   r*   r*   r+   rM   X  rU   r   r   r   c                	      r  r  r*   r*   r  r*   r+   rM   e  s    c                   S   rS   )Nzself must be a 3D tensorr*   r*   r*   r*   r+   rM   n  rU   c                      s   d  d   S )Nz*Expected an input tensor shape with shape z but got shape: r   r*   )r@  self_baddbmmr*   r+   rM   q  r  )r?   rP   rh   r   rx   )r  r  is_bmmr  r  res_rowsres_colsr   r*   )r  r  r  r@  r  r+   common_meta_baddbmm_bmmV  s*   


r  c                 C   s   t | |dS )NT)r  )rz   r   r*   r*   r+   meta_bmmw  r  r   c                 C   s<   | | }| | }|dkrt |dk t |dk kr|d8 }|S r  )r  )r5   yqr  r*   r*   r+   div_rtn|  s
    r#  c                 C   sZ   t | | | ||d   d |r|d nd |d }|r+|d | | | kr+|d8 }|S r  )r#  )	inputSize
kernelSizer?  r@  r   r  r  
outputSizer*   r*   r+   pooling_output_shape_pad_lr  s*   
	r'  c                    sl   t |dkdd  t dkfdd t d   d d k fdd t| | |S )Nr   c                   S   rS   )Nzstride should not be zeror*   r*   r*   r*   r+   rM     rU   z&pooling_output_shape.<locals>.<lambda>c                      r  )Nz'pad must be non-negative, but got pad: r*   r*   )padr*   r+   rM     r  r   r   c                      s   d d d  S )NzApad should be at most half of effective kernel size, but got pad=z, kernel_size=z and dilation=r*   r*   r  r%  r(  r*   r+   rM     s
   )r?   rP   r'  )r$  r%  r(  r   r  r  r*   r)  r+   r    s   r  c              	      sN     }tdkodkdd  t|dko|dkdd  t|dko+|dkdd   ddko= ddk}|tjkrWt|dkoQ|oQ d	dkd
d  n"t|d	krf ddkrf|pr|dkor|or d	dk fdd td 
kod 	k	
fdd tdkodkfdd d S )Nr   c                   S   rS   )NzCkernel size should be greater than zero, but got kH: {kH}, kW: {kW}r*   r*   r*   r*   r+   rM     rU   z$pool2d_shape_check.<locals>.<lambda>c                   S   rS   )Nz>stride should be greater than zero, but got dH: {dH}, dW: {dW}r*   r*   r*   r*   r+   rM     rU   c                   S   rS   )Nz\dilation should be greater than zero, but got dilationH: {dilationH}, dilationW: {dilationW}r*   r*   r*   r*   r+   rM     rU   r   r   r`  r~   c                   S   rS   )NzExpected 4D (batch mode) tensor expected for input with channels_last layout with optional 0 dim batch size for input, but got: {input.size()}r*   r*   r*   r*   r+   rM     rU   c                         d    S )NzYExpected 3D or 4D (batch mode) tensor with optional 0 dim batch size for input, but got: r   r*   rN  r*   r+   rM     rt  c                      s   d d d d  S )NzKpad should be smaller than or equal to half of kernel size, but got padW = z	, padH = z, kW = z, kH = r*   r*   )r  r  r  r  r*   r+   rM     s    c                      s*   d d  d d d d dS NzGiven input size: (r5   z). Calculated output size: (z). Output size is too smallr*   r*   )r  r  r  r#  r  r   r*   r+   rM     s    )rh   r?   rP   r   r  )r   r  r  r  r  r  r  	dilationH	dilationWr  r  r  r  r   r   r   
valid_dimsr*   )r   r  r  r  r  r  r#  r  r   r  r  r+   r    sB   

r  r3  r0  r  r  r1  r  r  pTpHpW	dilationTr,  r-  r4  r5  r6  r7  r8  r9  r.  c              
      s  	j }tdkodkodkfdd tdko&dko& dk fdd tdko<dko<dkfdd t|dv 	fdd t|D ]|dkradkraqVt	dk	fd	d qV|rt
kokok
fd
d td kod kod kfdd tdkodkodk
fdd d S )Nr   c                         d d  d S )Nz5kernel size should be greater than zero, but got kT: z, kH: z, kW: r*   r*   )r  r0  r  r*   r+   rM        z$pool3d_shape_check.<locals>.<lambda>c                      r3  )Nz0stride should be greater than zero, but got dT: z, dH: z, dW: r*   r*   )r  r1  r  r*   r+   rM     r4  c                      r3  )Nz9dilation should be greater than zero, but got dilationT: z, dilationH: z, dilationW: r*   r*   )r,  r2  r-  r*   r+   rM     r4  r,  c                      r  )Nz/: Expected 4D or 5D tensor for input, but got: r  r*   )r|  r   r*   r+   rM     rN   rs  c                      s     dj  d dS )NzZ: Expected input's non-batch dimensions to have positive length, but input has a shape of z and non-batch dimension z has length zero!)ry   r   r*   r|  r   r   r*   r+   rM   #  s
   c                      s*   d d  d d d d dS )Nzinput image (T: rw  rg  z ) smaller than kernel size (kT:  kH:  kW: r]   r*   r*   )r5  r4  r6  r  r0  r  r*   r+   rM   -  s   r   c                      s(   d d d  d d d S )NzHpad should be smaller than or equal to half of kernel size, but got kT: r7  r6  z padT: z padW: z padH: r*   r*   )r  r0  r  r0  r/  r1  r*   r+   rM   5  s   r   c                      s6   d d d  d d d d d dS r+  r*   r*   )r5  r4  r6  r3  r8  r7  r9  r*   r+   rM   =  s   )r   r?   rP   r   r   )r   r3  r0  r  r  r1  r  r  r/  r0  r1  r2  r,  r-  r4  r5  r6  r7  r8  r9  r|  r.  r   r*   )r  r1  r  r,  r2  r-  r|  r   r5  r   r4  r6  r  r0  r  r3  r8  r7  r9  r0  r/  r1  r+   r/    sJ   	"r/  c                 C   s   | j }t| |||||||	|
|||||||||||| t|||d | t|||d | t|||d | t|||d | t|||d | t|||d | t|||d | t|||d | d S )Nr`  r~   r   r   r   r/  r  )r   rV  r   r3  r0  r  r  r1  r  r  r/  r0  r1  r2  r,  r-  r4  r5  r6  r7  r8  r9  r|  r   r*   r*   r+   max_pool3d_backward_shape_checkE  s@   r9  c                 C   s   | j }t| ||||||||	|
|ddd|||||||d t|||d | t|||d | t|||d | t|||d | d S )Nr   Tr`  r~   r   r8  )r   rV  r3  r0  r  r  r1  r  r  r/  r0  r1  r4  r5  r6  r7  r8  r9  r|  r   r*   r*   r+   r;    s:   r;  c                 C   sB  dd }|d|\}}t t|dv dd  t|dkr#||}	}
n|d|\}	}
|d	|\}}|d
|\}}| d}| d}| d}t| }|t jkr^t |  dkdd  n|t jkrpt |  dv dd  nt ddd  t	||||	||}t	||||
||}t
| |||	|
|||||||||| |||fS )Nc                    r  )Nr  c                      r  )Nzmax_pool2d: r  r*   r*   r	  r*   r+   rM     rt   zEmax_pool2d_checks_and_compute_shape.<locals>.unpack.<locals>.<lambda>r   r   r
  r  r*   r	  r+   r    r  z3max_pool2d_checks_and_compute_shape.<locals>.unpackr  r  c                   S   rS   )NzOmax_pool2d: stride must either be omitted, a single int, or a tuple of two intsr*   r*   r*   r*   r+   rM     rU   z5max_pool2d_checks_and_compute_shape.<locals>.<lambda>r   r   r2  r  r  rd  r|   r`  c                   S   rS   )NzMnon-empty 4D (batch mode) tensor expected for input with channels_last layoutr*   r*   r*   r*   r+   rM     rU   rM  c                   S   rS   )Nz9non-empty 3D or 4D (batch mode) tensor expected for inputr*   r*   r*   r*   r+   rM     rU   Fc                   S   rS   )Nz?Unsupport memory format. Supports only ChannelsLast, Contiguousr*   r*   r*   r*   r+   rM     rU   )r?   rP   r   r   r:   r  r  rh   r   r  r  )r   r  r   r2  r  r  r  r  r  r  r  r  r  r,  r-  r  r  r  r   r  r   r*   r*   r+   r    sb   	









r  c                    s   t |||||\}tj jk fdd |jfdd}	|	  |	| t}
tjjjj	|
dS )Nc                      r  )NzExpected dtype z  for `gradOutput` but got dtype rq   r*   rJ  r*   r+   rM     r  z7meta_max_pool2d_with_indices_backward.<locals>.<lambda>c                    s:   t | d   t | d  t | d  d S )Nr~   r   r   )r  )rp  )r#  r   r  r   r*   r+   _check_dim_size  s   z>meta_max_pool2d_with_indices_backward.<locals>._check_dim_sizer'  )
r  r?   rP   rG   r   r:   r  rm   ry   re   )rV  rz   r  r   r2  r  r  r   r  r:  r   r*   )rV  r#  r   r  r   rz   r+   %meta_max_pool2d_with_indices_backward   s.   

r;  c                 C   s   t | |||||\}}}|  dkr| dnd}	t| }
|  dkr*|||g}n|	|||g}tj|| j| j|
dtj|tj	| j|
dfS r  )
r  rh   r   r:   r  r?   rm   rG   re   r   r  r*   r*   r+   meta_max_pool2d_with_indices,  s2   
r<  c           
   	      s  t jdv fdd j}t|d |D ] t  dkd  d  d qt td	kd
d  t t|d	kdd  d}dd|dkr_d}nd}t jjkdd  t jdkfdd d}d}d	 t ||kd t ||kdd  t  d	k fdd t |d d  d kfdd t |d d  d kfdd  dkr|||d |d g}	n	||d |d g}	t j|	jj	dt j|	t j
j	dfS )NrM  c                      rp   )Nz:fractional_max_pool2d: Expected 3D or 4D tensor, but got: r  r*   self_r*   r+   rM   R  rt   z,meta_fractional_max_pool2d.<locals>.<lambda>r~   r   z^fractional_max_pool2d: Expected input to have non-zero  size for non-batch dimenions, but got rC  z emptyr   c                   S   rS   )NzNfractional_max_pool2d: kernel_size musteither be a single int or tuple of Intsr*   r*   r*   r*   r+   rM   `  rU   c                   S   rS   )NzOfractional_max_pool2d: output_size must either be a single int or tuple of Intsr*   r*   r*   r*   r+   rM   e  rU   r  rd  r|   r`  r   c                   S   rS   )Nz6Expect _random_samples to have the same dtype as inputr*   r*   r*   r*   r+   rM   s  rU   c                      rp   )Nz1Expect _random samples to have 3 dimensions got, r  r*   )random_samplesr*   r+   rM   w  rt   z=Expect _random_samples.size(0) no less then input batch size.c                   S   rS   )Nz<Expect _random_samples.size(1) equals to input channel size.r*   r*   r*   r*   r+   rM     rU   c                      r  )Nz/Expect _random_samples.size(2) equals to 2 got .r*   r*   )r   r*   r+   rM     rt   c                         dd  d  S )Nz%fractional_max_pool2d: kernel height r   z' is too large relative to input height r*   r*   )input_heightr  r*   r+   rM     r  c                      rA  )Nz$fractional_max_pool2d: kernel width r   z& is too large relative to input width r*   r*   )input_widthr  r*   r+   rM     r  rG   re   )r?   rP   r   r   r   r   rG   rh   rm   re   r   )
r>  r  r@  r?  r   input_channelsinput_batchr   cr   r*   )r   rB  rC  r  r?  r>  r+   meta_fractional_max_pool2dN  s   










rH  c           	         s  t d tjtjkfdd ttdkfdd \}}tjdv fdd tjjkfdd t	d	jD ] t
 d
k fdd qG }jdkrr|
d
}||||f}|S |
d
}|
d	}|||||f}|S )Nmax_unpooling2d_forward_outc                      rp   )Nz2elements in indices should be type int64 but got: rq   r*   r  r*   r+   rM     rt   z#meta_max_unpool2d.<locals>.<lambda>r   c                         dt   dS )NzMThere should be exactly two elements (height, width) in output_size, but got 
 elements.r1  r*   r@  r*   r+   rM        rM  c                      r  )NzLInput to max_unpooling2d should be a 3d or 4d Tensor, but got a tensor with  dimensions.r  r*   r=  r*   r+   rM     r  c                      r  NzBExpected shape of indices to be same as that of the input tensor (z%) but got indices tensor with shape: r  r*   )r   r>  r*   r+   rM     r  r   r   c                      rO  )NzZmax_unpooling2d(): Expected input to have non-zero size for non-batch dimensions, but got rC   being empty.r  r*   )r   r>  r*   r+   rM     s
   r~   )r:   r  r?   rP   rG   r   r   r   ry   r   r   r   rx   )	r>  r   r@  r8  r9  rz   	nchannelsr  rF  r*   )r   r   r@  r>  r+   meta_max_unpool2d  s@   






	



rR  c                    s  t jt jkdd  t jdv fdd t tdkfdd t tdkfdd t tdkfdd t jjkfd	d td
jD ]t dk fdd qXt d dkod
 dkod dkfdd d S )Nc                   S   rS   )Nz(elements in indices should be type int64r*   r*   r*   r*   r+   rM     rU   z._max_unpooling3d_shape_check.<locals>.<lambda>r,  c                      r  )NzLInput to max_unpooling3d should be a 4d or 5d Tensor, but got a tensor with rN  r  r*   rN  r*   r+   rM     rt  r~   c                      rJ  )NzVThere should be exactly three elements (depth, height, width) in output_size, but got rK  r1  r*   rL  r*   r+   rM     rM  c                      rJ  )NzRThere should be exactly three elements (depth, height, width) in stride, but got: rK  r1  r*   r  r*   r+   rM     rN   c                      rJ  )NzSThere should be exactly three elements (depth, height, width) in padding, but got: rK  r1  r*   )r2  r*   r+   rM     rN   c                      r  rO  r  r*   )r   r   r*   r+   rM     r  r   r   c                      s     dj  d dS )NzI: Expected input to have non-zero size for non-batch dimensions, but got rC  rP  r  r*   r5  r*   r+   rM     s
   r   c                      r  )Nz5strides should be greater than zero, but got stride: r*   r*   r  r*   r+   rM     r  )	r?   rP   rG   r   r   r   ry   r   r   )r   r   r@  r   r2  r|  r*   )r|  r   r   r   r@  r2  r   r+   _max_unpooling3d_shape_check  s@   







	"
rS  c                 C   s   t d t| ||||d |  }|\}}}| jdkr,|d}	||	|||f}
|
S |d}|d}	|||	|||f}
|
S )Nmax_unpooling3d_forward_outzmax_unpooling3d()r`  r   r   )r:   r  rS  r   r   r   rx   )r>  r   r@  r   r2  rz   odepthr8  r9  rQ  r  rF  r*   r*   r+   meta_max_unpool3d  s   





rV  c                 C   s  t t|dv dd  |d }t|dkr|n|d }t|dkr$|n|d }t | p2t|dv dd  |s;|n|d }	|sC|nt|dkrK|	n|d }
|sS|nt|dkr[|	n|d }t t|dv dd  |d }t|dkrw|n|d }t|dkr|n|d }t t|dv d	d  |d }t|dkr|n|d }t|dkr|n|d }t | jd
v dd  | jdkr| dnd}| d}| d}| d}| d}t||||	||}t||||
||}t||||||}t| |||||	|
|||||||||||||d | jdkot| t j	k}| jdkr:| 
d}|  o2|jt j	d}||||f}n|||||f}| |}| j|t jd}|r_|jt j	d}|jt j	d}||fS )Nr(  c                   S   rS   NzMmax_pool3d: kernel_size must either be a single int, or a tuple of three intsr*   r*   r*   r*   r+   rM   /  rU   z.meta_max_pool3d_with_indices.<locals>.<lambda>r   r   r   c                   S   rS   NzQmax_pool3d: stride must either be omitted, a single int, or a tuple of three intsr*   r*   r*   r*   r+   rM   7  rU   c                   S   rS   NzImax_pool3d: padding must either be a single int, or a tuple of three intsr*   r*   r*   r*   r+   rM   ?  rU   c                   S   rS   NzJmax_pool3d: dilation must be either a single int, or a tuple of three intsr*   r*   r*   r*   r+   rM   G  rU   r,  c                   S   rS   r-  r*   r*   r*   r*   r+   rM   O  rU   rs  r  r  rd  r|   zmax_pool3d_with_indices()r`  r   rq   )r?   rP   r   r   r   r  r/  r:   r  r  r	  r  rx   r   r   )r   r  r   r2  r  r  r0  r  r  r1  r  r  r/  r0  r1  r2  r,  r-  rF  r3  r4  r5  r6  r7  r8  r9  r  input_channels_last_checkr   r   r   r*   r*   r+   meta_max_pool3d_with_indices#  s   

  







r]  c                 C   s^  t t|dv dd  |d }t|dkr|n|d }	t|dkr$|n|d }
t | p2t|dv dd  |s;|n|d }|sC|	nt|dkrK|n|d }|sS|
nt|dkr[|n|d }t t|dv dd  |d }t|dkrw|n|d }t|dkr|n|d }t t|dv d	d  |d }t|dkr|n|d }t|dkr|n|d }t |jd
v dd  |d}|d}|d}|d}| d}| d}| d}t|| ||||	|
|||||||||||||||d |jdkot|t jk}|jdkr|	d}|
  o|j
t jd}||j}|r-|jt jd}|S )Nr(  c                   S   rS   rW  r*   r*   r*   r*   r+   rM     rU   z7meta_max_pool3d_with_indices_backward.<locals>.<lambda>r   r   r   c                   S   rS   rX  r*   r*   r*   r*   r+   rM     rU   c                   S   rS   rY  r*   r*   r*   r*   r+   rM     rU   c                   S   rS   rZ  r*   r*   r*   r*   r+   rM     rU   r,  c                   S   rS   r-  r*   r*   r*   r*   r+   rM     rU   r  r  rd  r|   z"max_pool3d_with_indices_backward()rs  r`  r   )r?   rP   r   r   r   r9  r:   r  r  r	  r  rx   ry   r   )rV  r   r  r   r2  r  r  r   r0  r  r  r1  r  r  r/  r0  r1  r2  r,  r-  r3  r4  r5  r6  r7  r8  r9  r  r\  rY  r*   r*   r+   %meta_max_pool3d_with_indices_backward  s   
  









r^  gridc                    s   t j jk fdd t jt jko jt jk fdd t jd  jd k fdd t  jd jd k fdd tdjD ]t j dkfd	d qPd S )
Nc                      r  )NzNgrid_sampler(): expected input and grid to be on same device, but input is on z and grid is on rM  r*   r_  r   r*   r+   rM     r  z+check_grid_sampler_common.<locals>.<lambda>c                      r  )NzTgrid_sampler(): expected input and grid to have torch.strided layout, but input has z and grid has )rd   r*   r`  r*   r+   rM     r  r   c                      r  )NzZgrid_sampler(): expected grid and input to have same batch size, but got input with sizes  and grid with sizes r  r*   r`  r*   r+   rM     r  r|   r   c                      s   dj d  d j S )Nz+grid_sampler(): expected grid to have size r   z, in last dimension, but got grid with sizes )r   ry   r*   r`  r*   r+   rM     s   c                      rO  )NzYgrid_sampler(): expected input to have non-empty spatial dimensions, but input has sizes rC  rD  r  r*   rP  r*   r+   rM     rQ  )r?   rP   re   rd   r!  ry   r   r   )r   r_  r*   )r_  r   r   r+   check_grid_sampler_common  s,   
rb  c                   @   s   e Zd ZdZdZdZdS )GridSamplerInterpolationr   r   r   N)r_   
__module____qualname__BILINEARNEARESTBICUBICr*   r*   r*   r+   rc    s    rc  interpolation_modec                    sP   t jdkoj jk fdd t jdko |tjjk dd  d S )Nrs  c                      r  )Nzdgrid_sampler(): expected 5D input and grid with same number of dimensions, but got input with sizes ra  r  r*   r`  r*   r+   rM   $  s
   z'check_grid_sampler_3d.<locals>.<lambda>c                   S   rS   )Nz<grid_sampler(): bicubic interpolation only supports 4D inputr*   r*   r*   r*   r+   rM   /  rU   )r?   rP   r   rc  rh  r  )r   r_  ri  r*   r`  r+   check_grid_sampler_3d!  s   

rj  c           
      C   s:   |d }|rt j|t jd}nd }t j|t jd}	||	fS Nr   r   )r?   r  r   r   
rV  r   r_  ri  padding_modealign_cornersr}  input_requires_gradrY  	grad_gridr*   r*   r+   grid_sampler_2d_backward_meta3  s   
rq  c           
      C   s\   t | | t| || | jd }| jd }|jd }|jd }|jd }	| |||||	fS )Nr   r   r   r~   )rb  rj  ry   rx   )
r   r_  ri  rm  rn  r  Cout_Dout_Hout_Wr*   r*   r+   grid_sampler_3dF  s   
	




rv  rp  c           
      C   sP   t || t||| |d }|rtj|tjd}nd }tj|tjd}	||	fS rk  )rb  rj  r?   r  r  r   rl  r*   r*   r+   grid_sampler_3d_backwardY  s   
rw  c                 O   s:   | dd }|st|}||d< tj| g|R i |S )NrG   )rF   r:   	get_dtyper?   rm   )r   rf  r<   r  rG   r*   r*   r+   fullq  s
   
ry  c                 C   s   |t jkrJt |d u dd  t jd|d u r| jn|||d u r"| jn||d}| jr8||  | 	 | 
  n||  |  d |d |S tjj| |||||d}|d |S )Nc                   S   rS   )Nz9memory format option is only supported by strided tensorsr*   r*   r*   r*   r+   rM     rU   zzeros_like.<locals>.<lambda>r   r   TrL  )r?   
sparse_coorP   rm   rG   re   	is_sparsesparse_resize_and_clear_r   
sparse_dim	dense_dimrh   _coalesced_r"   r   r   fill_)rz   rG   rd   re   rf   r   r  r*   r*   r+   r  {  s:   
	

	r  c                    s     }t|dkdd   dkr n |   }t |kp'|k  fdd dkr7n| t }t } |    }| = | = |||S )Nr   c                   S   rS   )Nz-select() cannot be applied to a 0-dim tensor.r*   r*   r*   r*   r+   rM     rU   zmeta_select.<locals>.<lambda>c                      s   d d   d  S )Nzselect(): index z! out of range for tensor of size z at dimension r   r*   rh   rs   rz   r*   r+   rM     s
    )rh   r?   rv   r   r   r   r   r   )rz   rh   rs   r   r   new_sizer   new_storage_offsetr*   r  r+   meta_select  s$   
r  c                 C   r	  r%   r:   clone_preserve_strides)rz   r   rh   rs   r*   r*   r+   meta_select_scatter  r  r  c                 C   r	  r%   r  )rz   r   rh   ra   r`   stepr*   r*   r+   meta_slice_scatter  r  r  dim_post_exprwrap_scalarc                 C   sb   |dkr
|sJ d}| }|d }| |k s| |kr'J d|  d| d| d| dk r/| |7 } | S )Nr   r   zdim z out of bounds (r\   r]   r*   )rh   r  r  rS  rT  r*   r*   r+   r     s   ,r   c                 C   s   |   dkrdS | j| S r  r  )rp  rh   r*   r*   r+   ensure_nonempty_size  s   r  c                    st   t  d}t  d}t||kdd  t|D ] kr7tttk fdd qd S )Nr   c                   S   rS   )NzDIndex tensor must have the same number of dimensions as input tensorr*   r*   r*   r*   r+   rM     rU   z$gather_shape_check.<locals>.<lambda>c                      s$   d dj  dj  d   S )Nz!Size does not match at dimension z expected index  to be smaller than self  apart from dimension r  r*   rh   r   rs   rz   r*   r+   rM     s    )rT  rh   r?   rP   r   r  )rz   rh   rs   	self_dims
index_dimsr*   r  r+   gather_shape_check  s   r  c                    sb   ddl m} t||  }|  dk}|s+t jtjk fdd t	| |  | 
 jS )Nr   guard_size_obliviousc                      rp   )Nz2gather(): Expected dtype int64 for index, but got rq   r*   rr   r*   r+   rM     rt   zmeta_gather.<locals>.<lambda>)rU  r  r   rh   rw   r?   rP   rG   ru   r  rx   ry   )rz   rh   rs   sparse_gradr  wrapped_dimis_index_emptyr*   rr   r+   meta_gather  s   

r  c                 C   s   |r*| dkrdS | dkrdS | dkrdS | dkrdS | d	kr d
S t ddd  d S | dkr0dS | dkr6dS t ddd  d S )Nsum
REDUCE_ADDr  REDUCE_MULTIPLYmeanREDUCE_MEANamaxREDUCE_MAXIMUMaminREDUCE_MINIMUMFc                   S   rS   )Nz=reduce argument must be either sum, prod, mean, amax or amin.r*   r*   r*   r*   r+   rM     rU   z#get_operator_enum.<locals>.<lambda>addmultiplyc                   S   rS   )Nz/reduce argument must be either add or multiply.r*   r*   r*   r*   r+   rM     rU   r  )reduce_use_new_optionsr*   r*   r+   get_operator_enum  s,   r  c                    sd   ddl m} || dkrt|jtjk fdd |d ur0t|j|jk fdd d S d S )Nr   r  c                      
     dS )Nz"(): Expected dtype int64 for indexr*   r*   method_namer*   r+   rM   "  r  z,scatter_gather_dtype_check.<locals>.<lambda>c                      r  )Nz0(): Expected self.dtype to be equal to src.dtyper*   r*   r  r*   r+   rM   (  r  )rU  r  rw   r?   rP   rG   ru   )r  rz   rs   src_optr  r*   r  r+   scatter_gather_dtype_check  s   



r  c                 C   s
   t | dS r   )rT  r}   r*   r*   r+   ensure_nonempty_dim,  s   
r  c           	         s0  ddl m} | dkrd S tt t kdd  d}t }t|D ]}t|}| kr:q.|t|krEd} nq.|scd urct|D ]}t|}|t|krbd} nqPd urtt t kdd  t|  fdd d S t|  fd	d d S )
Nr   r  c                   S   rS   NzCIndex tensor must have the same number of dimensions as self tensorr*   r*   r*   r*   r+   rM   8  rU   z%scatter_shape_check.<locals>.<lambda>FTc                   S   rS   r  r*   r*   r*   r*   r+   rM   R  rU   c                      s&   dj  dj  d  dj   S )NExpected index r  r  z and to be smaller than src r  r*   rh   rs   rz   r  r*   r+   rM   V  s    c                      s   dj  dj  d   S )Nr  r  r  r  r*   r  r*   r+   rM   \  s    )	rU  r  rw   r?   rP   r  rh   r   r  )	rz   rh   rs   r  r  is_wrong_shaper  r   index_d_sizer*   r  r+   scatter_shape_check1  sJ   

r  c                 C   sD   t ||  }td| || t| ||| |d ur t|| d S d S )Nscatter)r   rh   r  r  r  )rz   rh   rs   r   r  r  r  r*   r*   r+   scatter_meta_implb  s   r  c                 C   s   t | |||d | | jS Nr  r  rx   ry   rz   rh   rs   r   r*   r*   r+   meta_scatter_addk  s   r  c                 C   s   t | |||d | S r  r  r  r*   r*   r+   meta_scatter_add_q  r^  r  c                 C   s0   t |tjr|nd }t| |||| | | jS r%   )rV   r?   r
   r  rx   ry   rz   rh   rs   src_or_valuer  r   r*   r*   r+   meta_scatterw  s   
r  c                 C   s(   t |tjr|nd }t| |||| | S r%   )rV   r?   r
   r  r  r*   r*   r+   meta_scatter_  s   	r  rE  queryr   r  	logsumexp	cum_seq_q	cum_seq_kmax_qmax_k	dropout_p	is_causalphilox_seedphilox_offsetr  c                 C   sX   t |dddd}t |dddd}t |dddd}|||fS r  )r?   r   r  )rE  r  r   r  r   r  r  r  r  r  r  r  r  r  r  grad_qgrad_kgrad_vr*   r*   r+   'meta__scaled_dot_product_flash_backward  s   
r          	attn_maskc                 C   sv   |  d}|  d}|  d}	|  d}
tj||	||
f| j| jddd}tj||	|ftj| jddd}||fS )Nr   r   r   r~   rD  )r   r?   rm   rG   re   r  rC   )r  r   r  r  r  r  r  r   	num_headsmax_seqlen_batch_qhead_dim	attentionr  r*   r*   r+   0meta__scaled_dot_product_flash_attention_for_cpu  s0   





r  c
                 C   s   | d}
| d}| d}| d}| d}tj|
|||fd|j|jd}tj|
|||fd|j|jd}tj|
|||fd|j|jd}|||fS )Nr   r   r~   r   r   r   r   r~   rD  )r   r?   empty_permutedrG   re   )rE  r  r   r  r   r  r  r  r  r  r   r  r  len_qlen_kr  r  r  r*   r*   r+   9meta__scaled_dot_product_flash_attention_for_cpu_backward  s0   








r  	attn_biasgrad_input_maskc                 C   s  | d}| d}| d}| d}| d}| d}tj||||fd|j|jd}tj||||fd|j|jd}tj||||fd|j|jd}d }|d ur|
d r| d}|d dkrb|n|d |d  }t|  }||d< tj||j|jd}|d	d |f }||||fS )
Nr   r   r   r~   r  rD  r|   r  .)r   r?   r  rG   re   r   rm   )rE  r  r   r  r  r   r  r  r  r  r  r  r  r   r  r  r  
head_dim_vr  r  r  r  	grad_biaslastDimlastDimAligned	new_sizesr*   r*   r+   +meta__scaled_dot_product_efficient_backward  sF   









 
r  window_size_leftwindow_size_rightc                 C   s(   t |}t |}t |}|||fS r%   r  )rE  r  r   r  r   r  r  r  r  r  r  r  r  r  r  r  r  
grad_querygrad_key
grad_valuer*   r*   r+   meta__flash_attention_backwardB  s   



r  cu_seqlens_qcu_seqlens_kmax_seqlen_qmax_seqlen_kcustom_mask_typebias_requires_gradnum_splits_keyshared_storage_dqdkdvc                 C   sL  |rSt |jd |jd kdd  t |jd |jd kdd  t jg |jdd d|jd |jd R |j|jd	}|d
d}|d
d}|d
d}nt |}t |}t |}|d ur|d}|d dkrs|n|d |d  }t	| }||d< t j||j|jd	}|dd |f }nt jd|jd}||||fS )Nr   c                   S   rS   )Nz,seqlen must match for `shared_storage_dqdkdvr*   r*   r*   r*   r+   rM   }  rU   z4meta__efficient_attention_backward.<locals>.<lambda>r~   c                   S   rS   )Nz3embedding dim must match for `shared_storage_dqdkdvr*   r*   r*   r*   r+   rM     rU   r   rd  r|   rD  r  r   r  .r*   rM  )
r?   rP   ry   rm   rG   re   rl  r   r   r   )rE  r  r   r  r   r  r  r  r  r  r  r  r  r  r  r  r  r  chunkr  r  r  r  r  r  r  r*   r*   r+   "meta__efficient_attention_backwarda  s:   *



 r  scale_ascale_bscale_resultuse_fast_accumc                    s8  dd }dd }	dd }
t  dko  dk fdd	 t | d
d	  t |	 j  dd	  t dd dkfdd	 t  dd dko_ dd dk fdd	 t |
joq|
 j fdd	 |d ur|nj}t jd d|jdt jdt j	jdfS )Nc                 S   s   | d | d ko| d dkS r  r*   r  r*   r*   r+   is_row_major     z$meta_scaled_mm.<locals>.is_row_majorc                 S   s   |d dko|d | d kS r  r*   )ry   r   r*   r*   r+   is_col_major  r  z$meta_scaled_mm.<locals>.is_col_majorc                 S   s   | t jt jt jt jfv S r%   )r?   float8_e4m3fnfloat8_e5m2float8_e4m3fnuzfloat8_e5m2fnuzrq   r*   r*   r+   is_fp8_type  s   z#meta_scaled_mm.<locals>.is_fp8_typer   c                      s   d   d    S )Nz%Inputs must be 2D but got self.dim()=z and mat2.dim()=r}   r*   r   rz   r*   r+   rM     r3  z meta_scaled_mm.<locals>.<lambda>c                   S   rS   )Nzself must be row_majorr*   r*   r*   r*   r+   rM     rU   c                   S   rS   )Nzmat2 must be col_majorr*   r*   r*   r*   r+   rM     rU   r   r  r   c                      s   d  d S )NzBExpected self.size(0) to be divisible by 16, but got self.size(1)=r   r   r*   r   r*   r+   rM     rN   c                      rp   )Nz>Expected both dimensions of mat2 to be divisble by 16 but got r  r*   )r   r*   r+   rM     rt   c                      r  )Nz8Expected both inputs to be fp8 types but got self.dtype=z and mat2.dtype=rq   r*   r  r*   r+   rM     r  rD  r*   )
r?   rP   rh   r   ry   r   rG   rm   re   r  )rz   r   r   r   r  r  r  r  r  r  r  
_out_dtyper*   r  r+   meta_scaled_mm  sB   

"
r  c                 C   s    t | ||||dd | | jS NT)r  r  rz   rh   rs   r   r  r  r*   r*   r+   meta_scatter_reduce_two  s   r  c                 C   s   t | ||||dd | S r   r  r  r*   r*   r+   meta_scatter_reduce__two  s   r  c                   sh   t d    k odkn   fdd   dkr&t j|t j jdS t j d|t j jdS )Nr   r   c                      r*  )Nz@The probabilty distributions dimensions must be 1 or 2, but got r}   r*   rN  r*   r+   rM     rt  z"meta_multinomial.<locals>.<lambda>r   rD  )r?   rP   rh   rm   ru   re   r   )r   num_samplesreplacementr   r*   rN  r+   meta_multinomial  s   
r  c                 C   s   d}| D ]}||9 }q|S r   r*   )vsr  vr*   r*   r+   multiply_integers  s   
r	  c                    s   t tkfdd d  t t k fdd t tdd dd  D o9tdd D fdd d d \}}||gR S )Nc                         d  dt  S )Nz%It is expected output_size equals to , but got size r1  r*   )num_spatial_dimsr@  r*   r+   rM     r  z'upsample_common_check.<locals>.<lambda>r   c                      r
  )Nz$It is expected input_size equals to r  r1  r*   )expected_input_dimsr&  r*   r+   rM     r  c                 s       | ]}|d kV  qdS r   Nr*   )r4   rJ  r*   r*   r+   rY     r  z(upsample_common_check.<locals>.<genexpr>c                      rZ   )NzDInput and output sizes should be greater than 0, but got input size z and output size r*   r*   )r&  r@  r*   r+   rM     s
    )r?   rP   r   r
  )r&  r@  r  rF  channelsr*   )r  r&  r  r@  r+   upsample_common_check  s   

*r  c                    sZ   t   dkpt  dd   fdd t  |dd} |jt	 dS )Nr   r   c                      r*  )Nz>Non-empty 3D data tensor expected but got a tensor with sizes r   r*   rN  r*   r+   rM     rt  z$upsample_nearest1d.<locals>.<lambda>r  r   
r?   rP   rw   r	  r   r  rx   r   r:   r  )r   r@  scalesfull_output_sizer*   rN  r+   upsample_nearest1d     


r  c           	         s   t   dkpt  dd   fdd t  |dd} |}t } j	\}}}} j
jdkr?|dk r?t j}|j|d	}|S )
Nr   r   c                      r*  Nz>Non-empty 4D data tensor expected but got a tensor with sizes r   r*   rN  r*   r+   rM   !  rt  z$upsample_nearest2d.<locals>.<lambda>r   r  r  r`  r   )r?   rP   rw   r	  r   r  rx   r:   r  ry   re   r^   r   r   )	r   r@  scales_hscales_wr  r   r   r=   
n_channelsr*   rN  r+   upsample_nearest2d  s   



r  r@  r&  r  r  c                    st   t ||dd tjdkfdd tdD ]t  k fdd q|jt	dS )Nr   r  r`  c                      rp   )NzFExpected grad_output to be a tensor of dimension 4 but got: dimension r  r*   rX  r*   r+   rM   G  rt   z-upsample_nearest2d_backward.<locals>.<lambda>c                
      s&   d d   d d  S )NzCExpected grad_output to have the same shape as output; output.size(z) = z but got grad_output.size(r   r*   r  rV  r   r*   r+   rM   L  s   r   )
r  r?   rP   r   r   r   rx   r   r:   r  )rV  r@  r&  r  r  r*   r  r+   upsample_nearest2d_backward5  s   

	r  c                    sZ   t   dkpt  dd   fdd t  |dd} |jt	 dS )Nr   r   c                      r*  )Nz>Non-empty 5D data tensor expected but got a tensor with sizes r   r*   rN  r*   r+   rM   ^  rt  z$upsample_nearest3d.<locals>.<lambda>r~   r  r   r  )r   r@  scales_dr  r  r  r*   rN  r+   upsample_nearest3dX  r  r   c           
      C   s   t | t j| t jd}}|d urQ|d urQt|tsJ t|ts$J |j}| }	t||}t||}|||	 |||	 t	||d t	||d ||fS ||fS )Nrq   )r  r  )
r?   r   r   rV   r   ry   r   r   r   r   )
rz   stablerh   
descendingr   r   r  r   r   
out_strider*   r*   r+   	meta_sorth  s   	

r$  )rh   r"  c                C   s   t | |||dd S )N)r!  rh   r"  r   )r$  )rz   r!  rh   r"  r*   r*   r+   meta_argsort  s   r%  c                    s  t jdkfdd t jjkfdd dd urPt jdkfdd t  kfdd t jjkfdd t jdkfd	d d
   t   k fdd t tfddfD dd  d S )Nr   c                          j  dS Nz != 2r  r*   input_gatesr*   r+   rM     rt   z%rnn_cell_checkSizes.<locals>.<lambda>c                         j  d j  S N != r  r*   )hidden_gatesr)  r*   r+   rM         r   c                      r&  )Nz != 1r  r*   )
input_biasr*   r+   rM     rt   c                      s      d  S r+  r  r*   )
gates_sizer/  r*   r+   rM     r.  c                      r*  r+  r  r*   )hidden_biasr/  r*   r+   rM     r.  c                      r&  r'  r  r*   )prev_hiddenr*   r+   rM     rt   r   c                
      s,      dd d d d  d
S )Nr,  r   z * z // z (aka r]   )rw   r   r*   )expected_prev_hidden_numelfactorr0  r)  r2  r*   r+   rM     s   , c                 3   s    | ]	}|j  j kV  qd S r%   rM  r3   r(  r*   r+   rY     s
    

z&rnn_cell_checkSizes.<locals>.<genexpr>c                   S   rS   )Nz%expected all inputs to be same devicer*   r*   r*   r*   r+   rM     rU   )r?   rP   r   ry   r   rw   r
  )r)  r-  r/  r1  r4  r2  r*   )r3  r4  r0  r1  r-  r/  r)  r2  r+   rnn_cell_checkSizes  s8   





r5  c                 C   sL   t | |||d| tj| tjd}tj|tjd}tj|tjd}|||fS )Nr`  r   )r5  r?   r   r   )r)  r-  cxr/  r1  	workspacehycyr*   r*   r+   _thnn_fused_lstm_cell_meta  s
   
r:  c                 C   s(  t |dk}|rt |}|d }| jd }n|
r| jd n| jd }|
r)| jd n| jd }d}|r4dnd}|dkr<|n|}|rG||| g}n|
rP|||| gn|||| g}| |}|	| ||g}|d u rptjd| jd}n||}||	| ||g}|rdnd}| j|tjd}|||||fS )Nr   r   r|   r   rM  rq   )r   ry   rx   r?   rm   re   r  )r   r   weight_stride0
weight_bufhxr6  r  hidden_size	proj_size
num_layersbatch_firstdropouttrainbidirectionalbatch_sizesdropout_stateis_input_packed
seq_length
mini_batchbatch_sizes_sumnum_directionsout_sizer   r   
cell_shaper9  r8  reserve_shapereserver*   r*   r+   
_cudnn_rnn  s2   

rP  c                 C   s   |r| j d n| j d }|r| j d n| j d }|
}|r!|||gn|||g}| |}|d u r8tjd| jd}n||j }|d u rKtjd| jd}n||j }tjd| jtjd}||||fS )Nr   r   rM  r   )ry   rx   r?   rm   re   r  )r   w0w1w2w3hx_cx_r   rE  r  r>  r@  
has_biasesrD  rA  rC  rH  rI  output_chanelsr   r   r8  r9  r7  r*   r*   r+   mkldnn_rnn_layer  s    
rY  c                    sT   | j dkrt dkp dk fdd d S t|  dk fdd d S )Nr   r|   c                      rr  )Nz4: Expected reduction dim -1 or 0 for scalar but got r*   r*   rh   r|  r*   r+   rM     rt  z'zero_numel_check_dims.<locals>.<lambda>c                      rv  )Nz: Expected reduction dim z to have non-zero size.r*   r*   rZ  r*   r+   rM     rN   )r   r?   rv   r   )rz   rh   r|  r*   rZ  r+   zero_numel_check_dims  s   
r[  c                    sF   |d urt || }t||  d S t| dk fdd d S )Nr   c                      r  )Nz@: Expected reduction dim to be specified for input.numel() == 0.r*   r*   r	  r*   r+   rM   *  r  z%check_argmax_argmin.<locals>.<lambda>)r   rh   r[  r?   rP   rw   )rh  rz   rh   r*   r	  r+   check_argmax_argmin#  s   

r\  c                 C   sD   t d| | t| j|d ur|fnd }t| ||}| j|tjdS )Nargmaxrq   )r\  r:   r5  ry   r6  rx   r?   r   )rz   rh   r8  r  ry   r*   r*   r+   argmax_argmin_meta.  s   r^  c                 C   s   t jd||||dS )Nr*   r   r   )rJ  rG   rd   re   rf   r*   r*   r+   scalar_tensor6  s   
r_  c                 C   s   t ||  dd}t|dko||  dkr| |ndkdd  |  dkr*dn| |}t|dko8||kdd  t| j}t|dkrL|||< | || j|tj	dfS )	NT)r  r   r   c                   S   rS   )Nzselected index k out of ranger*   r*   r*   r*   r+   rM   C  rU   ztopk_meta.<locals>.<lambda>c                   S   rS   )Nzk not in range for dimensionr*   r*   r*   r*   r+   rM   F  rU   rq   )
r   rh   r?   rP   r   r   ry   r   rx   r   )rz   r  rh   largestsorted	sliceSizetopKSizer*   r*   r+   	topk_meta=  s   $
rd  c                 C   s   | d ur| n|}t | dkdd  | }| d ur(t |  |kdd  |d ur8t | |kdd  t | |kdd  t | |kdd  t | dkdd  t | |d	 |d
  d kdd  d S )Nr   c                   S   rS   N r*   r*   r*   r*   r+   rM   T  rU   z(checkLSTMBackwardSizes.<locals>.<lambda>c                   S   rS   re  r*   r*   r*   r*   r+   rM   W  rU   c                   S   rS   re  r*   r*   r*   r*   r+   rM   Y  rU   c                   S   rS   re  r*   r*   r*   r*   r+   rM   Z  rU   c                   S   rS   re  r*   r*   r*   r*   r+   rM   [  rU   c                   S   rS   re  r*   r*   r*   r*   r+   rM   \  rU   r   r   r`  c                   S   rS   re  r*   r*   r*   r*   r+   rM   ]  rU   )r?   rP   rh   r   rw   )grad_hygrad_cyr6  r9  r7  defined_gradexp_sizer*   r*   r+   checkLSTMBackwardSizesR  s   ,rk  c           	      C   s`   | d u r
|d u r
dS t | |||| tj|td}tj|td}|r)|jdddnd }|||fS )NNNNr   r   F)r8  )rk  r?   r   legacy_contiguous_memory_formatr  )	rg  rh  r6  r9  r7  has_bias
grad_gatesgrad_cxr  r*   r*   r+   #_thnn_fused_lstm_cell_backward_impla  s   
rq  c                 C   sf   d }d }d }|d r| |  }|d s|d r.| |d| df}| |d}|||fS )Nr   r   r   r|   rw  )ry  rx  rz  r}  rY  grad_weightr  r*   r*   r+   linear_backwardo  s   
rs  c                    s   t jdkrjd ||  dksJ dj d| dd   fdd	}jd ||  }jd
 | }jd | }g jd d |||R }|}|j| d}|S )Nr   r  r   z'Invalid input shape for pixel_shuffle: z with upscale_factor = c                 S   r  r%   r  r  r*   r*   r+   r    r  z,meta_pixel_shuffle.<locals>.is_channels_lastc                      sL    rt dkrtjS tjS jtjdrtjS jtjdr$tjS d S r  )r  r?   r   r  r  r  r*   r  rz   r*   r+   r    s   z.meta_pixel_shuffle.<locals>.pick_memory_formatrd  r|   r   )r   ry   rx   r   )rz   upscale_factorr  rr  HrWrr   r   r*   rt  r+   meta_pixel_shuffle|  s   & 
rx  c                 C   sZ   |  | j}| |j}| |j}| |j}| |j}| |j}|||||||fS r%   r  )r   weight0weight1weight2weight3rU  cx_tmpr   hy_cy_grad_output_r_optgrad_hy_r_optgrad_cy_r_optr   r  r>  r@  rW  rC  rD  rE  rA  r7  diff_xdiff_hxdiff_cxdiff_w1diff_w2diff_br*   r*   r+   mkldnn_rnn_layer_backward  s   r  )	out_int32r   c                C   s   t j| |rt jnt jd S r3  )r?   r   r   r   r   )rz   
boundariesr  r   r*   r*   r+   meta_bucketize  s
   r  d   c                    s   dt dkrt fdd tt t fdd t dk fdd tttfdd tttfd	d tkd
d  tj jj	dS )Nzhistc()r  c                      r  )Nz%"histogram_cpu" not implemented for ''rq   r*   rN  r*   r+   rM     rt  zmeta_histc.<locals>.<lambda>c                      s    dt   S )Nz#: argument 'bins' must be int, not r  r*   binsr|  r*   r+   rM     r.  r   c                      rr  )Nz: bins must be > 0, but got r*   r*   r  r*   r+   rM     rt  c                           dt  S )Nz%: argument 'min' must be Number, not r  r*   )r|  rS  r*   r+   rM     r.  c                      r  )Nz%: argument 'max' must be Number, not r  r*   )r|  rT  r*   r+   rM     r.  c                   S   rS   )Nz&{fn_name}: max must be larger than minr*   r*   r*   r*   r+   rM     rU   r   )
r  r?   rP   r   rV   r   r   rm   re   rG   )r   r  rS  rT  r*   )r  r|  r   rT  rS  r+   
meta_histc  s*   
r  c                    sd   t   |dd}t  dkptdd   dd  D  fdd  |jt	 d	S )
Nr   r  r   c                 s   r  r  r*   )r4   r   r*   r*   r+   rY     r  z,meta_upsample_bimode2d_aa.<locals>.<genexpr>r   c                      r*  r  r   r*   rN  r*   r+   rM     rt  z+meta_upsample_bimode2d_aa.<locals>.<lambda>r   )
r  r   r?   rP   rw   r
  rx   r   r:   r  )r   r@  rn  r  r  r  r*   rN  r+   meta_upsample_bimode2d_aa  s   
(

r  c                 C   s\   t | dkdd  t | dkdd  t |jjdd  t |jjdd  d S )Nr   c                   S   rS   )Nz%found_inf must be a 1-element tensor.r*   r*   r*   r*   r+   rM     rU   z<_amp_foreach_non_finite_check_and_unscale_.<locals>.<lambda>c                   S   rS   )Nz%inv_scale must be a 1-element tensor.r*   r*   r*   r*   r+   rM     rU   c                   S   rS   )Nz!found_inf must be a float tensor.r*   r*   r*   r*   r+   rM     rU   c                   S   rS   )Nz!inv_scale must be a float tensor.r*   r*   r*   r*   r+   rM      rU   )r?   rP   rw   rG   r   )rz   r  	inv_scaler*   r*   r+   *_amp_foreach_non_finite_check_and_unscale_  s   r  c                 C   s   t |  }| |S r%   )r   r   rx   )rz   nanposinfneginfr  r*   r*   r+   
nan_to_num  s   
r  c                 C   s   | j tjtjtjtjhvsJ d| j  d| j}t||}t||}||kr)| S t| 	 }t| 
 }|| || ||< ||< || || ||< ||< | || | S )Nz>torch.transpose_: in-place transposition is not supported for z layout)rd   r?   r"  
sparse_cscr#  
sparse_bscr   r   r   r   r   r   )rz   dim0r  ndimsr   r   r*   r*   r+   r    s&   

r  c                 C   sz   | j }| jr"|  }|  }|dkr|dks!J d| d| dn|  dks0J d| dt| d|dk r:dS dS )	Nr   r   zEt_ expects a tensor with <= 2 sparse and 0 dense dimensions, but got z sparse and z dense dimensionsz6t_ expects a tensor with <= 2 dimensions, but self is r  r   )r   r{  r}  r~  rh   r  )rz   r  r}  r~  r*   r*   r+   t_'  s   
r  )r  r   sidesorterc                C   s@   |rt jnt j}t|t jrt j||d S t jd|| jdS )Nrq   r*   rD  )	r?   r   r   rV   r
   r   r   rm   re   )sorted_sequencerz   r  r   r  r  rG   r*   r*   r+   meta_searchsorted9  s   r  c                    s,   t  t jt jt jt jfv fdd d S )Nc                      r  )Nz/Unsupported input type encountered for isin(): r*   r*   rq   r*   r+   rM   H  r  z3_check_for_unsupported_isin_dtype.<locals>.<lambda>)r?   rP   r  r  
complex128	complex64rq   r*   rq   r+   !_check_for_unsupported_isin_dtypeE  s   
r  c
                    sf   t  jt jt jt jt jfv  fdd td\}
}}||kr't |d u  | 	df}|S )Nc                      rp   )Nz$Unsupported input type encountered: rq   r*   r  r*   r+   rM   [  rt   z3meta_embedding_bag_dense_backward.<locals>.<lambda>r~   r   )
r?   rP   rG   r  r  r  float64r   rx   r   )r  r   r  r  maximum_indicesnum_weightsr  r  r  r  r  r  r  index_grad_weightr*   r  r+   !meta_embedding_bag_dense_backwardL  s   
r  c                 C   s   t d\}}}	| d}
t||kd t|  dk t| dk |d}t| dk t|d|
k | |f}|S )Nr~   r   zHembedding_bag_backward: per_sample_weights only supported for mode='sum'r   r   )r   r   r?   rP   rh   rx   )r  r   r   r"  r  r  r  r  r  r  embedding_featuresr  r   r*   r*   r+   .meta_embedding_bag_per_sample_weights_backwardd  s   

r  )assume_uniqueinvertc                C   sx   t t| tpt|tdd  t| tst j| |jd} t|ts*t j|| jd}t| j t|j t j| t j	dS )Nc                   S   rS   )Nz<At least one of elements and test_elements must be a Tensor.r*   r*   r*   r*   r+   rM   |  rU   zmeta_isin.<locals>.<lambda>rM  rq   )
r?   rP   rV   r
   r   re   r  rG   r   r  )elementstest_elementsr  r  r*   r*   r+   	meta_isinw  s   



r  r   c                 C   s4   t | dkdd  t|tjd\}}t j||dS )Nr   c                   S   rS   )Nz,polygamma(n, x) does not support negative n.r*   r*   r*   r*   r+   rM     rU   z meta_polygamma.<locals>.<lambda>r=  rq   )r?   rP   r   r   r>  r   )r   rz   r=   r7   r*   r*   r+   meta_polygamma  s   
r  c                 C   s>   |   ^ }}}}tjg ||||R | j| j| jdS )N)rG   rd   re   )r   r?   rm   rG   rd   re   )r   r  leading_dimsrr  r  r  r*   r*   r+   meta_channel_shuffle  s   r  c                 C   s   t d)Nz.Tensor.item() cannot be called on meta tensors)r   r   r*   r*   r+   meta_local_scalar_dense  s   r  c                 C      t | t dd }|S )Nc                 S   r  r  r>   r   r>  r   r*   r*   r+   _f  s   z)_create_unary_float_meta_func.<locals>._fr0   r   funcr  r*   r*   r+   _create_unary_float_meta_func     r  c                 C   r  )Nc                 S   s   t | |tjdS r  r  )r5   r!  r*   r*   r+   r    s   z*_create_binary_float_meta_func.<locals>._fr  r  r*   r*   r+   _create_binary_float_meta_func  r  r  c                  C   s4  i } dD ]}t | }|D ]}|| vr|| | |< qq|  D ]y\}}t|tjjr*qt|ts1J |tjj	j
| tj| drR|t d v rQt| dq|jrVq| dv r]qd| v rjt|| qd| v rwt|| qd| v rt|| qd	| v rt|| qt|| qd S )
N)rc   post_autogradpre_autogradCompositeImplicitAutogradrc   z is a CompositeImplicitAutograd op, we shouldn't register meta function for it. Instead, we should let the decomposition run and write meta kernels for the base operators.>   aten::cloneaten::copy_aten::rot90aten::_to_copyaten::empty_stridedaten::constant_pad_ndaten::as_strided_scatterzmkldnn::zmkl::zonednn::zquantized::)r   itemsrV   r?   _opsHigherOrderOperatorr   py_impl_CDispatchKeyr$   %_dispatch_has_kernel_for_dispatch_keyrh  r   is_view2_meta_lib_dont_use_me_use_register_meta_for_mkldnnimpl/_meta_lib_dont_use_me_use_register_meta_for_mkl2_meta_lib_dont_use_me_use_register_meta_for_onednn5_meta_lib_dont_use_me_use_register_meta_for_quantized'_meta_lib_dont_use_me_use_register_meta)activate_meta_tabler^   registryopoop_overloadr)   r*   r*   r+   activate_meta  sJ   	r  r   rl  r%   )NNNF)NN)Trm  )r  )r  T)FF)TT)r  )FTN)TFF)TF)r   )r  N)r  r  )r*   r   r  F)r*   r   FTN)Fr   FNFr|   )NF)r|   F)NNNNN)r   NNr   )NNF)r  FNN)FN)NNNNNF)Nr|   FNN)NNNN)r|   TT)r  r   r   )r|   (3  r  enumr   typingr   r   r   r   r   r?   torch._prims_commonr  r:   r   r	   r
   torch._decompr   r   r   r   
torch._opsr   torch._primsr   r   r   r   r   r   r   r   r   r   torch._prims_common.wrappersr   r   r   r   r   rm  r   r    torch.utilsr!   r-   opsr"   libraryLibraryr  r0   r>   rI   rR   linspacelogspacer!  ro   taker   r   r{   r   r   cummaxcumminr   r   r   _fft_c2cr   _fft_r2cr   randpermgenerator_outr   ru   r   randintr   r   low_outr   randr   _fft_c2rr   rA  r   r   
unsqueeze_r   _sparse_semi_structured_linearr  rG   r   _sparse_semi_structured_mmr  _sparse_semi_structured_addmmr  _cslt_sparse_mmr  r  index_reducer  r  index_reduce_r  index_selectr   segment_reducer,  rT  	unary_outr0  rh   r9  rS  r;  r<  rB  r?  rC  _assert_asyncrF  msgrI  _printrK  _make_dep_tokenrN  rW  _functional_sym_constrain_ranger]  r`  (_functional_sym_constrain_range_for_sizera  _functional_assert_asyncrb  r   ro  r   rx  r{  r  r  _linalg_eighr  r  _linalg_eigvalslinalg_eigvalsr  
linalg_eigr  r  r  r  r  r  r  r  linalg_inv_exr  linalg_ldl_factor_exr  linalg_ldl_solver  	linalg_lur  linalg_lu_factor_exr  linalg_lu_solver  	lu_unpackr  r  	linalg_qrr  r  r  _linalg_svdr  r  r  r  r  linalg_solve_triangularr  r   r&  _linalg_detr(  r0  r8  rH  reflection_pad1drN  replication_pad1drQ  rX  reflection_pad1d_backwardr]  replication_pad1d_backwardr_  rl  reflection_pad2drn  replication_pad2dro  reflection_pad2d_backwardrY  replication_pad2d_backwardrr  r{  reflection_pad3dr}  replication_pad3dr~  reflection_pad3d_backwardreplication_pad3d_backwardr  _pdist_forwardrC   r  _pdist_backwardr  baddbmmr  	bernoullir  
bernoulli_r  r  r  _fused_moving_avg_obs_fq_helperr  mmr  r6  r  r  r  convolutionr  r  _has_mkldnnr  r  _convolution_pointwiser  _linear_pointwiser  has_mklr  r  _mkl_linearr  r  r  qconv2d_pointwiser  qlinear_pointwiser   r  r  r  
max_pool2dr  r  
avg_pool2dr   r$  avg_pool2d_backwardr'  
avg_pool3dr:  avg_pool3d_backwardr?  _adaptive_avg_pool2drA  _adaptive_avg_pool3drB  _adaptive_avg_pool2d_backwardrG  _adaptive_avg_pool3d_backwardrK  rI  adaptive_max_pool2drW  rY  r[  adaptive_max_pool3dr^  r_  r`  repeat_interleaverb  rW   re  rg  rs   _unsafe_indexrv  convolution_backwardr  addbmmr  _fused_adam_r  _fused_adamr  _int_mmr  _convert_weight_to_int4packr  _weight_int4pack_mmr  _weight_int8pack_mmr  _cdist_forwardr  _cdist_backwardr  _embedding_bagr  _embedding_bag_forward_onlyr  r  nansumr  median	nanmedianr  
dim_valuesr  r   r  logical_not_r  repeatr  zero_r  mul_Scalardiv_logical_and_logical_or_logical_xor_r  add_sub_r  rounddecimalsr  r  
__rshift__r   
__lshift__r  zeror  r  r  fillr
  relu_r  	index_put_unsafe_index_putr  masked_fill_r  _masked_scaler  masked_scatter_r  masked_scatterr  masked_scatter_backwardr  
index_put_r  aliasr  r  bmmr   r#  r'  r  r  r/  r9  r;  r   max_pool2d_with_indices_backwardr;  max_pool2d_with_indicesr<  fractional_max_pool2drH  max_unpool2drR  rS  max_unpool3drV  max_pool3d_with_indicesr]   max_pool3d_with_indices_backwardr^  rb  rc  rj  grid_sampler_2d_backwardrq  rv  rw  ry  r  rl  r  select_scatterr  slice_scatterr  r   r  r  gatherr  r  r  r  r  r  scatter_addr  scatter_add_r  r  r   r  r  value_reducer  scatter_r  ,_scaled_dot_product_flash_attention_backwardr  +_scaled_dot_product_flash_attention_for_cpur  4_scaled_dot_product_flash_attention_for_cpu_backwardr  0_scaled_dot_product_efficient_attention_backwardr  _flash_attention_backwardr  _efficient_attention_backwardSymIntr  
_scaled_mmr  scatter_reducetwotwo_outr  scatter_reduce_r  multinomialr  r	  r  r  _upsample_nearest_exact1dr  _upsample_nearest_exact2dr  "_upsample_nearest_exact2d_backwardr   _upsample_nearest_exact3dr   r!  values_stabler$  argsortr%  r5  _thnn_fused_lstm_cellr:  rP  rY  r[  r\  r]  argminr^  r_  topkrd  r   rm  rk  rq  rs  pixel_shufflerx  r  	bucketize
Tensor_outr  histcr  _upsample_bilinear2d_aa_upsample_bicubic2d_aar  r  r  r  r  searchsortedr  r  _embedding_bag_dense_backwardr  *_embedding_bag_per_sample_weights_backwardr  isinr  	polygammar  channel_shuffler  _local_scalar_denser  r  r  special_airy_aispecial_bessel_y0special_bessel_y1special_modified_bessel_i0special_modified_bessel_i1special_modified_bessel_k0special_modified_bessel_k1!special_scaled_modified_bessel_k0!special_scaled_modified_bessel_k1special_chebyshev_polynomial_tspecial_chebyshev_polynomial_uspecial_chebyshev_polynomial_vspecial_chebyshev_polynomial_w&special_shifted_chebyshev_polynomial_t&special_shifted_chebyshev_polynomial_u&special_shifted_chebyshev_polynomial_v&special_shifted_chebyshev_polynomial_wspecial_hermite_polynomial_hspecial_hermite_polynomial_hespecial_laguerre_polynomial_lspecial_legendre_polynomial_ptorch._refs.nn.functionaltorch._refs.specialr  r*   r*   r*   r+   <module>   s
  (
	8	6





	
!"
$



#
	

	











	

	



)




"

2
&
*
7
(
"
%


	
;

/Z&5 ?'$,



e
	
,
"M,
H
TN



.


*(c$
#h	










!
T	
]>	
6G+!
T7
/


ge( 

	,$1	








	
	"	
*	
7	
	
76



"
7'
"





"	


C