o
    "i%                 !   @   s1  U d dl Z d dlZd dlZd dlZd dlmZ d dl mZmZ d dlm	Z	m
Z
 d dlmZmZmZmZmZmZmZmZ d dlZd dlmZ d dlmZ d dlm  mZ d dlmZm Z m!Z! d dl"m#Z# d dl$m%Z% d d	lm&Z&m'Z'm(Z(m)Z)m*Z* d d
l+m,Z,m-Z-m.Z.m/Z/ d dl0m1Z2 d dl3m4Z4 ej5j6Z6g Z7ee8 e9d< ej:j;j<Z<G dd deZ=	dkdedej>de?fddZ@ee@ej>jAddZBee@ej>jAdZCee@ej>jDdZEde!deFde!fddZGe#e<jHe/deCde!d e!fd!d"ZHe#e<jIe/deCde!d e!fd#d$ZIe#e<jJe/deCde!de!d%eKd&eKfd'd(ZJe#e<jLe/deCd)e!d*eKd+eKd,eKd-e?d.e!fd/d0ZLe#e<jMjNgd1d2 ZOe#e<jMj!gd3e!fd4d5ZPe#e<jQe/ eCd6e!de!fd7d8ZQe#e<jRe/deCd)e!d6e!fd9d:ZRe#e<jSe/dd)e!d6e!d;eKd<eKfd=d>ZSe#e<jTe/ eCd6e!de!fd?d@ZTe#e<jUe/ eCd)e!d6e!de!fdAdBZUe#e<jVe/dd)e!d6e!d&eKfdCdDZVe#e<jWe/deCd)e!d6e!dEeKdFe?fdGdHZWe#e<jXe/deCdldJe!d6e!dKe8fdLdMZXe#e<jYeCd)e!dNe!fdOdPZYe#e<jZe/ eCd6e!de!fdQdRZZe#e<j[e/deCd)e!d6e!de!fdSdTZ[e#e<j\d6e!dUe!de!fdVdWZ\e#e<j]d)e!d6e!dUe!dee!e!f fdXdYZ]e#e<j^e<j^j_`e6jae/ eC	Z	[		dmd6e!d\e!d]eKd^eKd_e?d`eejb de!fdadbZ^e#e<jce<jcj_`e6jaeC	Z	[		dmd6e!d\e!d]eKd^eKd_e?d`eejb de!fdcddZce#e<jde/ eCd)e!d6e!d\e!d]eKd^eKd_e?dFe?de!fdedfZde#e<jee/deCd)e!d6e!dge!de!fdhdiZedje!dkeFfdldmZfdnejgfdodpZhe#e<jie/ eCe=jjjkfd6e!dqe!dkeFde!fdrdsZie#e<jle/deCd)e!dNe!dqe!dkeFfdtduZle#e<jme/ eCe=jjjkdvfd6e!dqe!dkeFd%eKfdwdxZme#e<jnj_eCd)e!d6e!dqe!dkeFd%eKf
dydzZne#e<jnjoeCd)e!d6e!dqe!dkeFd%eKde!fd{d|Zpe#e<jqj_eCd)e!d6e!dqe!dkeFd}eKf
d~dZqe#e<jqjreCd)e!d6e!dqe!dkeFd}eKde!fddZsd)e!d6e!dqe!dUee! dkeFdeFde!de!fddZte#e<jue/deCd)e!d6e!deFde!fddZue#e<jve/dd)e!d6e!dqe!dUee! dkeFdeFde!de!fddZve#e<jwe/dd)e!d6e!dqe!dUee! dkeFdeFde!de!fddZwe#e<jxe/ eCde=jjjkfd6e!dqe!dUee! dkeFde!f
ddZxe#e<jye/deCde=jjjkfd)e!d6e!dqe!dUee! dkeFde!fddZye#e<jze/ eCe=jjjkfdNe!dqe!dkeFde!fddZze#e<j{e/deCe=jjjkfd)e!d6e!dqe!dkeFde!f
ddZ{e#e<j|e/ dndNe!de!deKfddZ|e#e<j}e/ de!de!de!fddZ}e#e<j~e/ d)e!deeF deFdeFdeFdeFfddZ~e#e<jj!	 			dod6e!deFdeeF deeF deFf
ddZe#e<je/ d)e!deeF deFdeFfddZe#e<je/ d)e!deeF deFdeFdeFf
ddZd)e!de!dejgfddZe#e<je/deBd)e!de!deFdejgfddZe#e<je/ eBd)e!de!deFdejgfddZdd Ze#e<je/ dNe!deeF deeF deeF deeF de!fddZe#e<je/ eCdNe!deeF deeF deeF deeF deeF de!fddZe#e<je/ d)e!de!d+eKfddÄZe#e<je/ dJe!deeF deFdeFdeFde!fddȄZe#e<jj_eC	dpd)e!d6e!deeK de!fdd˄Ze#e<je<jj_`e6je<jj_`e6jdNe!deKdee? fdd΄Ze#e<je/ddЃdNe!deKdee? fdd҄Ze#e<je/ de!deFde?fddՄZe#e<je/ de!deFde?fddׄZe#e<je/ 			dqdUe!de!deFde?de?de!fddބZe#e<je/ d)e!de!deFdeFde?f
ddZdeeF fddZdee! deFdeFdee! fddZdee! fddZdee! deFfddZdee! deFdeFfddZe#e<jj_e<jjrg	dpdee! deFdeFdee! de!f
ddZe#e<j	 drd6e!deeF deFdee! fddZe#e<jj_e<jjrg	 	dsd6e!deeF deFdeee!  deee!  f
ddZe#e<jj!drdNe!deFdeFdee!df fddZe#e<jj_	 drdNe!deeF deFdee!df fddZe#e<jj!drd6e!deFdeFdee!df fddZe<jj`e6j	 drd6e!de!deFdee!df fdd Ze#e<je/ eCdtd6e!de!de!d%eFd*eFf
ddZe#e<je/ eC			dud6e!de!de!d%eFd*eFde?fddZe#e<je/ eCdtd6e!de!de!d%eFd*eFf
d	d
Ze#e<jj_eCd)e!dNe!de!de!dee! deFdeFdeFdeFdee? deee! ee! ee! f fddZe#e<jjrd)e!dNe!de!de!dee! deFdeFdeFdeFdee? dej!dej!dej!deee! ee! ee! f fddZdee! dee! fddZe#e<jj_de!dNe!deeF de!de!dUee! dee! dee? deee! ee! ee! f fddZe#e<jjrde!dNe!deeF de!de!dUee! dee! dee? dej!dej!dej!deee! ee! ee! f fdd ZdNe!dUee! dee! d!ee! d"ee! d_e?d#eKdeKd$e?dee!e!e!ee! ee! f fd%d&Ze#e<je/dd'd(dNe!dUee! dee! d!ee! d"ee! d_e?d#eKdeKdee!e!e!f fd)d*Ze<jj_`e6je<jj_`e6jdNe!dUee! dee! d!ee! d"ee! d_e?d#eKdeKdee!e!e!f fd+d,Ze<jj_`e6jdrdee! fd-d.Ze#e<jj_dNe!dUee! dee! d!e!d"e!d#eKdeKdee!e!e!f fd/d0Ze#e<jj_dNe!dUee! dee! d!e!d"e!d_e?d#eKdeKdee!e!e!f fd1d2Ze#e<jjdNe!dUee! dee! d_e?d#eKdeKdee!e!e!f fd3d4Ze#e<jj_dNe!dUee! dee! d!e!d"e!d_e?d#eKdeKdee!e!e!e!e!f fd5d6ZdNe!dUee! dee! d!e!d"e!deKd_e?de!fd7d8Ze#e<jj_dNe!dUee! dee! d!e!d"e!d#eKdeKdee!e!e!e!f fd9d:Ze#e<jj_dNe!dUee! dee! d!e!d"e!d#eKdeKdee!e!e!e!e!e!f fd;d<Ze#e<jj_dNe!dUee! dee! d!e!d"e!d#eKdeKdee!e!e!e!f fd=d>Ze#e<je/ddЃeCdpd?d@Ze#e<je/ dddddddAde!dneejg dBeej dCe?dDe?dEeej fdFdGZe#e<je<je<jge/ dHdI Ze<jj_`e6je#e<je/ddАddJdNe!dUe!dee! d!ee! d"ee! d_e?dKeKdLeKfdMdNZdOdP Ze#e<jj_de!dNe!dUee! d!ee! d"ee! d'ee! d(ee! de?deKdee? dQe!dee!ee! ee! f fdRdSZe#e<jj_de!dNe!dUee! d!ee! d"ee! d'ee! d(ee! de?deKdee? dee!ee! ee! f fdTdUZe#e<jjrde!dNe!dUee! d!ee! d"ee! d'ee! d(ee! de?deKdee? dej!dej!dej!dee!ee! ee! f fdVdWZe#e<jƃe/ddАddNe!d)e!dUe!d!ee! d"ee! d'ee! dXee! dLeKfdYdZZe#e<jǃe/ddАddNe!d)e!dUe!d!ee! d"ee! d'ee! dXee! dLeKd[e!fd\d]Ze#e<jȃe/ eCdNe!deeFeFf fd^d_Ze#e<jʃdd`de)deFde)dae)d*e'f
dbdcZe#e<j˃e/ dd`de)deFde)dae)d*e'f
dddeZdd`de)deFde)dae)dfe?d*e'fdgdhZe#e<jj_e<jj_`e6jdvdjdkZe#e<j΃de)deFde)dae)fdldmZe#e<jσe/ de)deFde)dae)fdndoZde)deFde)dae)dfe?f
dpdqZe#e<jуe/ddgeCd6e!dee!e!f fdrdsZe#e<j҃e/ 	i	v	dwde!dtee?eFeKf duee?eFeKf d`eejb fdvdwZe#e<jӃdxdxdyZӐdzd{ ZԐd|d} Ze#e<jj׃e#e<jj׃e#e<jj׃e<jjנ`e6je<jjנ`e6je<jjנ`e6je<jjנ`e6je<jjנ`e6je<jjנ`e6jdNe!deeeF  d~eeeK  de!fddZe#e<jj׃e#e<jj׃e#e<jj׃e<jjנ`e6je<jjנ`e6je<jjנ`e6je<jjנ`e6je<jjנ`e6je<jjנ`e6jdNe!deeeF  d~eeeK  de!fddZސdkddZe#e<jj_e<jjrge<jj_`e6je<jj_`e6je/ddd	dpdNe!deeF deeK de!fddZe#e<jj_e<jjrge<jj_`e6je<jj_`e6je/ddd	dpdNe!deeF deeK de!fddZe#e<jj_e<jjrge<jj_`e6je<jj_`e6je/ddd		dydNe!deeF deeK deeK de!f
ddZe#e<jj_e<jjrge<jj_`e6je<jj_`e6je/ddd		dydNe!deeF deeK deeK de!f
ddZe#e<jj_e<jjrge<jj_`e6je<jj_`e6je/ddd			dzdNe!deeF deeK deeK deeK de!fddZe#e<jj_e<jjrge<jj_`e6je<jj_`e6je/ddd			dzdNe!deeF deeK deeK deeK de!fddZeC	dkdNe!deeF deeeK  de?de!f
ddZdd Zdd Zdd Zdd Z	dkddZdd Zdd ZdkddZdkddZdd Ze#e<jje<jj`e6je<jj`e6jdd Ze#e<jje<jj`e6je<jj`e6jdd Ze#e<jje<jj`e6je<jj`e6jdd Ze#e<jje<jj`e6je<jj`e6jdd Zdd ZdkddZdkddZdd Ze#e<jje<jj`e6je<jj`e6jdd Ze#e<jje<jj`e6je<jj`e6jdd Zdd ZdÐdĄ Ze#e<jje<jj`e6je<jj`e6jdŐdƄ Ze#e<jje<jj`e6je<jj`e6jdǐdȄ Ze#e<j j׃e<j jנ`e6je<j jנ`e6jdɐdʄ Ze#e<jj׃e<jjנ`e6je<jjנ`e6jdːd̄ Ze#e<jj׃e#e<jj׃e<jjנ`e6je<jjנ`e6je<jjנ`e6je<jjנ`e6je<jjנ`e6je<jjנ`e6jd͐d΄ Ze#e<jj_e<jjrge/ 	dpdNe!deeF de?deeK de!f
dАdфZe#e<jj_e<jjrge<jj_`e6je/ 		dydNe!deeF de?deeK deeK de!fdҐdӄZe#e<jj_e<jjrge/ 			dzdNe!deeF de?deeK deeK deeK de!fdԐdՄZdpd֐dׄZdؐdل Z	dee! dee! de!de!fdݐdބZ
de*de!fdߐdZeCdNe!deeF de?deeeK  de!f
ddZe#e<jj_de!de!de?fddZe#e<je<jge/ dd Ze#e<jgdd Zd6e!dqe!dUee! dkeFdeFdee!e!f fddZe#e<je/ddd6e!dqe!dUee! dkeFdeFdee!e!f fddZe#e<je/ddd6e!dqe!dUee! dkeFdeFdee!e!f fddZde!deKde!fddZde!deKde!fddZde!de*fddZde*de!de!fddZdee! de!fddZdeFde?dnejgdBejfd dZde!deFdeFde?fddZde!deFdeFdeFde?f
dd	Zde!deeF de?fd
dZde!deeF de?fddZe#e<je/ eCde!deeF de?fddZ	 	 		d{de!de!deFdeFde?de?de!fddZ e#e<j!e/ eC	 	 	d|de!de!deFdeFde?de!fddZ!e#e<j"e/ eCdd Z"e#e<j#e/ dde=jjjkfddZ#dej!dej!de?de?fdd Z$e<j%j_`e6je<j%jr`e6je/dd!dd"d#d$Z%e#e<j&j_e<j&jrge<j&j_`e6je/ eC		dydNe!deeFeFf de?d%eeK d&eeK de!fd'd(Z'e#e<j&j׃e<j&jנ`e6je<j&jנ`e6je/ eC	dpde!deeeFeFf  de?d~eeeKeKf  de!f
d)d*Z(e#e<j)e#e<j*e#e<j+eCe/ de!deeFdf de!fd+d,Z,e#e<j-e#e<j.e#e<j/eCe/ de!deeFdf de!fd-d.Z0de!deeFdf d/eeFeFeFge!f de!fd0d1Z1e#e<j2e/d2d3ddd4d5d6Z2e#e<j3e/ d}dd7d8d9Z3e#e<j4j_e<j4jrge/ dej5ddd:de'dneejg d;ej6dBeej dCe?f
d<d=Z7e#e<j4j8gdej5ddd:de'de'dneejg d;ej6dBeej dCe?fd>d?Z9e#e%d@dA Z:e#e<j;e<j;j_`e6je/ ddde=jjjkfdNe!dqe!de'dBe'dUee! dkeFde!fdCdDZ;e#e<j<e<j<j_`e6je/ddEdNe!dqe!dkeFdee!e!f fdFdGZ<e#e<j=j_	i	d~dddHdIe!dJe!d3e!dKeKdLe?dMee! d+eeK dee!e!f fdNdOZ>dPdQ Z?e#e<j@ge/ eCdtdRdSZ@e#e<jAe/ dTdU ZAe#e<jBdVdW ZBe#e<jCj_e<jCjrgdddXd6e!dneejg dee! de!fdYdZZDe#e<jEj_e<jEjFgdpd6e!deeF fd[d\ZGe#ej;j<jHdrd]d^ZHe#e<jIe/ ddd_d`daZIddbdcddZJddd_dedfZKe#e<jLe/ dgdh ZLe#e<jMdpdidjZMe?e<jNe<jO e?e<jPe<j e?e<jQe<j e?e<jRe<j@ e?e<jSe<jM e?e<jTe<jU e?e<jVe<jT e?e<jWe<jX e?e<jYe<jQ e?e<jZe<j[ e?e<j\e<j] e?e<j^e<j_ e?e<j`e<ja e?e<jbe<jc e?e<jde<je e?e<jfe<jg e?e<jhe<ji e?e<jje<jk e?e<jle<jm e?e<jne<jo e?e<jpe<jq e?e<jre<js e?e<jte<ju e?e<jve<jw e?e<jxe<jZ dS (      N)Enum)partialreduce)chainproduct)AnyCallablecastIterableListOptionalTupleUnion)	sym_floatsym_intTensorregister_decomposition)	out_dtype)IntLike
NumberTypesuggest_memory_format
TensorLikeTensorSequenceType)_maybe_convert_to_dtype_maybe_resize_out_safe_copy_outout_wrapper)_pytree)tree_map__all__c                   @   s   e Zd ZdZdZdZdS )	Reductionr         N)__name__
__module____qualname__NONEMEANSUM r*   r*   Z/home/ubuntu/SoloSpeech/.venv/lib/python3.10/site-packages/torch/_decomp/decompositions.pyr!   +   s    r!   Fftype_promotioncompute_dtype_onlyc                    s   t  fdd}|S )Nc                     sr   dd t j| i |D }tj|di\  fdd}fdd}t|| i t||}r4|S t||S )Nc                 S   s   g | ]	}t |tr|qS r*   )
isinstancer   .0xr*   r*   r+   
<listcomp>;   s
    
z-type_casts.<locals>.inner.<locals>.<listcomp>type_promotion_kindc                       t | tr
|  S | S Nr/   r   tor2   computation_dtyper*   r+   increase_precC      

z0type_casts.<locals>.inner.<locals>.increase_precc                    r5   r6   r7   r9   )result_dtyper*   r+   decrease_precI   r=   z0type_casts.<locals>.inner.<locals>.decrease_prec)pytreearg_tree_leavesutilselementwise_dtypesr   )argskwargs	flat_argsr<   r?   rr.   r,   r-   )r;   r>   r+   inner9   s   

ztype_casts.<locals>.inner)	functoolswraps)r,   r-   r.   rI   r*   rH   r+   
type_casts4   s   rL   T)r-   r.   )r-   r2   dimreturnc                 C   s$   t ||   D ]}| d} q| S N)rangerM   	unsqueeze)r2   rM   _r*   r*   r+   _unsqueeze_to_dimf   s   rT   
grad_inputout_gradyc                 C   s   | d||     S Nr"   conj_physicalrV   rW   r*   r*   r+   tanh_backwardl      r\   c                 C   s   | |d|     S rX   rY   r[   r*   r*   r+   sigmoid_backwards   r]   r^   beta	thresholdc                 C   s.   ||   }t|| |k| | | |d  S N      ?)exptorchwhere)rV   r2   r_   r`   zr*   r*   r+   softplus_backwardz   s   "rg   grad_outputalphascaleinput_scale	is_resultself_or_resultc           	      C   sb   || }|}|}|rt |dk| | ||  | | S t |dk| | | t ||  | | S Nr   )rd   re   rc   )	rh   ri   rj   rk   rl   rm   negcoefposcoef
negiptcoefr*   r*   r+   elu_backward   s   rr   c                 C      t | |S r6   )rd   	full_likeselfvaluer*   r*   r+   fill_scalar      rx   rw   c                    s(   t   dk fdd t|  S )Nr   c                      s   d    dS )Nz@fill only supports 0-dimension value tensor but got tensor with z dimensionsrM   r*   rw   r*   r+   <lambda>       zfill_tensor.<locals>.<lambda>)rd   _checkrM   atencopyru   r*   r{   r+   fill_tensor   s
   

r   rv   c                 C   s    t jt j| d ddddd S N   r   min   maxrd   clamprv   r*   r*   r+   hardsigmoid   s    r   c                 C   s   t |dk|dk @ | d dS )Ng      g      @gUUUUUU?        rd   re   rh   rv   r*   r*   r+   hardsigmoid_backward   s
   r   min_valmax_valc                 C   s   t ||k||kB d| S )Nr   r   )rh   rv   r   r   r*   r*   r+   hardtanh_backward   s   r   c                 C   s$   | t jt j| d dddd d S r   r   r   r*   r*   r+   	hardswish   s   $r   c              
   C   s,   t |dk dt |dk| |d d  | S )Nr   r         ?r   r   r*   r*   r+   hardswish_backward   s
   r   c                 C   s   t ||kd| S rn   r   )rh   rv   r`   r*   r*   r+   threshold_backward      r   negative_slopeself_is_resultc                 C   s   t |dk| | | S rn   r   )rh   rv   r   r   r*   r*   r+   leaky_relu_backward   s   r   nonegradapproximatec                 C   s   d}d}d}|dkrO|| d }d}|| }|| }	||||	   }
t |
}d| }d| }d| }d||  }|dd| |   }|| | }| ||  S |}|| d }ddt ||   }|t || d	  }| |||   S )
Ng;f?g;f?gmBP?tanhr   gHm?r"   r   g      )rd   r   erfrc   )r   rv   r   M_SQRT2	M_SQRT1_2
M_2_SQRTPIkBetakKappax_sqx_cuberI   
tanh_innerleftrightleft_derivativetanh_derivativeinner_derivativeright_derivativekAlphacdfpdfr*   r*   r+   gelu_backward   s,   
r   inputc                 C   s:   t t|}t |}|| d||   }| ||  S rX   )rd   r   Fsoftplussigmoid)rh   r   input_tanh_softplusinput_sigmoidoutr*   r*   r+   mish_backward  s   
r   c                 C   s   | t |  S r6   )rd   r   r   r*   r*   r+   silu  s   r   c                 C   s,   ddt |   }| | d|d|    S rX   )rd   rc   )rh   rv   r   r*   r*   r+   silu_backward  s   r   weightc                 C   s   t | dk| ||  S rn   r   )rv   r   r*   r*   r+   _prelu_kernel   s   r   c                 C   s4   t |dk| ||  }t |dkd||  }||fS )Nr   r   r   )rh   rv   r   
input_gradweight_gradr*   r*   r+   _prelu_kernel_backward%  s   r         ?UUUUUU?noiseloweruppertraining	generatorc           
      C   sh   |d u sJ |r(| dk}t | ||}t|| | | }|t||d |S || d }	t | |	S )Nr   r"   r#   )r   uniformrd   re   copy_
leaky_relu)
rv   r   r   r   r   r   not_positiverG   outputr   r*   r*   r+   rrelu_with_noise0  s   r   c              	   C   s   |  t| |||||S r6   )r   r   )rv   r   r   r   r   r   r*   r*   r+   rrelu_with_noise_H  s   r   c                 C   s6   |r|| dkr|  |S || d }t| |||S )Ngư>r#   )mulr   r   )rh   rv   r   r   r   r   r   r   r*   r*   r+   rrelu_with_noise_backwardV  s   
r   bufferc                 C   sN   |dk }t |dd}t |dd}t t | }| |||d|     S )Nr   r"   rP   )rd   re   rc   abs)rh   rv   r   in_negative	max_derivsignrf   r*   r*   r+   log_sigmoid_backwardk  s
   r   loss	reductionc                 C   s0   |t jjkrt| S |t jjkrt| S | S r6   )r!   r(   rw   rd   meanr)   sum)r   r   r*   r*   r+   apply_loss_reductionx  s
   

r   dtypec                 C   s4   | t jkrt jS | t jkrt jS | t jkrt jS d S r6   )rd   	complex32float16	complex64float32
complex128float64r   r*   r*   r+   to_real_dtype  s   


r   targetc                 C   s   | | d }t ||S )Nr#   )r   )rv   r   r   r   r*   r*   r+   mse_loss  s   
r   c                 C   s,   |t jjkrd|  nd}|||  |  S )N       @)r!   r(   rw   numel)rh   r   r   r   normr*   r*   r+   mse_loss_backward  s   r   rb   c                 C   s<   | |   }t||k d|d  | |d|  }t||S )Nr   r#   )r   rd   re   r   )rv   r   r   r_   r   r*   r*   r+   smooth_l1_loss  s   	&
r   c           	      C   sZ   |t jjkrd|  nd}|| }t|}||  }t||k || | |t| S ra   )r!   r(   rw   r   rd   r   re   r   )	rh   rv   r   r   r_   r   r2   abs_x	norm_gradr*   r*   r+   smooth_l1_loss_backward  s   

r   c                 C   *   t | ||||}t||j t||ddS NT	copy_fromcopy_toexact_dtype)r   r   shaper   )rh   rv   r   r   r_   rU   resultr*   r*   r+   smooth_l1_loss_backward_out     
r   deltac              
   C   s`   |t jjkrd|  nd}|| }t|| k | |  | t||k||  | || |  S ra   )r!   r(   rw   r   rd   re   )rh   rv   r   r   r   r   r2   r*   r*   r+   huber_loss_backward  s    r   c                 C   r   r   )r   r   r   r   )rh   rv   r   r   r   rU   r   r*   r*   r+   huber_loss_backward_out  r   r   ignore_indextotal_weightc                 C   s   |  dk rdnd}|tjjkr| | } ||}t||k|d}t|}	t|	||d}	|	  |     kr=dkrDn n| |} |d urcdd t	|  D }
|j
d |
|< ||
}| | } t||k| d} |	|  S )Nr#   r   r"   g      c                 S   s   g | ]}d qS r"   r*   r1   rS   r*   r*   r+   r3     r}   z&_nll_loss_backward.<locals>.<listcomp>)rM   r!   r(   rw   rR   rd   re   
zeros_likescatterrQ   r   reshape)rh   rv   r   r   r   r   r   channel_dimsafe_targetrU   	new_shaper*   r*   r+   _nll_loss_backward  s    	

 

r  c           
      C   s   |  dks
J dt|  |}||}|d dks'J d| d| |d }||d|}||||}t|}d| | | |  }	||  }tj||	g|dS )Nr   z*glu does not support 0-dimensional tensorsr#   z.Halving dimension must be even, but dimension z	 is size rb   rz   )rM   rB   canonicalize_dimsizenarrowrd   r   cat)
rh   rv   rM   wrap_dimnIn	inputSize	firstHalf
secondHalfgradInputFirstHalfgradInputSecondHalfr*   r*   r+   glu_backward  s   

r  c                 C   sr  d|    krdksJ d J d|  dksJ d|  dko)|  dk}|sC|jd |jd ksCJ d|j d|j d| dksXJ d	|j d
|  df|d u si| |jd ksiJ d|tjjkr|  dkr|   dkr| jd |jd ksJ d|jd  d|    d| jd  n|   dkr|  dksJ d| j t| ||||||S )Nr   r#   input tensor should be 1D or 2Dr"   ;0D or 1D target tensor expected, multi-target not supportedsize mismatch (got input: 
, target: ):expected total_weight to be a single element tensor, got: z (z
 elements)rP   z<weight tensor should be defined either for all or no classesz7Expected a tensor of dimension 1 and tensor.size[0] == z but got: dimension z and tensor.size[0] == z7Expected a single element grad_output tensor, but got: )rM   r   r   r!   r'   rw   r  )rh   rv   r   r   r   r   r   no_batch_dimr*   r*   r+   nll_loss_backward$  s<   ("
r  c                 C   s   |  dksJ d|   |  dksJ d|   |jd |jd kr<|jd |jd kr<|jd |jd ksHJ d|j d	|j | dks\J d
|j d|  dt| ||||||S )N   zSonly batches of spatial inputs supported (4D tensors), but got input of dimension: r   zUonly batches of spatial targets supported (3D tensors) but got targets of dimension: r   r#   r"   r  r  r  z ( z, elements))rM   r   r   r  )rh   rv   r   r   r   r   r   r*   r*   r+   nll_loss2d_backwardP  s*   r  c              	   C   s\   |d t t |  | dd |t t | | dd  }|d ur)|| }t||S )Nr"   r*   i)rd   maximumlog1pnew_fulllogr   )rv   r   r   r   r   r*   r*   r+   binary_cross_entropys  s   

r#  c                 C   sR   d}| ||  t j|d|  |d }|d ur|| }|tjjkr'||  }|S )Ng-q=r"   r   )rd   r   r!   r(   rw   r   )rh   rv   r   r   r   EPSILONr   r*   r*   r+   binary_cross_entropy_backward  s   
"r%  c                 C   s    t t |  | }t||S r6   )rd   r   rc   r   )r   r   r   r   r*   r*   r+   soft_margin_loss  s   
r&  c                 C   s6   ||  t || d  }|tjjkr||  }|S rX   )rd   r   r!   r(   rw   r   )rh   rv   r   r   rU   r*   r*   r+   soft_margin_loss_backward  s   	r'  r#   otherpc                 C   s   t j| | |dS )N)r)  )r   r   )r   r(  r)  r*   r*   r+   dist  r   r*  x1x2c           	      C   s   |  ddd}tj|tjd}| ddd}tj|tjd}t| d||gd}t|||gd}||j}|	d
 S )Nr#   rP   Tmemory_formatr   )powr   rd   	ones_likecontiguous_formatr  r   matmulmT	clamp_minsqrt)	r+  r,  x1_normx1_padx2_normx2_padx1_x2_r   r*   r*   r+   _euclidean_dist  s   r=  input_sizesstartendstepc                 C   s   |  |}t|| ||||S r6   )	new_zerosrd   slice_scatter)rh   r>  rM   r?  r@  rA  rU   r*   r*   r+   slice_backward  s   

rD  r"   c                 C   s:  |   }|dkrtdt|   |}t|  }t|  }|dkr(td|d ur.|nd}|d ur6|ntj}	|dk rC||| 7 }|	dk rM|	|| 7 }	|dk rTd}n
||| kr^|| }|	|k re|}	n
|	|| kro|| }	| 	 |||   }
|	| }|| d | ||< ||  |9  < | j
rtd| |||
S )Nr   z,slice() cannot be applied to a 0-dim tensor.zslice step must be positiver"   z<Slice decomposition for quantized tensors aren't implemented)rM   RuntimeErrorrB   r	  listr
  stridesysmaxsizestorage_offsetis_quantizedNotImplementedError
as_strided)rv   rM   r?  r@  rA  ndimsizesstrides	start_valend_valrJ  lenr*   r*   r+   slice_forward  s>   	rT  indexc                 C   s   |  |}t|| ||S r6   )rB  rd   select_scatter)rh   r>  rM   rU  rU   r*   r*   r+   select_backward  s   
rW  offsetdim1dim2c                 C   s   |  |}t|| |||S r6   )rB  rd   diagonal_scatter)rh   r>  rX  rY  rZ  rU   r*   r*   r+   diagonal_backward  s   
r\  input_dtypec                 C   s   | j |kr
||}|S r6   )r   r8   )rh   rU   r]  r*   r*   r+   _cast_grad_to_input_dtype  s   

r^  r   c                 C   s0   | | }||t j||dd  }t| || S NTrM   keepdim)rd   r   r^  
contiguous)rh   r   rM   r]  new_grad_outputrU   r*   r*   r+   _softmax_backward_data#  s
   
rd  c                 C   s*   | t |t j| |dd  }t| ||S r_  )rd   rc   r   r^  )rh   r   rM   r]  rU   r*   r*   r+   _log_softmax_backward_data5  s   
re  c           
      C   sZ   | |d  ||d   }t tjtj|d}|d||d}|d|| |d}	||	 S )z/Utility function to implement im2col and col2imr#   r"   r   devicer   rP   )r   rd   arangeint64rR   )
input_dkernel_d
dilation_d	padding_dstride_drg  blocks_d	arange_kwblocks_d_indiceskernel_gridr*   r*   r+    _im2col_col2im_indices_along_dimA  s
   rs  kernel_sizedilationpaddingrG  c              	      s&  t tdkdd  t t dkdd  t tdkdd  t tdkdd  ddd	}|d
 | d | ddd |d | jt}t |dv odtdd dd  D fdd tdd tdd   D t tdd D  fdd |dk}|s| d} | j\}}	}
}\}}\}} \}}\}}t|
||||| j	}t|||||| j	}t
| ||||f}|dd}|d d d d ||f }|dddddd}|d}|d}|||	| | || }|s|d}|S ) Nr#   c                   S      dS )Nz"im2col(): only 2D kernel supportedr*   r*   r*   r*   r+   r|   ]      zim2col.<locals>.<lambda>c                   S   rw  )Nz$im2col(): only 2D dilation supportedr*   r*   r*   r*   r+   r|   ^  rx  c                   S   rw  )Nz#im2col(): only 2D padding supportedr*   r*   r*   r*   r+   r|   _  rx  c                   S   rw  )Nz"im2col(): only 2D stride supportedr*   r*   r*   r*   r+   r|   `  rx  Tc                 S   <   |rt dd | D nt dd | D }t|dd  d S )Nc                 s       | ]}|d kV  qdS r   Nr*   r1   r)  r*   r*   r+   	<genexpr>c      z1im2col.<locals>.check_positive.<locals>.<genexpr>c                 s       | ]}|d kV  qdS r{  r*   r|  r*   r*   r+   r}  c  r~  c                   S   rw  )Nz<{param_name} should be greater {'than' zero, but got {param}r*   r*   r*   r*   r+   r|   e  rx  z0im2col.<locals>.check_positive.<locals>.<lambda>allrd   r~   param
param_namestrictcondr*   r*   r+   check_positiveb     (zim2col.<locals>.check_positivert  ru  rv  Fr  rG  r   r  c                 s       | ]}|d kV  qdS r{  r*   r1   dr*   r*   r+   r}  p  r~  zim2col.<locals>.<genexpr>r   c                         dt   S )NzmExpected 3D or 4D (batch mode) tensor for input with possible 0 batch size and non-zero dimensions, but got: tupler*   r   r*   r+   r|   q      c                 s   s>    | ]\}}}}}d |d|  ||d    d  |  V  qdS )r"   r#   Nr*   r1   r   paddilkerstr*   r*   r+   r}  t  s
    "
r/  c                 s   rz  r{  r*   )r1   cr*   r*   r+   r}  {  r~  c                      s6   dt dd   d d  d d d dS )	Nz!Given an input with spacial size r/  , kernel_size=, dilation=
, padding=	, stride=z9, the calculated shape of the array of sliding blocks is z*, but its components must be at least one.r  r*   ru  rt  output_sizerv  r   rG  r*   r+   r|   |  s    r  r   rP   r"   r      T)rd   r~   rS  r   r  r  ziprR   rs  rg  r   r  permuter
  r  squeeze)r   rt  ru  rv  rG  r  rN  batched_input	batch_dimr  input_hinput_wstride_hstride_w	padding_h	padding_w
dilation_h
dilation_wkernel_hkernel_wblocks_row_indicesblocks_col_indicespadded_inputr   num_blocks_rownum_blocks_colr*   r  r+   im2colT  sd   	



 




r  r  c              
      s  t tdkdd  t tdkdd  t tdkdd  t tdkdd  t tdkdd  d$d	d
}|d |d |ddd |d |d | jt}t |dv outdd dd  D fdd d d  }t d | dkfdd dd tD }	|	d |	d   t d  k fdd t  dk fdd |dk}
|
s| d} | j\}}\}}\}}\}}\}}| d d | g |	 } | dddd dd!} t	|||||| j
}t|d }t	|||||| j
}d"d tD }| d d t g| }d d ||f}tj||| dd#}t|| | | | f}|
sf|d}|S )%Nr#   c                   S   rw  )Nzonly 2D output_size supportedr*   r*   r*   r*   r+   r|     rx  zcol2im.<locals>.<lambda>c                   S   rw  )Nzonly 2D kernel supportedr*   r*   r*   r*   r+   r|     rx  c                   S   rw  )Nzonly 2D dilation supportedr*   r*   r*   r*   r+   r|     rx  c                   S   rw  )Nzonly 2D padding supportedr*   r*   r*   r*   r+   r|     rx  c                   S   rw  )Nzonly 2D stride supportedr*   r*   r*   r*   r+   r|     rx  Tc                 S   ry  )Nc                 s   rz  r{  r*   r|  r*   r*   r+   r}    r~  z1col2im.<locals>.check_positive.<locals>.<genexpr>c                 s   r  r{  r*   r|  r*   r*   r+   r}    r~  c                   S   rw  )Nz9{param_name} should be greater than zero, but got {param}r*   r*   r*   r*   r+   r|     rx  z0col2im.<locals>.check_positive.<locals>.<lambda>r  r  r*   r*   r+   r    r  zcol2im.<locals>.check_positivert  ru  rv  Fr  rG  r  )r#   r   c                 s   r  r{  r*   r  r*   r*   r+   r}    r~  zcol2im.<locals>.<genexpr>r/  c                      r  )NzmExpected 2D or 3D (batch mode) tensor for input with possible 0 batch size and non-zero dimensions, but got: r  r*   r  r*   r+   r|     r  r   r"   c                      s   dd  d  S )Nz|Expected size of input's first non-batch dimension to be divisible by the product of kernel_size, but got input.shape[-2] = r/  z and kernel_size=r*   r*   )rt  r   r*   r+   r|     s
    c                 S   s:   g | ]\}}}}}d |d|  ||d    d  |  qS r"   r#   r*   r  r*   r*   r+   r3     s    "zcol2im.<locals>.<listcomp>rP   c                      4   d d d d d d  dd  d	S 
NzGiven output_size=r  r  r  r  z , expected input.size(-1) to be 	 but got rP   .r*   r*   Lru  rt  r  rv  r   rG  r*   r+   r|         c                      r  r  r*   r*   r  r*   r+   r|     r  r   r  r  c                 S   s   g | ]
\}}|d |  qS r#   r*   )r1   or)  r*   r*   r+   r3     s    
accumulater  )rd   r~   rS  r   r  r  rR   r  r  rs  rg  rT   rB  prodr   _unsafe_index_putr   r  r  )r   r  rt  ru  rv  rG  r  rN  prod_kernel_sizecolr  out_hout_wr  r  r  r  r  r  r  r  indices_rowindices_coloutput_padded_sizer   idxr*   r  r+   col2im  s   




 



"

r  maskc                 C   s$   | | | |  jt| d}|S Nr-  )type_asclonerB   r   )rh   r  rj   rG   r*   r*   r+   native_dropout_backward	  s   	r  
input_size	dimensionr
  c           	      C   s   t |dkrt| dS tt ||}tj|| | jtjd}|d||	 }| 
d|d 	||d } | |}d| |f }tj||| dd S )Nr   rg  r   rP   r"   r6   Tr  )rS  rd   squeeze_copyrB   r	  rh  rg  int32unfoldflattenmovedimrB  r   r  rb  )	r   r  r  r
  rA  rM   r  rU   rU  r*   r*   r+   unfold_backward  s   
r  epsc              	   C   st   |d ur|}d| }t t ||k||k| |d|   dS t t |dk|dk| |d|   |dtdS )Nrb   r   r*   nan)rd   re   logical_andr!  float)rh   rv   r  lohir*   r*   r+   logit_backward+  s   r  trainc                 C   s&   |r|dkrt | ||d S |  S rn   )r   native_dropoutr  )r   r)  r  r*   r*   r+   dropout@  s   r  out0out1c                 C   s   |r6|dkr6|dkrt | t j| t jdfS | jjstdt | |k}||  tdd|   }||fS | t j| t jdfS )Nr   r"   r   z?result type Float can't be cast to the desired output type Longrb   )	rd   r  boolr   is_floating_pointrE  	rand_liker  r1  )r   r)  r  	bool_maskresr*   r*   r+   r  J  s   r  half_to_floatc                 C   s   |   } |r| jtjksJ tj| tjjd\}}| |} | 	 dkr*t
| }ntj| |dd}t
| | }|tj||dd }|sJ||}|S Nr4   r   Tra  )rb  r   rd   halfrB   rC   ELEMENTWISE_TYPE_PROMOTION_KINDDEFAULTr8   r   rc   amaxr   )r2   rM   r  r;   r>   unnormalizedx_maxr   r*   r*   r+   _softmax[  s   


r  c           	      C   s   |   } |r| jtjksJ tj| tjjd\}}| |} | 	 dkr'| }ntj
| |dd}| | }ttjt||dd}|| }|sL||}|S r  )rb  r   rd   r  rB   rC   r  r  r8   r   r  r"  r   rc   )	r2   rM   r  r;   r>   shiftedr  shifted_logsumexpr   r*   r*   r+   _log_softmaxr  s    


r  rP   indicespadding_idxscale_grad_by_freqsparsec                 C   sJ   |   dks
J d|jdkr!| d|}|jdkr|d}|S | | S )Nr#   z'weight' must be 2-Dr"   r   )rM   rN  index_selectr  )r   r  r  r  r  r   r*   r*   r+   	embedding  s   	


r  num_weightsc                 C   s   t j| t jjd\}}| |} t|tj}|r8||f}t	|}t
j||g|dd}|| }	| |	d } t||k| j}
| |
d}| |f| j|jd   }t
j||g|dd|S )Nr  Tr  rP   r   )rB   rC   r  r  r8   r   rd   longrB  r1  r   r  rR   rT   rN  masked_fillr   )rh   r  r   r  r  r;   r>   countsonesgrad_weights_scaler  r   grad_weightr*   r*   r+   embedding_dense_backward  s&   	


r  c                 C   s   d}| D ]}||9 }q|S rX   r*   )r2   rG   ir*   r*   r+   r    s   
r  tensors
num_chunksc           	      C   s   g }| D ]H}|  }|| | d | | }||| kr7dgd |j| d  d|||  g }t||d}|d | t|dg }||| q|S )Nr"   r   r#   rP   )r
  rN  r   constant_pad_ndrd   Sizeappendview)	r	  rM   r
  padded_tensorstensortensor_sizepad_along_dimr  	view_sizer*   r*   r+   
_pad_chunk  s   
r  c                 C   s(   | d j }| D ]
}|j |kr dS qdS )Nr   FTrN  )r	  rN  r  r*   r*   r+   have_same_ndims  s   

r  c                 C   sB   | d   d | }| D ]}t|  d | |kdd  qd S )Nr   c                   S   rw  )NzG_chunk_cat expects same sizes of 0,...,dim-1 dimensions for all tensorsr*   r*   r*   r*   r+   r|     rx  z+leading_dimension_matches.<locals>.<lambda>)r
  rd   r~   )r	  rM   leading_dim_sizesr  r*   r*   r+   leading_dimension_matches  s   r  c                 C   s   t |dkdd  t t| dkdd  | d j}| d j}| D ]$}t | dkdd  t |j|kdd  t |j|kdd  q"t| rVt| d 	 |}nt |dkd	d  | D ]}t ||j
k d
d  qbt| | |S )Nr"   c                   S   rw  )Nz&_chunk_cat expects positive num_chunksr*   r*   r*   r*   r+   r|     rx  z._preprocess_chunk_cat_inputs.<locals>.<lambda>r   c                   S   rw  )Nz0_chunk_cat expects a non-empty input tensor listr*   r*   r*   r*   r+   r|     rx  c                   S   rw  )Nz#_chunk_cat expects non-empty tensorr*   r*   r*   r*   r+   r|     rx  c                   S   rw  )Nz8_chunk_cat expects all input tensors with the same dtyper*   r*   r*   r*   r+   r|     rx  c                   S   rw  )Nz8_chunk_cat expects all inputs tensors on the same devicer*   r*   r*   r*   r+   r|     rx  c                   S   rw  )NzK_chunk_cat expects non-negative dim when input tensors have different ndimsr*   r*   r*   r*   r+   r|     rx  c                   S   rw  )Nz3_chunk_cat expects dim < ndim for all input tensorsr*   r*   r*   r*   r+   r|   
  rx  )rd   r~   rS  r   rg  r   r  rB   r	  rM   rN  r  )r	  rM   r
  expected_dtypeexpected_devicer  r*   r*   r+   _preprocess_chunk_cat_inputs  s:   


r  r   c                 C   sH   t | ||}t| ||}|d u rt||d S tj||d |d |S )Nr"   )r   )r  r  rd   r  )r	  rM   r
  r   r  r*   r*   r+   
_chunk_cat  s   r  split_sizesc           	         s   t tD ]}t| dd  qtttj  k fdd t}g }d}ddlm	} t |D ]}| }||| j  k |
 || ||7 }q9|S )Nc                   S   rw  )NzCsplit_with_sizes expects split_sizes have only non-negative entriesr*   r*   r*   r*   r+   r|   )  rx  z"split_with_sizes.<locals>.<lambda>c                      s   dt  dj   S )NzSplit sizes add up to z but got the tensor's size of )r   r   r*   rM   rv   r  r*   r+   r|   .      r   )expect_true)rQ   rS  rd   _check_is_size_check_with
ValueErrorr   r   %torch.fx.experimental.symbolic_shapesr   r  r  )	rv   r  rM   r  
num_splitssplits	start_idxr   lengthr*   r  r+   split_with_sizes   s(   
r)  c                 C   sV   t | ||d}|d u rdd |D S t||D ]\}}t||j t||dd qd S )Nrz   c                 S   s   g | ]	}|j tjd qS )r-  )r  rd   r2  )r1   sr*   r*   r+   r3   M  r  z)split_with_sizes_copy.<locals>.<listcomp>Tr   )r)  r  r   r   r   )rv   r  rM   r   r&  r   splitr*   r*   r+   split_with_sizes_copyB  s   	r,  
split_size.c                 C      t j| ||S r6   )r   r+  r   )r   r-  rM   r*   r*   r+   unsafe_splitU  s   r/  c                 C   r.  r6   )r   r)  default)r   r  rM   r*   r*   r+   unsafe_split_with_sizesZ  s   r1  c                    s   | j }|| } dkr|dksJ | fS |  d   }ddlm} ||} fddt|D }  | |  |d< t| ||S )Nr   r"   )	guard_intc                       g | ]} qS r*   r*   r1   r  r-  r*   r+   r3   n  r}   zsplit.<locals>.<listcomp>rP   )r   r$  r2  rQ   rd   r+  )rv   r-  rM   r>  dim_sizechunksr2  r  r*   r5  r+   r+  a  s   r+  tensor_indices_or_sectionsc                    s   |j jdksJ |jtjksJ |  t dkp dk fdd  dkr9| }t|t	s3J | 
||S dd |D }| 
||S )Ncpur"   r   c                      s   d  dS )Nz{tensor_split expected tensor_indices_or_sections to be a zero-dimensional or one-dimensional tensor, but got a tensor with z dimsr*   r*   	split_dimr*   r+   r|     s    zAtensor_split_tensor_indices_or_sections_py_impl.<locals>.<lambda>c                 S   s   g | ]}|  qS r*   )itemr4  r*   r*   r+   r3         zCtensor_split_tensor_indices_or_sections_py_impl.<locals>.<listcomp>)rg  typer   rd   ri  rM   r~   r<  r/   r   tensor_split)rv   r8  rM   sectionsr  r*   r:  r+   /tensor_split_tensor_indices_or_sections_py_impls  s   

rA  mat1mat2c                 C   H   |   s|  st|}t|}|t|| }|dkr|S |||   S rn   )r  
is_complexintrd   mm)rv   rB  rC  r_   ri   r   r*   r*   r+   addmm  s   rH  use_geluc                 C   s<   t | ||||}|r| jrtj|ddS t|S t|S )Nr   )r   )rH  is_cudar   gelurelu)rv   rB  rC  r_   ri   rI  r   r*   r*   r+   _addmm_activation  s   

rM  vecc                 C   rD  rn   )r  rE  rF  rd   mv)rv   rB  rN  r_   ri   r   r*   r*   r+   addmv  s   rP  r   rstdgammaNCHxWgroupoutput_maskc
              	      s  t j| ||dd t j|| dd t j|dd t|    k fdd tjfkfdd td u pJ  k fdd t \}
}t|dk fdd t| |	 j
d	gd
}| 	 j
d	gd
}d }d }d }|	d r:d|
  }d urt|d|

d	}t|d|

d	}t|dd|
}n&||

d	}||

d	}t|dtjd|
f|jd}| | | | | | }|  || |  }|d}t|d}t|d}t| |
|t||
| | }||j|j}|	d r_|	|
|	|
d  |d j
dgd
 }|	d	 rk|j
dgd
}|||fS )NF)allow_cpu_scalar_tensorsc                      s   d    dS )NzExpect input to have z	 elementsr*   r*   )rT  rU  rS  r*   r+   r|     r=  z,native_group_norm_backward.<locals>.<lambda>c                      s   d  d dj  S )NzExpect mean to have shape (, z
, but got r  r*   )rS  rV  r   r*   r+   r|         c                      s$   d  dd ur   S d S )NzExpect gamma to have z elements but got rP   )r   r*   )rT  rR  r*   r+   r|        $ r   c                      s   d  d S )NzExpect number of channels z, to be evenly-divisible by number of groups r*   r*   )rT  rV  r*   r+   r|     r}   r#   rz   rb   rP   r"   rg  r  )rB   check_same_devicecheck_same_shaperd   r~   r   r   divmodr   r  r   rR   r  r  rg  rT   r8   r   )rh   r   r   rQ  rR  rS  rT  rU  rV  rW  cpg_remdsdbd_inputd_gammad_biasr*  ds_valdb_valc1c2c3r*   )rT  rU  rS  rR  rV  r   r+   native_group_norm_backward  s   
 
""



$

rl  out2c
                C   d   t | |||||||||	
}|
||f}t|D ]\}}|d ur/t|| |j t||| dd q|S r   )rl  	enumerater   r   r   )rh   r   r   rQ  rR  rS  rT  rU  rV  rW  r  r  rm  r   rU   r  rG   r*   r*   r+   native_group_norm_backward_out$  s   
rp  c                 C   s   | d ur	|  |S | S r6   r8   )r2   r   r*   r*   r+   _maybe_castA  s   
rr  grad_outnormalized_shapebiasc           !         sL  |j }| }	t|j  fdd| |||fD \}
}}}|
d us$J |	t| }||d  }|d | }g }g }t|	D ]}||krJ|| q>|| q>t|}t|}|dks`|dkr|d ri|	|nd |d rw|	||d  nd |d r|	||d  fS d fS t
|| }t
|| }|| | }|d ur|
| }n|
}|| }t||d}t||}t||d}t||}|| | }d }d }d } |d r|| | }|d r|d urt|dkrt|
| |d}n|
| }|d r|d urt|dkrt|
|d} n|
 } t||jt||jt| |jfS )Nc                 3   s*    | ]}|d ur|   n|V  qd S r6   )r8   rb  r0   r:   r*   r+   r}  V  
    
z-native_layer_norm_backward.<locals>.<genexpr>r   r"   r#   TF)r   rM   rB   get_computation_dtyper   rS  rQ   r  r  rB  rT   rd   r   r   r  rr  )!rs  r   rt  r   rQ  r   ru  rW  input_shape
input_ndimgrad_out_cast
input_castweight_cast	bias_castaxis
inner_dims
outer_dimsinner_dim_indicesouter_dim_indicesr  rS  Mx_hat
grad_x_hatabri  rj  rk  rI   rd  d_weightrf  r*   r:   r+   native_layer_norm_backwardH  sl   





r  c             	   C   s`   t | |||||||}||	|
f}t|D ]\}}|d ur-t|| |j t||| dd q|S r   )r  ro  r   r   r   )rs  r   rt  r   rQ  r   ru  rW  r  r  rm  r   rU   r  rG   r*   r*   r+   native_layer_norm_backward_out  s   
r  running_meanrunning_varmomentum
functionalc	                 C   sT  dgt td|   }	t| j}
|}|}|rt| j}
| j|
d}tj||	ddd\}}t	|| }| | | }t
||	}t
||	}|d ur]|| d| |  }|s]|| |d ur|  | jd  }t
||	}|||d   }|| d| |  }|s|| nT|d ur|d usJ |j|
dd}|}|j|
dd}|}|}dt||  }| jjdkr|}|}n
| d	}| d	}t||  d }t||  d }| | | }|d ur| }t||  d }|| }|d ur	| }t||  d }|| }| jjdkr|j| jd}|j| jd}|j| jd||||fS )
Nr   r#   r   T)rM   
correctionra  r"   )r   r   r9  r   )rF  rQ   rM   rB   rw  r   r8   rd   var_meanrsqrtr  r   r   r   r6  rg  r>  rB  rT   r  )r   r   ru  r  r  r   r  r  r  reduction_dimsr;   new_running_meannew_running_var	input_acc
biased_varr   rQ  r   	save_mean	save_rstdnsqueezed_varunbiased_varinvstdr*   r*   r+   native_batch_norm_helper  st   





r  r  save_invstdc              
   C   ,   t | |||||||d	\}}	}
}}||	|
fS NFr  r   r   ru  r  r  r   r  r  r   r  r  rS   r*   r*   r+   native_batch_norm  s   
r  c              
   C   sv   |d u r|d u rt | |||||S |d u rtd|d u r"td|r0t | |||||||S t | ||||||S )Nz`running_mean is None, but running_var is provided. They should both be None or both be provided.z`running_var is None, but running_mean is provided. They should both be None or both be provided.)r   _native_batch_norm_legitrE  $_native_batch_norm_legit_no_training)r   r   ru  r  r  r   r  r  r*   r*   r+   native_batch_norm_decomposition   s&   r  c                    s|   |  |}|| d |   dkr4|dkr4 fdd|D }  | |  ||d < tjjj| ||S tjjj|  |S )Nr"   r   c                    r3  r*   r*   r  r5  r*   r+   r3   K  r}   z(unsafe_chunk_py_impl.<locals>.<listcomp>)r
  rd   opsr   r1  r0  r/  r   )r  r7  rM   r6  r  r*   r5  r+   unsafe_chunk_py_implE  s   
r  c              
   C   s   t j| ||||d||S r  )r   r  r0  )r   r   ru  r  r  r  r  r*   r*   r+   r  Q  s   
r  c              
   C   r  r  r  r  r*   r*   r+   r  g  s   
r  c           
   
   C   s,   t | ||d d |||d	\}}}}	}	|||fS r  r  )
r   r   ru  r   r  r  r   r  r  rS   r*   r*   r+   !_native_batch_norm_legit_no_statsx  s   	
r  c              
   C   sP   t | |||||||d	\}}	}
}}|d usJ d|d us!J d||	|
||fS )NT#new_running_mean should not be None"new_running_var should not be Noner  )r   r   ru  r  r  r   r  r  r   r  r  r  r  r*   r*   r+   #_native_batch_norm_legit_functional  s   r  c           	   	   C   sP   t j| ||||d|}d}|t jjjkrt j| |}t j|t j| j| j	dS )a  
    Return a reserve tensor for batch norm, used only by cudnn to pass forward state to the
    backward pass. This is needed for `_batch_norm_with_update` and `_batch_norm_no_update`,
    which support a variety of backends including cudnn. We create this tensor here to get
    the correct shape in the traced graph if we detect that will call the cudnn kernel,
    and rely on DCE to avoid materializing this tensor.
    Tr   )r   layoutrg  )
rd   _C_select_batch_norm_backend_BatchNormBackendCudnn(_get_cudnn_batch_norm_reserve_space_sizeemptyuint8r  rg  )	r   r   ru  r  r  r  r   backendreserve_sizer*   r*   r+   _get_batch_norm_reserve_tensor  s   r  c              
   C   sD   t | ||||d||d	\}}}	}
}
t| |||||dd}|||	|fS )NTFr   r  r  r   r   ru  r  r  r  r  r   r  r  rS   reserver*   r*   r+   _batch_norm_with_update     
r  c              
   C   sh   t | ||||d||d	\}}}	}
}t| |||||dd}|
d us$J d|d us,J d|||	||
|fS )NTr  r  r  r  )r   r   ru  r  r  r  r  r   r  r  new_rmnew_rvr  r*   r*   r+   "_batch_norm_with_update_functional  s   r  c              
   C   sD   t | ||||d||d	\}}}	}
}
t| |||||dd}|||	|fS )NFr  r  r  r*   r*   r+   _batch_norm_no_update  r  r  c                 C   sB   |d u sJ t | |k jt jd}|| |  d|  }||fS )Nr   rb   )rd   r  r8   r  r  )r   r)  r   r  r  r*   r*   r+   _fused_dropout_decomposition  s   r  )r   r  rg  
pin_memorynon_blockingr.  rg  r  r  r.  c                C   s   |r|t jksJ d|rJ d|d u r!|d u r!|d u r!|  S d}|d urE|| jkrE|d ur>|jdkr>t j| |} d}t j| |} |d urT|sTt j| |} d}|d ur_t j| |dS | S )NTODOFr9  Tr-  )rd   stridedr  rg  r>  _primsconvert_element_type
device_put)r2   r   r  rg  r  r  r.  dtype_convertedr*   r*   r+   _to_copy  s    r  c                 C   s
   t | S r6   )r   aliasr9   r*   r*   r+   nop_decomposition;  s   
r  out3exponential_average_factorepsilonc              
   C   s^   t | |||||||\}}	}
|r||	|
| jdtjdfS ||d|d| jdtjdfS )Nr  r   )r   r  rB  rd   r  )r   r   ru  r  r  r   r  r  r  r  r  r*   r*   r+   cudnn_batch_normC  s"   
r  c                 C   s@   t |D ]\}}|dkr|| jk r| j| |ks| |} q| S rX   )ro  rN  r   rR   )r2   broadcast_maskr~  r  r*   r*   r+   _broadcast_batch_norm_backwarde  s
    
r  r  c                 C   s   t | |||||||||	
S r6   )native_batch_norm_backward)rs  r   r   r  r  r  r  r  r  rW  r  r*   r*   r+   batch_norm_backwardl  s   r  c
           &         s  |j }
|d ur|j }n|
}t|j   fdd| ||||||fD \}}}}}}}|j}| }|dks9J dd}tt|||  }|}|}|rV|d urS|d usUJ n|d ur^|d us`J |}t|| }dg| }|| ||< g }t	|D ]}||kr|
| qzt||}d| }t||}t|||  |}t|| |}tt|| || |} |d u rt||d }!nt|| |}!|r|| |  }"||" | |! }#n||! }#|	d r|| }$nd }$|	d r|}%nd }%|#|
t|$|t|%|fS )Nc                 3   s&    | ]}|d ur|  n|V  qd S r6   rq  r0   r:   r*   r+   r}    s
    
z-native_batch_norm_backward.<locals>.<genexpr>r#   z$rank of the input must be at least 2r"   rb   )r   rB   rw  r   rM   r  rF  rd   r  rQ   r  r  r   r   r8   rr  )&rs  r   r   r  r  r  r  r  r  rW  r]  weight_dtyperz  r{  r|  running_mean_castrunning_var_castsave_mean_castsave_invstd_castrx  
input_rankr~  num_featuresr   r  r  reduction_axesr  r   grad_output_sumdot_p	grad_mean
proj_scale
grad_scaleprojrU   r  	grad_biasr*   r:   r+   r    s   
	



r  c
                C   rn  r   )r  ro  r   r   r   )rs  r   r   r  r  r  r  r  r  rW  r  r  rm  r   rU   r  rG   r*   r*   r+   native_batch_norm_backward_out  s&   
r  save_varc                 C       t || |||||d|g d
S NT)TTTr   r  )r   rh   r   r  r  r  r  r  r*   r*   r+   miopen_batch_norm_backward	  s   r  reserveSpacec	           	      C   r  r  r  )	r   rh   r   r  r  r  r  r  r  r*   r*   r+   cudnn_batch_norm_backward-	  s   r  c                    s  | j  | jttdv fdd | jdd  D ]}t|dkfdd qd |d  dkrjd |d  dkrjtdd	 tdd  |D }td
d	 tdd  ||D }tjj	| ||S dd dd  fdd}|d |d \}}}}	|d |d \}
}}}| dt
|d|
f }|	s|stj|ddS dd }|||||	dd\}}|||||dd\}}d }tt|jd t|jd D ]\}}|d u r|d|d d |f }q||d|d d |f  }q|||  S )Nr  c                      
   d  S )Nz9adaptive_avg_pool2d(): Expected 3D or 4D tensor, but got r*   r*   r  r*   r+   r|   R	     
 z%adaptive_avg_pool2d.<locals>.<lambda>r/  r   c                      s   dt   dS )Nzjadaptive_avg_pool2d(): Expected input to have non-zero size for non-batch dimensions, but input has shape r  r  r*   r  r*   r+   r|   W	  s    rP   c                 s   s    | ]	\}}|| V  qd S r6   r*   )r1   r  r  r*   r*   r+   r}  ]	      z&adaptive_avg_pool2d.<locals>.<genexpr>c                 s   s&    | ]\}}}||d  |  V  qdS )r"   Nr*   )r1   r  r  r*  r*   r*   r+   r}  ^	  s    
c                 S   s   t j| | |ddS )Ntruncrounding_moderd   divr  r  r  r*   r*   r+   start_indexc	  s   z(adaptive_avg_pool2d.<locals>.start_indexc                 S   s    t j| d | | d |ddS )Nr"   r  r  r  r  r*   r*   r+   	end_indexf	      z&adaptive_avg_pool2d.<locals>.end_indexc                    s   t j| t jd}||| }| | d }| | }|dkp"|| dk }|r+|d7 }n|dkr3|d8 }t j| t jd}|d| }|rbt j| d |j|jd}	t ||	}||| }
|
| }n|}||||fS )Nr  r"   r   rP   rf  )rd   rh  ri  rR   scalar_tensorr   rg  minimum)in_sizeout_sizeorangei0	maxlengthin_size_modadaptive	range_maxr  maxvali1r(  )rg  r  r  r*   r+   compute_idxi	  s(   

z(adaptive_avg_pool2d.<locals>.compute_idx.r  )r   rP   rz   c                 S   s`   t |tr	| |fS |dk sJ ||dk}|dkrt|d}t| |d} t|| }| |fS )Nr   rP   r/  r  r   )r/   r   rR   rT   rd   r  )valsr(  r  r  rM   r  r*   r*   r+   
maybe_mask	  s   

z'adaptive_avg_pool2d.<locals>.maybe_mask)r  rM   r   )rg  r   rS  rd   r~   r  r  nnr  
avg_pool2drT   r   r   rQ   )r   r  r  rG  kernelr  idxhlength_hrange_max_h
adaptive_hidxwlength_wrange_max_w
adaptive_wr  r  retr  jr*   )rg  r  rN  r   r  r+   adaptive_avg_pool2dH	  sN   

(  



&r  )ri   r  c                C      t | |||d|dS )NTinplaceri   
_index_addr2   rM   rU  r  ri   r*   r*   r+   
index_add_	  s   	r  c                C   r  )NFr  r  r  r*   r*   r+   	index_add	  s   
r  r  c                   s"  t | jtjdkfdd jdkrdnd|jdkr*|ndtkfdd  dkr]t | jttkpQt 	t
  fdd |  }| jdk}|ri| dn| }d f }|rwtjntj}	|	|||dd	}
|r| S |r|
dS |
 S )
Nr"   c                         d j  dS Nz(Index should have dimension 1 or 0 (got r  r  r*   rU  r*   r+   r|   	      z_index_add.<locals>.<lambda>r   c                      s   d d d S )NzNumber of indices (z') should be equal to tensor.size(dim) (z), for dim=r*   r*   )rM   
index_sizer  r*   r+   r|   	      c                      s   dt   d dS )Nzalpha argument of type z cannot be safely cast to type !)r>  r*   )ri   python_typer*   r+   r|   	  r$  r6   Tr  )rB   canonicalize_dimsrN  rd   r~   r
  dtype_to_typer   r  is_weakly_lesser_typer>  rR   r   
index_put_	index_putr  rb  )r2   rM   rU  r  r  ri   zero_dimr+  r  r+  r   r*   )ri   rM   rU  r#  r&  r  r+   r  	  s6   	

r  r   c              
   C   s   t t| dkdd  t| }| d  }|dd  }tdd | D }|r,||f}n||f}|| }| d ||}dt| }	t|D ]+}
| |
 }t||	d||d f |}|rhtj	||d|
d}qFtj	||d|
d}qF|S )	Nr   c                   S   rw  )Nz#received an empty list of sequencesr*   r*   r*   r*   r+   r|   	  rx  zpad_sequence.<locals>.<lambda>r"   c                 s   s    | ]}| d V  qdS r{  r
  r0   r*   r*   r+   r}  	      zpad_sequence.<locals>.<genexpr>)r   r   rM   rU  )
rd   r~   rS  r
  r   r!  rQ   r   r  rV  )	sequencesbatch_firstpadding_valuesequences_sizemax_sizetrailing_dimsmax_lenout_dimsr   dim_paddingsr  currseqrowr*   r*   r+   pad_sequence	  s(   
r;  c                 C      t | |||ddS )NTr  _index_copyr2   rM   rU  r  r*   r*   r+   index_copy_
     rA  c                 C   r<  )NFr=  r>  r@  r*   r*   r+   
index_copy
  r   rC  c          
         s   t | j|}t jdk fdd | jdk}|r | dn| } jdkr, dn  d|  f }|r:tjntj}||||}	|rG| S |rN|		dS |	
 S )Nr"   c                      r  r   r  r*   r!  r*   r+   r|   
  r"  z_index_copy.<locals>.<lambda>r   r6   )rB   r'  rN  rd   r~   rR   r   r*  r+  r  rb  )
r2   rM   rU  r  r  r,  r+  r  r+  r   r*   r!  r+   r?  
  s   

r?  c                 C   sL   t | d| }t t |  }| jr| d}n|}|t | |fS )Nr*   r  )rd   r  rB  rc   r   rJ  r   )rv   r   rf   r   r*   r*   r+   log_sigmoid_forward.
  s   rD  lowhighc                 C   s$   t j| jt|t|| j| j|dS )N)rE  rF  r   rg  r   )prims_uniform_helperr   r   r   rg  )r2   rE  rF  r   r*   r*   r+   r   ;
  s   r   c                 C   s   |  t| |||S r6   )r   r   )rv   rE  rF  r   r*   r*   r+   uniform_M
  s   rI  c                 C   s   t | d }|d ur"t|d u dd  tt ||kdd  |S |d urjt|d u dd  tt ||kdd  g }t|D ]%\}}t||krZ|| |d  t|  qB|t| |d  |  qB|S tddd  d S )	Nr#   c                   S   rw  Nz9Must specify exactly one of output_size and scale_factorsr*   r*   r*   r*   r+   r|   X
  rx  z.upsample_compute_output_size.<locals>.<lambda>c                   S   rw  N r*   r*   r*   r*   r+   r|   Z
  rx  c                   S   rw  rJ  r*   r*   r*   r*   r+   r|   `
  rx  c                   S   rw  rK  r*   r*   r*   r*   r+   r|   b
  rx  Fc                   S   rw  rJ  r*   r*   r*   r*   r+   r|   k
  rx  )rS  rd   r~   ro  rF  r  r   )r  r  scale_factorsspatial_dimensionsr  r*  r*   r*   r+   upsample_compute_output_sizeS
  s.   rO  c                 C   s   | d u rd S | | S r6   r*   )scalesr  r*   r*   r+   get_scale_valueo
  s   rQ  rM  c                 C   s2   t |  ||}|r|nd gt| }t| ||S r6   rO  r
  rS  _upsample_nearestr   r  rM  osizerP  r*   r*   r+   _upsample_nearest_vecu
  s   rV  c                 C   s6   t |  ||}|r|nd gt| }t| ||ddS NTexactrR  rT  r*   r*   r+   _upsample_nearest_exact_vec
  s   rZ  c                 C   s   g }t |}|r
dnd}t|D ]I}|| }| j| |  }	|| d ur,|	|	||   n|	| }
tj|tj| jd}|| |
 tj}t|d | D ]}|	d}qL|
| q|S )Nr   r   rf  r"   rP   )rS  rQ   r   rd   rh  r   rg  r8   ri  rR   r  )r   r  rP  rY  r  num_spatial_dimsrX  r  rU  isizerj   output_indicesinput_indicesrS   r*   r*   r+   !_compute_upsample_nearest_indices
  s   $r_  )preserve_memory_formatr   rP  c                 C   s   t | ||gS r6   rS  r   r  rP  r*   r*   r+   upsample_nearest1d
  s   	rc  c                 C   s   t | ||gddS rW  ra  rb  r*   r*   r+   upsample_nearest_exact1d
     rd  scales_hscales_wc                 C   s   t | |||gS r6   ra  r   r  rf  rg  r*   r*   r+   upsample_nearest2d
  s   
ri  c                 C   s   t | |||gddS rW  ra  rh  r*   r*   r+   _upsample_nearest_exact2d
  s   rj  scales_dc                 C   s   t | ||||gS r6   ra  r   r  rk  rf  rg  r*   r*   r+   upsample_nearest3d
  re  rm  c                 C   s   t | ||||gddS rW  ra  rl  r*   r*   r+   _upsample_nearest_exact3d  s   rn  rY  c           	      C   sp   t | |||d}d d g| }t| |}|jdkr6t| }| jd }| jjdkr0|dk r0t	j
}|j|d}|S )NrX  r  r"   cudar-  )r_  r   _unsafe_indexrN  rB   r   r   rg  r>  rd   r2  rb  )	r   r  rP  rY  spatial_indicesr  r   r.  
n_channelsr*   r*   r+   rS    s   


rS  c                    sb   |r|rd n|rd n|rd nd t   dks!J t  fddtdt  D S )Nr  r  r   r#   r   c                    s    g | ]}t ||   qS r*   r  r4  
group_sizeparamsr*   r+   r3   ;  s    z!gather_params.<locals>.<listcomp>)rS  rQ   )ru  
has_biaseshas_projectionsr*   rs  r+   gather_params0  s   rx  c                 C   sh   |r!| d|  |d|  }}| d| d  |d| d  }}n| | || }}d\}}||||fS )Nr#   r"   NNr*   )ru  hiddensr  bidirectional
cur_params
cur_hiddenbidir_paramsbidir_hiddenr*   r*   r+   params_hiddens@  s   $r  c                 C   s2   ||ksJ | | d|||  | dd|S rn   )r  r  )r}  last_batch_size
batch_sizerz  r*   r*   r+   update_hidden_for_packedK  s   r  c              	   C   s4   ||kr| S ||k sJ t | |d||| fS rn   )rd   concatr  )r}  r  r  
inp_hiddenr*   r*   r+    update_hidden_for_packed_reverseQ  s   r  c                 C   s$  |d }|d }|r|d nd }	|r|d nd }
g }g }|r"|d n|d }| dd|}t| t|}|r>|d d d }|D ]-} | jd }||krLn|rVt||||}nt||||}|| |||	||
}|}|| q@|ru|  n	|| |  t	|d}|st	|dn|}||fS )Nr   r"   r#   r   rP   )
r  rd   r+  rF  r   r  r  r  reverser  )inphiddenru  rv  	hidden_fnbatch_sizesr  	ih_weight	hh_weightih_biashh_biasstep_outputrz  r  r}  	split_inpr  r   
hidden_outr*   r*   r+   one_layer_rnn_data_  s@   


r  c                        fdd}|S )Nc                    s    t ||||  S r6   r   linearr  r}  r  r  r  r  nonlinearityr*   r+   rI     s   zrnn_cell.<locals>.innerr*   r  rI   r*   r  r+   rnn_cell  s   r  c                    r  )Nc                    s$   t | ||}  t ||||  S r6   r  r  r  r*   r+   rI     s   zrnn_cell_data.<locals>.innerr*   r  r*   r  r+   rnn_cell_data  s   r  c                 C   s   |d }|d }|r|d nd }|r|d nd }	t | ||}
|r&|
dn|
}
|d}g }|
D ]}|||||||	}|| q1|rH|  t|d}||dfS )Nr   r"   r#   r   )	r   r  fliprR   r  r  rd   r  r  )r  r  ru  rv  r  r  r  r  r  r  precomputed_inputr}  r  r  r   r*   r*   r+   one_layer_rnn  s   
r  c                 C   s   |d }|d }|r|d }|d }nt | }t | }|d d}	|d d}
g }d}|	d}d}d}d}d}|  } |	 }	|
 }
t jjj| |||||	|
|||||||||}|d |d |d }}}||	d|	dffS )Nr   r"   r#   r   F)
rd   zerosr
  rR   rb  r  r   mkldnn_rnn_layerr0  r  )r  r  ru  rv  r  w0w1w2w3hxcxr  modehidden_size
num_layersr{  r1  r  outputsrW   hycyr*   r*   r+   mkldnn_one_layer_lstm  sN   


r  c
                 C   s   |r|  ddn| } g }
t|D ]^}t||||\}}}}|r'||d k r'|nd}|	| |||\}}|
| |rI|	| |||dd\}}|
| |rXt||g| d } n|} |dkrn|rn||d k rntj| |dd} q|rw|  ddn| } | |
fS )Nr   r"   r   T)r  )r  )	transposerQ   r  r  rd   r  rM   r  )r   r  ru  rv  r  r  r  r{  r1  layer_fnfinal_hiddensr  r|  r}  r~  r  fwd_inp
fwd_hiddenbwd_inp
bwd_hiddenr*   r*   r+   _rnn_helper  s,   



r  c	                 C   R   | d}	t||d}t| |	|||||||ttttjd
\}
}|
t|dfS Nr   Fr  )	unbindrx  r  r   r  r  rd   r   stackr   r  ru  rv  r  r  r  r{  r1  r  r   r  r*   r*   r+   rnn_tanh_input     
r  c	                 C   r  r  )	r  rx  r  r   r  r  rd   rL  r  r  r*   r*   r+   rnn_relu_input,  r  r  c	                 C   T   | d}	t||d}t| |	||||||dtt|ttjd
\}
}|
t|dfS Nr   Fr  r  )	r  rx  r  r   r  r  rd   rL  r  datar  r  ru  rv  r  r  r  r{  r  r   r  r*   r*   r+   rnn_relu_dataK  &   
r  c	                 C   r  r  )	r  rx  r  r   r  r  rd   r   r  r  r*   r*   r+   rnn_tanh_datan  r  r  c                 C   s   t ||||  }|d|}|d  }	|d  }
|d  }|d  }|
| |	|  }||  }|d u r;|nt ||d }||fS )Nr  r   r"   r#   r   r   r  chunkr   r   )r  r  r  r  r  	hr_weight	chunk_dimgateschunked_gatesin_gateforget_gate	cell_gateout_gater  r  r*   r*   r+   	lstm_cell  s   r  c              
   C   s   |d }|d }|r|d nd }|r|d nd }t |dkr"|d nt |dkr,|d nd }	|d d}
|d d}t| ||}|rJ|dn|}g }|D ]} t| |
||||	dd\}
}||
 qP|rk|  t	|d}||

d|
dffS )Nr   r"   r#   r   r  r  r  )rS  rR   r   r  r  r  r  r  rd   r  r  )r  r  ru  rv  r  r  r  r  r  r  r  r  r  r  r   r*   r*   r+   one_layer_lstm  s$   *r  c              
   C   s
  |d }|d }|r|d nd }|r|d nd }	t |dkr"|d nt |dkr,|d nd }
g }g }|r8|d n|d }t| t|}|rM|d d d }|d }|d }|dd||dd|}}|D ]l} | jd }t| ||} ||k r||d||| |d||| f |dd||dd|}}||krt	||d||| fd}t	||d||| fd}t
| ||||	|
dd\}}|}|| qf|r|  ||f}n|||f |  t| \}}t|dt|df}t|d}||fS )	Nr   r"   r#   r   r  r  rP   r  )rS  rd   r+  rF  r  r   r   r  r  r  r  r  r  r  )r  r  ru  rv  r  r  r  r  r  r  r  r  rz  r  r  orig_hxorig_cxr  r  r  r  hidden0hidden1r   r*   r*   r+   one_layer_lstm_data  s\   *

r  c                 C   s   dd }|| ||rt S tS )a*  Check whether we could use decompose lstm with mkldnn_rnn_layer.
    All the below conditions need to be met:
        * ``torch._C._get_mkldnn_enabled()`` returns ``True``.
        * All the input args are on CPU.
        * The dtypes of args are either torch.float or torch.bfloat16.
        * Inference.
        * ``has_projections`` returns ``False``.

    Args:
        * input: the input sequence to LSTM
        * hx: a tuple of the input hidden state and cell state ``(h_0, c_0)`` to LSTM
        * params: the weight and bias tensors of LSTM
    c           	      S   s   t j sdS | gt| tt| }dd |D }t|dkr$dS | }|t dkr1dS dd |D }|D ]}|t j	t j
fvrG dS q:| jrMdS |d d|d dk}|r_dS d	S )
NFc                 S      h | ]}|j qS r*   r\  r1   tr*   r*   r+   	<setcomp>      zEselect_one_layer_lstm_function.<locals>.use_mkldnn.<locals>.<setcomp>r"   r9  c                 S   r  r*   r   r  r*   r*   r+   r    r  r   r#   T)rd   r  _get_mkldnn_enabledrF  r   from_iterablerS  poprg  r  bfloat16requires_gradr
  )	r   r  ru  r	  devicesrg  dtypesr   rw  r*   r*   r+   
use_mkldnn  s(   
z2select_one_layer_lstm_function.<locals>.use_mkldnn)r  r  )r   r  ru  r  r*   r*   r+   select_one_layer_lstm_function  s   r  c	                 C   s   t |dks
J dt|||d d|d dk}tt|d |d }	t| ||}
t| |	||||||||

\}}tt| }|t|d dt|d dfS )Nr#   lstm expects two hidden statesr   r"   )	rS  rx  r
  rF  r  r  r  rd   r  )r   r  ru  rv  r  r  r  r{  r1  r  r  r   r  r*   r*   r+   	lstm_impl+  s$   $"r  c	                 C   s   t |dks
J dt|||d d|d dk}tt|d |d }	t| |	||||||dtt|d
\}
}tt| }|
t	|d dt	|d dfS )Nr#   r  r   r"   F)r  )
rS  rx  r
  rF  r  r  r   r  rd   r  r  r*   r*   r+   lstm_data_implM  s"   $
"r  c                 C   sr   |  dd}t||| dd}|d |d   }|d |d   }	|d |d |   }
||
 |	 |
 S )Nr   r"   r#   r   )r  r   r  r   r   r  r}  r  r  r  r  chunked_igateschunked_hgates
reset_gate
input_gatenew_gater*   r*   r+   gru_celln  s   r  c                 C   s|   t | ||dd}t |||dd}|d |d   }|d |d   }	|d |d |   }
||
 |	 |
 S )Nr   r"   r   r#   r  r  r*   r*   r+   gru_cell_dataw  s   r  c	                 C   sJ   t ||d}t| |d||||||dtt|td
\}	}
|	t|
dfS )NFr   r  )rx  r  r  r   r  r  rd   r  )r  r  r  ru  rv  r  r  r  r{  r   r  r*   r*   r+   gru_impl_data  s   r  c	                 C   sH   t ||d}t| |d|||||||tttd
\}	}
|	t|
dfS )NFr   r  )rx  r  r  r   r  r  rd   r  )r   r  ru  rv  r  r  r  r{  r1  r   r  r*   r*   r+   gru_impl  s   
r  c                 C   :   t |  ||}t|d}t|d}tjj| ||||S Nr   r"   )rO  r
  rQ  rd   r  r   _upsample_bilinear2d_aar   r  align_cornersrM  rU  scale_hscale_wr*   r*   r+   upsample_bilinear2d_aa_vec     


r  c                 C   r  r  )rO  r
  rQ  rd   r  r   _upsample_bicubic2d_aar  r*   r*   r+   upsample_bicubic2d_aa_vec  r  r  c                 C   s4   t |  ||}|r|nd gt| }t| |||S r6   )rO  r
  rS  _upsample_linear)r   r  r  rM  rU  rP  r*   r*   r+   _upsample_linear_vec  s   	r  r  c                 C   s   t | |||gS r6   r   )r   r  r  rg  r*   r*   r+   upsample_linear1d  s   r  c                 C   s   t | ||||gS r6   r  )r   r  r  rf  rg  r*   r*   r+   upsample_bilinear2d  s   r  c                 C   s   t | |||||gS r6   r  )r   r  r  rk  rf  rg  r*   r*   r+   upsample_trilinear3d  s   r  c                 C   s@   |r|dkr| d |d  S dS |d ur|dkrd| S | | S )Nr"   rb   r   r*   )r  r  r  rj   r*   r*   r+   _compute_scale  s    r  c                 C   s   |r| | S | |d  d S Nr   r*   )rj   	dst_indexr  r*   r*   r+   _compute_source_index  s   r	  srcweightsweights_precisionc                 C   sB   t dd t| |D d|d >  }||? }t|ddtjS )Nc                 s   s,    | ]\}}| tj| tj V  qd S r6   )r8   rd   r  )r1   r*  r  r*   r*   r+   r}    s    
z%_sum_tensors_uint8.<locals>.<genexpr>r"   r      )_sum_tensorsr  rd   r   r8   r  )r
  r  r  r   r*   r*   r+   _sum_tensors_uint8  s   
r  c                 C   sJ   t |  }d}t j||jd}d|d|d >   }|dk}||  S )N   r\  r   r"   i   )rd   r  r   rh  rg  r   )r  
max_weightmax_weight_precision
precisionsvaluesr  r*   r*   r+   _compute_weight_precision%  s   r  c                    s  j d d \}}j dd  }t|tjtjjd\}fddfddtt|||D }tt| \}	g }
t	ddgg  D ]# d d g fd	dt
D  }t|}t|}|
| qKtt
D ]'}|	| |  d
dfddt|
d d d |
dd d D }
qut|
dksJ |
d }t}jjdkr|dk rtj}t|tjsJ |j|d} s| }|S )Nr#   r  c           	         s   t | | |}tj|jdjd}t|| jdd}|j|jd gdg| R  }|tj	}|d j| d d}|||fS )Nr\  r   r   r   r   r"   r   )
r  rd   rh  rg  r8   r	  r   r  r   ri  )	inp_sizer  rP  nsqueezescale_factorr  x_f32r2   xp1)r  r   r   r*   r+   
get_values?  s   
z$_upsample_linear.<locals>.get_valuesc                    s,   g | ]\}\}}} |||d  | qS r   r*   )r1   r  r  r  rP  )r  n_dimsr*   r+   r3   L  s    z$_upsample_linear.<locals>.<listcomp>r   r"   c                    s(   g | ]} | d kr| n| qS r  r*   )r1   k)r  xp1sxsr*   r+   r3   V  s   ( r   rb   c                    s$   g | ]\}}|t ||   qS r*   )rd   r   )r1   v1v2)xscaler*   r+   r3   ]  s    ro     r-  )r   rS  rB   rC   r  INT_TO_FLOATro  r  rF  r   rQ   r   rp  r   r  reversedr   r8   r   rg  r>  rd   r2  r/   r   rb  r  round)r   r  r  rP  n_batchrr  	inp_sizesrS   r  xs_f32vsr  vr  r   r.  r*   )	r  r  r   r  r   r  r  r  r"  r+   r   .  sF   

"


r   r  r  c                 C   s   | j |j kS r6   r  )r  r  r*   r*   r+   is_same_sizex  ry   r,  c                 G   rs   r6   )r   r  )r2   r   rD   r*   r*   r+   _reshape_alias}  s   r-  c                 C   rs   r6   )r   rU  )r2   r  r*   r*   r+   _index  ry   r.  c                 C   sV  |   }d}|dk rd}|d ur,|dkr&dg| }|jd ||< ||}n|}| | } t||k|d}	|	|}
t| ||
| }t||k|d}|tj	j
krb|dkrb| dd}||fS |d ur|| j}t|||
|}t||k|d}| }n	||k | }|tjj
kr| }||fS |tjj
kr| | }||fS )Nr"   r#   r   r*   r   )rM   r   r  rd   re   rR   gatherr  r!   r'   rw   r!  expandr   r8   r)   r(   )rv   r   r   r   r   r  r  r   wr  safe_target_r   r   wsumr*   r*   r+   _nll_loss_forward  sB   


r4  c                 C   s   |   dkr|   dksJ d|  dksJ d|   dko%|  dk}|s?| jd |jd ks?J d| j d|j d| jd	 }|d u s_|  dkrT| |ks_J d
| d|j t| ||||S )Nr   r#   r  r"   r  r  r  r  rP   z/weight tensor should be defined either for all z7 classes or no classes but got weight tensor of shape: )rM   r   r   r4  )rv   r   r   r   r   r  	n_classesr*   r*   r+   nll_loss_forward  s    	
r6  c                 C   s   t | ||||S r6   )r4  )rv   r   r   r   r   r*   r*   r+   nll_loss2d_forward  s   	r7  Ac                 C   s    |d |  |d  |  |  d S )Nr#   r   r"   r*   r2   r8  r*   r*   r+   _upsample_cubic_convolution1  r  r:  c                 C   s(   ||  d|  |  d|  |  d|  S )Nr     r  r*   r9  r*   r*   r+   _upsample_cubic_convolution2  s   (r<  r  c           
      C   s   d}| j t dkrDtj| d|  gdd}tj| d d|  gdd}t||}t||}tj|dd\}}tj|dd\}}	|||	|fS t| d |t| |td|  |td|  |fS )Ng      r9  rb   r   rz   r   )rg  rd   r  r<  r:  r  )
r  r8  tt1tt2w03w12r  r  r  r  r*   r*   r+    _upsample_get_cubic_coefficients  s   

rA  coeffstsc                 C   s    t |}tdd t| |D S )Nc                 s       | ]	\}}|| V  qd S r6   r*   r1   ri  rj  r*   r*   r+   r}    r  z+_upsample_cubic_interp1d.<locals>.<genexpr>)rA  r  r  )rB  rC  coeffs2r*   r*   r+   _upsample_cubic_interp1d  s   rG  c                 C   s   t tj| S r6   )r   rd   add)rC  r*   r*   r+   r  	  s   r  	num_stepsc                 C   sB   | dkrt jd||dS |s| d |  nd}t j| || ||dS )Nr"   r   r  )stepsrg  r   )rd   r  linspace)rI  r  r   rg  r  r*   r*   r+   _linspace_from_neg_one  s   rL  thetahr1  c           	      C   s   | j }| j}t||||d|d}t|||||dd}tjd||d}tjjj|dddd}tjjj|dddd}tjjj|d	ddd}|| | S )
Nr"   )r"   r"   r"   rf  )r   r#   constantr   r  r  rw   r"   r"   )r#   r   	r   rg  rL  r  rd   r  r	  r  r  )	rM  rN  r1  r  r   rg  grid_xgrid_ygrid_oner*   r*   r+   _make_base_grid_4d  s   rV  r  c                 C   s   | j }| j}t||||dd|d}t||||d|dd}t|||||ddd}	tjd||d}
tjjj|dddd}tjjj|dddd}tjjj|	d	ddd}	tjjj|
d
ddd}
|| |	 |
 S )Nr"   )r"   r"   r"   r"   rf  )r   r   rO  r   rP  r  r#   r"   )r   r   rR  )rM  r  rN  r1  r  r   rg  rS  rT  grid_zrU  r*   r*   r+   _make_base_grid_5d(  s   rY  c           	      C   sL   |\}}}}t | |||d}|ddd| jd d}||||dS )Nr  rP   r   r"   r/  r#   )rV  r  r4  rR   r   )	rM  r
  r  r  rS   rN  r1  	base_gridgridr*   r*   r+   _affine_grid_generator_4d9  s    r]  c           
      C   sR   |\}}}}}t | ||||d}|ddd| jd d}	|	||||dS )NrZ  rP   r  r"   r/  r   )rY  r  r4  rR   r   )
rM  r
  r  r  rS   r  rN  r1  r[  r\  r*   r*   r+   _affine_grid_generator_5dC  s    r^  c                 C   s@   t t|dv dd  t|dkrt| ||dS t| ||dS )N)r  r  c                   S   rw  )NzCaffine_grid_generator needs 4d (spatial) or 5d (volumetric) inputs.r*   r*   r*   r*   r+   r|   S  rx  z'affine_grid_generator.<locals>.<lambda>r  rZ  )rd   r~   rS  r]  r^  )rM  r
  r  r*   r*   r+   affine_grid_generatorM  s   
r_  r\  interpolation_modepadding_mode_expand_gridc                    sJ  t dv fdd t dv fdd dtdtdtffdd	dtd
tdtdtfdddtdtdtffdddtdtdtffdd}j\ |j\}}|dkscJ ru|d| d}dtdtdtffddt jjddddt j jdd dddtdtdtdt	f fdddtdtdtffdd
|d  }	|d! }
d"kr1||	}||
}|
 |
 d }}d }}||}}|| ||  }|| ||  }|| ||  }| |  }t
fd#d$|f|||f|||f|||ffD S dkrN||	}||
}| }| }
||dS |	}|
}|
 |
 | | }sud|d}dtdtdtf
fd%d&d'tdtffd(d)	t	fd*d$td+D }t||S ),N)r   r"   r#   c                      r  )NzInvalid interpolation mode r*   r*   )r`  r*   r+   r|   l  r  z"_grid_sampler_2d.<locals>.<lambda>c                      r  )NzInvalid padding mode r*   r*   )ra  r*   r+   r|   o  r  coordsr
  rN   c                    s0    r|d d n|d }|d d }| | | S r  r*   )rc  r
  r   ofsrZ  r*   r+   unnormalizer  s   z%_grid_sampler_2d.<locals>.unnormalize	twice_low
twice_highc                 S   sv   ||kr	t | S |d }|| d }| |  }t ||}||  jt jd}t |d@ dk|| || | S )Nr#   r   r"   r   )rd   r  r   fmodfloorr8   int8re   )rc  rf  rg  
coords_mincoords_spancoords2extraflipsr*   r*   r+   reflect_coordinates}  s   
z-_grid_sampler_2d.<locals>.reflect_coordinatesc                    sf   dkr| S dkrt | d|d S  r | dd|d  }n
| dd| d }t |d|d S )Nr   r"   r#   rP   r   )rc  r
  coords_reflected)r  ra  rp  r*   r+   compute_coordinates  s   z-_grid_sampler_2d.<locals>.compute_coordinatesc                    s   | |} ||S r6   r*   )rc  r
  	coords_un)rr  re  r*   r+   compute_source_index  s   

z._grid_sampler_2d.<locals>.compute_source_indexr#   r"   r  ysc                    s,   t d| kt | k t d|k| k S rn   )rd   r  )r  ru  )iHiWr*   r+   in_bounds_cond  s   $z(_grid_sampler_2d.<locals>.in_bounds_condr\  wsc                    sN   | |r	nd t  fdd| jtjd|jtjd|fD S )Nr"   c                 3   s*    | ]}t |d  V  qdS r{  )rd   re   r  r  )rS  r  r  oHoWr*   r+   r}    rv  z1_grid_sampler_2d.<locals>.clip.<locals>.<genexpr>r   )r  r8   rd   ri  )r  ru  ry  )rT  rS  rb  rx  rz  r{  )r  r  r+   clip  s
   
z_grid_sampler_2d.<locals>.clipixiyc                    s&   | ||\}}} ||f | S r6   r*   )r}  r~  r1  idx_xidx_yw_)C_idxN_idxr  r|  r*   r+   get_summand  s   z%_grid_sampler_2d.<locals>.get_summand).r   ).r"   r   c                 3   s"    | ]\}}} |||V  qd S r6   r*   )r1   r}  r~  r1  )r  r*   r+   r}    
    

z#_grid_sampler_2d.<locals>.<genexpr>c                    s     | } |}||dS rX   r*   )r}  r~  r2   rW   )rr  r  rv  rw  r*   r+   get_value_bounded  s   

z+_grid_sampler_2d.<locals>.get_value_boundedrd  c                    sF   | d  } d | | d | d |f}t |S )Nr"   r#   )rG  )rd  iy_ofscs)r  ix_nwiy_nwtxr*   r+   	get_coeff  s   
z#_grid_sampler_2d.<locals>.get_coeffc                 3       | ]} |V  qd S r6   r*   )r1   rd  )r  r*   r+   r}    r~  r  )rd   r~   r   rF  r   r  r0  rh  rg  r   ri  r  r&  rR   r  rQ   rG  )r  r\  r`  ra  r  rb  rt  rS   twor2   rW   r}  r~  ix_neiy_neix_swiy_swix_seiy_sew_nww_new_sww_se
ix_nearest
iy_nearesttyrB  r*   )rT  r  rS  r  rb  r  r  r|  rr  r  r  r  rv  rw  rx  r`  r  r  rz  r{  ra  rp  r  re  r+   _grid_sampler_2d[  sx   
 ( 




	





 

r  c                 C   s   t | ||||dS )N)r\  r`  ra  r  )r  )r  r\  r`  ra  r  r*   r*   r+   grid_sampler_2d  s   
r  c                    s`   t   dko dk fdd t  ddk fdd   jddS )Nr#   r"   c                      s   d    d   S )Nzmatrix @ vector expected, got rY  rz   r*   rv   rN  r*   r+   r|     rZ  zmv.<locals>.<lambda>r   c                      s*   d  d d  d d d dS )Nzsize mismatch, got input (r   r2   r"   z), vec (r  r-  r*   r  r*   r+   r|     s   * rz   )rd   r~   rM   r
  r   r  r*   r  r+   rO    s   rO  c                 C   sd   |d ur|d | d }d| |  |t |   }nd| |  t |  }|d ur-|| }t||S rX   )r   
logsigmoidr   )rv   r   r   
pos_weightr   
log_weightr   r*   r*   r+    binary_cross_entropy_with_logits#  s   
r  tensor1tensor2is_outc                 C   s   | j |j kr
| |fn|| f\}}ddlm} |j dkr |j dks"dS |jr)|s)dS | j dkr0dS || dkr:dS |j}| }tdd t|d d	 |d
d |d
d D S )Nr   )guard_size_obliviousr   r#   FTc                 s   s"    | ]\}}}||| kV  qd S r6   r*   )r1   st1st2s2r*   r*   r+   r}  F  r  zshould_fold.<locals>.<genexpr>r/  r"   rP   )	rN  r$  r  r  r   r   rG  r  r  )r  r  r  t1t2r  t1_shape	t1_strider*   r*   r+   should_fold4  s    

"r  )pass_is_out)r  c                C   sl  |   }|  }|dkr|dksJ |dkr |dkr t| |S |dkr.|dkr.t| |S |dkrD|dkrDttt| d|dS |dkrR|dkrRt| |S t| ||r||k}|ra|jn| }|sg|n	|dkro| 	 n| }|j
}t|d d }	ttj|	}
|  dk}|r|	|j
d  ||
|d }|r|||	}|r|j S |S |||	S |dkr|dkr|dkr| dnd}| d}| j
d d }|dkr|dn|d}|dkr|dnd}g }t|d D ]
}||| q|dkr;|dkr;|d |d kr;|d dkr(| jr(t| d|S |d dkr;|jr;t| |dS tt||}|||g }t|}| ||||}|dk}|rp||g }||||d}n|||g }|||||}|}	|dkr|	| |dkr|	| |r||d|	S |||	S tddd	  d S )
Nr   r"   r#   rP   r/  r   Fc                   S   rw  )Nz/both arguments to matmul need to be at least 1Dr*   r*   r*   r*   r+   r|     rx  zmatmul.<locals>.<lambda>)rM   rd   dotrO  r  rG  rR   r  r4  r  r   rF  r   operatorr   r  r  r  rb  r
  rQ   r  r3  broadcast_shapesr  r0  bmmr~   )r  r  r  dim_tensor1dim_tensor2r  r  r  sizes_1output_shapefolded_dim1t2_is_matrix	t1_foldedr   r  m1batch_tensor1m2r)  batch_tensor2r  expand_batch_portiontensor1_expand_sizeexpand_batch_producttensor1_expanded
vector_rhstensor2_expand_sizetensor2_expandedr*   r*   r+   r3  L  s   	










r3  r  r  c                    s  j \}}t|d ||}t|d ||}tjtjjd\}}tj|d jdj	|d}	tj|d jdj	|d}
t
||
|}t
||	|}|d}| }| }|| dd}|| dd}|	tj}|	tj}|d ||d |d	 f}|d ||d |d	 ft|t|}d
\jtjkrtt|fddD fdd|D }fddfdd t fdd|D }jtjkrd usJ t||}ntdd t||D }t}|j|d}|S )Nr   r"   r  r\  r   rP   r   rb   r#   ry  c                    .   g | ]}|d  >  t |d  t jqS r"   r   rd   r   r8   int16r1   r1  )weights_precision_xr*   r+   r3          z.upsample_bicubic2d_default.<locals>.<listcomp>c                    r  r  r  r  )weights_precision_yr*   r+   r3     r  c                    s<   t | d d }t |dd }td d ||g}|S r  )rd   r   r   rp  )ru  r  y_idxx_idxr+  )in_hin_wr   r*   r+   load_bounded  s   z0upsample_bicubic2d_default.<locals>.load_boundedc                    sT   t  fddD }jtjkrd usJ t|S tdd t|D S )Nc                 3   s    | ]} |V  qd S r6   r*   )r1   x_ofs)r  rW   r*   r+   r}  
  r.  zCupsample_bicubic2d_default.<locals>.get_x_interp.<locals>.<genexpr>c                 s   rD  r6   r*   rE  r*   r*   r+   r}    r  )r  r   rd   r  r  r  r  )rW   src_x)r   ixs_ofsr  r  	weights_x)rW   r+   get_x_interp	  s
   z0upsample_bicubic2d_default.<locals>.get_x_interpc                 3   r  r6   r*   )r1   y_ofs)r  r*   r+   r}    r~  z-upsample_bicubic2d_default.<locals>.<genexpr>c                 s   rD  r6   r*   rE  r*   r*   r+   r}    r  r-  )r   r  rB   rC   r  r$  rd   rh  rg  r8   r	  rR   ri  r   ri  rA  r   r  r  r  r  r  r  r   rb  )r   r  r  r  r  rS   h_scale_factorw_scale_factorr   r  r  x_floaty_floatr2   rW   yscaler"  iys_ofs	weights_ysrc_yr   r.  r*   )	r  r  r  r   r  r  r  r  r  r+   upsample_bicubic2d_default  sR   




r  c                 C   s   t t|t| dkdd  |d u r2|d usJ ttttf tdd t| jdd  |D }|r6|nd\}}t	| ||||S )Nr"   c                   S   rw  )Nz:Must specify exactly one of output_size and scale_factors.r*   r*   r*   r*   r+   r|   *  rx  z(upsample_bicubic2d_vec.<locals>.<lambda>c                 s   s$    | ]\}}t t|| V  qd S r6   )r   r   )r1   r1  rj   r*   r*   r+   r}  0  s
    
z)upsample_bicubic2d_vec.<locals>.<genexpr>r#   ry  )
rd   r~   r  r	   r   rF  r  r  r   r  )r  r  r  rM  r  r  r*   r*   r+   upsample_bicubic2d_vec  s   
r  c                        fdd}t  ||S )Nc                    s4   t j|  ||  jd}|d |d |    S )Nr\  r"   )rd   rh  rg  r   r   middler   dim_idxr  r*   r+   r  ?  s   z_reflection_pad.<locals>.idx_reflection_or_replication_padr  rv  r  r*   r  r+   _reflection_pad9     r  c                    r  )Nc                    s*   t j|  ||  jd}t |d|d S )Nr\  r   r"   )rd   rh  rg  r   r  r  r*   r+   r  P  s   z_replication_pad.<locals>.idxr  r  r*   r  r+   _replication_padJ  r  r  idx_fnc                    s   t d  t|   d  d fv  fdd | j  d  }|    } fddt D } fddt D }| }t D ]}d g|  }	||| || || |	|| < t||	}qFt	|}
|j
|
d}|S )	Nr#   r"   c                      s    d  d d  d d  dS )Nreflection_padzd requires r"   zD or r#   zD inputr*   r*   rz   r*   r+   r|   c       z0_reflection_or_replication_pad.<locals>.<lambda>c                    s    g | ]}d  d |   qS rW  r*   r4  rM   rv  r*   r+   r3   h  r  z2_reflection_or_replication_pad.<locals>.<listcomp>c                    s$   g | ]}d  d |  d  qS rW  r*   r4  r  r*   r+   r3   i  r[  r-  )rS  rd   r~   rM   r   rQ   r   rp  rB   r   rb  )r  rv  r  	inp_shapenc_dimpadding_leftpadding_rightr   r  r  r.  r*   r  r+   r  [  s"   
 
r  r   r   r`  c                C   s(   t j| ||d}t j| ||d}||fS )Nr`  )rd   aminr  )rv   rM   ra  r  r  r*   r*   r+   aminmaxw  s   r  r   c                C   s"   t jtt| d| |||dS )Nr   r   )r   r   rd   re   isnan)rv   rM   ra  r   r*   r*   r+   nansum  s   "r  r   r  rg  r  r  c             	   C   s   t jjd| d||||dS )Nr   r"   r  r   rh  
start_step)r@  r   r  rg  r  r*   r*   r+   arange_default     
r  c             	   C   s   t jj| |d||||dS )Nr"   r  r  )r?  r@  r   r  rg  r  r*   r*   r+   arange_start  r  r  c                  O   s   ddl m} || i |S )Nr   )out_dtype_dense)!torch._higher_order_ops.out_dtyper   )rD   rE   r   r*   r*   r+   out_dtype_decomp  s   r  marginc           	         s  t t jd jd  t |dkp|dkdd  t jdko, dkfdd t jdko? kfdd d urdt t jdko\  k fdd dt jdd	}||  }|	d}|dkr|n|| }d ur|  }t j
 jd
}t |k|d}|tjjkr| S |tjjkr| |jd  S |jddS )Nr   r"   r#   c                   S   rw  )Nz only p == 1 and p == 2 supportedr*   r*   r*   r*   r+   r|     rx  z#multi_margin_loss.<locals>.<lambda>c                         d j  S NzMExpected non-empty vector or matrix with optional 0-dim batch size, but got: r  r*   )r   r*   r+   r|         c                         d  dj  S )Nz#inconsistent target size, expected r  r  r*   )nframer   r*   r+   r|     r  c                      r  )Nz#inconsistent weight size, expected r  r  r*   )rM   r   r*   r+   r|     r  r/  r\  rz   )rd   
atleast_2d
atleast_1dr   r~   rN  r   rR   r/  r5  rh  rg  re   r!   r(   rw   r   r)   r   )	r   r   r)  r  r   r   urf   r  r*   )rM   r   r  r   r   r+   multi_margin_loss  sB   







r  	is_targetc                    s  | j  |j t| } t|}| j d }tt dko |dk fdd ttdko2 k fdd tj||jd}|dk}tjt|||dd	d
}||k }t||d}tj	| d|d}	t||d}
tj
||
jddkdd}d|	jjdd |  }|d}|| }t|d|}|tjjkr|jdd }n|tjjkr| }n|jdd}|| j}||fS )Nr"   r#   r   c                      r  r  r*   r*   )orig_input_shaper*   r+   r|     r  z0multilabel_margin_loss_forward.<locals>.<lambda>c                      s   d d  S )Nzinconsistent target size: z for input of size: r*   r*   r  orig_target_shaper*   r+   r|     r}   r\  rP   Tr`  r/  rz   rb   )r   rP   )r   rd   r	  r~   rS  rh  rg  r  re   r/  anyrR   Tr5  r!   r(   rw   r   r   r)   r8   r   r  )r   r   r   rM   r  is_endend_idxtarget_masktidx0r  tidx1r  rf   r*   r  r+   multilabel_margin_loss_forward  s@   





r  )	attn_maskrj   querykey	dropout_p	is_causalr  c          
   
      s   j }ttfdd t dko# dko# dkfdd t dk fdd tjd jd koMjd jd kdd  tjj| |d |d	\}}	|	d
dj
tjd}|	d
d|	fS )Nc                      r  )Nz-query must be FP32, FP64, BF16, FP16 but got r   r*   )r  r*   r+   r|   &  r  z<scaled_dot_product_flash_attention_for_cpu.<locals>.<lambda>r  c                      s"   d   d    d   S )Nz,q, k, v must be a 4 dimensional tensor, got rY  rz   r*   )r  r  rw   r*   r+   r|   *  s   " r   c                      r  )Nz&dropout probability must be zero, got r*   r*   )r  r*   r+   r|   -  r  r   c                   S   rw  )Nz&q, k, v should have the same head sizer*   r*   r*   r*   r+   r|   1  rx  )r  r  r  dropout_maskrj   r"   r#   r-  )r   rd   r~   r  rM   r   r   "_scaled_dot_product_attention_mathr0  r  rb  r2  )
r  r  rw   r  r  r  rj   r   r   attnr*   )r  r  r  rw   r+   *scaled_dot_product_flash_attention_for_cpu  s8   
"&
+r!  c                    s   t |  fdd}|S )Nc                     s    | i |}| d  |S rn   )r   )rD   rE   r   outplace_opr*   r+   
inplace_opd  s   z$register_inplace.<locals>.inplace_opr   )aten_opr#  r$  r*   r"  r+   register_inplacec  s   r&  c                 C   sx   |   s|  st|}t|}t||}t|tjr |dkr$|| }|dkr*|S t|tjr4|dkr8| | } | | S )Nr"   r   )r  rE  rF  rd   r  r/   numbersNumber)rv   batch1batch2r_   ri   r   r*   r*   r+   baddbmml  s   r+  c                 C   s   t j| |ddS )Nri  r  r  )rv   r(  r*   r*   r+   floor_divide}  s   r,  c                 C   s   t tj| jdS rX   )rJ   r   r  r   r   )r  r*   r*   r+   	sym_numel  rB  r-  r   r   c                C   s.   |d u rt jj| g |dS t jj| g ||dS )Nr   r.  )r   r   dim_IntListIntList_out)rv   r   r   r*   r*   r+   sum_default  s   r1  c                 C   s2   |d u rt j| tt|  S t j| |gS r6   )r   r  dimsrF  rQ   rM   )rv   rM   r*   r*   r+   squeeze_default  s   r3  c                    s@   t  fddtt| jD }| jd|dd}| ||  |fS )Nc                 3   s    | ]	}| kr|V  qd S r6   r*   r4  rz   r*   r+   r}    r  z)_weight_norm_interface.<locals>.<genexpr>r#   Tr  )r  rQ   rS  r   r   )r2   rW   rM   keep_dimr   r*   rz   r+   _weight_norm_interface  s    r5  assume_uniqueinvertc                C   sp   t | tjstj| |jd} t |tjstj|| jd}| dt|  d k r0t| ||dS t| |||dS )Nr\  g      $@g(\?r8  r6  )	r/   rd   r   r  rg  r   r0  isin_defaultisin_sorting)elementstest_elementsr7  r8  r*   r*   r+   isin  s   r>  r9  c                C   sr   |   dkrtj| tjdS | jg | jd|j R  }|s#||k}n||k}ttd|j d d}|j	|dS )Nr   r   r   rP   r"   rz   )
r   rd   
empty_liker  r  r   rN  r  rQ   r  )r<  r=  r8  r2   cmprM   r*   r*   r+   r:    s   
r:  c                C   s   |   }|  }|rIt||g}tj|dd\}}|dd  |d d k}	t|	ddgd}	|r5|	 }	t|	}
|
d||	}
|
d|   S t|\}}t	||}t
|| k |d}|| |k}|rm| n|}|| jS )NT)stabler"   rP   r   F)r  rd   r  sortr  logical_notr?  rC  r   searchsortedre   r  r   )r<  r=  r7  r8  elements_flattest_elements_flatall_elementssorted_elementssorted_orderduplicate_maskr  sorted_test_elementsrS   r  test_idxr@  r*   r*   r+   r;    s$   
r;  c                 C   s   |  d}|| S rO   )r  )rv   rU  	flattenedr*   r*   r+   take  s   
rN  c                 C   s2   |d u rt j}|t jkrt|}tj| |j|dS r  )rd   r2  preserve_formatr   r   resizer   )rv   r(  r.  r*   r*   r+   	resize_as  s
   
rQ  )F)r   )r   r   FNr  )r   NNr"   r6   )rP   FFr  r{  rQ  )r"   r"   F)Fr   )r   rb   N)r   r"   Nry  )NNN)r   r   FT)r   r   Fr  )r   F(y  rJ   r'  r  rH  enumr   r   r   	itertoolsr   r   typingr   r   r	   r
   r   r   r   r   rd   torch._primsr  rG  torch._prims_common_prims_commonrB   torch.nn.functionalr	  r  r   r   r   r   torch._decompr   r  r   r   r   r   r   r   torch._prims_common.wrappersr   r   r   r   torch.utilsr   r@   torch.utils._pytreer   r  DispatchKeyr    str__annotations___opsr  r   r!   r  r  rL   r  compute_only_pw_cast_for_opmathpw_cast_for_opmathr$  pw_cast_for_int_to_realrF  rT   r\   r^   rg   r  rr   fillScalarrx   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r0  py_implAutogradCUDA	Generatorr   r   r   r   r   r   r   r(   rw   r   r   r   rU   r   r   r   r   r  r  r  r  r#  r%  r&  r'  r*  r=  rD  slicerT  rW  r\  r^  rd  re  rs  r  r  r  r  r  r  CompositeImplicitAutogradAutogradr  r  r  r  r  r  r  r  r  r  r  r)  r,  r/  r1  r+  r?  r8  rA  rH  rM  rP  rl  rp  rr  r  r  r  r  r  unsafe_chunkr  r  r  no_statsr  r  r  r  r  r  _fused_dropoutr  r  rg  r.  detachlift
lift_freshr  r  r  r  r  r  r  r  _adaptive_avg_pool2dr  r  r  r  r;  rA  rC  r?  rD  r   rI  rO  rQ  rc  rN  ri  rm  rV  _upsample_nearest_exact1drj  rn  rZ  r_  rd  rS  rx  r  r  r  r  r  r  r  r  r  rnn_tanhr   r  rnn_relur  r  r  r  r  r  r  r  lstmr  r  r  r  grur  r  r  r  r  r  r  r  r  r  r  r	  r  r  r   r,  r-  _unsafe_viewrp  r.  r4  r6  r7  r:  r<  rA  rG  r  rL  rV  rY  r]  r^  r_  r  r  rO  r  r  r3  upsample_bicubic2dr  r  reflection_pad1dreflection_pad2dreflection_pad3dr  replication_pad1dreplication_pad2dreplication_pad3dr  r  r  r  rh  r  r  r  r?  r  r  r  r  +_scaled_dot_product_flash_attention_for_cpur!  r&  r+  r,  r-  r   r1  r  rM   r3  r5  r>  r:  r;  rN  rQ  addbmm_addbmmaddmm_addmv_baddbmm_fill_gelu_rK  
hardswish_	hardtanh_hardtanhhardsigmoid___iand____and____ilshift__
__lshift__r*  r+  index_reduce_index_reduce__ior____or____irshift__
__rshift____ixor____xor__leaky_relu_r   logit_logitrelu_rL  renorm_renormround_r&  scatter_r  scatter_add_scatter_addscatter_reduce_scatter_reducesilu_r*   r*   r*   r+   <module>   s<  
(

$ 
 

 
	




  *!	
2"
	P`
 
	
%!


(


(
 00

	

W	

	
N
	
R		#

	

	


#
	

	

d	
%	$f
("$$





 




  		

.2
)


  ?
2
	
	
			

	
I"

5


 (
.$$


* 
'


*
w
S


0


0




,

<

	J	

"


