o
    ߗib                     @   s  d dl Z d dlZd dlZd dlZd dlZd dlmZmZmZmZm	Z	m
Z
mZ d dlZd dlmZ d dlmZ d dlZd dlmZmZmZ d dlmZmZmZ d dlmZ d dlmZ d dl m!Z! d dl"m#Z# d d	l$m%Z% d d
lm&Z&m'Z'm(Z( d dl)m*Z*m+Z+ ddl,m-Z-m.Z. ddlm/Z/m0Z0m1Z1 e2e3Z4ej5j6Z6ej5j7Z7ej5j8Z8ej5j9Z9ej5j:Z:eg e6j;e6j<e6j=e6j>e6j?e6j@e6jAe6jBe6jCe6jDe6jEe6jFe6jGe6jHe6jIe6jJe6jKe6jLe6jMe6jNe6jOe6jPe6jQe6jRe6jSe6jTe6jUe6jVe6jWe6jXe6jYe6jZe6j[e#e6j\e6j]e6j^e6j_j`e8jae9jbZci e ecZde6jee6jfe6jge6jhjie6jje6jke6jle6jme6jne6joe6jpe6jqjre6jse6jte6jue6jvgZweedew deeejxjyejxjzf  dedef fddZ{e{e6j|j}gdejrde~ddfddZe{e6jj}gdejrde~ddfddZe{e6jjigddddejde	ejj de	ejj ddfddZe{e6jge		dd ejrde	ejj de	ejj dejrfd!d"Ze{e6jgd#eeeejf  d$ejjd%edejrfd&d'Ze{e6jmgdd(d ejrd)ed*ejrdejrd+ejjdejrfd,d-Zme{e6jjigd#eeeejf  d.ee d%edejrfd/d0Ze{e6jgd1ejrd2ejrd3ejrd4ee d5eeee f d6eeee f d7eeee f d8ed9ee d:ed;ee de
ejrejrejrf fd<d=Ze{e6jjgdd ejrd>edejrfd?d@Ze{e6jgedAejrdBejrdejrfdCdDZe{e6jge		ddAejrdEejrdFejrdGejjd+ejjdejrfdHdIZe{e6jgedAejrdJejrdejrfdKdLZe{e6jjig	 ddMeejr d)edejrfdNdOZe{e6jgd ejrdejrfdPdQZe{e6jgdd(d ejrdRejrd+e	ejj dejrfdSdTZe{e6jgdAejrdejrfdUdVZe{e6je6jgdAejrdejrfdWdXZe{e6je7jgdAejrdYejrdejrfdZd[Ze{e6je7jgdAejrdYejrdejrfd\d]Ze{e6j		^ddAejrd)e	e d_edejrfd`daZe{e6j		^ddAejrd)e	e d_edejrfdbdcZe{e6jgdAejrd)eddedeedejrf
dfdgZe{e6jjigdAejrd#eeeejf  dejrfdhdiZe{e6jjgdAejrdjejdejrfdkdlZ	ddejrdme	ej dejfdndoZe{e6jddddpdAejrdje	ej dqe	ej dme	ej d%edejrfdrdsZe{e6jddddpdAejrdje	ej dqe	ej dme	ej d%edejrfdtduZe{e6jdddd^d^ejdvdAejrd$eeef dje	ej dwe	ej dqe	ej dxedyedmejdejrfdzd{Ze{e6jjiddddpdAejrd|edje	ej dqe	ej dme	ej d%edejrfd}d~Ze{e6jjddddpdAejrded|edje	ej dqe	ej dme	ej d%edejrfddZe{e6jjid|ed#eeeejf  d%edejrfddZe{e8jajid2ejrd3ejrdejrdejrfddZae{e9jbjid2ejrdejrdejrd3ejrdejrdejrdejrdejrdejrdedejrfddZbe{ej5j8jdejrdejrfddZe{e6jge	 	 	^ddejrdejrdedededejrfddZe{e6jj	ddAeejr deejr deejr dedeejr f
ddZe{e6jj	ddAeejr deejr deejr dedeejr f
ddZe{e6jjdeejr deejr d3ejjdeejr fddZe{e6jjdeejr deejr deejj deejr fddZe6jjiejjje{e6jd2ejrd3ejrdej	ejr dej	ejr dej	ejr dededede
ejrejrejrf fddZe ddeeedef f fddZdeeedef f fddZe{e6jdAejrdejrdejrdejrfddZe{e:jjăd2ejrdedededjejde
ejrejrf fddZe{e6jƃ	^ddAejrd*ejrdejrdedejrf
ddZe{e6jǃ	^ddAejrd*ejrdejrdedejrf
ddZe{e6jjjied1ejrdejrd)edejdejrf
ddĄZje{e6jȃddƜdAejrd)ed*ejrdejrde~dedejrfdd˄Ze{e6jɃ		 		^dd ejrdee d5e	eeee f  d6eeee f d7eeee f dede
ejrejrf fddτZe{e6jʃd ejrdee de
ejrejrf fdd҄Ze{e6jjd^d^dddӜdejrdAejjdedede	e~ de	ejr dejrfddڄZe{e6j̓			^	ddAejrdejrdededede	ej de
ejrejrf fddZdS )    N)AnyCallableDictListOptionalTupleUnion)core_aten_decompositionsget_decompositionsremove_decompositions)_grid_sampler_2d
_index_addpw_cast_for_opmath)extra_random_decomps)counters)	is_fbcode)	out_dtype)pad_listlike)elementwise_dtypesELEMENTWISE_TYPE_PROMOTION_KINDtype_to_dtype)definitely_trueguard_size_oblivious   )configinductor_prims)is_gpu,needs_fallback_due_to_atomic_add_limitationsuse_scatter_fallbackopsreturn.c                 C   s8   t | r| gn| D ]}|tv rtd|  q	t| tS )Nzduplicate decomp: %s)callabledecompositionslogwarningdecompregister_decomposition)r   op r(   [/home/ubuntu/transcripts/venv/lib/python3.10/site-packages/torch/_inductor/decomposition.pyr&   x   s
   r&   tensormsgc                 C      d S Nr(   r*   r+   r(   r(   r)   assert_async_msg_decomp      r/   c                 C   r,   r-   r(   r.   r(   r(   r)   "functional_assert_async_msg_decomp   r0   r1   )minmaxsymbolr2   r3   c                C   r,   r-   r(   )r4   r2   r3   r(   r(   r)   sym_constrain_range_for_size   s   r5   xc                 C   s(   |d ur	|  |} |d ur| |} | S r-   )	clamp_min	clamp_max)r6   r2   r3   r(   r(   r)   clamp   s
   

r9   size
fill_valuekwargsc                 K   s:   | d}|d u rtt||d< tj| |fi |S tS )Ndtype)getr   typetorchfullNotImplemented)r:   r;   r<   r=   r(   r(   r)   rA      s
   
rA   alphadimindexrD   c                C   s*   t  s| jtjkrtS t| |||d|dS )NF)inplacerD   )r   r=   r@   bfloat16rB   r   )r6   rE   rF   r*   rD   r(   r(   r)   	index_add   s   rI   physical_layoutc                    sN   dgt   }t|D ]\}}|||< qtj fdd|D fi ||S )Nr   c                    s   g | ]} | qS r(   r(   ).0lr:   r(   r)   
<listcomp>   s    z"empty_permuted.<locals>.<listcomp>)len	enumerater@   emptypermute)r:   rJ   r<   permprL   r(   rM   r)   empty_permuted   s   
&rU   grad_outputinputweight
bias_sizesstridepaddingdilation
transposedoutput_paddinggroupsoutput_maskc                 C   st   |
d r
t | jjstS t| dgttd|   }t	| |||||||||	|
d |
d dg\}}}|||fS )N   r   r   F)
r   devicer?   rB   atensumlistrangerE   convolution_backward)rV   rW   rX   rY   rZ   r[   r\   r]   r^   r_   r`   	grad_biasgrad_inpgrad_weight_r(   r(   r)   rg      s"    

rg   decimalsc                 C   s   d| }t | | d|  S )Ng      $@g      ?)rc   round)r6   rl   ten_pow_decimalsr(   r(   r)   	round_dec   s   ro   selfbatch2c                 C   s   t jr+| jjdkr+t| jd dkst|jd dkr+| d|d jdd}|S | jjdkr`t| ddkr`t|ddkr`t	d d  d7  < t
j| d|d ddd	dS tS )
Ncpur   ra   rE   inductordecompose_bmmTrE   keepdim)r   coordinate_descent_tuningrb   r?   r   shape	unsqueezerd   r:   r   r@   squeezerB   )rp   rq   outr(   r(   r)   bmm   s"   r~   mat1mat2betac                 C   s   | j jdkrtt|ddkr=t|ddkr=td d  d7  < tj|d|d dddd}|| ||   S t|ddkrtt	|dd	krtt	|dd	krttd d  d7  < |j
| jddd}|| ||   S tS )
Nrr   r   r   rs   ru   decompose_addmmTrw      )rb   r?   r   r:   r   r@   rd   r|   r{   r   TrB   )rp   r   r   r   rD   r}   r(   r(   r)   addmm  s*   	r   input2c                    sV  t jr)jjdkr)tjd dkst jd dkr)d d jddS jjdkrtddkrztddkrzt ddkrzj	 j	krzt
tt  dkrztd d	  d7  < t fd
dtdD S tddkrt ddkrtd d	  d7  < tjd d ddddS tS )Nrr   r   r   ra   rt   rs       ru   decompose_mmc                    s    g | ]}|d d f   qS r-   r(   )rK   ir   rp   r(   r)   rN   F  s     zmm.<locals>.<listcomp>Trw   )r   ry   rb   r?   r   rz   r{   rd   r:   r=   r   r@   numelr   catrf   r|   rB   )rp   r   r(   r   r)   mm0  s6   $r   tensorsc                    s   ddl m dtjdtf fdd}tt|| tdkr&d  S dt  k r4t| k r=n nt	j
 S tdkr|tfdd	D r|d }t|j} dk rb t|j n  | t | j|   d  S tS )
Nr   )r   r6   r    c                    sL   t | jdkr| jd dkrdS  t | jk r$| j  dkr$dS dS )Nr   r   FT)rO   rz   )r6   )rE   r   r(   r)   non_empty_tensor\  s
     zcat.<locals>.non_empty_tensorr   c                 3   s    | ]	}| d  u V  qdS )r   Nr(   )rK   t)filtered_tensorsr(   r)   	<genexpr>~  s    
zcat.<locals>.<genexpr>)%torch.fx.experimental.symbolic_shapesr   r@   Tensorboolre   filterrO   clonerc   r   defaultallrz   insertr{   expandflattenrB   )r   rE   r   inprz   r(   )rE   r   r   r)   r   U  s     
 r   c                 C   s~   |   rtt| jtdt| j| jS t| t	j
d\}}tjtj|| jd}t| dk |d}tt| td|S )Nnan)type_promotion_kindr=   rb   r   g        )
is_complexr@   whereisnanrealfloatatan2imagr   r   INT_TO_FLOATscalar_tensormathpirb   )r6   rk   r=   r   retr(   r(   r)   angle  s   
r   yc                C   s   t | o|  }t |o| }|r|stS |}|d ur"|| }t | j|j}dt jdt jfdd}|| | jj}|||jj}	t j	||	 dd|}
|
S )Nr*   r    c                 S   sD   | j ^ }}|d dkrtdg ||d dR }| |}|S )zNReshape tensor from [*initial_dims, last_dim] to *initial_dims, last_dim/2, 2]ra   r   zQThe size of the last dimension must be even to reshape it to [..., last_dim/2, 2])rz   AssertionErrorview)r*   initial_dimslast_dim	new_shapereshaped_tensorr(   r(   r)   reshape_tensor_complex  s   
z#add.<locals>.reshape_tensor_complex)	start_dim)
r@   	is_tensorr   rB   promote_typesr=   r   r   r   r   )r6   r   rD   x_is_complex_tensory_is_complex_tensorzcomplex_typer   
x_reshaped
z_reshapedresultr(   r(   r)   add  s   r   c                 C   s   |   rJ d| S )NzTODO: implement this)r   rp   r(   r(   r)   conj_physical  s   r   c                 C   s   | S r-   r(   r   r(   r(   r)   lift  r0   r   otherc                 C   s   t t ||| kB | |S r-   r@   r   r   rp   r   r(   r(   r)   fmin     r   c                 C   s   t t ||| k B | |S r-   r   r   r(   r(   r)   fmax  r   r   Frx   c                 C       | j tjkrtj| ||dS tS Nrw   )r=   r@   r   anyrB   rp   rE   rx   r(   r(   r)   amax     r   c                 C   r   r   )r=   r@   r   r   rB   r   r(   r(   r)   amin  r   r   startlengthc                 C   s   t | ||| S r-   )r@   narrowr   )rp   rE   r   r   r(   r(   r)   narrow_copy  s   r   c                 C   s   t | | S r-   )rc   r   r   )rp   r:   r(   r(   r)   view_copy_default  s   r   r=   c                 C   s   |  | S r-   )tor   )rp   r=   r(   r(   r)   view_copy_dtype  s   r   memory_formatc                 C   s    |t ju s	|d u rt| S |S r-   )r@   preserve_formatutilssuggest_memory_format)r*   r   r(   r(   r)   get_like_layout  s   
r   )r=   rb   r   rb   c                K   :   t jg |  f|p| j|p| jd|jt| |dS Nr   r   )r@   randr:   r=   rb   r   r   rp   r=   rb   r   r<   r(   r(   r)   	rand_like     	

r   c                K   r   r   )r@   randnr:   r=   rb   r   r   r   r(   r(   r)   
randn_like*  r   r   )r=   layoutrb   
pin_memoryrequires_gradr   r   r   r   c                C   s>   t jg |  ||p| j|p| j|p| j|djt| |dS )N)r=   r   rb   r   r   )r@   rA   r:   r=   r   rb   r   r   )rp   r;   r=   r   rb   r   r   r   r(   r(   r)   	full_like;  s   

r   highc                K   s@   t jjd|g |  f|p| j|p| jd|jt| |dS )Nr   r   r   rc   randintlowr:   r=   rb   r   r   )rp   r   r=   rb   r   r<   r(   r(   r)   randint_likeQ  s   


r   r   c                K   s@   t jj||g |  f|p| j|p| jd|jt| |dS r   r   )rp   r   r   r=   rb   r   r<   r(   r(   r)   randint_like_lowe  s   

r   c                 K   s   t jjd| |fi |S Nr   )rc   r   r   )r   r:   r<   r(   r(   r)   r   z  s   r   biasc                 C   s*   t jj|}t jj| ||| d S r   )r@   r   
_quantized$wrapped_fbgemm_pack_gemm_matrix_fp16!wrapped_fbgemm_linear_fp16_weightr:   )rW   rX   r   packed_weightr(   r(   r)   #linear_dynamic_fp16_unpacked_weight  s   r   input_scaleinput_zero_pointweight_scaleweight_zero_point	out_scaleout_zero_pointout_channelc
              	   C   s.   t jj||||}
t jj| |||
|||	S r-   )r@   r   r   _wrapped_linear_prepack#_wrapped_quantized_linear_prepacked)rW   r   r   rX   r   r   r   r   r   r   r   r(   r(   r)   wrapped_quantized_linear  s   r  packedc                 C   s^   dt jdt jfdd}|| dddf }|| ddd f }| dd df t j| | S )Nu8r    c                    st    fdddD \}}}}t jdkr&||d>  |d>  |d>  tjd S |d> |d>  |d>  | tjd S )	Nc                 3   s$    | ]} d |f  tjV  qdS .N)r   r@   int32)rK   nr  r(   r)   r     s   " zPq_embedding_bag_byte_unpack_decomp.<locals>.bitcast_u8_to_f32.<locals>.<genexpr>)r   r   ra      little   r      r  )sys	byteorderr   r@   float32)r  r6   r   r   wr(   r  r)   bitcast_u8_to_f32  s   
((z=q_embedding_bag_byte_unpack_decomp.<locals>.bitcast_u8_to_f32.i)r@   r   r   r  )r  r  scalesoffsetsr(   r(   r)   "q_embedding_bag_byte_unpack_decomp  s    r  agridinterpolation_modepadding_modealign_cornersc                 C   s@   | j t dko|dko| jtjd }t| |||||d}|S )Nrr   r   r   )r  r  r  r  _expand_grid)rb   r@   is_contiguouscontiguous_formatdecomp_grid_sampler_2d)r  r  r  r  r  r  outputr(   r(   r)   grid_sampler_2d  s   r  left_tensorsright_tensorsscalarc                 C      t jj| t j|||dS NrC   )rc   _foreach_addr   _foreach_mulrp   r   r!  r"  r(   r(   r)   _foreach_addcmul_scalar     r(  c                 C   r#  r$  )rc   r%  r   _foreach_divr'  r(   r(   r)   _foreach_addcdiv_scalar  r)  r+  start_tensorsend_tensorsc              	   C   "   t j| t jt j|| |S r-   )rc   r%  r   r&  Scalar_foreach_sub)r,  r-  rX   r(   r(   r)   _foreach_lerp_scalar     r1  scalarsc              	   C   r.  r-   )rc   r%  r   r&  
ScalarListr0  )r,  r-  r3  r(   r(   r)   _foreach_lerp_scalarlist  r2  r5  running_meanrunning_vartrainingexponential_average_factorepsilonc              
   C   sB   t | |||||||\}}	}
|r||	|
fS ||d|dfS )Nr   )rc   native_batch_norm	new_zeros)rW   rX   r   r6  r7  r8  r9  r:  r  bcr(   r(   r)   miopen_batch_norm  s    

r@  c                   C   s   i t tS r-   )r"   r   r(   r(   r(   r)   fast_random_decomps+  s   rA  c                   C   s   t jrtS t S )z"decomps can change based on config)r   fallback_randomr"   rA  r(   r(   r(   r)   select_decomp_table2  s   rC  masksourcec           
      C   s   ddl m}m} || j|jrCt| |g\} }|ddd }dd | ||fD \}}}t	|||gd}	t
||	|| jS tS )Nr   )BackendFeaturehas_backend_featurers   r   c                 s   s    | ]}|  V  qd S r-   )r   )rK   r6   r(   r(   r)   r   F  s    z!masked_scatter.<locals>.<genexpr>)codegen.commonrF  rG  rb   MASKED_SCATTER_WITH_INDEXrc   broadcast_tensorsreshapecumsum_unsafe_masked_indexr@   r   r   rz   rB   )
rp   rD  rE  rF  rG  
source_idx	self_flat	mask_flatsource_flatr   r(   r(   r)   masked_scatter9  s   rR  	quant_min	quant_maxepsc           	      C   sv   t | \}}|| t||  }t |t |g}|t || t j }t |||}|t j	|t j
fS r-   )r@   aminmaxr   r3   r   rm   r   intr9   float64int64)	rW   rS  rT  rU  r=   min_valmax_valscale
zero_pointr(   r(   r)   choose_qparams_tensorL  s   r^  
accumulatec                 C   s.   |   }t||g||j|}|| jS r-   )r   r@   	index_putrK  rz   )rp   rF   rE  r_  	flattenedr(   r(   r)   put\  s
   rb  c                 C   s   t j| |||d}| |S )N)r_  )rc   rb  copy_)rp   rF   rE  r_  r}   r(   r(   r)   put_j  s   
rd  r  input_dtypec                 C   sD   | | }t j||dd}t| ||}| j|kr||}| S )NTrw   )r@   rd   r   fmar=   r   
contiguous)rV   r  rE   re  new_grad_outputsum_new_grad
grad_inputr(   r(   r)   _softmax_backward_datau  s   

rk  Tinclude_selfsrcreduction_typerm  c                C   sr  |dkrOt | jsO| jjp| jj}t|}|r%| }t| |||}	n| ||d}t| |||}	|		|	dk d}	||||}|rK||	 S ||	 S t
tjj|| j|j|jjdr`tS | j|d d   | jd |   }
| g| j|d d  | jd | R }g t| j| | jdtd| j| R }|tj|
||}| j|||||dS )Nmeanr   r   Trl  )r   r=   is_floating_pointr   r@   	ones_likerI   
index_fill
zeros_likemasked_fillr   rc   scatter_reduce_tworb   r?   rB   rz   r   rf   ndimr   rY  repeat_interleaverK  rR   scatter_reduce)rp   rE   rF   rn  ro  rm  true_divisiononesr}   countsrepeatsindex_shaperS   scatter_indexr(   r(   r)   index_reduce  sJ   


(,,
r  kernel_size	ceil_modec           
      C   s   |dkrddg}|dkrddg}|s|}t |d}t |d}t |d}t |d}|d |d  }tjj||sA|ttjjkrCtS t	
| |||||\}}t	||d | d||}	||	fS )Nr   r   ra   rs   )r   r@   	_inductorlowering'should_fallback_max_pool2d_with_indicesiinfoint8r3   rB   prims#_low_memory_max_pool2d_with_offsets)_low_memory_max_pool2d_offsets_to_indicesr:   )
r6   r  rZ   r[   r\   r  window_sizevalsr  indicesr(   r(   r)   max_pool2d_with_indices  sB   	



r  output_sizec           	      C   s   | j ^ }}}|\}}|dks|dkr(g |||}| || j|tjdfS || dkrB|| dkrB|| || g}t| |S tS )Nr   )r=   )rz   	new_emptyr@   rY  rc   r  rB   )	r6   r  batchh_inw_inh_outw_outo_sizer  r(   r(   r)   adaptive_max_pool2d  s   r  	out_int32rightsidesortersorted_sequencer  r  r  r  c                C   s(   t j| tj|g| jd||||dd S )N)rb   r  r   )rc   searchsortedr@   r*   rb   )r  rp   r  r  r  r  r(   r(   r)   searchsorted_scalar  s   
r        ?UUUUUU?noiselowerupper	generatorc                 C   sf   |r#| dk}t j| |||d}t|| | | }t||d}	||	fS || d }
t | |
t fS )Nr   )r  r   ra   )rc   uniformr@   r   
leaky_relur   )rp   r  r  r  r8  r  not_positiverr  	noise_outnegative_sloper(   r(   r)   rrelu_with_noise_functional  s   	r  )NNr;  )r   r   )NFr-   )r   r   F)r   )F)Nr   r   F)r  r  FN)	functoolsloggingr   r  typingr   r   r   r   r   r   r   r@   torch._decomp_decompr%   torch._prims_common_prims_commonr   $torch.ao.quantization.fx._decomposedr	   r
   r   torch._decomp.decompositionsr   r  r   r   $torch._decomp.decompositions_for_rngr   torch._dynamo.utilsr   torch._environmentr   !torch._higher_order_ops.out_dtyper   torch._inductor.utilsr   r   r   r   r   r   r    r   r   r   r   r   	getLogger__name__r#   r   rc   r  	quantizedr   quantized_decomposed_adaptive_avg_pool2d_backwardindex_selectaddmvarangebitwise_and_bitwise_or_
clamp_min_dist
empty_likeflipgeluhardtanhlcmr  linalg_vector_norm_log_softmax max_pool2d_with_indices_backward_native_batch_norm_legit#_native_batch_norm_legit_functional$_native_batch_norm_legit_no_training_batch_norm_with_update"_batch_norm_with_update_functional_batch_norm_no_updatebatch_norm_backwardr<  native_group_normnative_layer_normnll_loss2d_backwardpermute_copyrrelu_with_noise_backward_softmaxsin_sqrt__to_copytril_indicestriu_indicesupsample_bilinear2dvecr   r  inductor_decompositionsr"   _unsafe_indexrM  #_unsafe_masked_index_put_accumulate+_scaled_dot_product_flash_attention_for_cpur   rk  r8   r7   rI   gluselect_scatterslice_scattersplitr   r|   rd   unbindbaddbmmdecomps_to_exclude_opsOperatorBaseOpOverloadPacketr&   _assert_asyncr+   strr/   _functional_assert_asyncr1   r5   SymInttypesNumberr9   rA   rW  rU   rg   r   rm   rl   ro   r~   r   r   r   r   r   r   r   detach_r   r   r   r   r   	view_copyr   r=   r   r   r   r   rb   r   r   r   r   r   r   	low_dtyper   r   embedding_bag_byte_unpackr  r  _foreach_addcmulr/  r(  _foreach_addcdivr+  _foreach_lerpr1  r4  r5  r@  py_impl_CDispatchKeyAutograd	lru_cacherA  rC  rR  choose_qparamsr*   r^  rb  rd  r  r  r  r  r  r  	Generatorr(   r(   r(   r)   <module>   s  $
	
 !"#$%&'(,





	




	
 


#
5


)


		

	


	


	









	 


42
