o
    "i^                     @   sX  d dl Z d dlZd dlZd dlZd dlZd dlmZ d dlZd dlmZ	 d dl
mZ d dlZd dlmZmZmZ d dlmZmZ d dlmZ d dlmZ d dlmZ d dlmZ d d	l
mZmZmZ d
dl m!Z!m"Z" d
dlm#Z#m$Z$m%Z% e&e'Z(ej)j*Z*ej)j+Z+ej)j,Z,ej)j-Z-eg e*j.e*j/e*j0e*j1e*j2e*j3e*j4e*j5e*j6e*j7e*j8e*j9e*j:e*j;e*j<e*j=e*j>e*j?e*j@e*jAe*jBe*jCe*jDe*jEe*jFe*jGe*jHe*jIe*jJe*jKee*jLe*jMe*jNe*jOjPe,jQZRi e eRZSe*jTe*jUjVe*jWe*jXe*jYe*jZe*j[e*j\j]e*j^e*j_e*j`gZaeeSea dd Zbebe*jcjdgdd Zeebe*jfjdgdd Zgebe*jhjVgdddddZhebe*jigedddZiebe*jjgdd Zjebe*jkjVgdd Zkebe*jlgdd Zlebe*jmjngdddZoebe*jpged d! Zpebe*jqgedd"d#Zqebe*jrged$d% Zrebe*jsjVgdd&d'Zsebe*jtgd(d) Ztebe*jugdd*d+d,Zuebe*jvgd-d. Zvebe*jwe*jxgd/d0 Zwebe*jyjVgdd1d2d3Zyebe*jze+jzgd4d5 Zzebe*j{e+j{gd6d7 Z{ebe*j|dd9d:Z|ebe*j}dd;d<Z}ebe*j~gd=d> Z~ebe*jgd8d?d@dAZebe*jjVgdBdC Zebe*jjgdDdE ZdFej]dGeej dHejfdIdJZebe*jddddKdLdMZebe*jddddKdNdOZebe*jdddd8d8ejdPdQdRZebe*jjVddddKdSdTZebe*jjddddKdUdVZebe*jjVdWdX Zebe,jQjVdYej]dZej]d[ej]dHej]fd\d]ZQebej)j,jd^d_ Zebe*jge	 	 	8dd`ej]daej]dbedceddedHej]fdedfZebe*jjddgdhZebe*jjddidjZebe*jjdkdl Ze*jjVejjjebe*jdYej]dZej]d[ejej] dmejej] dnejej] doedpedqefdrdsZe ddtdu Zdvdw Zebe*jdxdy Zebe-jjdYej]dzed{ed|ed}ejf
d~dZebe*jdddZebe*jdddZebe*jWjVedd ZWebe*jdddededefddZebe*j	8dddZdS )    N)Optional)core_aten_decompositionsget_decompositionsremove_decompositions)_grid_sampler_2dpw_cast_for_opmath)extra_random_decomps)counters)	out_dtype)pad_listlike)elementwise_dtypesELEMENTWISE_TYPE_PROMOTION_KINDtype_to_dtype   )configinductor_prims)is_gpu,needs_fallback_due_to_atomic_add_limitationsuse_scatter_fallbackc                 C   s8   t | r| gn| D ]}|tv rtd|  q	t| tS )Nzduplicate decomp: %s)callabledecompositionslogwarningdecompregister_decomposition)opsop r   [/home/ubuntu/SoloSpeech/.venv/lib/python3.10/site-packages/torch/_inductor/decomposition.pyr   j   s
   r   c                 C      d S Nr   tensormsgr   r   r   assert_async_msg_decomps      r$   c                 C   r   r    r   r!   r   r   r   "functional_assert_async_msg_decompy   r%   r&   )minmaxc                C   r   r    r   )symbolr'   r(   r   r   r   sym_constrain_range_for_size~   r%   r*   c                 C   s(   |d ur	|  |} |d ur| |} | S r    )	clamp_min	clamp_max)xr'   r(   r   r   r   clamp   s
   

r.   c                 K   s:   | d}|d u rtt||d< tj| |fi |S tS )Ndtype)getr   typetorchfullNotImplemented)size
fill_valuekwargsr/   r   r   r   r3      s
   
r3   c                    sN   dgt   }t|D ]\}}|||< qtj fdd|D fi ||S )Nr   c                    s   g | ]} | qS r   r   ).0lr5   r   r   
<listcomp>   s    z"empty_permuted.<locals>.<listcomp>)len	enumerater2   emptypermute)r5   physical_layoutr7   permpr9   r   r:   r   empty_permuted   s   
&rC   c                 C   st   |
d r
t | jjstS t| dgttd|   }t	| |||||||||	|
d |
d dg\}}}|||fS )N   r   r   F)
r   devicer1   r4   atensumlistrangedimconvolution_backward)grad_outputinputweight
bias_sizesstridepaddingdilation
transposedoutput_paddinggroupsoutput_mask	grad_biasgrad_inpgrad_weight_r   r   r   rK      s"    

rK   c                 C   s   d| }t | | d|  S )Ng      $@g      ?)rF   round)r-   decimalsten_pow_decimalsr   r   r   	round_dec   s   r^   c                 C   s   t jr!| jd dks|jd dkr!| d|d jdd}|S | jjdkrR| ddkrR|ddkrRtd d  d7  < t	j| 
d|
d ddd	dS tS )
Nr   rD   rJ   cpuinductordecompose_bmmTrJ   keepdim)r   coordinate_descent_tuningshape	unsqueezerG   rE   r1   r5   r	   r2   squeezer4   )selfbatch2outr   r   r   bmm   s   rm   c                 C   s   | j jdkrj|ddkr9|ddkr9td d  d7  < tj|d|d dddd}|| ||   S |ddkrj|dd	krj|dd	krjtd d  d7  < |j| jddd}|| ||   S t	S )
Nra   r   r   r_   rb   decompose_addmmTrd      )
rE   r1   r5   r	   r2   rG   ri   rh   Tr4   )rj   mat1mat2betaalpharl   r   r   r   addmm   s   *ru   c                    sR  ddl m}m} tjr'jd dks jd dkr'd d jddS jj	dkr|
ddkrx|
ddkrx| 
ddkrxj jkrx|tt  dkrxtd	 d
  d7  < t fddt
dD S |
ddkr| 
ddkrtd	 d
  d7  < tjd d ddddS tS )Nr   )definitely_trueguard_size_obliviousr   rD   r`   ra   r_       rb   decompose_mmc                    s    g | ]}|d d f   qS r    r   )r8   iinput2rj   r   r   r;      s     zmm.<locals>.<listcomp>Trd   )%torch.fx.experimental.symbolic_shapesrv   rw   r   rf   rg   rh   rG   rE   r1   r5   r/   r2   numelr	   catrI   ri   r4   )rj   r|   rv   rw   r   r{   r   mm   s4   $r   c                    sr   ddl m   fdd}tt|| }t|dkr|d  S dt|  k r-t| k r7n t	S tj||S t	S )Nr   rw   c                    s    t | jdkp | jd dkS )Nr   r   )r<   rg   )r-   r   r   r   non_empty_tensor  s    zcat.<locals>.non_empty_tensorr   )
r}   rw   rH   filterr<   clonerF   r   defaultr4   )tensorsrJ   r   filtered_tensorsr   r   r   r     s   r   c                 C   s~   |   rtt| jtdt| j| jS t| t	j
d\}}tjtj|| jd}t| dk |d}tt| td|S )Nnan)type_promotion_kindr/   rE   r   g        )
is_complexr2   whereisnanrealfloatatan2imagr   r   INT_TO_FLOATscalar_tensormathpirE   )r-   rZ   r/   r   retr   r   r   angle1  s   
r   rt   c                C   sv   t | o|  }t |o| }|r|stS |}|d ur"|| }t | j|j}| | jj||jj |S r    )r2   	is_tensorr   r4   promote_typesr/   viewr   )r-   yrt   x_is_complex_tensory_is_complex_tensorzcomplex_typer   r   r   addE  s   "r   c                 C   s   |   rJ d| S )NzTODO: implement this)r   rj   r   r   r   conj_physicalR  s   r   c                 C   s   | S r    r   r   r   r   r   liftX  r%   r   )	generatorc                C   s(   |d u sJ t j| t jd| k | jS )N)r/   )r2   	rand_likefloat32tor/   )rj   r   r   r   r   	bernoulli]  s   r   c                 C   s   t t ||| kB | |S r    r2   r   r   rj   otherr   r   r   fminc     r   c                 C   s   t t ||| k B | |S r    r   r   r   r   r   fmaxh  r   r   Fc                 C       | j tjkrtj| ||dS tS Nrd   )r/   r2   boolanyr4   rj   rJ   re   r   r   r   amaxm     r   c                 C   r   r   )r/   r2   r   allr4   r   r   r   r   amint  r   r   c                 C   s   t | ||| S r    )r2   narrowr   )rj   rJ   startlengthr   r   r   narrow_copy{     r   implicitc                C   s   t j| ||d S )Nr   )rF   expandr   )rj   r5   r   r   r   r   expand_copy  r   r   c                 C   s   t | | S r    )rF   r   r   )rj   r5   r   r   r   view_copy_default  s   r   c                 C   s   |  | S r    )r   r   )rj   r/   r   r   r   view_copy_dtype  s   r   r"   memory_formatreturnc                 C   s    |t ju s	|d u rt| S |S r    )r2   preserve_formatutilssuggest_memory_format)r"   r   r   r   r   get_like_layout  s   
r   )r/   rE   r   c                K   :   t jg |  f|p| j|p| jd|jt| |dS Nr   r   )r2   randr5   r/   rE   r   r   rj   r/   rE   r   r7   r   r   r   r        

r   c                K   r   r   )r2   randnr5   r/   rE   r   r   r   r   r   r   
randn_like  r   r   )r/   layoutrE   
pin_memoryrequires_gradr   c                C   s>   t jg |  ||p| j|p| j|p| j|djt| |dS )N)r/   r   rE   r   r   )r2   r3   r5   r/   r   rE   r   r   )rj   r6   r/   r   rE   r   r   r   r   r   r   	full_like  s   

r   c                K   s@   t jjd|g |  f|p| j|p| jd|jt| |dS )Nr   r   r   rF   randintlowr5   r/   rE   r   r   )rj   highr/   rE   r   r7   r   r   r   randint_like  s   

r   c                K   s@   t jj||g |  f|p| j|p| jd|jt| |dS r   r   )rj   r   r   r/   rE   r   r7   r   r   r   randint_like_low  s   

r   c                 K   s   t jjd| |fi |S Nr   )rF   r   r   )r   r5   r7   r   r   r   r     s   r   rM   rN   biasc                 C   s*   t jj|}t jj| ||| d S r   )r2   r   
_quantized$wrapped_fbgemm_pack_gemm_matrix_fp16!wrapped_fbgemm_linear_fp16_weightr5   )rM   rN   r   packed_weightr   r   r   #linear_dynamic_fp16_unpacked_weight  s   r   c                 C   sP   dd }|| dddf }|| ddd f }| dd df  tj| | S )Nc                    st    fdddD \}}}}t jdkr&||d>  |d>  |d>  tjd S |d> |d>  |d>  | tjd S )	Nc                 3   s$    | ]} d |f  tjV  qdS .N)r   r2   int32)r8   nu8r   r   	<genexpr>  s   " zPq_embedding_bag_byte_unpack_decomp.<locals>.bitcast_u8_to_f32.<locals>.<genexpr>)r   r   rD      little   ro      r   )sys	byteorderr   r2   r   )r   r-   r   r   wr   r   r   bitcast_u8_to_f32  s   
((z=q_embedding_bag_byte_unpack_decomp.<locals>.bitcast_u8_to_f32.i)r   r2   r   )packedr   scalesoffsetsr   r   r   "q_embedding_bag_byte_unpack_decomp  s    r   agridinterpolation_modepadding_modealign_cornersc                 C   s@   | j t dko|dko| jtjd }t| |||||d}|S )Nra   r   r   )r   r   r   r   _expand_grid)rE   r2   is_contiguouscontiguous_formatdecomp_grid_sampler_2d)r   r   r   r   r   r   outputr   r   r   grid_sampler_2d  s   r   c                 C      t jj| t j|||dS Nr   )rF   _foreach_addList_foreach_mulrj   left_tensorsright_tensorsscalarr   r   r   _foreach_addcmul_scalar     r
  c                 C   r  r  )rF   r  r  _foreach_divr  r   r   r   _foreach_addcdiv_scalar   r  r  c              	   C   s"   t j| t jt j|| |S r    )rF   r  r  r  Scalar_foreach_sub)start_tensorsend_tensorsrN   r   r   r   _foreach_lerp_scalar'  s   r  running_meanrunning_vartrainingexponential_average_factorepsilonc              
   C   sB   t | |||||||\}}	}
|r||	|
fS ||d|dfS )Nr   )rF   native_batch_norm	new_zeros)rM   rN   r   r  r  r  r  r  r   bcr   r   r   miopen_batch_norm1  s    

r  c                   C   s   i t tS r    )r   r   r   r   r   r   fast_random_decompsQ  s   r  c                   C   s   t jrtS t S )z"decomps can change based on config)r   fallback_randomr   r  r   r   r   r   select_decomp_tableV  s   r   c                 C   sF   t | jjr!t| |g\} }|ddd }t| |||S t	S )Nr_   r   r   )
r   rE   r1   rF   broadcast_tensorsreshapecumsumr   masked_scatter_with_indexr4   )rj   masksource
source_idxr   r   r   masked_scatter]  s
   r(  	quant_min	quant_maxepsr/   c           	      C   sv   t | \}}|| t||  }t |t |g}|t || t j }t |||}|t j	|t j
fS r    )r2   aminmaxr   r(   Tensorr[   r   intr.   float64int64)	rM   r)  r*  r+  r/   min_valmax_valscale
zero_pointr   r   r   choose_qparams_tensorh  s   r5  c                 C   s.   |   }t||g||j|}|| jS r    )flattenr2   	index_putr"  rg   )rj   indexr&  
accumulate	flattenedr   r   r   putt  s
   r;  c                 C   s   t j| |||d}| |S )N)r9  )rF   r;  copy_)rj   r8  r&  r9  rl   r   r   r   put_}  s   
r=  c                 C   sD   | | }t j||dd}t| ||}| j|kr||}| S )NTrd   )r2   rG   r   fmar/   r   
contiguous)rL   r   rJ   input_dtypenew_grad_outputsum_new_grad
grad_inputr   r   r   _softmax_backward_data  s   

rD  Tinclude_selfrJ   reduction_typerF  c                C   sr  |dkrOt | jsO| jjp| jj}t|}|r%| }t| |||}	n| ||d}t| |||}	|		|	dk d}	||||}|rK||	 S ||	 S t
tjj|| j|j|jjdr`tS | j|d d   | jd |   }
| g| j|d d  | jd | R }g t| j| | jdtd| j| R }|tj|
||}| j|||||dS )Nmeanr   r   TrE  )r   r/   is_floating_pointr   r2   	ones_like	index_add
index_fill
zeros_likemasked_fillr   rF   scatter_reduce_tworE   r1   r4   rg   r~   rI   ndimr   r0  repeat_interleaver"  r?   scatter_reduce)rj   rJ   r8  srcrG  rF  true_divisiononesrl   countsrepeatsindex_shaperA   scatter_indexr   r   r   index_reduce  sJ   

(,,
r[  c           
      C   s   |dkrddg}|dkrddg}|d u r|}t |d}t |d}t |d}t |d}|d |d  }tjj||sC|ttjjkrEtS t	
| |||||\}}t	||d | d||}	||	fS )Nr   r   rD   r_   )r   r2   	_inductorlowering'should_fallback_max_pool2d_with_indicesiinfoint8r(   r4   prims#_low_memory_max_pool2d_with_offsets)_low_memory_max_pool2d_offsets_to_indicesr5   )
r-   kernel_sizerP   rQ   rR   	ceil_modewindow_sizevalsr   indicesr   r   r   max_pool2d_with_indices  sB   



ri  )NNr  )r   r   )NF)r   r   F)r   )F)Nr   r   F)	functoolsloggingr   r   typingr   r2   torch._decomp_decompr   torch._prims_common_prims_commonr   $torch.ao.quantization.fx._decomposedr   r   r   torch._decomp.decompositionsr   r   r   $torch._decomp.decompositions_for_rngr   torch._dynamo.utilsr	   !torch._higher_order_ops.out_dtyper
   torch._inductor.utilsr   r   r   r    r   r   r   r   r   	getLogger__name__r   r   rF   ra  	quantizedquantized_decomposed_adaptive_avg_pool2d_backwardarangebitwise_and_bitwise_or_
clamp_min_dist
empty_likeflipgeluhardtanhindex_selectlcm
leaky_relulinalg_vector_norm_log_softmax max_pool2d_with_indices_backward_native_batch_norm_legit#_native_batch_norm_legit_functional$_native_batch_norm_legit_no_training_batch_norm_with_update"_batch_norm_with_update_functional_batch_norm_no_updatebatch_norm_backwardr  native_group_normnative_layer_normnll_loss2d_backward_softmaxsin_sqrt__to_copytril_indicestriu_indicesupsample_bilinear2dvecr   inductor_decompositionsr   _unsafe_index+_scaled_dot_product_flash_attention_for_cpur   rD  r,   r+   gluselect_scattersplitr-  ri   rG   unbinddecomps_to_excluder   _assert_asyncr#   r$   _functional_assert_asyncr&   r*   r.   r3   rC   rK   r[   r\   r^   rm   ru   r   r   r   r   r   r   detach_r   r   r   r   r   r   r   	view_copyr   r/   r   r   r   r   r   r   r   r   	low_dtyper   r   embedding_bag_byte_unpackr   r   r.  r   _foreach_addcmulr  r
  _foreach_addcdivr  _foreach_lerpr  r  py_impl_CDispatchKeyAutogradr   	lru_cacher  r   r(  choose_qparamsr"   r5  r;  r=  r[  strri  r   r   r   r   <module>   s:  
	
 !"#$(
	







 


#
!













		




	





	







.