o
    "i                     @   sJ  d dl Z d dlZd dlZd dlZd dlmZmZ d dlZd dlZd dl	Zd dl
mZ d dlmZmZmZmZmZ d dlmZmZmZmZmZmZ d dlmZ d dlmZ ejjZg dZ i Z!g Z"ej#j$j%Z%d	d
 Z&dd Z'e&e%j(j)e%j(j*e%j+j)e%j+j*e%j,j)e%j,j*e%j-j)e%j-j*e%j.j)e%j.j*e%j/j)e%j/j*e%j/j0e%j/j1e%j2j)e%j2j*e%j3j)e%j3j*e%j4j)e%j4j*e%j5j)e%j5j*e%j6j)e%j6j*e%j7j)e%j7j*Z8e&e%j9j)e%j:j)e%j:j*e%j;j)e%j<j)e%j=j>e%j=j?e%j@j)e%j@j*e%jAj)e%jAj*ZBe%jCfZDdd ZEe FddefddZGdeeegeHf ef fddZIeIe!jJdd ZKeIeGeIg e8dd ZLeIe%j=j?eIe%j=j>dd ZMdd ZNeIeNdd ZOeIe%jPj)dd  ZPeIe%jQj)d!d" ZQeId#d$ d%d& ZR	(dld)d*ZSeIe%jTj)	(dld+d,ZUeIe%jVj)	(dld-d.ZVeIe%jWjXdmd/d0ZYeIej$j%jZj)d1d2 Z[eIej$j%j\j)d3d4 Z\eIej$j%j]j)d5d6 Z]eId7d$ d8d9 Z^d:d; Z_d<d= Z`e&d>d?d@ZadAdB ZbdCdD ZceIdEd$ dFdG ZdeIe%jejXdHdI ZfeIe%jgj)dJdK ZheIe%jij)eIe%jjj)eIe%jkj)eIe%jlj)dLdM ZmeIe%jjj*eIe%jlj*dNdO ZneIe%joj)eIe%jpj)dPdQ ZqeIe%j:j)eIe%j:j*eIe%jrj)eIe%jsj)dRdS ZteIdTdU eBD dVdW ZueIe%jvj)e%jwj)gdXdY ZxeIe%jyj)dZd[ ZzeIe%j{j)d\d] Z|eIe%j}j)d^d_ Z~eIe%jj)d`da ZeIej$j%jj)dbdc Zi ZdefdddeZdfdg Zdhdi Ze Fddjdk ZdS )n    N)CallableUnion)
OpOverload)elementwise_dtypesELEMENTWISE_TYPE_PROMOTION_KINDis_boolean_dtypeis_float_dtypeis_integer_dtype)DataDependentOutputExceptionDynamicOutputShapeException
FakeTensorin_kernel_invocation_managerrun_fallback_kernelUnsupportedOperatorException)normalize_functioncount_label)op_implementations_checksget_fast_op_implsstride_incorrect_ophas_metac                  G   s   t | dS )NT)dictfromkeys)items r   Z/home/ubuntu/SoloSpeech/.venv/lib/python3.10/site-packages/torch/_subclasses/fake_impls.pyordered_set2      r   c                 C   s   | j dkrdS dS )NhpuFT)typedevicer   r   r   is_noncontiguous_supported8   s   
r"   c                 C   s,   t jj }| |ptdd |  D S )Nc                 s   s    | ]}t |V  qd S N)contains_tensor_types).0er   r   r   	<genexpr>p       
z(contains_tensor_types.<locals>.<genexpr>)torch_C
TensorTypegetisSubtypeOfanycontainedTypes)r   tensor_typer   r   r   r$   n   s   r$   funcc                 C   sR   t | tsJ | j}tdd |jD rdS t|jdko(|jd jtj	j
 u S )Nc                 s   s    | ]}t |jV  qd S r#   )r$   r   )r%   argr   r   r   r'   y   s    z)_is_tensor_constructor.<locals>.<genexpr>F   r   )
isinstancer   _schemar.   	argumentslenreturnsr   r)   r*   r+   r,   )r1   schemar   r   r   _is_tensor_constructoru   s   $r:   run_impl_checkc                        fdd}|S )Nc                    sr   t  tr tvsJ d  | t < | S t  ttfr* D ]}t||  q| S t s0J t | f | S )Nzduplicate registration: )	r4   r   op_implementations_dictlisttupleregister_op_implcallabler   append)op_implopr;   r   r   impl_decorator   s   

z(register_op_impl.<locals>.impl_decoratorr   )r;   rF   r   rE   r   r@      s   r@   c                 O   s   t | | |g|R i |S r#   )r=   	fake_moder1   argskwargsr   r   r   #dispatch_to_op_implementations_dict   s   rK   c           	      O   s   |t vsJ t|||dd\}}d|v rtd|tv r(|d j}|df}ntd}d}|dd }|d ur;|n|}td	|d< t|  ||i |}W d    n1 sZw   Y  t| ||S )
NTrI   rJ   normalize_to_only_use_kwargsnamesz+torch.compile doesn't support named tensorsinputcpur   r!   meta)	_non_kwarg_device_constructorsr   r   _like_tensor_constructorsr!   popr)   r   r   )	rH   r1   rI   rJ   _
new_kwargsdefault_device
out_devicerr   r   r   constructors   s(   



rZ   c           
      O   s   t |||dd\}}|d }|r|n|d j}td|d< |d}t|  ||fi |}	W d    n1 s:w   Y  | j| |	|S )NT)rM   r!   rO   rQ   )r   r!   r)   rT   r   fake_tensor_converterfrom_meta_and_device)
rH   r1   rI   rJ   rU   rV   input_devicerX   inprY   r   r   r   non_kwarg_to   s   


r_   c                 C   s6   | j dvrdS | tjju rdS |  }d|v rdS dS )N)atenprimsFfftT)	namespacer`   _fft_c2cdefaultname)rD   op_namer   r   r   r      s   
r   c                    s^   dd  | j r+t fddt|| D }|s+t||f\}}t| |||d S t|)Nc                 S   s.   t | tr| jS t | tjtjtjfrdS dS )NTF)r4   r   _has_symbolic_sizes_stridesr)   SymIntSymFloatSymBool)xr   r   r   is_symbolic   s
   
z3wordaround_stride_incorrect_op.<locals>.is_symbolicc                 3   s    | ]} |V  qd S r#   r   r%   rl   rm   r   r   r'      r(   z1wordaround_stride_incorrect_op.<locals>.<genexpr>)	allow_fallback_kernelsr.   	itertoolschainvaluespytreetree_flattenr   r   )rH   r1   rI   rJ   require_dynamic	flat_args	args_specr   ro   r   wordaround_stride_incorrect_op   s   ry   c                 O   s:   t |  ||i |W  d    S 1 sw   Y  d S r#   )r   rG   r   r   r   
resize_as_   s   
$rz   c                 O   s   t | |g|R i |S r#   )rZ   rG   r   r   r   (_sparse_coo_tensor_with_dims_and_tensors   s   r{   c                 C   s(   t jj| jv o| tjjtjjtj	jfvS r#   )
r)   Tagdynamic_output_shapetagsr`   indexTensornonzerore   repeat_interleaver1   r   r   r   <lambda>   s    r   c                 O      t |r#   )r   rG   r   r   r   	dyn_shape   s   r   TFc                 C   s  | j d u s	| j jst||d us|j }d u r[ddlm}m}	 |	| s/| dkr/d}n%| j  }t	j
d }
|d u rA| n||}|	|sNt|}
|||
d |d u r[||_|d u rg||fg}n|jg |jd | ||j|d d  R  g}|d uo|jtdk}|s|r||d u r|jn|j| f}n|d}|| |s|r||d u r|d jn|d j| f}n|d}|| t|S )Nr   _constrain_range_for_sizehas_free_symbolsr3   maxrP   )	shape_envallow_dynamic_output_shape_opsr   unique_memo%torch.fx.experimental.symbolic_shapesr   r   numelcreate_unbacked_symintsysmaxsizesizeint	new_emptyshapefake_devicer)   r!   rB   r?   )rH   r1   r2   dimsortedreturn_inversereturn_countsnnzr   r   maxvalr   retreturn_if_dim_and_cpuinversecountsr   r   r   _unique  s:   
	

2"

*

r   c                 C   s   t | ||d |||S r#   )r   )rH   r1   r2   r   r   r   r   r   r   unique2D  s   r   c                 C   s,   t | |||dkr
|n|t|jd |||S )Nr   r3   )r   r   ndim)rH   r1   r2   r   r   r   r   r   r   r   
unique_dimK  s   r   c                 C   sJ   |d u r | j d u s| j jst|| j  }ddlm} || ||S )Nr   r   )r   r   r   r   r   r   r   )rH   r1   repeatsoutput_sizer   r   r   r   repeat_interleave_tensor[  s   


r   c                 C   s   |j  }d ur	|S | jd u s| jjs| jst|t|jr$| j }nt|jr/| j }nt	|jr:| j
 }ntd|j ||_ |S )Nz local_scalar_dense/item NYI for )	item_memor   allow_scalar_outputsr
   r   dtypecreate_unbacked_symfloatr	   r   r   create_unbacked_symboolNotImplementedError)rH   r1   r2   rY   r   r   r   local_scalar_densen  s"   



r   c                 C   s   | j d u s	| j jst||j }d u rJddlm}m} || s+| dkr+d}n| j  }t	j
d }|| sAt| }|||d ||_|j|| ftjdS )Nr   r   r3   r   )r   )r   r   r   nonzero_memor   r   r   r   r   r   r   r   r   r   r)   int64)rH   r1   r2   r   r   r   r   r   r   r   r     s   
	

r   c                 C   sz   | j d u s	| j jst|| j  }tjd }ddlm}m} ||	 s1|	 dkr1t
|	 }|||d ||fS )Nr3   r   r      r   )r   r   r   r   r   r   r   r   r   r   r   r   )rH   r1   selfmaskr   r   r   r   r   r   r   masked_select  s   


r   c                 C   s   t jj| jv S r#   )r)   r|   data_dependent_outputr~   r   r   r   r   r     s    c                 O   r   r#   )r
   rG   r   r   r   data_dep  s   r   c                 C   s0   |D ]}|d ur|j tjtjfv rt| qd S r#   )r   r)   booluint8r   )r1   r   indicesr   r   r   r   check_no_bool_index_tensors  s
   r   c                 C   s   t |||dd\}}|d j}t|  ||i |}t|s%||j}W d    n1 s/w   Y  ||d u r<|S t| ||S NTrL   rO   )r   r!   r   r"   r   r   r   )rH   r1   rI   rJ   rU   rV   rX   outr   r   r   )run_and_return_new_tensor_of_input_device  s   


r   r`   ra   primc                 C   s
   | j tv S r#   )rc   _is_builtin_namespaces)rD   r   r   r   
is_builtin  s   
r   c                 C   s   t j|  dS )NMeta)r)   r*   ._dispatch_has_computed_kernel_for_dispatch_keyrf   r   r   r   r   r     s   r   c                 C   s   t | od|  v ot| S )Nforeach)r   rf   r   r   r   r   r   r     s    c              
      s  g }t || D ]}t|ttfr$t|r$t|d tjr$|	| q
zt
|  ||i |}W d    n1 s<w   Y  W n tyV } ztW  Y d }~S d }~ww |s[|S |s_J g }t|D ]\ }	t| fdd|D \}
}|	| j| |	|
 qe|S )Nr   c                    s   g | ]}|  qS r   r   )r%   tlir   r   
<listcomp>  s    z4foreach_run_and_map_input_device.<locals>.<listcomp>)rq   rr   rs   r4   r>   r?   r7   r)   r   rB   r   r   NotImplemented	enumerater   _find_common_devicer[   r\   )rH   r1   rI   rJ   tensor_listsr2   out_metanot_implemented_errorout_fakemeta_tr!   rU   r   r   r    foreach_run_and_map_input_device  s>   

r   c           	      O   sj   ddl m} t|||dd\}}|d j}|  ||i |}||W  d    S 1 s.w   Y  d S )Nr   )meta_index_TensorTrL   rO   )torch._meta_registrationsr   r   r!   to)	rH   r1   rI   rJ   r   rU   rV   rX   r   r   r   r   index_tensor  s   

$r   c                 O   sB   ddl m} |  ||i |W  d    S 1 sw   Y  d S )Nr   )meta_embedding_bag)r   r   )rH   r1   rI   rJ   r   r   r   r   embedding_bag+  s   $r   c                 O   s   t | |||S r#   )r   rG   r   r   r   multi_device_op_default4  s   r   c                 O   sR   t |  ||i |}W d    n1 sw   Y  t|||dd\}}|d S r   )r   r   )rH   r1   rI   rJ   r   rU   rV   r   r   r   multi_device_op_out=  s   

r   c                    s   t  ||dd\}}|d |d jtjkp$jdko$ dk fdd t|  ||} tjj	u r>|d S |S )	NTrL   rs   rO   r   r3   c                      s   d  d dj  dS )NzMismatching z device between self (z) and values ()r    r   r1   self_devicers   r   r   r   U  s    z index_put_impl.<locals>.<lambda>)
r   r   r)   _checkr   r   r   r`   
index_put_re   )rH   r1   rI   rJ   rU   rV   r   r   r   r   index_put_implJ  s   

r   c                 O   s   t d)Nz3torch.compile does not support strided NestedTensor)r   rG   r   r   r   nested_tensors_unsupported_  s   r   c                 C   s0   g | ]}|t jjt jjt jjt jjfvr|qS r   )r`   r   r!   prim_Device_nested_tensor_from_tensor_listre   r   rn   r   r   r   r   j  s    r   c                 O   s   |t vsJ d| d S )NzNYI: )_device_not_kwarg_opsrG   r   r   r   nyii  s   r   c                    s  t |||dd\}}|d j p |d j}|d jd }ddlm} ||s+d }nO|dkr<|d js<|d js<d }n>|tj	j
u rLtjjdi |}	n"tjj|d |d d |d |d	 |d
 |d |d |d |d d
}	tj|d |d |	}W d    n1 sw   Y   fdd}
t5 |di |}|tj	j
u r|
||W  d    S |
|d ||
|d ||
|d d fW  d    S 1 sw   Y  d S )NTrL   rO   weightr   )has_hint   stridepaddingdilation
transposedoutput_paddinggroups
bias_sizes)biasr   r   r   r   r   r   r   c                    s,   | d u r| S |d ur| j |d} t|  S )Nmemory_format)r   r   )tmem_fmtr!   rH   r   r   convert  s
   zconv.<locals>.convertr3   r   r   )r   r   r   r   r   r   	is_mkldnnis_xpur`   convolutionre   r)   r*   _select_conv_backend%_conv_determine_backend_memory_formatr   )rH   r1   rI   rJ   rU   kbatchr   r   conv_backendr   r   r   r   r   conv{  sT   


"
$r   c                    sh  t |||dd\}}|d }|d }|d } fdd}|d}	|d	}
|d
}|d}|d
}|d	d
}t|d	d
}|tj|	|
|ftjdd|jd}|r|dkr]dnd}t	|| }|dkrmd}n|dkrsd}|tj|	|
||f|j
dd|jd}n|tjd|j
dd|j}||d d |||tjdtjdd|j|tjdtjdd|j|f	S )NTrL   querykeyreturn_debug_maskc                       t  | |S r#   r   r   r!   rH   r   r   convert_tensor  r   z6meta__scaled_dot_product_flash.<locals>.convert_tensorr   r3   r   r   rQ   r   r!   r    @         r   )r   r   	transposer)   
empty_likeemptyfloatr!   mathceilr   long)rH   r1   rI   rJ   rU   r  r  r  r  
batch_size	num_headsmax_seqlen_batch_qhead_dimmax_seqlen_batch_kquery_t	attention	logsumexpblocksize_cmax_seqlen_k
debug_maskr   r  r   meta__scaled_dot_product_flash  sf   





	
	r  c              	      sF  t |||dd\}}|d }|d }|d }|d } fdd}	|d	d
}|d	d
}|d	d
}|d}
|d	}|d	}|d}|d}|d}|	tj|
||||jdd|j}|rkt|d d nd}|	tj|
||ftj	dd|j}|d	d
}|	tjdtj
dd|j}|	tjdtj
dd|j}||||fS )NTrL   r  r  valuecompute_log_sumexpc                    r  r#   r  r  r  r   r   r    r   z:meta__scaled_dot_product_efficient.<locals>.convert_tensorr3   r   r   rQ   r	      r   )r   r  r   r)   r  r   r!   r  r  r  r  )rH   r1   rI   rJ   rU   r  r  r   r!  r  BMNr  KKvreslogsumexp_dim
logsum_expseedoffsetr   r  r   "meta__scaled_dot_product_efficient  sL   






	r/  c              	      s  t |||dd\}}|d }|d }|d }|d }|d }	|d }
|d	 } fd
d}|d u r5|dn| d }|d u rD|dn|	}|d u rO|dn|
}|d}|d}t|}|tj|||ftjdd|jd}|r|dkrydnd}t	|| }|dkrd}n|dkrd}|tj||||f|j
dd|j}n|tjd|j
dd|j}|||tjdtjdd|j|tjdtjdd|j|fS )NTrL   r  r  	cum_seq_q	cum_seq_kmax_qmax_kr  c                    r  r#   r  r  r  r   r   r  E  r   z5meta__flash_attention_forward.<locals>.convert_tensorr   r3   r"  r#  rQ   r	  r    r
  r  r  r   )r   r   r   r)   r  r  r  r!   r  r  r   r  )rH   r1   rI   rJ   rU   r  r  r0  r1  r2  r3  r  r  r  r  r  r  r  r  r  r  r  r  r   r  r   meta__flash_attention_forward5  sd   



	
	r4  c              	      sx  t |||dd\}}|d }|d }|d }|d }|d }	|d }
|d	 } fd
d}|d}|d}|d}|d}|d}|d}|tj|||||jdd|j}|d ure|dd n|}|}|d uru|	d ussJ |	}|
d ur{|
n|}|rt|d d nd}|tj|||ftjdd|j}|tjdtj	dd|j}|tjdtj	dd|j}||||||fS )NTrL   r  r  r   cu_seqlens_qmax_seqlen_qr  r!  c                    r  r#   r  r  r  r   r   r    r   z9meta__efficient_attention_forward.<locals>.convert_tensorr   r3   r"  r#  rQ   r	  r$  r   )
r   r   r)   r  r   r!   r  r  r  r  )rH   r1   rI   rJ   rU   r  r  r   r5  r6  r  r!  r  r%  r&  r'  r  r(  r)  r*  logsumexp_batch_dimactual_max_seqlen_qactual_max_seqlen_kr+  r,  r-  r.  r   r  r   !meta__efficient_attention_forward}  sX   







r:  c           
      C   st   | j d u s	| j jst|| j  }ddlm} || |s$|dd}|jdd  }||}||f}	||	fS )Nr   r   r3   )	r   r   r   r   r   r   r  r   r   )
rH   r1   inputslengthsbatch_firstnew_batch_sizer   res_sizepacked_datar  r   r   r   _pack_padded_sequence  s   


rA  c                    r<   )Nc                    s   | t  < | S r#   )FAST_OP_IMPLEMENTATIONS)rC   r   r   r   rF     s   z-register_fast_op_impl.<locals>.impl_decoratorr   )r1   rF   r   r   r   register_fast_op_impl  s   rC  c           
         s   ddl m} t| }t|}t||}dg| }t|d ddD ]N |d   }|d | }|d | }	|dkr<| | nd|	dkrF||	 ndt|dkpY|dkpYk fdd |dkrjn| < q t|S )Nr   )guard_size_obliviousr3   r#  c                      s   d d d  dS )NzThe size of tensor a (z#) must match the size of tensor b (z) at non-singleton dimension r   r   r   r   sizeAsizeBr   r   r     s
    zinfer_size.<locals>.<lambda>)r   rD  r7   r   ranger)   r   r?   )
abrD  dimsAdimsBr   expandedSizesr.  dimAdimBr   rE  r   
infer_size  s(   


rP  c                    r<   )Nc                    sh   fdd}t d  }d}d}d }|D ]#}t|tjr"|jnd}	t|	dkr-d}nd}|d u r5|	}t||	}q|d usAJ |D ]}t|tjr[t|jt|kr[|j|kr[ nqC|dS td	}
|
}d }d }d}|D ]'}t|tjszd}qo||
kr|jjd	ks|j}|d u r|j	}qo||j	krd}qo|rt
|d
tji\}}d}d}|D ]2}t|tjsq||
kr| dkr|j|
kr||kr|d  S |d7 }q|j|kr|d  S qd}d}t|r|D ]}t|tjsq|o|jtjd}|o|jtjd}q|rt d ttj||dtjd|dS |r0t d ttj||dtjd|dS |dS )Nc                    sD   t d|     i W  d    S 1 sw   Y  d S )Nzslow r   )msg)rI   rJ   modeslow_refr   r   slow  s   $z=make_fast_binary_impl.<locals>.fast_binary_impl.<locals>.slowzattempt fastFr   r   Tz#both tensors nontrivially broadcastrP   type_promotion_kindr3   errorr   zfast is_contiguousrQ   )r   r!   r   r    zfast channels_lastzno contiguity match)r   r4   r)   r   r   r7   rP  r!   r   r   r   r   DEFAULTr   r"   is_contiguouscontiguous_formatchannels_lastr   r  )rR  rI   rJ   rT  operandshas_scalarshas_tensorsfinal_shaperD   r   rP   common_devicecommon_dtypeoutput_dtypehas_different_input_dtypesrU   current_cpu_scalars_on_non_cpumax_cpu_scalars_on_non_cpurX  is_channels_lastrS  )rI   rJ   rR  r   fast_binary_impl  s   










z/make_fast_binary_impl.<locals>.fast_binary_implr   )rS  rg  r   rf  r   make_fast_binary_impl  s    rh  c                  C   s|   dd l } t| jjjjt| jj t| jjjjt| jj t| jjj	jt| jj	 t| jjj
jt| jj
 tS )Nr   )torch._refsrC  opsr`   addr   rh  _refssubmuldivrB  )r)   r   r   r   r     s   


r   )TFFr#   )	functoolsrq   r  r   typingr   r   r)   torch._custom_optorch._logging
torch._opsr   torch._prims_commonr   r   r   r   r	   torch._subclasses.fake_tensorr
   r   r   r   r   r   torch.fx.operator_schemasr   torch.utils._statsr   utils_pytreert   __all__r=   r   _opsrj  r`   r   r"   r  re   r   	full_like	ones_like	rand_like
randn_likerandint_like	low_dtypelow_dtype_out
zeros_liker   new_empty_stridednew_full	new_zerosnew_onesrS   _resize_output_r   
pin_memory	is_pinnedr   r!   r   _pin_memory_resize_outputr   _list_to_tensorrR   r$   	lru_cacher:   r   r@   __contains__rK   rZ   r_   r   ry   rz   r{   r   r   _unique2r   r   r   r   r   _local_scalar_denser   r   r   r   r   r   r   r   r   r   r   r   _embedding_bagr   _unsafe_index_putcopycopy_slice_scatterr   r   	index_putr   r   _nested_view_from_buffer_nested_view_from_buffer_copyr   r   r   convolution_backwardr   #_scaled_dot_product_flash_attentionr  '_scaled_dot_product_efficient_attentionr/  _flash_attention_forwardr4  _efficient_attention_forwardr:  rA  rB  rC  rP  rh  r   r   r   r   r   <module>   s:   
 











?



(




$

















<

F

5

G
:
	% 
