o
    "i$                     @   s  d dl mZmZ d dlZd dlm  mZ d dlmZ d dl	m
Z
 d dlmZ d dlmZ d dlmZmZ d dlmZ d d	lmZmZmZ d d
lmZmZ dd ZdddZdejfddZdejfddZdd Z dd Z!dd Z"dd Z#e" Z$e# Z%dd Z&dS )    )OptionalTupleN)_prims)DispatchKey)autograd_not_implemented)HigherOrderOperator)CUDARngStateHelpermake_contiguous_strides_for)FakeTensorMode)disable_proxy_modes_tracingProxyTorchDispatchModetrack_tensor_tree)_device_dtypec                 C   s"   t d| j d| j d| j d)Nz"You are trying to functionalize a z RNG operator but zE does not use Philox/counter-based RNG. Therefore, functionalizing a zo RNG operator is not supported. We are discussing the possibility of a Philox-based RNG implementation for CPU.)RuntimeErrortype)device r   T/home/ubuntu/SoloSpeech/.venv/lib/python3.10/site-packages/torch/_prims/rng_prims.pythrow_on_non_cuda   s
   r   c           
      C   s|   t jjd|  |d|d}|| tt jjj| }|j}|r!||_	||fD ]}	||	_
t jjj|	_| | |	_||	_||	_q%d S )Nz
rngprims::r   )mutates_argsschema)torchlibrary	custom_opregister_fakegetattr_opsopsrngprimsdefault_tags__doc___prims_commonRETURN_TYPENEWreturn_typer   	impl_atenprim_meta_impl)
namer   r'   	impl_metadoctagsrngprim_defprim_packetprimpr   r   r   register_rng_prim   s   

r1   shapec                 C   s   t tjdtjdS )Nr   dtype)r   
TensorLiker   tensorint64)r2   r   r   r   philox_rand_offset_meta3   s   r8   c                 C   s   d}| D ]}||9 }qt j|t jd}d}d}d}t jt j }|j| }|| d | }	t|	|j| }	|d ||	 |  d | }
|
S )N   r3         )	r   scalar_tensorr7   cudaget_device_propertiescurrent_devicemax_threads_per_multi_processorminmulti_processor_count)r2   numel_scalardim_sizenumel
block_sizeunrollcurand4_engine_callsdevice_propertyblocks_per_sm	grid_sizeoffsetr   r   r   philox_rand_offset9   s   

rM   c                  C   s   d} d}dt jdt jdt jdtttdf  dtd	tfd
d}dt jdt jdt jdtttdf  dtd	tfdd}t| |||dt j	j
fd d S )Nphilox_randz{(SymInt[] size, Tensor seed, Tensor offset, int[]? stride, Device? device=None, ScalarType? dtype=None) -> (Tensor, Tensor)r2   seedrL   stride.r   r4   c                 S   s6   |d u sJ t | }tj| |||d}t| }||fS )N)r2   stridesr4   r   )r	   r   
TensorMetar8   )r2   rO   rL   rP   r   r4   random_valuesr   r   r   _philox_rand_metaU   s   	z/register_philox_rand.<locals>._philox_rand_metac                 S   s   |d u sJ |j dkrg }n|g}|j dkrt|tj| t|| tj| ||d}W d    n1 s9w   Y  |t| fS )Ncpur=   )r   r4   )	r   r   r   randomfork_rngr   set_torch_state_tensorrandrM   )r2   rO   rL   rP   r   r4   devicesrS   r   r   r   _philox_randf   s   	

z*register_philox_rand.<locals>._philox_randz$Philox based stateless rand operator)r)   r   r'   r*   r+   r,   )r   SizeTensorr   r   intr   r   r1   Tagnondeterministic_seeded)r)   r   rT   r[   r   r   r   register_philox_randQ   sH   


ra   c                 C   sl   | dr| d}t|trt|}|jS dd | D }tdd |D r)dS tdd |D r4dS d S )	Nr   c                 S   s    h | ]}t |tjr|jjqS r   )
isinstancer   r]   r   r   ).0argr   r   r   	<setcomp>   s     zget_device.<locals>.<setcomp>c                 s       | ]}|d kV  qdS )r=   Nr   rc   devr   r   r   	<genexpr>       zget_device.<locals>.<genexpr>r=   c                 s   rf   )rU   Nr   rg   r   r   r   ri      rj   rU   )getrb   strr   r   r   any)argskwargsr   rZ   r   r   r   
get_device   s   



rp   c                     s   t dtjtdd tjdd tjdd tjfdd	 t fd
d} t	 fdd}S )Nrun_and_save_rng_stateTdeferred_errorc                 _   s   t j | |i |fS N)r   r=   get_rng_stateoprn   ro   r   r   r   	impl_cuda   s   z5register_run_and_save_rng_state_op.<locals>.impl_cudac                 _   s   t  | |i |fS rt   )r   ru   rv   r   r   r   impl_cpu   s   z4register_run_and_save_rng_state_op.<locals>.impl_cpuc                    sH    d}t ||}||v sJ d| || }|| g|R i |S N)r=   rU   zBackend not supported for rp   )rw   rn   ro   impl_mapr   implry   rx   r   r   impl_backend_select   s
   

z?register_run_and_save_rng_state_op.<locals>.impl_backend_selectc                    s>   |   |g|R i |W  d    S 1 sw   Y  d S rt   r   )moderw   rn   ro   )r   r   r   impl_fake_tensor_mode   s   $zAregister_run_and_save_rng_state_op.<locals>.impl_fake_tensor_modec                    s~   | j r4 |g|R i |}t| jj|g|R }t| jj|}| jd||}t||d | jdS |g|R i |S Ncall_function)constanttracer)enable_tracingpytreetree_mapr   unwrap_proxycreate_proxyr   )r   rw   rn   ro   out
proxy_argsproxy_kwargs	out_proxy)r   rq   r   r   impl_proxy_dispatch_mode   s   zDregister_run_and_save_rng_state_op.<locals>.impl_proxy_dispatch_mode)
r   py_implr   Autogradr   CUDACPUBackendSelectr
   r   )r   r   r   )r   ry   rx   rq   r   "register_run_and_save_rng_state_op   s   






r   c                     s   t dtjtdd tjdd tjdd  tfdd	} tj fd
d}t	dd }S )Nrun_with_rng_stateTrr   c                 _   s8   t j }t j|   ||i |}t j| |S rt   )r   r=   ru   set_rng_staterU   	rng_staterw   rn   ro   current_stater   r   r   r   rx      s
   
z1register_run_with_rng_state_op.<locals>.impl_cudac                 _   s.   t  }t |  ||i |}t | |S rt   )r   ru   r   r   r   r   r   ry      s
   

z0register_run_with_rng_state_op.<locals>.impl_cpuc           	         s   | j rIt   ||g|R i |}W d    n1 sw   Y  t| jj||g|R }t| jj|}| jd ||}t||d | jdS  ||g|R i |S r   )r   r   r   r   r   r   r   r   )	r   r   rw   rn   ro   r   r   r   r   )r   r   r   r      s   z@register_run_with_rng_state_op.<locals>.impl_proxy_dispatch_modec                    sJ    d}t ||}||v sJ d| || }|| |g|R i |S rz   r{   )r   rw   rn   ro   r|   r   r}   r~   r   r   r      s
   

z;register_run_with_rng_state_op.<locals>.impl_backend_selectc                 _   s6   |  ||i |W  d    S 1 sw   Y  d S rt   r   )r   r   rw   rn   ro   r   r   r   r      s   $z=register_run_with_rng_state_op.<locals>.impl_fake_tensor_mode)
r   r   r   r   r   r   r   r   r   r
   )r   r   r   r   )ry   rx   r   r   register_run_with_rng_state_op   s   







r   c                   C   s
   t   d S rt   )ra   r   r   r   r   register_rng_prims  s   
r   rt   )'typingr   r   r   torch.utils._pytreeutils_pytreer   r   torch._Cr   torch._higher_order_ops.utilsr   
torch._opsr   torch._prims_commonr   r	   torch._subclasses.fake_tensorr
   "torch.fx.experimental.proxy_tensorr   r   r   torch.typesr   r   r   r1   r\   r8   rM   ra   rp   r   r   rq   r   r   r   r   r   r   <module>   s4   


7-9