o
    "i                      @   sT  d dl Z d dlZd dlmZ d dlmZmZmZ d dlZd dl	m
Z
 d dlmZ d dlmZ d dlmZmZmZmZmZmZ d dlmZmZmZmZmZ d d	lmZ d
dlmZ dd Z dej!j"fddZ#dej!j"dee$ fddZ%dej!j"dee$ fddZ&de'fddZ(deee$  fddZ)dd Z*G dd dZ+ede+ d d#d!d"Z,dS )$    N)defaultdict)DictListOptional)config)aot_autograd)	boxed_nop)BoxedDeviceIndex'check_multiple_devices_or_any_cpu_nodesformat_default_skip_messageget_mutation_stack_traceget_placeholders#log_cudagraph_skip_and_bump_counter)	BoxedBoolcount_tangents%get_first_incompatible_cudagraph_nodenum_fw_fixed_argumentsoutput_node)StorageWeakRef   )register_backendc                 C   s  dd }t t}d}t }| jD ]q}|jdkr3t||jtjr.|t||j	  
| |d7 }q|jdkr|jtju r?q|jj}t|jD ]8\}}|t|jk rY|j| }	n|j|jvr`qH|j|j }	d}
|jrq|jjrqd}
|
r||t||	j	  O }qHq|S )	Nc                 S   s   d| v r| d S | d S )Nvalfake_result )metar   r   _/home/ubuntu/SoloSpeech/.venv/lib/python3.10/site-packages/torch/_dynamo/backends/cudagraphs.pymeta_fk!   s   z%find_input_mutations.<locals>.meta_fkr   placeholderr   call_functionFT)r   setnodesop
isinstancer   torchTensorr   _typed_storageaddtargetoperatorgetitem_schema	enumerate	argumentslenargsnamekwargs
alias_infois_write)gr   inputs	input_idxmutated_inputsnschemaiargargumentmut_argr   r   r   find_input_mutations    s:   



r=   gmc                 C   sD   i }| j jD ]}|jdd }t|tjr|j|vr|||j< q|S )Nr   )graphr    r   getr"   r#   r$   device)r>   device_node_mappingr7   tr   r   r   get_device_node_mappingF   s   
rD   	aot_modelreturnc                 C   s:   t | jtt| }|sd S dd | jjD }t||S )Nc                 S   s   g | ]	}|j d kr|qS )r   )r!   ).0noder   r   r   
<listcomp>V   s    zGcheck_for_mutation_ignore_cuda_graph_managed_tensor.<locals>.<listcomp>)r=   r?   r   ranger    r   )rE   	num_fixedmutation_indicesplaceholdersr   r   r   3check_for_mutation_ignore_cuda_graph_managed_tensorO   s
   
rN   c                 C   sN   t jst| | }r|S tt|  }r|S t|  }r%td|j dS d S )Nzincompatible op ())r   (cudagraph_backend_support_input_mutationrN   r
   rD   r   r   r/   )rE   rK   mut_skipskiprH   r   r   r   check_for_skipZ   s   rS   c                 C   s$   t tt| }|jdksJ |jS )Ncuda)nextiterrD   typeindex)r>   rA   r   r   r   get_device_indexl   s   rY   c                 C   s.   t | }t|jdksJ dd |jd D S )Nr   c                 S   s&   g | ]}t |tjjjr|jnd qS N)r"   r#   fxrH   Nodestack_trace)rG   r:   r   r   r   rI   u   s    z$get_stack_traces.<locals>.<listcomp>r   )r   r-   r.   )r>   outputr   r   r   get_stack_tracesr   s
   r_   c                    sj   ddl m tdtd  d fdd	} fdd}t||tj|dd	tjj	j
d
}|| S )Nr   )cudagraphify_implTFc                    s   t | |}ttt|}t| | }r#t td|  |S  t|  ||t	| j
ddt| t| jt| jd	}d|_|S )Nzskipping cudagraphs due to Fdevice_indexis_backwardis_inferencestack_tracesrM   mutated_input_idxsT)r   r   r-   rS   r   disabler   r   rY   rJ   valuer_   r   r?   r=   _boxed_call)rE   
aot_inputsrd   interpfixedskip_msgoutboxed_device_indexr`   do_cudagraphsdynamo_inputsr   r   forward_cudagraphs   s,   

z&cudagraphs.<locals>.forward_cudagraphsc                    s   t  |}s	 S t }t | }r5td| tjjjjddd us)J  fdd}d|_	|S ||t
|t ddt t jt jd	}d|_	|S )Nzskipping cudagraphs due to %sF)create_if_none_existsc                    s       | S rZ   )set_to_running_backward)r4   rE   managerr   r   fn   s   z3cudagraphs.<locals>.backward_cudagraphs.<locals>.fnTra   )r   r   rS   r   r#   	_inductorcudagraph_treesget_managerrh   ri   rJ   rY   r_   r   r?   r=   )rE   rj   rk   rl   rm   rx   rn   )rp   r`   rq   rv   r   backward_cudagraphs   s8   
z'cudagraphs.<locals>.backward_cudagraphs)rd   )fw_compilerbw_compilerinference_compilerkeep_inference_input_mutations)F)torch._inductor.cudagraph_treesr`   r   r	   r   	functoolspartialr#   _dynamor   %cudagraph_backend_keep_input_mutation)dynamo_modelrr   rs   r|   aot_cudagraphsr   ro   r   
cudagraphs{   s   &
r   c                   @   s(   e Zd ZdZedd Zedd ZdS )CudagraphsBackendr   c                  C   s   ddl m}  |   d S )Nr   reset_cudagraph_trees)r   r   r   r   r   r   reset   s   
zCudagraphsBackend.resetc                 C   s
   t | |S rZ   )r   )modelr4   r   r   r   __call__   s   
zCudagraphsBackend.__call__N)__name__
__module____qualname__compiler_namestaticmethodr   r   r   r   r   r   r      s    
r   )r/   compiler_fnTc                    s  t |ttfs	J  rdd |D nt|tj  tj }|tj  tj	| | |  W d   n1 s>w   Y  |  tj | tj  tj
 tjj|d |  W d   n1 spw   Y  t ttfsf fdd}|S )zBThis isn't registered as a backend, but is used in some benchmarksc                 S   s   g | ]}t |qS r   )r#   
zeros_likerG   xr   r   r   rI      s    z$cudagraphs_inner.<locals>.<listcomp>N)streamc                     sT   t t | ks
J  rt| D ]	\}}|| q  r(dd D S S )Nc                 S   s   g | ]}|  qS r   )cloner   r   r   r   rI      s    z1cudagraphs_inner.<locals>.run.<locals>.<listcomp>)r-   zipcopy_replay)
new_inputsdstsrccopy_inputscopy_outputsr?   static_inputsstatic_outputsr   r   run   s   zcudagraphs_inner.<locals>.run)r"   listtupler#   rT   synchronizeStreamwait_streamcurrent_streamr   	CUDAGraphr?   )r   r4   r   r   r   r   r   r   r   cudagraphs_inner   s*   





r   )TT)-r   r(   collectionsr   typingr   r   r   r#   torch._dynamor   torch._dynamo.backends.commonr    torch._dynamo.backends.debuggingr   torch._inductor.cudagraph_utilsr	   r
   r   r   r   r   torch._inductor.utilsr   r   r   r   r    torch.multiprocessing.reductionsr   registryr   r=   r[   GraphModulerD   strrN   rS   intrY   r_   r   r   r   r   r   r   r   <module>   s4    &	
	N