o
    ߗi                      @   sL  d dl Z d dlmZ d dlmZmZmZ d dlZd dlm	Z	 d dl
mZ d dlmZ d dlmZmZmZmZmZmZ d dlmZmZmZmZmZ d d	lmZ d
dlmZ dd Zdej j!fddZ"dej j!dee# fddZ$dej j!dee# fddZ%de&fddZ'deee#  fddZ(dd Z)G dd dZ*ede* d d#d!d"Z+dS )$    N)defaultdict)DictListOptional)config)aot_autograd)	boxed_nop)BoxedDeviceIndex'check_multiple_devices_or_any_cpu_nodesformat_default_skip_messageget_mutation_stack_traceget_placeholder_info#log_cudagraph_skip_and_bump_counter)	BoxedBoolcount_tangents%get_first_incompatible_cudagraph_nodenum_fw_fixed_argumentsoutput_node)StorageWeakRef   )register_backendc                 C   s  dd }t t}d}t }| jD ]q}|jdkr3t||jtjr.|t||j	  
| |d7 }q|jdkrt|jds?q|jj}t|jD ]8\}}|t|jk rY|j| }	n|j|jvr`qH|j|j }	d}
|jrq|jjrqd	}
|
r||t||	j	  O }qHq|S )
Nc                 S   s   d| v r| d S | d S )Nvalfake_result )metar   r   _/home/ubuntu/transcripts/venv/lib/python3.10/site-packages/torch/_dynamo/backends/cudagraphs.pymeta_fk    s   z%find_input_mutations.<locals>.meta_fkr   placeholderr   call_function_schemaFT)r   setnodesop
isinstancer   torchTensorr   _typed_storageaddhasattrtargetr   	enumerate	argumentslenargsnamekwargs
alias_infois_write)gr   inputs	input_idxmutated_inputsnschemaiargargumentmut_argr   r   r   find_input_mutations   s:   



r<   gmc                 C   sD   i }| j jD ]}|jdd }t|tjr|j|vr|||j< q|S )Nr   )graphr!   r   getr#   r$   r%   device)r=   device_node_mappingr6   tr   r   r   get_device_node_mappingF   s   
rC   	aot_modelreturnc                 C   s2   t | jtt| }|sd S t| j}t||S N)r<   r>   r    ranger   r   )rD   	num_fixedmutation_indicesplaceholdersr   r   r   3check_for_mutation_ignore_cuda_graph_managed_tensorO   s
   

rK   c                 C   sN   t jst| | }r|S tt|  }r|S t|  }r%td|j dS d S )Nzincompatible op ())r   (cudagraph_backend_support_input_mutationrK   r
   rC   r   r   r.   )rD   rH   mut_skipskipnoder   r   r   check_for_skipZ   s   rQ   c                 C   s$   t tt| }|jdksJ |jS )Ncuda)nextiterrC   typeindex)r=   r@   r   r   r   get_device_indexl   s   rW   c                 C   s.   t | }t|jdksJ dd |jd D S )Nr   c                 S   s&   g | ]}t |tjjjr|jnd qS rF   )r#   r$   fxrP   Nodestack_trace).0r9   r   r   r   
<listcomp>u   s    z$get_stack_traces.<locals>.<listcomp>r   )r   r,   r-   )r=   outputr   r   r   get_stack_tracesr   s
   r^   c                    sj   ddl m tdtd  d fdd	} fdd}t||tj|dd	tjj	j
d
}|| S )Nr   )cudagraphify_implTFc                    s   t | |}ttt|}t| | }r#t td|  |S  t|  ||t	| j
ddt| t| jt| jd	}d|_|S )Nzskipping cudagraphs due to Fdevice_indexis_backwardis_inferencestack_tracesrJ   mutated_input_idxsT)r   r   r,   rQ   r   disabler   r    rW   rG   valuer^   r   r>   r<   _boxed_call)rD   
aot_inputsrc   interpfixedskip_msgoutboxed_device_indexr_   do_cudagraphsdynamo_inputsr   r   forward_cudagraphs   s,   

z&cudagraphs.<locals>.forward_cudagraphsc                    s   t  |}s	 S t }t | }r5td| tjjjjddd us)J  fdd}d|_	|S ||t
|t ddt t jt jd	}d|_	|S )Nzskipping cudagraphs due to %sF)create_if_none_existsc                    s       | S rF   )set_to_running_backward)r3   rD   managerr   r   fn   s   z3cudagraphs.<locals>.backward_cudagraphs.<locals>.fnTr`   )r   r   rQ   r   r$   	_inductorcudagraph_treesget_managerrg   rh   rG   rW   r^   r   r>   r<   )rD   ri   rj   rk   rl   rw   rm   )ro   r_   rp   ru   r   backward_cudagraphs   s8   
z'cudagraphs.<locals>.backward_cudagraphs)rc   )fw_compilerbw_compilerinference_compilerkeep_inference_input_mutations)F)torch._inductor.cudagraph_treesr_   r   r	   r   	functoolspartialr$   _dynamor   %cudagraph_backend_keep_input_mutation)dynamo_modelrq   rr   r{   aot_cudagraphsr   rn   r   
cudagraphs{   s   &
r   c                   @   s(   e Zd ZdZedd Zedd ZdS )CudagraphsBackendr   c                  C   s   ddl m}  |   d S )Nr   reset_cudagraph_trees)r   r   r   r   r   r   reset   s   
zCudagraphsBackend.resetc                 C   s
   t | |S rF   )r   )modelr3   r   r   r   __call__   s   
zCudagraphsBackend.__call__N)__name__
__module____qualname__compiler_namestaticmethodr   r   r   r   r   r   r      s    
r   )r.   compiler_fnTc                    s  t |ttfs	J  rdd |D nt|tj  tj }|tj  tj	| | |  W d   n1 s>w   Y  |  tj | tj  tj
 tjj|d |  W d   n1 spw   Y  t ttfsf fdd}|S )zBThis isn't registered as a backend, but is used in some benchmarksc                 S   s   g | ]}t |qS r   )r$   
zeros_liker[   xr   r   r   r\      s    z$cudagraphs_inner.<locals>.<listcomp>N)streamc                     sT   t t | ks
J  rt| D ]	\}}|| q  r(dd D S S )Nc                 S   s   g | ]}|  qS r   )cloner   r   r   r   r\      s    z1cudagraphs_inner.<locals>.run.<locals>.<listcomp>)r,   zipcopy_replay)
new_inputsdstsrccopy_inputscopy_outputsr>   static_inputsstatic_outputsr   r   run   s   zcudagraphs_inner.<locals>.run)r#   listtupler$   rR   synchronizeStreamwait_streamcurrent_streamr   	CUDAGraphr>   )r   r3   r   r   r   r   r   r   r   cudagraphs_inner   s*   





r   )TT),r   collectionsr   typingr   r   r   r$   torch._dynamor   torch._dynamo.backends.commonr    torch._dynamo.backends.debuggingr   torch._inductor.cudagraph_utilsr	   r
   r   r   r   r   torch._inductor.utilsr   r   r   r   r    torch.multiprocessing.reductionsr   registryr   r<   rX   GraphModulerC   strrK   rQ   intrW   r^   r   r   r   r   r   r   r   <module>   s2    '	
	N