o
    ߗiB                     @   s  d dl Z d dlZd dlZd dlmZmZmZmZmZm	Z	m
Z
 d dlZd dlmZmZmZ d dlmZmZ d dlmZmZmZ d dlmZmZ d dlmZ d dlmZ d d	lmZ d d
l m!Z! d dl"m#Z#m$Z$m%Z%m&Z&m'Z'm(Z(m)Z) d dl*m+Z+m,Z, d dl-m.Z.m/Z/ d dl0m1Z1 d dl2m3Z3 e	rd dl4m5Z5 ee6dZ7ee6dZ8dd Z9dd Z:dd Z;g dZ<e1eeej=ej>j?j@jAgZBG dd dZCdaDdaEdaFe jGd$ddZHe jGdd  ZId%d"d#ZJdS )&    N)AnyDictListOptionalTupleTYPE_CHECKINGUnion)call_backward	call_hookFakeCompiledAutogradEngineGetItemSourceLocalSource)counterslazy_format_graph_codeset_locals_to_steal)getArtifactLoggertrace_structuredclone_preserve_strides)FakeTensorMode)GraphModule)BackwardState)	decomposedisable_autocast_cachedisable_proxy_modes_tracingfetch_object_proxyProxyTorchDispatchModePythonKeyTracertrack_tensor_tree)
DimDynamicShapeEnv)preserve_node_metaset_stack_trace)
OrderedSet)CapturedTraceback)Proxycompiled_autogradcompiled_autograd_verbosec                   C   s   t jjjdS )Nr(   )torch_logging	_internal	log_stateis_artifact_enabled r.   r.   ]/home/ubuntu/transcripts/venv/lib/python3.10/site-packages/torch/_dynamo/compiled_autograd.py snapshot_verbose_logging_enabled+   s   
r0   c                   C   s   t jjjjS N)r)   	_inductorconfigtriton
cudagraphsr.   r.   r.   r/   snapshot_cudagraph_enabled1   s   r6   c                 C   s   | d urt | S | S r1   r   )xr.   r.   r/   maybe_clone5   s   r8   )inputssizesscalarshooksc                   @   s|  e Zd ZdCddZdd ZedefddZd	ee	j
 d
ee deeeef  deeeeef   fddZdefddZdd ZdefddZdd Zdd Zdd Zdee fddZdd  Zd!d" Zd#d$ Zd%d& Zed'd( Zed)d* Zd+d, Zd-d. Zd/d0 Z d1d2 Z!d3d4 Z"d5d6 Z#d7d8 Z$	dDde%eeeef   fd9d:Z&d;efd<d=Z'd>ed?ed@e%e	j(j) fdAdBZ*dS )EAutogradCompilerInstancereturnNc                 C   sT   || _ t | _| jj| _t | _tdd| jd| _t	 | _
t| j
d| _d | _d S )NT)allow_fallback_kernelsallow_non_fake_inputs	shape_envsymbolic)compiler_fn
contextlib	ExitStackstackcloser!   rA   r   fake_tensor_moder   	fx_tracerr   
proxy_modehooks_proxy)selfrC   r.   r.   r/   __init__G   s   


z!AutogradCompilerInstance.__init__c                 C   s    t |tjsJ | jj||dS )N)source)
isinstancer)   TensorrH   from_tensor)rL   r7   rN   r.   r.   r/   	wrap_fakeU   s   z"AutogradCompilerInstance.wrap_fakec                 C   s   t t| |S r1   r   )nameidxr.   r.   r/   rN   Y   s   zAutogradCompilerInstance.sourcer9   r:   r;   originsc                    s  t d d  d7  < d  _i  _tj  j_tjj	t
d j_i  j_ fddtD \}}} _ jt  |\}}	}
 fddt|D } |||  fd	dt|D } |||	 t|D ]:\}} d
|}t|tr j||tj||< qft|tr jj jj||tjd||d||< qftdt| |||
  jt i   j j!  j j"  jt#   j!jd usJ  j!j} jtjj$j%&| |||fS )Nr'   captures   )
tracer_clsc                 3   s"    | ]} j d |di V  qdS )placeholderr.   N)rI   create_proxy).0rS   rL   r.   r/   	<genexpr>j   s
    
z9AutogradCompilerInstance.begin_capture.<locals>.<genexpr>c              	      s$   g | ]\}}  | d |qS )r9   )rR   rN   )r[   rT   r7   r\   r.   r/   
<listcomp>r   s    z:AutogradCompilerInstance.begin_capture.<locals>.<listcomp>c              	      s*   g | ]\}} j | d |tjqS )r:   )rA   $create_unspecified_symint_and_symbolrN   r    DYNAMIC)r[   rT   valr\   r.   r/   r^   y   s    
r;   )rN   dynamic_dim)hintrN   zUnexpected scalar type: )'r   aot_graph_cls_nameaot_graph_infosr)   nnModulerI   rootfxGraphr   graphtensor_attrs_graph_placeholdersrK   rF   enter_contextr"   	enumeratebind_tensors_to_proxiesrN   rO   intrA   r_   r    r`   floatcreate_symfloatnodecreate_unspecified_symbolAssertionErrortyper   rH   rJ   r   experimentalsymbolic_shapes_suppress_guards)rL   r9   r:   r;   rU   
args_proxysizes_proxyscalars_proxyinputs_originssizes_originsscalars_originsrT   ra   rN   envr.   r\   r/   begin_capture]   sb   








z&AutogradCompilerInstance.begin_capturebackward_idxc              
   C   s   | j d usJ | j | }| jjdt|| |g| |R i d}t 4 g }|D ]}|d u r5|d  q)|\}	}
}}|tj|||	|
d q)| 	|| W d    t
|S 1 s[w   Y  t
|S )Ncall_function)kindtargetargskwargs)sizedtypelayoutdevice)rK   rI   rZ   r	   to_proxyr   appendr)   emptyrp   tuple)rL   r9   output_metadatassaved_tensorsr   backward_c_functionproxiesgrad_insoutput_metadatar   r   r   r   r.   r.   r/   proxy_call_backward   s6   


z,AutogradCompilerInstance.proxy_call_backwardc                    s(    j dt|g fdd|D R |S )Nr   c                       g | ]}  |qS r.   r   r[   r7   r\   r.   r/   r^          z<AutogradCompilerInstance.proxy_call_hook.<locals>.<listcomp>)rI   rZ   r
   )rL   hookr   r   r.   r\   r/   proxy_call_hook   s   z(AutogradCompilerInstance.proxy_call_hookic                 C   s|   | j d usJ | j | }| j||| dd}t  t|| ||< | || g|g W d    |S 1 s7w   Y  |S )Ntensor_pre_hook	hook_type)rK   r   r   r8   rp   )rL   r9   hook_idr   r   proxyr.   r.   r/   r      s   

z(AutogradCompilerInstance.tensor_pre_hookc                 C   sn   | j d usJ | j | }| j||dd}t  dd |D }| || W d    |S 1 s0w   Y  |S )Npre_hookr   c                 S      g | ]}t |qS r.   r8   r   r.   r.   r/   r^          z5AutogradCompilerInstance.pre_hook.<locals>.<listcomp>rK   r   r   rp   )rL   r9   r   r   r   r.   r.   r/   r      s   

z!AutogradCompilerInstance.pre_hookc                 C   sp   | j d usJ | j | }| j|||dd}t  dd |D }| || W d    |S 1 s1w   Y  |S )N	post_hookr   c                 S   r   r.   r   r   r.   r.   r/   r^      r   z6AutogradCompilerInstance.post_hook.<locals>.<listcomp>r   )rL   outputsr9   r   r   r   r.   r.   r/   r      s   

z"AutogradCompilerInstance.post_hookc                 C   s|   t |tjsJ | jd usJ | j| }| j||dd}t  t|g}| ||g W d    |S 1 s7w   Y  |S )Npost_acc_grad_hookr   )rO   r)   rP   rK   r   r   r8   rp   )rL   inputr   r   r   r.   r.   r/   r      s   


z+AutogradCompilerInstance.post_acc_grad_hookc                 C   s6  i }d}t |j}|d jdksJ |d }t |j }tt}|| |d ks+J |t| d }|| |d ks=J t|D ]>\}	}
|sS|
jd j	j
dkrSd}qA|
jd j	j
d	k}t|
jd  dk}|r|rt |
j }td
d |D r|
||	< qA|r| D ]}
|
jd  |
jd< qt | S g S )NFr   r9   rW   ra   cudaTcpuc                 s   s,    | ]}t |jtjjo|jjd v V  qdS ))primsatenN)rO   r   r)   _ops
OpOverload	namespacer[   userr.   r.   r/   r]   $  s    

zDAutogradCompilerInstance.move_graph_nodes_to_cuda.<locals>.<genexpr>)listnodesr   userskeyslenrm   ro   metar   rv   r   allvaluesr   )rL   rk   to_movehas_cuda_inputsr   r9   inputs_usersfirst_getitem_idxlast_getitem_idxr   nodeis_cpu	is_scalar
node_usersr.   r.   r/   move_graph_nodes_to_cuda  s8   
z1AutogradCompilerInstance.move_graph_nodes_to_cudac                 C   s6   t |tjjo|jdko|jtjjjj	tjjj
jfv S )Nr   )rO   r)   ri   Nodeopr   opsr   sym_sizerq   	sym_numeldefault)rL   r   r.   r.   r/   is_sym_node7  s   z$AutogradCompilerInstance.is_sym_nodec                    sf   t   t| jjjddD ]\}} |j  q|tt	d ks$J  fdd}| jj
| d S )NrY   )r   rW   c                    s0   |  v p| j dkp| j dkp| j dko| jtv S )NrY   outputr   )r   r   _impure_targetsr   unpack_nodesr.   r/   	is_impureI  s   z/AutogradCompilerInstance.dce.<locals>.is_impure)r$   ro   rI   rk   
find_nodesupdater   r   r   rm   eliminate_dead_code)rL   r   r   r   r.   r   r/   dce?  s   zAutogradCompilerInstance.dcec              	      s  | j dtjdi  | j  | j dd| j | |fi  | 	  | 
  |   |   |   |   |   |   g t rL| | j jt| j j| j jd t dg td dddd}td	| td	| td
 fddd fdd}||  fS )Nr   r.   r   CompiledAutogradr9   zCompiled autograd graphT)include_deviceinclude_stridecoloredz%scompiled_autograd_graphc                      s    j ddS )NF)print_output)print_readabler.   )rk   r.   r/   <lambda>  s    z6AutogradCompilerInstance.end_capture.<locals>.<lambda>)
payload_fnc                    sr   z5da  D ]}||  jdd||< qt  | ||||W  d    W da S 1 s,w   Y  W da d S da w )NT)non_blockingF)in_compiled_autograd_region
pin_memoryr   _disable)compiled_fnr9   r:   r;   r<   r   )runtime_inputs_to_mover.   r/   runtime_wrapper  s   z=AutogradCompilerInstance.end_capture.<locals>.runtime_wrapper)rI   rZ   r   _exec_final_callbacks_stubrF   rG   create_node
create_argr   rename_aot_dispatcher_nodesreorder_tensor_pre_hook_nodes'reorder_pre_hook_nodes_to_schedule_asapreorder_accumulate_grad_nodes%reorder_pre_hook_nodes_to_mimic_eager reorder_post_acc_grad_hook_nodesreorder_post_hook_nodesr   r6   r   rk   r   rh   r   r   compiled_autograd_loginfoverbose_logdebugr   rC   )rL   r   lazy_graph_coder   r.   )rk   r   r/   end_captureS  sV   

z$AutogradCompilerInstance.end_capturec                 C   s  | j du rdS dtjjjdtjjjfdd}| j D ]\}}|d }|d }|d j}t|j	}t
|}|dus;J z|jd	krJt
|}|jd	ksAW n	 tyT   Y qw z|t| jjj	}	t|D ]}
t
|	 qat
|	}|jd
kr|||st
|	}|jd
kr|||rv|jd
kr|jd
kr|jst
|	}q|||std| d|j |_t|jD ]\}}d| d|j |j| _qt
|}t
|	}|jd
kr|jd
ksW q ty   td| j ||| Y qw dS )z
        Renames nodes as they appear in the AOTDispatcher backward graphs, prefixed by AOT id
        e.g. AOTDispatcher backward graph X's `sin_Y` -> `aotX_sin_Y`
        Ncaaotc                 S   s   | j |j k}|st| j dot|j do| j j|j jk}|sFt| j drFt|j drF|j  dkrFt|jddrF| j  |jd  k}|o]| j|jko]| j|jko]t| j	t|j	kS )N__name__rS   zaten::reshapeoriginal_aten)
r   hasattrr   rS   r   getr   rv   r   all_input_nodes)r   r   target_matchr.   r.   r/   
is_similar  s0   




zHAutogradCompilerInstance.rename_aot_dispatcher_nodes.<locals>.is_similarca_node_start_idxaot_idaot_gmr   r   _zIFailed to match %s%s (NodeCall %s) nodes with AOT backward graph %s nodes)rd   r)   ri   r   r   re   itemsrk   iterr   nextr   StopIterationrI   ranger   rS   ro   r   r   r   )rL   r  nodecall_indexr   r  r  	aot_graphaot_itaot_nodeca_itr  ca_noder   inpr.   r.   r/   r     sb   






z4AutogradCompilerInstance.rename_aot_dispatcher_nodesc                 C   s   dd | D }|S )Nc                 S   s    g | ]}t |tjju r|qS r.   )rv   r)   ri   r   )r[   nr.   r.   r/   r^     s     z:AutogradCompilerInstance.get_all_nodes.<locals>.<listcomp>r.   )r   r   r.   r.   r/   get_all_nodes  s   z&AutogradCompilerInstance.get_all_nodesc                 C   s8   | j dks| j dkr| jtjkr| jd j dkrdS dS )NrY   r   r   TF)r   r   operatorgetitemr   r   r.   r.   r/   is_placeholder  s   

z'AutogradCompilerInstance.is_placeholderc                 C   s   | j jjdtjjjjdD ]:}|jd |jd }}d}|j	t
jkr)|}|jd }t||g}||jurG| |sG|| |durG|| qdS )a  
        Usage of AOTAutograd causes all the accumulate_grad_ nodes to get pushed to the end of
        the graph.  This differs from eager mode, which schedules them as soon as possible. This
        pass attempts to reorder the graph to mimic eager behavior.
        r   r   r   r   rW   N)rI   rk   r   r)   r   inductoraccumulate_grad_r   r   r   r  r  maxprevr  r   )rL   r   
param_node	grad_nodegetitem_nodeargr.   r.   r/   r     s   



z6AutogradCompilerInstance.reorder_accumulate_grad_nodesc                 C   sl   | j jjdtdD ]*}|jdddkrq	|jd }|jd }||jur3| |s3|	| |	| q	dS )a  
        Usage of AOTAutograd causes all the tensor_pre_hook nodes to get pushed
        to the end of the graph. This differs from eager mode, which schedules
        them as soon as possible. This pass attempts to reorder the graph to
        mimic eager behavior.
        r   r  r   Nr   r   rW   )
rI   rk   r   r
   r   r   r   r  r  r   )rL   r   r  
input_noder.   r.   r/   r     s   




z6AutogradCompilerInstance.reorder_tensor_pre_hook_nodesc                 C   s   | j jjdtdD ]s}|jdddkrq	|jd }| |jd }g }g }|g}|D ]}|jdkrJ|j	t
jkrJ||jd  || || q+t||D ]\}}	|| ||	 qPt|}
|
|jur|| |
s||
| |D ]}|| qtq	dS )a  
        In this function, we schedule the pre hooks as soon as possible. This
        does not match eager behavior (schedule pre hook right before its
        registered node), but it can make acc grad be scheduled properly when
        the pre hooks are registered to them. After reordering acc grad node, we
        will reorder the pre hooks again to mimic eager behavior.
        r   r  r   Nr   r   rW   )rI   rk   r   r
   r   r   r   r  r   r   r  r  r   zipremover  r  r  )rL   r   r  input_nodes	to_remove	to_append
hook_blockr  abr  r.   r.   r/   r   *  s4   





z@AutogradCompilerInstance.reorder_pre_hook_nodes_to_schedule_asapc                 C   s   g }| j jjdtdD ]}|jdddkrq|| qt|D ]D}|jd }t	|j
 }t|dkr6q!tdd |D sAJ tt|d j
 }||jure|| || |D ]}|| q]q!dS )	a%  
        Usage of AOTAutograd causes all the pre_hook nodes to get pushed to the
        end of the graph. This differs from eager mode, which schedules them
        right before their registered node execution. This pass attempts to
        reorder the graph to mimic eager behavior.
        r   r  r   Nr   r   c                 s   s&    | ]}|j d ko|jtjkV  qdS )r   N)r   r   r  r  r   r.   r.   r/   r]   e  s
    
zQAutogradCompilerInstance.reorder_pre_hook_nodes_to_mimic_eager.<locals>.<genexpr>)rI   rk   r   r
   r   r   r   reversedr   r   r   r   r   r   r  r  prepend)rL   	pre_hooksr   hook_getitem_noder   registered_noder  r.   r.   r/   r   N  s.   




z>AutogradCompilerInstance.reorder_pre_hook_nodes_to_mimic_eagerc                 C   s   g }| j jjdtdD ]}|jdddkrq|| qt|D ]<}|jd }|jd }d}t	|j
 D ]}|jdkrJ|jtjjjjkrJ|} nq6|dusSJ d|| || q!dS )	a  
        Usage of AOTAutograd causes all the post_acc_grad_hook nodes to get
        pushed to the end of the graph. This differs from eager mode, which
        schedules them as soon as possible. This pass attempts to reorder the
        graph to mimic eager behavior.
        r   r  r   Nr   r   rW   z8post_acc_grad_hook must have corresponding acc grad node)rI   rk   r   r
   r   r   r   r)  r   r   r   r   r   r   r)   r   r  r  r   )rL   post_acc_grad_hooksr   r  r  acc_grad_noder  r.   r.   r/   r   q  s.   





z9AutogradCompilerInstance.reorder_post_acc_grad_hook_nodesc                    sl  g }| j jjdtdD ]  jdddkrq|  qt|D ]  jd } jd } jd }t	|dkr9q!g }|
t| |D ]}|
 fd	d
t|j D  qDt|}|jdkr|jtjjjjkr|jd }d}	t|j D ]}
|
jdkr|
jtkr|
jdddkr|
}	qx|	dur|	| |  q!| jur| |s|| |  q!dS )a  
        Usage of AOTAutograd causes all the post_hook nodes to get pushed to the
        end of the graph. This differs from eager mode, which schedules them as
        soon as possible. This pass attempts to reorder the graph to mimic eager
        behavior.
        r   r  r   Nr   r   rW      c                 3   s:    | ]}|j d kr|jtkr jdddks|V  qdS )r   r   Nr   )r   r   r
   r   r   r   r   r.   r/   r]     s    

zCAutogradCompilerInstance.reorder_post_hook_nodes.<locals>.<genexpr>r   )rI   rk   r   r
   r   r   r   r)  r   r   extendr   r   r   r  r   r   r)   r   r  r  r   r  r  )rL   
post_hooksr  output_nodesr#  input_nodes_and_usersr   r  r  post_acc_grad_hook_noder  r.   r   r/   r     sN   













z0AutogradCompilerInstance.reorder_post_hook_nodesc                    s   |d u rd S t |tr fdd|D S t |tr$t fdd|D S t |tjs,J t j|}t |tjjj	j
s=J |jS )Nc                    r   r.   r   r   r\   r.   r/   r^     r   z5AutogradCompilerInstance.to_proxy.<locals>.<listcomp>c                 3   s    | ]}  |V  qd S r1   r   r   r\   r.   r/   r]     s    z4AutogradCompilerInstance.to_proxy.<locals>.<genexpr>)rO   r   r   r)   rP   r   rI   ri   rw   proxy_tensor_ProxyTensorr   )rL   tr6  r.   r\   r/   r     s   

z!AutogradCompilerInstance.to_proxyc                    s   t  tjjrB|r5t|t|ksJ g }tt|D ]}|| \}}| ||d  | |  q| n fddtt|D  t|t ksLJ t| d | j	d d S )Nc                    s   g | ]} | qS r.   r.   )r[   r   r   r.   r/   r^     r   zDAutogradCompilerInstance.bind_tensors_to_proxies.<locals>.<listcomp>constanttracer)
rO   r)   ri   r&   r   r
  set_node_originr   r   rI   )rL   tensorsr   rU   bound_proxiesr   r  	node_namer.   r9  r/   rp     s   z0AutogradCompilerInstance.bind_tensors_to_proxiesindexc                 C   s4   | j d usJ | j | }t }t||d | jd |S )Nr:  )rK   r   r   rI   )rL   rA  r   bw_stater.   r.   r/   bind_backward_state  s
   
z,AutogradCompilerInstance.bind_backward_stater@  r  pyobjc           	      C   s   d}|d ur$|j }t|dr$|| _|j}t| jjj||jj	d| j
|< | | d| d}t  d }|d|}t| d S )N _aot_id)r  r  r  z (NodeCall )r   z:raw_stack_trace = CapturedTraceback.extract().format()[-1])_forward_clsr   rd   rF  r   rI   rk   r   _lazy_backward_info	bw_modulere   r%   extractformatreplacer#   )	rL   r@  r  rD  maybe_aot_idforward_clsnew_coderaw_stack_tracenew_stack_tracer.   r.   r/   r=    s    
z(AutogradCompilerInstance.set_node_originr>   Nr1   )+r   
__module____qualname__rM   rR   staticmethodr   rN   r   r)   rP   rq   r   rr   r   strr   r   r   r   r   r   r   r   r   r   r   r   r  r  r   r   r   r   r   r   r   r   rp   rC  autogradFunctionr=  r.   r.   r.   r/   r=   F   sd    

I
#(DZ

	$#'<

r=   Fc              
   c   s    |rt |tu sJ ddlm} |jjdkr%dazd V  W dad S daw dd l}|jj	j
tt| |\}}t rC|jj	j
t daz)|jd d V  W d    n1 sZw   Y  W |sdda|jj	j
|| d S |ssda|jj	j
|| w )Nr   )
eval_frameforce_eagerTF)rv   booltorch._dynamorZ  _stancestance%compiled_autograd_enabled_force_eagertorch._inductor.cudagraph_trees_C_dynamor'   set_autograd_compiler	functoolspartialr=   r0   set_verbose_loggerr   compiled_autograd_enabledrX  set_multithreading_enabled)rC   dynamicrZ  r)   prior_compilerprior_dynamicr.   r.   r/   _enable  sD   


rm  c               
   c   s`    t jjjd d\} }dazd V  W | rdat jjj| | d S | r&dat jjj| | w )NFT)r)   rb  rc  r'   rd  rh  )rk  rl  r.   r.   r/   r   I  s$   

r   r>   c                   C   s2   da trJ tjjjd d tjjjd  d S )NF)rh  r   r)   rb  rc  r'   rd  rg  r.   r.   r.   r/   reset\  s   rn  )FrS  )KrD   re  r  typingr   r   r   r   r   r   r   r)   torch._dynamo.external_utilsr	   r
   r   torch._dynamo.sourcer   r   torch._dynamo.utilsr   r   r   torch._loggingr   r   torch._prims_commonr   torch._subclassesr   torch.fxr   %torch.fx.experimental._backward_stater   "torch.fx.experimental.proxy_tensorr   r   r   r   r   r   r   %torch.fx.experimental.symbolic_shapesr    r!   torch.fx.tracebackr"   r#   torch.utils._ordered_setr$   torch.utils._tracebackr%   torch.fx.proxyr&   r   r   r   r0   r6   r8   rm   r   r   r  r  r   r   r=   rh  r`  r   contextmanagerrm  r   rn  r.   r.   r.   r/   <module>   s^   $$	



     U)
