o
    ߗiD                     @   s  d dl Z d dlZd dlZd dlZd dlmZ d dlmZ d dlm	Z	m
Z
mZmZmZmZmZmZ d dlZd dlmZ d dlmZ d dlm  mZ d dlm  mZ d dlmZmZ d dl m!Z! d dl"m#Z#m$Z$ g dZ%d	e&d
ee&e&f fddZ'dee dej(dee&e	f fddZ)dej(dee&e	f dejj*fddZ+d7dejj*d
ejj*fddZ,dej*d
ej*fddZ-dej*deej( deej( deej( fdd Z.ej/ej0ej1ej2ej3ej4ej5ej6ej7ej8ej6ej9ej:gZ;ej<ej=gZ>ej/ej?ej0ej@ej1d!d" iZAdeej( dee&ej*f fd#d$ZBdeej( dee&ej*f d%eej*ej*f fd&d'ZCG d(d) d)ZDd8d,d-ZEd.eDd
eFfd/d0ZGG d1d2 d2ZHdejIfdejj*d3eee&e	f  d4eejI d
ejj*fd5d6ZJdS )9    N)defaultdict)Enum)AnycastDictIterableListOptionalTupleType)ArgumentTarget)	ShapeProp)fuse_conv_bn_evalfuse_linear_bn_eval)matches_module_patternreplace_node_modulefuseremove_dropoutextract_subgraphmodules_to_mkldnnreset_modulesMklSubgraphgen_mkl_autotuneruse_mkl_length	UnionFindoptimize_for_inferencetargetreturnc                 C   s*   |  dd^ }}|r|d |fS d|fS )zp
    Splits a qualname into parent path and last atom.
    For example, `foo.bar.baz` -> (`foo.bar`, `baz`)
    .   r    )rsplit)r   parentname r%   `/home/ubuntu/transcripts/venv/lib/python3.10/site-packages/torch/fx/experimental/optimization.py_parent_name$   s   r'   patternnodemodulesc                 C   s   t |jdkr	dS |jd |f}t| |D ]2\}}t|tjs" dS |jdkr* dS t|jts3 dS |j|vr; dS t	||j |urG dS qdS )Nr   Fcall_moduleT)
lenargszip
isinstancefxNodeopr   strtype)r(   r)   r*   nodesexpected_typecurrent_noder%   r%   r&   r   .   s    

r   
new_modulec                 C   s<   t | jtsJ t| j\}}||| j< t|| || d S N)r/   r   r3   r'   setattr)r)   r*   r8   parent_namer$   r%   r%   r&   r   B   s   
r   Fmodelc                 C   s4  t jt jft jt jft jt jft jt jfg}|st	| } |r&t
| tjjs,t| }n| }t| }t	|j}|D ]W}|jD ]Q}t|||rt|jd jdkrTqA||jd j }	||j }
|
jseqA|d t jt jt jfv rwt|	|
}nt|	|
}t|jd || ||jd  || qAq<t||S )z
    Fuses convolution/BN and linear/BN layers for inference purposes.
    Will deepcopy your model by default, but can modify the model inplace as well.
    r   r    )nnConv1dBatchNorm1dConv2dBatchNorm2dConv3dBatchNorm3dLinearcopydeepcopyr/   torchr0   GraphModulesymbolic_tracedictnamed_modulesgraphr5   r   r,   r-   usersr   track_running_statsr   r   r   replace_all_uses_with
erase_node)r<   inplaceno_tracepatternsfx_modelr*   	new_graphr(   r)   first_layerbnfused_layerr%   r%   r&   r   K   s<   








r   c                 C   s*   t | }G dd dtj j}|| S )z5
    Removes all dropout layers from the module.
    c                       s>   e Zd Zdedeedf deeef def fddZ	  Z
S )z&remove_dropout.<locals>.DropoutRemoverr   r-   .kwargsr   c                    s:   t | j| tjrt|dksJ |d S t |||S )Nr    r   )r/   
submodulesr=   Dropoutr,   superr+   )selfr   r-   rY   	__class__r%   r&   r+   z   s   z2remove_dropout.<locals>.DropoutRemover.call_module)__name__
__module____qualname__r   r
   r   r   r3   r   r+   __classcell__r%   r%   r^   r&   DropoutRemovery   s    

rd   )r0   rI   rG   Transformer	transform)r<   rT   rd   r%   r%   r&   r   s   s   

r   orig_moduler5   inputsoutputsc                    s|   t  }i  |D ]}||j}| |< q|D ]}|| fdd}| |< q| fdd|D  |  t | |S )z
    Given lists of nodes from an existing graph that represent a subgraph, returns a submodule that executes that subgraph.
    c                    s    |  S r9   r%   )xenvr%   r&   <lambda>       z"extract_subgraph.<locals>.<lambda>c                    s   g | ]} | qS r%   r%   ).0outputrk   r%   r&   
<listcomp>       z$extract_subgraph.<locals>.<listcomp>)r0   Graphplaceholderr$   	node_copyrp   lintrH   )rg   r5   rh   ri   rU   inputnew_noder)   r%   rk   r&   r      s   	

r   c                 C   s
   t | S r9   )	th_mkldnnMkldnnBatchNorm)a_r%   r%   r&   rm      s   
 rm   c                 C   s   i }| D ]9}|j dkr=t|jtsJ ||j }t|tv r=tt| |tj}t|tj	s0J t
|||< t||| q|S )z
    For each node, if it's a module that can be preconverted into MKLDNN,
    then we do so and create a mapping to allow us to convert from the MKLDNN
    version of the module to the original.
    r+   )r2   r/   r   r3   r4   
mkldnn_maprG   floatr=   ModulerE   rF   r   )r5   r*   old_modulesr)   
cur_moduler8   r%   r%   r&   r      s   

r   r   c                 C   sJ   | D ] }|j dkr"t|jtsJ ||j }||v r"t||||  qdS )za
    Maps each module that's been changed with `modules_to_mkldnn` back to its
    original.
    r+   N)r2   r/   r   r3   r   )r5   r*   r   r)   r   r%   r%   r&   r      s   	

r   c                   @   s   e Zd ZdejfddZdS )r   fx_graphc                 C   s   || _ g | _g | _g | _d S r9   )r   r5   start_nodes	end_nodes)r]   r   r%   r%   r&   __init__   s   
zMklSubgraph.__init__N)r`   ra   rb   r0   rs   r   r%   r%   r%   r&   r      s    r   
   r    c                    s*   dddt dtf fdd}|S )aW  
    This generates a heuristic that can be passed into `optimize_for_inference` that
    determines whether a subgraph should be run in MKL by running it with the example_inputs.

    Example usage:
        heuristic = gen_mkl_autotuner(example_inputs, iters=10)
        fast_model = optimization.optimize_for_inference(model, heuristic)
    NrL   r   c                    s   | j }d u r| jj| jjt dd |D  tttj	 dd | j
D }t| j||fdd}| fdd}tjjt  | fdd}||k S )	Nc                 S   s   g | ]}t |jqS r%   )rG   randnshapero   r)   r%   r%   r&   rq      s    z@gen_mkl_autotuner.<locals>.use_mkl_heuristic.<locals>.<listcomp>c                 S   s   g | ]}|j d  qS )r   )r-   r   r%   r%   r&   rq      s    c                    s<   t D ]}|   qt }t  D ]}|   qt | S r9   )rangetime)fr|   begin)iterswarmupr%   r&   	benchmark   s   z?gen_mkl_autotuner.<locals>.use_mkl_heuristic.<locals>.benchmarkc                      s   dd dd  D  D S )Nc                 S      g | ]}|  qS r%   )to_densero   ir%   r%   r&   rq     s    zRgen_mkl_autotuner.<locals>.use_mkl_heuristic.<locals>.<lambda>.<locals>.<listcomp>c                 S   r   r%   )	to_mkldnnr   r%   r%   r&   rq     rr   r%   r%   sample_inputs	submoduler%   r&   rm     s    z>gen_mkl_autotuner.<locals>.use_mkl_heuristic.<locals>.<lambda>c                      s     S r9   r%   r%   r   r%   r&   rm     rn   )r   r   owning_moduler   r   	propagater   r   r0   r1   r   r   r5   r   rL   rJ   rK   )rL   input_nodesoutput_argsr   mkl_timeno_mkl_timeexample_inputsrT   r   r   r   r   r&   use_mkl_heuristic   s"   z,gen_mkl_autotuner.<locals>.use_mkl_heuristic)r   bool)r   r   r   r   r%   r   r&   r      s   	r   rL   c                 C   s   t | jdkS )z
    This is a heuristic that can be passed into `optimize_for_inference` that
    determines whether a subgraph should be run in MKL by checking if there
    are more than 2 nodes in it
       )r,   r5   )rL   r%   r%   r&   r     s   r   c                   @   sF   e Zd Zdd ZdefddZdedefddZd	ed
efddZdS )r   c                 C   s   d g| | _ dg| | _d S )Nr   r#   size)r]   nr%   r%   r&   r     s   zUnionFind.__init__vc                 C   s   || j |< d| j|< d S )Nr    r   )r]   r   r%   r%   r&   make_set  s   
zUnionFind.make_setr   c                 C   sB   | j | }||kr|S |d usJ | || j |< tt| j | S r9   )r#   findr   int)r]   r   parr%   r%   r&   r   #  s   
zUnionFind.findr{   bc                 C   sf   |  ||  |}}||kr|S | j| | j| k r ||}}|| j|< | j|  | j| 7  < d S r9   )r   r   r#   )r]   r{   r   r%   r%   r&   join+  s   

zUnionFind.joinN)r`   ra   rb   r   r   r   r   r   r%   r%   r%   r&   r     s
    r   pass_configtracerc              	      s@  dddt id}|du ri }|| |d rt| } |d r#t| } |d du r+| S t|d ts6td	d|d vr@td
|d d }| }|t	|  t
|j  t|  }G dd dt}t jD ]}|j}	|jdkr||j }
t|
tv r|j}	t|
 d}|dur|jtjksJ d|jtdksJ dn|jdkr|jtv r|j}	n|jtv r|j}	|	|jkr"|	|jkrtdd |j D sqk !| t
"|j  fdd}W d   n1 sw   Y  t#t$t
j%j& ||_  '|  (dd|f}|)| |f|_ W d   n	1 sw   Y  qkt*t j|}| _+ jD ]B}|jdkrr|jdkrr|j d }t|j,}|D ]}|jdkrc|jdkrc|)|  -| qKt.|j,dkrr -| q1t. j}t/|fddt0 jD ]w\}}|jdkr|jdkr||_12| q|jdkr|jdkrĈ|j d dusJ |j d |_3qfdd|j4D }t.|dkrאqtdd |D rJ t5|}|d |_6|dd D ]}7|d | qqt8 fd d} jD ]9}t9|d!r|:|j6 j;| t9|d"r1|:|j1 j<;| t9|d#rC|:|j3 j=;| q|> D ](}||sp|j<|j= D ]}|j d }|)|  -| qVt?|j|| qId} jD ]}|jdks|jdkr|d7 }qwt@AtBCd$|  D  t
|  }|S )%a  
    Performs a set of optimization passes to optimize a model for the
    purposes of inference. Specifically, the passes that are run are:
    1. Conv/BN fusion
    2. Dropout removal
    3. MKL layout optimizations

    The third optimization takes a function `use_mkl_heuristic` that's used
    to determine whether a subgraph should be explicitly run in MKL layout.

    Note: As FX does not currently handle aliasing, this pass currently
    assumes nothing aliases. If that isn't true, use at your own risk.
    T	heuristic)conv_bn_fuser   mkldnn_layout_optimizeNr   r   r   Fz+mkldnn_layout_optimize config is not a dictz4Heuristic not found in mkldnn_layout_optimize configc                   @   s   e Zd ZdZdZdZdS )z*optimize_for_inference.<locals>.MklSupportr    r      N)r`   ra   rb   NOYESUNKNOWNr%   r%   r%   r&   
MklSupporta  s    r   r+   z)this pass is only for torch.float modulescpuz!this pass is only for CPU modulescall_functionc                 s   s    | ]}|j d kV  qdS )r   N)r   )ro   argr%   r%   r&   	<genexpr>  s    z)optimize_for_inference.<locals>.<genexpr>c                    s     d| fS )Nr   )call_methodr   r   r%   r&   rm     s    z(optimize_for_inference.<locals>.<lambda>r   r   r   r   c                    s0   t | dr | jS t | dr | jS d S )Ncolorstart_color)hasattrr   r   r   r   )ufr%   r&   	get_color  s
   

z)optimize_for_inference.<locals>.get_colorc                    s,   g | ]}t |tjr |d ur |qS r9   )r/   r0   r1   r   )r   r%   r&   rq     s    
z*optimize_for_inference.<locals>.<listcomp>c                 s   s    | ]}|d u V  qd S r9   r%   r   r%   r%   r&   r     s    r    c                      s   t  S r9   )r   r%   r   r%   r&   rm     rn   r   r   	end_colorzmkldnn conversions: %s)Er   updater   r   r/   rJ   RuntimeErrortracerE   rF   r0   rH   rootrK   r   listr5   r   r2   r   r4   mkldnn_supportedr   next
parametersdtyperG   r~   devicemkldnn_supported_unknownr   anyr-   inserting_beforemap_argr   r
   r)   r   inserting_aftercreate_noderO   r   r   rM   rP   r,   r   	enumerater   r   r   all_input_nodessortedr   r   r   r   r   appendr   r   valuesr   logging	getLoggerr`   inforv   )r<   r   r   default_pass_configr   
cur_tracerr*   r   r)   supports_mkldnnr   sample_parametermkldnn_argsdense_xr   prv_noderM   user	num_nodescur_idx
cur_colorsother_colormkldnn_graphsrL   prvmkldnn_conversionsresultr%   )r   r   r   r&   r   5  s   
	





















r   )FF)r   r    )KrE   r   operatorr   collectionsr   enumr   typingr   r   r   r   r   r	   r
   r   rG   torch.fxr0   torch.nnr=   torch.nn.functional
functionalFtorch.utils.mkldnnutilsmkldnnry   torch.fx.noder   r   torch.fx.passes.shape_propr   torch.nn.utils.fusionr   r   __all__r3   r'   r1   r   r   r   r   r   r   r@   rD   rA   ReLU	MaxPool2d	AvgPool2dAdaptiveAvgPool2drelu	transposesigmoid
avg_pool2dadaptive_avg_pool2dr   addmulr   MkldnnConv2dMkldnnLinearr}   r   r   r   r   r   r   r   Tracerr   r%   r%   r%   r&   <module>   s   (




	(

"

.	