o
    Ơi9                     @   s(  U d dl Z d dlZd dlZd dlmZ d dlmZmZmZmZm	Z	m
Z
mZ d dlZd dlZd dlZd dlZd dlmZ d dlmZ d dlmZ ddlmZ dd	lmZ dd
lmZ ddlmZmZm Z m!Z! e "e#Z$e j%e&d< edZ'eeej(j)gdf  e&d< G dd de	Z*G dd dej(j+Z,dS )    N)datetime)AnyCallableDictList
NamedTupleOptionalSequence)
OpOverload)_get_qualified_name)TensorMetadata   )
CONVERTERS)InputTensorSpec)Observer)
FrameworksLowerPrecisionget_dynamic_dimsunified_dtype_converter_LOGGER!TRT_INTERPRETER_CALL_PRE_OBSERVERc                   @   s6   e Zd ZU eed< ee ed< ee ed< eed< dS )TRTInterpreterResultengineinput_namesoutput_namesserialized_cacheN)__name__
__module____qualname__r   __annotations__r	   str	bytearray r"   r"   L/home/ubuntu/.local/lib/python3.10/site-packages/torch_tensorrt/fx/fx2trt.pyr      s
   
 r   c                
       s   e Zd Z			ddejjdee dedef fddZ	d	d
 Z
dd Zddejdddddddf
def fddZ fddZdd Zdd Zdd Zdd Zdd Z  ZS )TRTInterpreterFNmoduleinput_specsexplicit_batch_dimensionexplicit_precisionc           
         s   t  | t|ptjj| _t| j| _d}|r'dttj	j
> }||O }|r5dttj	j> }||O }| j|| _|  }	|	rQtdddd |	D   d | _|| _d| _|   d | _g | _g | _t | _d S )Nr   r   z4Interpretation will fail due to missing operations 

c                 s   s    | ]}| V  qd S Nr"   ).0ir"   r"   r#   	<genexpr>D   s    z*TRTInterpreter.__init__.<locals>.<genexpr>)super__init__trtLoggerWARNINGloggerBuilderbuilderintNetworkDefinitionCreationFlagEXPLICIT_BATCHEXPLICIT_PRECISIONcreate_networknetworkvalidate_conversionwarningswarnjoinoptimization_profilesr&   input_specs_itervalidate_input_specs_cur_node_name_input_names_output_namesdict_itensor_to_tensor_meta)
selfr%   r&   r'   r(   logger_levelflagr8   r9   missing_ops	__class__r"   r#   r/   %   s>   zTRTInterpreter.__init__c              	      s  j D ]\}}}}jjs|sJ dt}t|rňjjr(J d dt|s0J djrMt|tjksLJ dtj dt| dnfdd	tt|D _|D ]ftd
ksnJ dt tfddD sJ d dt ttD ]7  |v rt fddtdD sJ d  d dqt fddD sJ d  d dqq]qt|dksJ dqd S )NzPIt's required to specify batch dimension when it's explicit in TensorRT network.z7Can't have dynamic dim when batch dim is implicit, got .z9shape_ranges must be provided when shape has dynamic dim.z Number of optimization profiles z. doesn't match with the number of shape_range z
 provided.c                    s   g | ]} j  qS r"   )r5   create_optimization_profile)r+   _)rH   r"   r#   
<listcomp>j   s    z7TRTInterpreter.validate_input_specs.<locals>.<listcomp>   z*Expect three elements in shape_range, got c                 3   s     | ]}t |t  kV  qd S r*   )lenr+   s)shaper"   r#   r-   s       z6TRTInterpreter.validate_input_specs.<locals>.<genexpr>zExpect elements in shape_range z9 have the same number of dimension as the provided shape c                 3   s,    | ]}|   |d     kV  qdS )r   Nr"   )r+   j)r,   shape_ranger"   r#   r-   z   s
    
   zExpect dynamic dim z5 to have incremental value for shapes in shape_range c                 3   s     | ]}|    kV  qd S r*   r"   rT   )r,   rV   r"   r#   r-      rW   zExpect non dynamic dim z. to be the same for all shapes in shape_range r   zBshape_ranges are provided for input that doesn't have dynamic dim.)r&   r;   has_implicit_batch_dimensionr   rS   r@   rangeall)rH   rP   shape_rangeshas_batch_dimdynamic_dimsr"   )r,   rH   rV   rY   r#   rB   R   s|   



z#TRTInterpreter.validate_input_specsc                 C   s   t  }| jjjD ]Z}|jdkr$t|js$||j dt	|j  q|jdkr<t|js<||j d|j  q|jdkrb| 
|j}t|dt|}t|sb||j dt|  q|S )Ncall_function call_methodz torch.Tensor.call_module_base_class_origin)setr%   graphnodesopr   gettargetaddr   
fetch_attrgetattrtypetorchtypename)rH   missing_converternodesubmodsubmod_typer"   r"   r#   r<      s   

z"TRTInterpreter.validate_conversion@   i   returnc                    s  t | j | o|tjk| _|tjkr| jjst	d|tjkr*| jj
s*td d| _t }t   tdt |   t }|| j_| j }||_tjdkrhtjdddkrhtd	 d|_d
}|ryt|}||  }n|d}|!|d tjdkr|	r|	ntj"j#|_$|tjkr|%tj&j |tjkr|%tj&j |r|%tj&j' |r|%tj&j( | j)r| j)D ]}|*| q|r|%tj&j+ ||_,|
d
ur|j-|
d | j.| j/|}|sJ |0 rt1|2 nt1 }tdt |   t3|| j4| j5|S )a  
        Build TensorRT engine with some configs.
        Args:
            max_batch_size: set accordingly for maximum batch size you will use.
            max_workspace_size: set to the maximum size we can afford for temporary buffer
            lower_precision: the precision model layers are running on (TensorRT will choose the best perforamnce precision).
            sparse_weights: allow the builder to examine weights and use optimized functions when weights have suitable sparsity
            force_fp32_output: force output to be fp32
            strict_type_constraints: Usually we should set it to False unless we want to control the precision of certain layer for numeric reasons.
            algorithm_selector: set up algorithm selection for certain layer
            timing_cache: enable timing cache for TensorRT
            profiling_verbosity: TensorRT logging level
        Return:
            TRTInterpreterResult
        z2Current platform doesn't support fast native int8!z2Current platform doesn't support fast native fp16!r   z(TRT INetwork construction elapsed time: z8.6TRT_TEST_ENV01zSet TRT optimization level to 0N    Fz8.2)tactic_sourceszBuild TRT engine elapsed time: )6r   observer%   r   FP16output_fp16INT8r5   platform_has_fast_int8RuntimeErrorplatform_has_fast_fp16r=   r>   rA   r   nowr.   runr   infomax_batch_sizecreate_builder_configmax_workspace_sizer0   __version__osenvironrj   builder_optimization_levelnumpyarraycreate_timing_cachetobytesset_timing_cacheProfilingVerbosityLAYER_NAMES_ONLYprofiling_verbosityset_flagBuilderFlagSPARSE_WEIGHTSSTRICT_TYPESr@   add_optimization_profileDISABLE_TIMING_CACHEalgorithm_selectorset_tactic_sourcesbuild_enginer;   get_timing_cacher!   	serializer   rD   rE   )rH   r   r   lower_precisionsparse_weightsforce_fp32_outputstrict_type_constraintsr   timing_cacher   r|   run_module_start_timebuild_engine_start_timebuilder_configcache
cache_fileoptimization_profiler   r   rL   r"   r#   r      s~   











zTRTInterpreter.runc                    sj   t || _t|j}| j|d< ||_t |}t|j}|d= ||_t|tj	j
r3|jd| j|< |S )NrG   tensor_meta)r    rC   rF   kwargsrG   r.   run_node
isinstancer0   tensorrtITensormetarj   )rH   nr   trt_noderL   r"   r#   r     s   



zTRTInterpreter.run_nodec                 C   s   | j | | j| j \}}}}}|  jd7  _| jjr%|r$|dd  }nt|D ]\}	}
| js2J | j|	 j|g|
R   q)| jj	|t
|t|tjdS )Nr   )namerV   dtype)rD   appendr&   rA   r;   r[   	enumerater@   	set_shape	add_inputtupler   r   TRT)rH   rk   argsr   rV   r   rP   r^   r_   r,   rY   r"   r"   r#   placeholder%  s"   

zTRTInterpreter.placeholderc                 C   sh   t |tsJ | |}t|dt|}t|}|s#td| d| jd us*J || j	|||| jS )Nre   zConversion of module of type  not currently supported!)
r   r    rm   rn   ro   r   rj   r   rC   r;   )rH   rk   r   r   rt   ru   	converterr"   r"   r#   rd   :  s   


zTRTInterpreter.call_modulec                 C   sF   t |}|stdt| d| jd usJ || j|||| jS )NzConversion of function r   )r   rj   r   rp   rq   rC   r;   rH   rk   r   r   r   r"   r"   r#   ra   H  s   
zTRTInterpreter.call_functionc                 C   sN   t |tsJ t|}|std| d| jd usJ || j|||| jS )NzConversion of method r   )r   r    r   rj   r   rC   r;   r   r"   r"   r#   rc   R  s   

zTRTInterpreter.call_methodc                    s   t |dksJ t|d tr|d }nt|d tr"t|d }n|d f}tdd |D s4tdt|D ]<\} t fdddD rJd}nd	}d
| }| _| j	
  |ratj _n| jrn jtjkrntj _| j| q8d S )Nr   r   c                 s   s    | ]
}t |tjjV  qd S r*   )r   r0   r   r   )r+   outputr"   r"   r#   r-   g  s    z(TRTInterpreter.output.<locals>.<genexpr>z+TensorRT requires all outputs to be Tensor!c                 3   s     | ]}| j d v V  qdS )rP   N)r   split)r+   op_namer   r"   r#   r-   k  s
    
)
eqgtltorxorandnotneisinfanyTFr   )rS   r   r   listr]   r   r   r   r   r;   mark_outputr0   boolr   r   float32float16rE   r   )rH   rk   r   r   outputsr,   output_boolr   r"   r   r#   r   ^  s.   



zTRTInterpreter.output)FFN)r   r   r   rp   fxGraphModuler   r   r   r/   rB   r<   r   r~   r   r   r   r   rd   ra   rc   r   __classcell__r"   r"   rL   r#   r$   $   sD    -9v
r$   )-loggingr   r=   r   typingr   r   r   r   r   r   r	   r   r   r0   rp   torch.fx
torch._opsr
   torch.fx.noder   torch.fx.passes.shape_propr   converter_registryr   input_tensor_specr   observerr   utilsr   r   r   r   	getLoggerr   r   r1   r   r   r   r   r   Interpreterr$   r"   r"   r"   r#   <module>   s*   
 $