o
    Ơi                     @   s   d dl Zd dlmZmZmZmZ d dlmZ d dl	m
Z
 ddlmZ ddlmZmZ ddlmZ ejG d	d
 d
ZejG dd deZdS )    N)ListOptionalSetType)nn)PassManager   )InputTensorSpec)fuse_permute_linearfuse_permute_matmul)LowerPrecisionc                   @   s   e Zd ZU dZdZeed< ejZ	eed< dZ
eed< dZeeeej   ed< dZeeeej   ed	< d
Zeed< d
Zeed< dS )LowerSettingBasica  
    Basic class for lowering.
    max_batch_size: The maximum batch size for lowering job.
                    If run with TensorRT lowering, this is the maximum
                    batch size which can be used at execution time,
                    and also the batch size for which the ICudaEngine
                    will be optimized.
                    If run with AITemplate lowering, this the max batch_size
                    for the model.
    lower_precision: lower precision dtype during lowering.
    min_acc_module_size(int): minimal number of nodes for an accelerate submodule.
    ast_rewriter_allow_list (Optional[Set[nn.Module]]): Optional allow list of
    modules that need AST rewriting. This is aiming to eliminate input variable involve in
    exception checking control flow.
    leaf_module_list (Optional[Set[nn.Module]]): Optional leaf module list where
    modules will not be traced into.
    verbose_profile (bool): verbosity of profiler, default to False.
    i   max_batch_sizelower_precision
   min_acc_module_sizeNast_rewriter_allow_listleaf_module_listFverbose_profileis_aten)__name__
__module____qualname____doc__r   int__annotations__r   FP32r   r   r   r   r   r   r   Moduler   r   boolr    r   r   S/home/ubuntu/.local/lib/python3.10/site-packages/torch_tensorrt/fx/lower_setting.pyr      s   
 r   c                   @   s  e Zd ZU dZejedZee	 e
d< dZee
d< dZee
d< dZee
d	< dZee
d
< ejdd dZee
d< ejdd dZee
d< dZee
d< dZdZee
d< dZee
d< dZee
d< dZee
d< dZee
d< dZee
d< dZee e
d< dZe e
d< dZ!e e
d< dZ"ee
d< dS ) LowerSettinga'  
    Basic configuration for lowering stack.
    Args:
    input_specs: Specs for inputs to engine, can either be a single size or a
    range defined by Min, Optimal, Max sizes.
    explicit_batch_dimension: Use explicit batch dimension during lowering.
    explicit_precision: Use explicit precision during lowering.
    max_workspace_size: The maximum workspace size. The maximum GPU temporary
    memory which the TensorRT engine can use at execution time.
    strict_type_constraints: Require TensorRT engine to strictly follow data type
    setting at execution time.
    customized_fuse_pass: List of custmozied pass to apply during lowering process.
    lower_basic_fuse_pass: Enable basic pass fuse duirng lowering, i.e. fuse multiple operations
    as (a->b->c->d)=>(e). Current basic fuse patterns are:
    permute->linear
    permute->matmul
    verbose_log: Enable TensorRT engine verbose log mode.
    algo_selector: Enable TensorRT algorithm selector at execution time.
    timing_cache_prefix: TensorRT timing cache file path. TensorRT engine will use timing
    cache file at execution time if valid timing cache file is provided.
    save_timing_cache: Save updated timing cache data into timing cache file if the timing
    cache file is provided.
    cuda_graph_batch_size (int): Cuda graph batch size, default to be -1.
    preset_lowerer (str): when specified, use a preset logic to build the
    instance of Lowerer.
    only used by explicit batch dim with dynamic shape mode. In general, we use 2 GPU setting with
    2 stream on each. Set total number to 8 as a safe default value.
    dynamic_batch: enable the dynamic shape in TRT with dim=-1 for the 1st dimension.
    tactic_sources: tactic sources for TensorRT kernel selection. Default to None,
    meaning all possible tactic sources.
    correctness_atol: absolute tolerance for correctness check
    correctness_rtol: relative tolerance for correctness check
    use_experimental_rt: Uses the next generation TRTModule which supports both Python and TorchScript based execution (including in C++).
    )default_factoryinput_specsTexplicit_batch_dimensionFexplicit_precisioni   @max_workspace_sizestrict_type_constraintsc                   C   s
   t g S N)r   build_from_passlistr   r   r   r    <lambda>U   s   
 zLowerSetting.<lambda>customized_fuse_passc                   C   s   t ttgS r(   )r   r)   r   r
   r   r   r   r    r*   X   s    lower_basic_fuse_passverbose_logN timing_cache_prefixsave_timing_cachecuda_graph_batch_sizepreset_lowerer   opt_profile_replicadynamic_batchtactic_sourcesg?correctness_atolcorrectness_rtoluse_experimental_rt)#r   r   r   r   dcfieldlistr#   r   r	   r   r$   r   r%   r&   r   r'   r+   r   r,   r-   algo_selectorr/   strr0   r2   r3   r5   r6   r7   r   r8   floatr9   r:   r   r   r   r    r!   *   s2   
 #r!   )dataclassesr;   typingr   r   r   r   torchr   torch.fx.passes.pass_managerr   input_tensor_specr	   passes.lower_basic_passr
   r   utilsr   	dataclassr   r!   r   r   r   r    <module>   s    