o
    Ơik                     @  s  d dl mZ d dlZd dlZd dlZd dlmZmZmZ d dl	Z	d dl
mZ d dlmZ d dlmZ d dlmZ d dlmZ d dlmZ d d	lmZ d d
lmZmZmZmZmZ d dlm Z m!Z!m"Z" e#e$Z%ej&ddej&ddd"ddZ'ej&ddd"ddZ(e dfd#d d!Z)dS )$    )annotationsN)AnyCallableSequence)aot_autograd)detect_fake_mode)aot_export_joint_simple)is_tegra_platform)CompilationSettings)compile_module)get_decompositionspost_loweringremove_detachremove_sym_nodesrepair_input_aliasing)parse_dynamo_kwargsprepare_inputsset_log_leveltensorrt)nametorch_tensorrtgmtorch.fx.GraphModulesample_inputsSequence[Any]kwargsr   returntorch.nn.Modulec                 K  sT   d|v rd|d v r|d d sd|v r|d rt tjtj t}|| |fi |S )Noptionsdebug)r   loggerparentloggingDEBUGaot_torch_tensorrt_aten_backend)r   r   r   DEFAULT_BACKEND r&   Z/home/ubuntu/.local/lib/python3.10/site-packages/torch_tensorrt/dynamo/backend/backends.pytorch_tensorrt_backend    s   
r(   aot_torch_tensorrt_atenc           	        s   t |\}}|jr=td tjt||d}i }t|j|d< dd |d D }|D ]}|d |= q*t	||d d| |S t
 sVddlm  t fd	d
|D rVtd |jr^td t| |||S )Nz@Wrapping the backend with aot_autograd for Distributed examples
settingsengine_cachedecompositionsc                 S  s   h | ]	}d |j v r|qS )detach)_name).0keyr&   r&   r'   	<setcomp>G   s
    
z2aot_torch_tensorrt_aten_backend.<locals>.<setcomp>)fw_compilerr-   r   DTensorc                 3  s    | ]}t | V  qd S )N)
isinstance)r0   tensorr4   r&   r'   	<genexpr>W   s    z2aot_torch_tensorrt_aten_backend.<locals>.<genexpr>zIt is recommended to run the model with use_distributed_mode_trace = True since there are distributed tensors in the input which is not supported in aot_export_joint_simplez~The offload_module_to_cpu option is set, but it is being ignored since the torch_compile backend does not support this feature)r   use_distributed_mode_tracer    r   	functoolspartial_pretraced_backendr   "enable_experimental_decompositionsr   r	   torch.distributed.tensorr5   anywarningoffload_module_to_cpu)	r   r   r   r+   r,   _pretraced_backend_autogradsettings_aot_autograd	to_deleter1   r&   r4   r'   r$   2   sD   r$   r+   r
   r,   )torch.fx.GraphModule | Callable[..., Any]c              	   C  s  zt dt| j  t|}tjj|dd |q t	| | t
| || dd |D }t| | |jsAt| |dt|jd} t dt| j  t| |} t d	t| j  t|dd
}|jrht d |jrst d d|_t| |||d}|W  d   W  d   W S 1 sw   Y  W d   W dS 1 sw   Y  W dS  ttfy   |jst jddd |  Y S t d  w )a'  Helper function to manage translation of traced FX module to TRT engines

    Args:
        module: FX GraphModule to convert
        inputs: Inputs to the module
        settings: Compilation settings
        engine_cache: Engine cache instance
    Returns:
        Compiled FX GraphModule
    zPre-AOT Autograd graph:
allow_non_fake_inputsTc                 S  s   g | ]
}t |tjr|qS r&   )r6   torchTensor)r0   inputr&   r&   r'   
<listcomp>   s
    z&_pretraced_backend.<locals>.<listcomp>F)trace_jointr-   zPost-AOT Autograd graph:
zLowered Input graph:
 )disable_memory_format_checkz]require_full_compilation arg is not applicable for torch.compile with backend='torch_tensorrtzfstrip_engine_weights=True is not supported for torch.compile(). It will be set to False automatically.r*   Nz^TRT conversion failed on the subgraph. See trace above. Returning GraphModule forward instead.)exc_infozHalting compilation on build failure since pass_through_build_failures was specified as True. To return the default Torch implementation and avoid halting compilation on engine build failures, specify pass_through_build_failures=False.)r    r   strgraphr   unittestmockpatchobjectr   r   r   r9   r   r   r=   r   r   require_full_compilationr@   strip_engine_weightsr   AssertionErrorRuntimeErrorpass_through_build_failurescritical)r   r   r+   r,   	fake_modetorch_inputstorchtrt_inputstrt_compiledr&   r&   r'   r<   c   st   

	
X4r<   )r   r   r   r   r   r   r   r   )
r   r   r   r   r+   r
   r,   r   r   rE   )*
__future__r   r:   r"   rP   typingr   r   r   rG   torch._dynamo_dynamotdtorch._dynamo.backends.commonr   torch._dynamo.utilsr   torch._functorch.aot_autogradr   torch_tensorrt._utilsr	   torch_tensorrt.dynamor
   torch_tensorrt.dynamo._compilerr   torch_tensorrt.dynamo.loweringr   r   r   r   r   torch_tensorrt.dynamo.utilsr   r   r   	getLogger__name__r    register_backendr(   r$   r<   r&   r&   r&   r'   <module>   s0    



3