o
    Ơi'                     @   sL   d dl mZmZmZ d dlZd dlZddlmZm	Z	 G dd dej
jZdS )    )AnyListSequenceN   )
Frameworksunified_dtype_converterc                       sz   e Zd Z	d fdd	Zdd Zdd Zd	d
 Zdd Zdd Zdd Z	dd Z
ddddZdd ZdefddZ  ZS )	TRTModuleNc                    sL   t t|   | tj || _|| _|| _|| _d| _	|r$| 
  d S d S )NF)superr   __init___register_state_dict_hook_on_state_dictengineinput_namesoutput_namescuda_graph_batch_sizeinitialized_initialize)selfr   r   r   r   	__class__ P/home/ubuntu/.local/lib/python3.10/site-packages/torch_tensorrt/fx/trt_module.pyr      s   zTRTModule.__init__c                    sd  d _  j  _ fdd jD  _ fdd jD  _t }|	 j |	 j g  _
g  _t jj jj D ]}||vrT j
|  j j| q> jj jj t jt j t j ksnJ  fdd jD  _ fdd jD  _ fdd jD  _ fdd jD  _ fd	d j
D  _ fd
d j
D  _d S )NTc                       g | ]} j |qS r   r   get_binding_index.0namer   r   r   
<listcomp>       z)TRTModule._initialize.<locals>.<listcomp>c                    r   r   r   r   r   r   r   r    "   r!   c                        g | ]}t  j|tjqS r   r   r   get_binding_dtyper   TORCHr   idxr   r   r   r    7       c                    s   g | ]
}t  j|qS r   )tupler   get_binding_shaper&   r   r   r   r    =   s    c                    r"   r   r#   r&   r   r   r   r    A   r(   c                    *   g | ]} j jrt j |nt qS r   r   has_implicit_batch_dimensionr)   r*   r&   r   r   r   r    G       c                    r"   r   r#   r&   r   r   r   r    O   r(   c                    r+   r   r,   r&   r   r   r   r    U   r.   )r   r   create_execution_contextcontextr   input_binding_indices_in_orderr   output_binding_indices_in_ordersetupdate&hidden_output_binding_indices_in_orderhidden_output_namesrangenum_bindingsnum_optimization_profilesappendget_binding_nameleninput_dtypesinput_shapesoutput_dtypesoutput_shapeshidden_output_dtypeshidden_output_shapes)r   primary_input_outputsir   r   r   r      sZ   







zTRTModule._initializec                 C   s   | j stdd S )NzTRTModule is not initialized.)r   RuntimeErrorr   r   r   r   _check_initialized^   s   zTRTModule._check_initializedc                 C   sL   |    t| j ||d < | j||d < | j||d < | j||d < d S )Nr   r   r   r   )rF   	bytearrayr   	serializer   r   r   )r   
state_dictprefixlocal_metadatar   r   r   r   b   s
   zTRTModule._on_state_dictc                 C   sR   ||d  }t  }	t |	}
|
|| _||d  | _||d  | _|   d S )Nr   r   r   )trtLoggerRuntimedeserialize_cuda_enginer   r   r   r   )r   rI   rJ   rK   strictmissing_keysunexpected_keys
error_msgsengine_bytesloggerruntimer   r   r   _load_from_state_dicti   s   

zTRTModule._load_from_state_dictc                 C   s,   | j  }t| j |d< |dd  |S )Nr   r0   )__dict__copyrG   r   rH   pop)r   stater   r   r   __getstate__}   s   
zTRTModule.__getstate__c                 C   sJ   t  }t |}||d |d< | j| | jr#| j | _d S d S )Nr   )	rL   rM   rN   rO   rX   r4   r   r/   r0   )r   r[   rU   rV   r   r   r   __setstate__   s   
zTRTModule.__setstate__c                 G   s  t jjd |   t jjd t|t| jks.J dt| j dt| d|d jd }dd |D }d gt| jt| j t| j	  }t
| jD ]}\}}|| jsfJ | d	| d
|| j| j| ksJ d| d	| d| j|  d|| j d	| j| }||  ||< | jjs| j|t|| j qS||  dd  | j| ksJ d| d	| d| j|  d||  dd   d	qSW d    n1 sw   Y  t jjdx g }t
| jD ]3\}}| jjr|f| j|  }	nt| j|}	t j|	| j| t j d}
||
 |
 ||< qt
| jD ]/\}}| jjr:|f| j |  }	nt| j|}	t j|	| j!| t j d}
|
 ||< q(W d    n	1 scw   Y  t jjd% | jjr| j"||t j# j$ n| j%|t j# j$ W d    n	1 sw   Y  t|dkr|d W  d    S t|W  d    S 1 sw   Y  d S )NzTRTModule:ForwardzTRTModule:ProcessInputszWrong number of inputs, expect z get .r   c                 S   s   g | ]}|  qS r   )
contiguous)r   rD   r   r   r   r       s    z%TRTModule.forward.<locals>.<listcomp>z	th input(z) is not on cuda device.zDtype mismatch for z
). Expect z, got r   zShape mismatch for zTRTModule:ProcessOutputs)sizedtypedevicezTRTModule:TensorRTRuntime)&torchautogradprofilerrecord_functionrF   r<   r   shaper   r6   	enumerateis_cudara   r=   r1   data_ptrr   r-   r0   set_binding_shaper)   r`   r>   r2   r@   r*   emptyr?   cudacurrent_devicer:   r5   rB   rA   execute_asynccurrent_streamcuda_streamexecute_async_v2)r   inputs
batch_sizecontiguous_inputsbindingsrD   
input_namer'   outputsrg   outputr   r   r   forward   s   (
 #




R&zTRTModule.forwardre   trt.IProfilerc                 C   s0   |    | jjs|du rt n|| j_dS dS )z
        Enable TensorRT profiling. After calling this function, TensorRT will report
        time spent on each layer in stdout for each forward run.
        N)rF   r0   re   rL   Profiler)r   re   r   r   r   enable_profiling   s   zTRTModule.enable_profilingc                 C   s&   |    tj  | `| j | _dS )z-
        Disable TensorRT profiling.
        N)rF   rc   rm   synchronizer0   r   r/   r   r   r   r   disable_profiling   s   
zTRTModule.disable_profilingreturnc                 C   s   | j  }|tjjS )zK
        Get layer info of the engine. Only support for TRT > 8.2.
        )r   create_engine_inspectorget_engine_informationrL   LayerInformationFormatJSON)r   	inspectorr   r   r   get_layer_info   s   
zTRTModule.get_layer_info)NNNr	   )N)re   r{   )__name__
__module____qualname__r   r   rF   r   rW   r\   r]   rz   r}   r   strr   __classcell__r   r   r   r   r   
   s    EU

r   )typingr   r   r   tensorrtrL   rc   utilsr   r   nnModuler   r   r   r   r   <module>   s
    