o
    Ơie                     @  s  d dl mZ d dlZd dlZd dlZd dlZd dlmZm	Z	m
Z
mZmZ d dlZd dlZd dlZd dlmZ d dlmZ d dlmZ d dlmZ d dlmZ d d	lmZ d d
lmZ d dl m!Z! d dl"m#Z# d dl$m%Z& d dl'm(Z( d dl)m*Z* d dl+m,Z, d dl-m.Z.m/Z/m0Z0m1Z1 d dl2m3Z3 d dl4m5Z5m6Z6m7Z7 d dl8m9Z9m:Z:m;Z;m<Z<m=Z=m>Z> d dl?m@Z@ eAeBZCee! fdDddZDedEd#d$ZEee! dfdFd,d-ZFe			.	/	.dGdHd<d=ZGd dlHZHdIdBdCZIdS )J    )annotationsN)AnyListOptionalSequenceTuple)ExportedProgram)unset_fake_temporarily)dtype)needs_refit)Input)partitioning)inline_torch_modules)CompilationSettings)infer_module_output_dtypes)DYNAMO_CONVERTERS)TRTInterpreter)batch_norm_constant_folding)repair_double_inputs)"clean_up_graph_after_modificationsget_decompositionspost_loweringpre_export_lowering)PythonTorchTensorRTModule)
ENGINE_IDXSERIALIZED_METADATA_IDXTorchTensorRTModule)
CPU_DEVICEcheck_module_outputget_model_deviceget_torch_inputsto_torch_deviceto_torch_tensorrt_device)
TRT_LOGGERmoduletorch.fx.GraphModuleinputsSequence[Input]settingsr   returndict[str, np.ndarray]c                 C  s2   t | |jd}t| |||d}|  |jj}|S )aY  Find out the weight mapping between weight in exported program and TensorRT engine
    Args:
        module: FX GraphModule to interpret
        inputs: Sequence of Tensors representing inputs to the module
        settings: Compilation settings
    Returns:
        Mapping from weight name in TensorRT to actual weight value in np.ndarray
    )truncate_double)output_dtypescompilation_settings)r   r+   r   _construct_trt_network_defctxweight_refit_map)r$   r&   r(   r,   interpreterr0    r2   P/home/ubuntu/.local/lib/python3.10/site-packages/torch_tensorrt/dynamo/_refit.pyconstruct_refit_mapping8   s   r4   weight_name_mapdict[Any, Any]
state_dictc                 C  s   i }|   D ]s\}\}}t|tj}t|tj}|dd dv rVi }	|D ]}
||
  |	|
dd < q+t	di |	ddi\}}t
|dd  ||< n||vr[q|| t|j||< ||  d ||f||< q|S )N )SCALESHIFT.epsgh㈵>r2   )itemsr
   _fromtotrtDataTypetorchsplitcudar   evallowerr!   deviceclonereshape
contiguous)r5   r7   r(   engine_weight_mapengine_weight_namesd_weight_namenp_weight_type	trt_dtypetorch_dtypeparamswscaleshiftr2   r2   r3   ,construct_refit_mapping_from_weight_name_mapY   s2   

	rV   new_gm
old_enginetrt.ICudaEngine
input_listSequence[Any]Optional[dict[str, List[str]]]Nonec              	   C  s*  t   t }t| }t|t}| }|r|jdkr!tjj	ntjj
}	|di }
t||  |}i }|
 D ]0\}}|j}t| }t|tj}t|tj}| d ||f||< q9|| |D ](}||vrt| d qq|| \}}t|| t|}||||	 qqt |! dksJ dnMt"| ||}tjj
}	|D ]3}||vrt#| d|| }t|jtj}t|| t|}||||	 |$| qt |t |krtd |% st&d	 t#d
W d   dS 1 sw   Y  dS )z*
    Refit a TensorRT Engine in place
    rE   constant_mappingr9   z  is not found in weight mapping.r   z/Fast refitting failed due to incomplete mappingz is not found in weight mappingz%Not all weights have been refitted!!!z#Error: failed to refit new weights.zRefitting failed.N)'r	   setr   rA   Refitterr#   get_all_weightstypeTensorLocationDEVICEHOSTpoprV   r7   r>   r
   rC   
from_numpyrE   r?   r@   rB   rI   rJ   rK   updateloggerwarningWeightsdata_ptrnumelset_named_weightslenget_missing_weightsr4   AssertionErroraddrefit_cuda_engineerror)rW   rX   rZ   r(   r5   refittedtorch_devicerefitterweight_listtrt_wt_locationr^   mappingconstant_mapping_with_typeconstant_namevalrO   
val_tensorrP   rQ   
layer_nameweightweight_dtypetrt_wt_tensorr2   r2   r3    _refit_single_trt_engine_with_gm   sr   







$r   FTcompiled_module&torch.fx.GraphModule | ExportedProgramnew_weight_moduler   
arg_inputsOptional[Tuple[Any, ...]]kwarg_inputsOptional[dict[str, Any]]verify_outputbooluse_weight_map_cachein_placec           %      C  s0  d}t | tr|  } tt|  dkrd}|st| } n|r%tdd}|r`dd | j	
 D }	|	d d  d t }
|
d	ksGJ d
t|
d }t|	}|  D ]\}}|||< qVni }g }|  D ]`\}}t |tttjjjjfr|d|v r}qht |tjjjr|| qhdd |j	
 D }	d}z|j}W n' ty   dd |	D d  d t }
|
d	ksJ d
t|
d }Y nw |||< qht| jjD ]}|jdkr|j|v r| j| q|D ]}|  | qt!|  |dusJ |j"rJ dt#|j$}|rt |t%j&j's|g}t(||}i }|rt(||}t)*t+}t |ts3tdt,| t-||}|.t/|j0}| }t12dt3|j  t4||}t12dt3|j  t56| t78||j9\}}|dksz||j:k r|j;st1<| d| d|j:  |S t12d| d| d |j=rzt1>d t7j?||j:|j9|j@||kd\}}W n tjAjBjCjDy   t1jEddd d|_=Y nw |j=st1>d t7jF||j:|j9|j@d\}}|rtG| |H  dd  | D tI|J ksJ d!nd"d  | D d"d  |  D ksJ d!| D ]\}}z|red}|| }d#|vr;|K|L  W q| d }tM|tN |}|rd| d t }
t|
d$ }|sdd}t1<d% n}tO| |}d#|vry|K|L  W qd}|rz|jP}W n> ty   t |tjjr|jQrJ d&d'd |	D d  d t }
t|
d$ }t |tjAjRjSst1<d( Y nw |sd}t1<d% |jQr|jT}n|jT d }tM|tN |}W n ty   td)w t7U|}t12d*t3| |dusJ |jVrtW|||tX|j$|}ztY|||||d+ W n' tyE }  zt1<|  |r;|r;tY||||dd+ W Y d} ~ nd} ~ ww |Z }!|![t)j\j] t^t)j\d,r_|!_t)j\j` |a|!}"t |trptb|"|_cnQt |trd|_Ttb|"|_c|d  n>|rt|}#tb|"|#tN< tjejfgth|#}$ti| | d-|$ nt |tjjrt|}#tb|"|#tN< tjejfgth|#}$|$|_T~tjk  tjlm  q|r|dur|ntX|j$ to|| ||d.rt1>d/ |ntp | S |rt1<d0 tq| ||||d|d1S t1Ed2 |ntp | S t1>d3 | S )4a<  
    Refit a compiled graph module with ExportedProgram. This performs weight updates in compiled_module without recompiling the engine.

    Args:
        compiled_module: compiled TensorRT module that needs to be refitted.
                        This compiled_module should be compmiled by torch_tensorrt.dynamo.compile
                        or load it from disk using trt.load.
        new_weight_module: exported program with the updated weights. This one should have the same model architecture as the compiled module.
        arg_inputs: sample arg inputs. Optional, needed if output check
        kwarg_inputs: sample kwarg inputs. Optional, needed if output check
        verify_output: whether to verify output of refitted module
    Returns:
        A new compiled TensorRT module that has the updated weights.
    Fr   TzuExported program does not support modifying in place. Please set in_place to false and use the returned graph module.Nc                 S  (   g | ]\}}d |v r| dd|fqS engine_engine replace.0namer   r2   r2   r3   
<listcomp>  
    z(refit_module_weights.<locals>.<listcomp>   r   zThe engine provided is either not refittable or was built with a version of Torch-TensorRT that is too old, please recompile using the latest versionr(   _run_on_gpuc                 S  r   r   r   r   r2   r2   r3   r   -  r   c                 S     g | ]
\}}|d kr|qS r   r2   r   r2   r2   r3   r   9  s    call_modulezSRefitting is not enabled. Please recompile the engine with immutable_weights=False.z6Input graph should be an ExportedProgram but got type zInput graph: zLowered Input graph: z6 supported operations detected in subgraph containing zV computational nodes. Skipping this subgraph, since min_block_size was detected to be zDetected support for z operators out of z in subgraph.z/Partitioning the graph via the fast partitioner)min_block_sizetorch_executed_opsrequire_full_compilationskip_fusionziPartitioning failed on the subgraph with fast partition. See trace above. Retrying with global partition.)exc_infoz1Partitioning the graph via the global partitioner)r   r   r   c                 S     h | ]}|d  qS r   r2   r   smr2   r2   r3   	<setcomp>      z'refit_module_weights.<locals>.<setcomp>zSNew weights module is not compatible with previously compiled Torch-TensorRT modulec                 S  r   r   r2   r   r2   r2   r3   r     r   _run_on_accr5   zGThis engine does not have a weight map cache. Rebuilding the weight mapzQRefitting a torch retraced module is only supported with use_python_runtime=Falsec                 S  r   r   r2   r   r2   r2   r3   r     s
    zYThe module was compiled with an old version of Torch-TensorRT. Rebuilding the weight map.z]The type of graph module is not supported for refitting or two compiled modules do not match.zRefitting Submodule name: %s
)rW   rX   rZ   r(   r5   INCLUDE_REFITr   )
new_modulerefitted_moduler   r   zRefitting Succeed!zRRefitting with weight_name_map yielded incorrect result! The outputs do not match.)r   r   z+Refitting Failed! The outputs do not match.z1Refitting Completed! Output verification skipped.)r
isinstancer   r$   ro   listnamed_childrencopydeepcopyrq   __dict__r>   __getstate__r   r   decode_metadatadictr   rC   nnmodulesModuleexport_unliftGuardsFnappendr(   AttributeErrorgraphnodesoptarget
erase_nodedelete_submoduler   immutable_weightsr"   rH   collectionsabcr   r    rA   Runtimer#   rb   r   run_decompositionsr   "enable_experimental_decompositionsri   debugstrr   
CONVERTERSset_compilation_settingsr   get_graph_converter_supportr   r   dryrunrj   use_fast_partitionerinfofast_partitionr   fxpassessplitter_baseFxNetSplitterInternalErrorrt   global_partitionr   delete_all_unused_submodulesr_   keysload_state_dictr7   get_engine_from_encoded_enginer   getattrr5   use_python_runtimegraph_moduleGraphModuler   construct_submodule_inputsr+   r   r!   r   create_serialization_config
clear_flagSerializationFlagEXCLUDE_WEIGHTShasattrset_flagr   serialize_with_configbytesserialized_enginesetup_engineclassestensorrtEnginetuplesetattrgccollectrE   empty_cacher@   r   r   refit_module_weights)%r   r   r   r   r   r   r   inline_moduler(   compiled_submodulesencoded_metadatacompiled_submodules_mapr   	submoduleguard_fn_modulesnodeguard_fn_module_namerH   torch_inputstorch_kwarg_inputsruntimerW   num_supported_ops	total_opsnew_partitioned_modulesupported_opsnew_submoduler5   compiled_submoduleengine_infor   submodule_inputseserialization_configr   new_engine_inforefitted_enginer2   r2   r3   r      s  



	













	

















	

r   encoded_enginer   r   trt.Runtimec                 C  s   t | }||}|S )N)base64	b64decodedeserialize_cuda_engine)r  r   r   r   r2   r2   r3   r   n  s   

r   )r$   r%   r&   r'   r(   r   r)   r*   )r5   r6   r7   r6   r(   r   r)   r6   )rW   r%   rX   rY   rZ   r[   r(   r   r5   r\   r)   r]   )NNFTF)r   r   r   r   r   r   r   r   r   r   r   r   r   r   r)   r%   )r  r   r   r  r)   rY   )J
__future__r   collections.abcr   r   r   loggingtypingr   r   r   r   r   numpynpr   rA   rC   torch.exportr   "torch.fx.experimental.proxy_tensorr	   torch_tensorrt._enumsr
   torch_tensorrt._featuresr   torch_tensorrt._Inputr   torch_tensorrt.dynamor   torch_tensorrt.dynamo._exporterr   torch_tensorrt.dynamo._settingsr   ,torch_tensorrt.dynamo.conversion._conversionr   3torch_tensorrt.dynamo.conversion._ConverterRegistryr   r   0torch_tensorrt.dynamo.conversion._TRTInterpreterr   7torch_tensorrt.dynamo.conversion.impl.normalization.opsr   0torch_tensorrt.dynamo.conversion.truncate_doubler   torch_tensorrt.dynamo.loweringr   r   r   r   8torch_tensorrt.dynamo.runtime._PythonTorchTensorRTModuler   2torch_tensorrt.dynamo.runtime._TorchTensorRTModuler   r   r   torch_tensorrt.dynamo.utilsr   r   r   r    r!   r"   torch_tensorrt.loggingr#   	getLogger__name__ri   r4   rV   r   r   r  r   r2   r2   r2   r3   <module>   sb     
 ,R   