o
    :ýÆiˆ  ã                   @   sp   d Z ddlZddlZddlZddlmZmZmZ ddlZddl	m
Z
 G dd„ dƒZdde
jded	efd
d„ZdS )zO
This script provides utility functions for working with TensorRT in ZipVoice.
é    N)ÚAnyÚTupleÚOptionalc                   @   s‚   e Zd ZdZ	ddededefdd„Zd	ee	ef fd
d„Z
dedefdd„Z	ddejdejdejdeej d	ejf
dd„ZdS )ÚTrtContextWrapperz9A wrapper class for managing TensorRT execution contexts.é   úcuda:0Ú
trt_engineÚtrt_concurrentÚdevicec                 C   s   t j|d| _|| _|| _t|ƒD ]&}| ¡ }tj 	tj 
t |¡¡¡}|dus/J d |¡ƒ‚| j ||g¡ q| j ¡ du sCJ dƒ‚d| _dS )a!  
        Initializes the TrtContextWrapper.

        Args:
            trt_engine (Any): The TensorRT engine.
            trt_concurrent (int, optional): The number of concurrent contexts. Defaults to 1.
            device (str, optional): The device to use. Defaults to 'cuda:0'.
        )ÚmaxsizeNz`failed to create trt context, maybe not enough CUDA memory, try reduce current trt concurrent {}Fzno avaialbe estimator contextéd   )ÚqueueÚQueueÚtrt_context_poolr   r
   ÚrangeÚcreate_execution_contextÚtorchÚcudaÚstreamÚStreamÚformatÚputÚemptyÚfeat_dim)Úselfr   r	   r
   Ú_Útrt_contextÚ
trt_stream© r   ú./home/ubuntu/LuxTTS/zipvoice/utils/tensorrt.pyÚ__init__!   s   
zTrtContextWrapper.__init__Úreturnc                 C   s   | j  ¡ | jfS )z*Acquires a TensorRT context from the pool.)r   Úgetr   )r   r   r   r   Úacquire_estimator7   s   z#TrtContextWrapper.acquire_estimatorÚcontextr   c                 C   s   | j  ||g¡ dS )zª
        Releases a TensorRT context back to the pool.

        Args:
            context (Any): The TensorRT context.
            stream (Any): The CUDA stream.
        N)r   r   )r   r$   r   r   r   r   Úrelease_estimator;   s   z#TrtContextWrapper.release_estimatorNÚxÚtÚpadding_maskÚguidance_scalec              	   C   sÈ  |  tj¡}|  tj¡}|  tj¡}|dur|  tj¡}|  ¡ \\}}}tj ¡  ¡  | d¡}| d¡}	tj||	| j	|j
|jd}
|Œ | d|| d¡| d¡f¡ | d|f¡ | d|| d¡f¡ |durq| d	|f¡ | ¡  ¡ | ¡  ¡ | ¡  ¡ g}|dur| | ¡  ¡ ¡ t|ƒD ]\}}| | |¡|¡ q“|j}| |d ¡}| ||
 ¡  ¡ ¡ | tj ¡ j¡d
u sÂJ ‚tj ¡  ¡  W d  ƒ n1 sÓw   Y  |  ||¡ |
  tj¡S )ad  
        Executes the TensorRT engine.

        Args:
            x (torch.Tensor): The input tensor.
            t (torch.Tensor): The time tensor.
            padding_mask (torch.Tensor): The padding mask tensor.
            guidance_scale (torch.Tensor): The guidance scale tensor.

        Returns:
            torch.Tensor: The output tensor.
        Nr   r   )Údtyper
   r&   é   r'   r(   r)   T)Útor   Úfloat16r#   r   Úcurrent_streamÚsynchronizeÚsizer   r   r*   r
   Úset_input_shapeÚ
contiguousÚdata_ptrÚappendÚ	enumerateÚset_tensor_addressÚget_tensor_nameÚnum_io_tensorsÚexecute_async_v3Úcuda_streamr%   Úfloat32)r   r&   r'   r(   r)   Ú	estimatorr   r   Ú
batch_sizeÚseq_lenÚoutputÚinput_data_ptrsÚiÚjÚnum_tensorsÚoutput_tensor_namer   r   r   Ú__call__E   s:   

"êzTrtContextWrapper.__call__)r   r   )N)Ú__name__Ú
__module__Ú__qualname__Ú__doc__r   ÚintÚstrr    r   Úlistr#   r%   r   ÚTensorr   rE   r   r   r   r   r      s0    ÿÿÿ
ÿûþýüûúr   r   ÚmodelÚ	trt_modelr	   c                 C   s’   t j |¡s
J dƒ‚ddl}t|dƒ}| | |jj¡¡ | 	¡ ¡}W d  ƒ n1 s-w   Y  |dus=J d 
|¡ƒ‚| `t||dd| _dS )a3  
    Loads a TensorRT engine and replaces the model's fm_decoder with a TrtContextWrapper.

    Args:
        model (nn.Module): The model to modify.
        trt_model (str): The path to the TensorRT engine file.
        trt_concurrent (int, optional): The number of concurrent contexts. Defaults to 1.
    zPlease export trt model first.r   NÚrbzfailed to load trt {}r   )r	   r
   )ÚosÚpathÚexistsÚtensorrtÚopenÚRuntimeÚLoggerÚINFOÚdeserialize_cuda_engineÚreadr   Ú
fm_decoderr   )rN   rO   r	   ÚtrtÚfÚestimator_enginer   r   r   Úload_trt€   s   	 ÿr_   )r   )rI   ÚloggingrQ   r   Útypingr   r   r   r   Útorch.nnÚnnr   ÚModulerK   rJ   r_   r   r   r   r   Ú<module>   s   b