o
    ڷi                     @   sn   d dl Z d dlmZ d dlmZ d dlmZ d dlmZ d dl	m
Z
 d dlmZ e eZG dd	 d	eZdS )
    N)FusionLayerNormalization)FusionMultiHeadAttentionMMDit)FusionOptions)is_installed)
ModelProto)BertOnnxModelc                       sx   e Zd Zddededef fddZdd Zd	d
 Zdd Zdde	dB de
fddZdde	dB fddZdd Z  ZS )MmditOnnxModelr   model	num_headshidden_sizec                    s>   |dkr|dks|dkr|| dksJ t  j|||d dS )ak  Initialize Multimodal Diffusion Transformer (MMDiT) ONNX Model.

        Args:
            model (ModelProto): the ONNX model
            num_heads (int, optional): number of attention heads. Defaults to 0 (detect the parameter automatically).
            hidden_size (int, optional): hidden dimension. Defaults to 0 (detect the parameter automatically).
        r   )r
   r   N)super__init__)selfr	   r
   r   	__class__ _/home/ubuntu/vllm_env/lib/python3.10/site-packages/onnxruntime/transformers/onnx_model_mmdit.pyr      s   (zMmditOnnxModel.__init__c                 C   s   |    |   d S N)prune_graphremove_unused_constant)r   r   r   r   postprocess      zMmditOnnxModel.postprocessc                 C   s*   d}t d t| | dd}|  d S )NTzwThe optimized model requires LayerNormalization with broadcast support. Please use onnxruntime-gpu>=1.21 for inference.)check_constant_and_dimensionforce)loggerwarningr   apply)r   layernorm_support_broadcastfusionr   r   r   fuse_layer_norm"   s   zMmditOnnxModel.fuse_layer_normc                 C   s   t | }|  d S r   )r   r   )r   r   r   r   r   fuse_multi_head_attention-   r   z(MmditOnnxModel.fuse_multi_head_attentionNFoptionsadd_dynamic_axesc                 C   s   |rJ t dr:dd l}ddlm} |  d}|jt|ddd}| || W d    d S 1 s3w   Y  d S td | |d  d S )Ntqdmr   )logging_redirect_tqdm   r   )initialdescz<tqdm is not installed. Run optimization without progress bar)r   r#   tqdm.contrib.loggingr$   range	_optimizer   info)r   r!   r"   r#   r$   stepsprogress_barr   r   r   optimize1   s   "
zMmditOnnxModel.optimizec                 C   s   |d ur|j s|   | j  |r|d |d u s|jr&|   |   |r-|d |d u s4|jr8| 	  |r?|d |d u sF|j
rJ|   |rQ|d |   |r\|d td|    d S )N   zopset version: )enable_shape_inferencedisable_shape_inferenceutilsremove_useless_cast_nodesupdateenable_layer_normr   fuse_simplified_layer_normenable_gelu	fuse_geluenable_attentionr    r   r   r+   get_opset_version)r   r!   r-   r   r   r   r*   @   s,   





zMmditOnnxModel._optimizec                 C   s@   i }g d}|D ]}|  |}t|||< qtd|  |S )z8
        Returns node count of fused operators.
        )FastGeluMultiHeadAttentionLayerNormalizationSimplifiedLayerNormalizationzOptimized operators:)get_nodes_by_op_typelenr   r+   )r   op_countopsopnodesr   r   r   get_fused_operator_statistics_   s   
z,MmditOnnxModel.get_fused_operator_statistics)r   r   )NF)NN)__name__
__module____qualname__r   intr   r   r   r    r   boolr.   r*   rE   __classcell__r   r   r   r   r      s    r   )loggingfusion_layernormr   fusion_mha_mmditr   fusion_optionsr   import_utilsr   onnxr   onnx_model_bertr   	getLoggerrF   r   r   r   r   r   r   <module>   s   
