o
    ٷioA                     @   s   d dl Z d dlZd dlZd dlmZ d dlZd dlZd dlZd dlm	Z	 d dl
mZ d dlmZ d dlmZ d dlmZ d dlmZmZ d d	lmZ e eZG d
d dejjZG dd dejjZG dd dZG dd dZdS )    N)Path)
TypeHelper)	OnnxModel)PastKeyValuesHelper)T5EncoderInputs)torch_onnx_export)	MT5ConfigT5Config)InferenceSessionc                
       sb   e Zd ZdZ	ddejjdejjdeeB de	dB f fddZ
d	ejd
ejdejfddZ  ZS )T5DecoderInitz~A T5 decoder with LM head to create initial past key values.
    This model is only called once during starting decoding.
    Ndecoderlm_headconfigdecoder_start_token_idc                    sV   t    || _|| _|| _|d ur|n| jj| _t| jdr&| jj| _d S d| _d S Ntie_word_embeddingsT)super__init__r   r   r   r   hasattrr   )selfr   r   r   r   	__class__ a/home/ubuntu/.local/lib/python3.10/site-packages/onnxruntime/transformers/models/t5/t5_decoder.pyr       s   
zT5DecoderInit.__init__decoder_input_idsencoder_attention_maskencoder_hidden_statesc                 C   s   |d u r|j d }tj|dftj|jd| j }| j|||ddd}|j}|j}| j	r3|| j
jd  }| |}t|\}	}
||	|
fS )Nr      dtypedeviceT)	input_idsr   r   	use_cachereturn_dict      )shapetorchoneslongr    r   r   last_hidden_statepast_key_valuesr   r   d_modelr   r   group_by_self_or_cross)r   r   r   r   
batch_sizedecoder_outputssequence_outputpresent_key_values	lm_logits	past_self
past_crossr   r   r   forward2   s0   
	

zT5DecoderInit.forwardN)__name__
__module____qualname____doc__r&   nnModuler	   r   intr   TensorFloatTensorr4   __classcell__r   r   r   r   r      s&    	r   c                       s(   e Zd ZdZ fddZdd Z  ZS )	T5Decoderz-A T5 decoder with LM head and past key valuesc                    s@   t    || _|| _|| _t| jdr| jj| _d S d| _d S r   )r   r   r   r   r   r   r   )r   r   r   r   r   r   r   r   Y   s   
zT5Decoder.__init__c                 G   sv   | j j}t||}|d}| j||||ddd}|j}|j}	| jr+|| j j	d  }| 
|}
t|	\}}|
|fS )N   T)r!   r*   r   r   r"   r#   r$   )r   num_decoder_layersr   group_by_layer	unsqueezer   r)   r*   r   r+   r   r,   )r   r   r   pastrB   r*   dummy_encoder_hidden_statesr.   r/   r0   r1   present_self_r   r   r   r4   b   s$   
	
zT5Decoder.forward)r6   r7   r8   r9   r   r4   r?   r   r   r   r   r@   V   s    	r@   c                   @   sd   e Zd Z	dddZe		ddeeB dededed	ej	d
e
de
fddZdefddZdd ZdS )T5DecoderInputsNc                 C   s   || _ || _|| _d S r5   )r   r   r*   )r   r   r   r*   r   r   r   r      s   
zT5DecoderInputs.__init__Fr   r-   encode_sequence_lengthpast_decode_sequence_lengthr    float16use_int32_inputsc                 C   s   | j }| j}| j}	| j}
d}tjd|	d ||f|rtjntj|d}tj	|||	||d}|r1tj
ntj}|dkro||||
g}||||
g}g }td| D ]}|tj|||d qLtd| D ]}|tj|||d q`nd}t||j|S )aZ  Create dummy inputs for T5Decoder.

        Args:
            decoder: decoder
            batch_size (int): batch size
            encode_sequence_length (int): sequence length of input_ids for encoder
            past_decode_sequence_length (int): past sequence length of input_ids for decoder
            device (torch.device): device of output tensors
            float16 (bool): whether the model uses float32 or float16 in input
            use_int32_inputs(bool): whether use int32 instead of int64 for some inputs

        Returns:
            T5DecoderInputs: dummy inputs for decoder
        r   r   )lowhighsizer   r    )rM   rA   r   N)	num_headsrB   
vocab_sized_kvr&   randintint32int64r   create_dummyrL   float32rangeappendrandrI   attention_mask)r   r-   rJ   rK   r    rL   rM   num_attention_heads
num_layersrR   	head_sizesequence_lengthr   encoder_inputs
float_typeself_attention_past_shapecross_attention_past_shaperE   rH   r   r   r   rW      sN   zT5DecoderInputs.create_dummyreturnc                 C   s"   | j | jg}| jr|| j |S r5   )r   r   r*   extend)r   
input_listr   r   r   to_list   s   zT5DecoderInputs.to_listc                 C   s2   | j rdd | j D nd }t| j | j |S )Nc                 S   s   g | ]	}|j tjd qS ))r   )tor&   rX   ).0pr   r   r   
<listcomp>   s    z+T5DecoderInputs.to_fp32.<locals>.<listcomp>)r*   rI   r   cloner   )r   rE   r   r   r   to_fp32   s   zT5DecoderInputs.to_fp32r5   )FF)r6   r7   r8   r   staticmethodr	   r   r<   r&   r    boolrW   listrh   rn   r   r   r   r   rI   ~   s.    

K	rI   c                   @   s~   e Zd Ze			ddeeB dejdede	de	de	fd	d
Z
edefddZe	ddeeB dedejde	def
ddZdS )T5DecoderHelperTFr   r    onnx_model_pathverboseuse_external_data_formatrM   c                 C   s  t | ttfs	J tj| jddt | trdnd||d}| }| jj}tj	|dd}	tj	|dd}
|
d	d|  }t | trA|	ng }t | trJ|n|
}d
g|}dg}|
d || ddiddddddddid}|D ]}dd|v rzdndd||< qq|D ]!}d|v rddd||< qt | trddd||< qddi||< qt|jjddd t I}tj|d}t|jjddd t| t||r|n|d|||dd||d |rtj|dd}tj||ddd W d	   d	S W d	   d	S 1 sw   Y  d	S )a  Export decoder to ONNX

        Args:
            decoder (Union[T5Decoder, T5DecoderNoPastState]): decoder object
            device (torch.device): device of decoder object
            onnx_model_path (str): onnx path
            verbose (bool, optional): print verbose information. Defaults to True.
            use_external_data_format (bool, optional): use external data format or not. Defaults to False.
            use_int32_inputs (bool, optional): use int32 inputs
        rA         r   )r-   rJ   rK   r    rM   F)presentTNlogitsr!   r   r-   rJ   )r   r   )r!   r   r   ry   r   rK   )r   rA   crosszpast_decode_sequence_length + 1)parentsexist_okzdecoder.onnx   )
argsfexport_paramsinput_namesoutput_namesdynamic_axesopset_versiondo_constant_foldingru   rt   )load_external_data)save_as_external_dataall_tensors_to_one_file)
isinstancer@   r   rI   rW   r   rh   rB   r   get_past_namesrZ   rf   r   parentmkdirtempfileTemporaryDirectoryospathjoinr   tupleonnx
load_modelr   save)r   r    rs   rt   ru   rM   inputsrg   rB   
past_namespresent_namespresent_self_namesinput_past_namesoutput_present_namesr   r   r   nametmp_dir_nametemp_onnx_model_pathmodelr   r   r   export_onnx   s   






"zT5DecoderHelper.export_onnxr   c                 C   s   t d t|j  t|j  d}|jrMt|jd dks(J t	t|jd }t
|}t|jD ]\}}t|  ||| < q;| d|}|S )zRun inference of ONNX model.zstart onnxruntime_inference)r!   r      r   N)loggerdebugnumpyascontiguousarrayr   cpur   r*   lenr<   r   r   	enumeraterun)ort_sessionr   
ort_inputsr^   r   ipast_tensorort_outputsr   r   r   onnxruntime_inference^  s   

z%T5DecoderHelper.onnxruntime_inferencer   r   r   	max_casesc                 C   s  t |ddk}g d}g }|d| D ]\}}	}
t| tr d}
tj| j||	|
|||d}|  }t	
  | | }W d   n1 sFw   Y  t||}| jj}tt|d   |d  }|}td|  td| D ](}tt|d	 |   |d	|   }td
| d|  t||}qwt| trtd| D ],}tt|d |   |d	d|  |   }td| d|  t||}q|| td||	|
| q|S )zQCompare the result from PyTorch and OnnxRuntime to verify the ONNX model is good.past_key_self_0ztensor(float16)))r      rv   )r   rA   rw   )rv   r   r   )   rw   rA   Nr   )r    rL   rM   zlogits max_diff=rA   r   zself attention past state z
 max_diff=zcross attention past state zUbatch_size=%s, encode_sequence_length=%s, past_decode_sequence_length=%s, max_diff=%s)r   get_input_typer   r   rI   rW   r   rn   rh   r&   no_gradrr   r   rB   r   amaxabsr   r   r   rY   maxrZ   info)r   r   r    rM   r   rL   
test_casestest_cases_max_diffr-   rJ   rK   r   rg   torch_outputsr   rB   max_diffmax_diff_allr   r   r   r   verify_onnxr  s`   	



$,
,
zT5DecoderHelper.verify_onnxN)TFF)r   )r6   r7   r8   ro   r@   r   r&   r    strrp   r   rI   r   r
   r<   r   r   r   r   r   rr      sB    urr   ) loggingr   r   pathlibr   r   r   r&   io_binding_helperr   
onnx_modelr   past_helperr   
t5_encoderr   torch_onnx_export_helperr   transformersr   r	   onnxruntimer
   	getLoggerr6   r   r:   r;   r   r@   rI   rr   r   r   r   r   <module>   s&   
;(i