o
    ٷirL                     @   s~   d dl Z d dlZd dlZd dlZd dlZd dlZd dlZd dlmZ d dl	m
Z
mZ e eZG dd dZG dd dZdS )    N)	Precision)
Gpt2Helper
Gpt2Inputsc                   @   s^   e Zd ZdddZdd Zdefdd	Zd
efddZdddZ	dddZ
dd Zdd ZdS )
Gpt2MetricTorch   c                 C   sj   |dkr|dks
J || _ || _| d| | _|| _d| _d| _d| _d| _d| _d | _	d | _
i | _d S )N   d   z vs r   )baseline	treatmentnametop_ktop_1_errortop_k_errortotal_samplesmax_logits_diffmax_logits_diff_no_pastbatch_top1_errorbatch_topk_errorseq_len_latency)selftreatment_namebaseline_namer    r   d/home/ubuntu/.local/lib/python3.10/site-packages/onnxruntime/transformers/models/gpt2/gpt2_tester.py__init__   s   
zGpt2Metric.__init__c                 C   s  | j | jkr`td td| j d| j  d | jdkrId| j | j }d| j | j }td| j d| j d	|d
d| j d| j d	|d
d td td| jd td| jd n	td| j d | j	rtd d}d}t
| j	 D ]H}t| j	| d }|dkrtd| d|d
d ntdd|  dd|d  d  d|d
d ||t| j	|  7 }|t| j	| 7 }q{td|| d
d d S d S )Nz---zMetrics for z (baseline=z):r   g      Y@zTotal=z Top1Error=z (z.2fz%) TopzError=z%)zMax logits diffs:z	with past  = z.6fz	empty past = z (baseline):z/Past sequence length range and average latency:     @@	z:         	z msz	[   z, r   z]:	zAverage Latency: )r
   r   printr   r   r   r   r   r   r   sortedkeys
statisticsmeanlen)r   top_1_error_ratetop_k_error_ratetotalcountkeyaverager   r   r   r   %   s4   
4.zGpt2Metric.printis_empty_pastc                 C   s8   ||    }|rt| j|| _|S t| j|| _|S N)absmaxr   r   )r   baseline_logitstreatment_logitsr+   diffr   r   r   diff_logitsC   s   zGpt2Metric.diff_logits
batch_sizec                 C   s>   |  j |7  _ tj|dftjd| _tj|dftjd| _d S )Nr   dtype)r   torchzerosboolr   r   )r   r3   r   r   r   start_batchL   s   zGpt2Metric.start_batchTc                 C   sb   |  |j|jd| |  |j|j| j| | |j|j|dk}|r/td| j d|  d S d S )Nr   r   zMax logits diffs of z: )
_eval_topktop_1_tokenstop_k_tokensr   r2   logitsr   r   )r   r
   r   past_seq_lenverbosemax_diffr   r   r   
eval_batchQ   s   zGpt2Metric.eval_batchc                 C   s   t t ||sL|dkr&|rtd| j  |  jt || O  _d S |r4td| d| j d |  jt || dj	dddkO  _d S d S )Nr   z!Generated tokens not matched for zTop z tokens not matched for z-. This will lead to wrong beam search results)dimr   )
r6   alleqr   r   r   logical_notr   sum	unsqueeze)r   baseline_topktreatment_topkr   r?   r   r   r   r:   Y   s    
zGpt2Metric._eval_topkc                 C   s,   |  j | j 7  _ |  j| j 7  _d S r,   )r   r   rF   r   r   r   r   r   r   	end_batchh   s   zGpt2Metric.end_batchc                 C   sF   |dkrt t|d nd}|| jvrg | j|< | j| | d S )Nr   r   )intmathlog2r   append)r   r>   latencyr)   r   r   r   add_latencyl   s   

zGpt2Metric.add_latencyN)r   r   )T)__name__
__module____qualname__r   r   r8   r2   rL   r9   rA   r:   rK   rQ   r   r   r   r   r      s    
	

r   c                
   @   s   e Zd Z			dddZdefddZdd	 Zd
d Zdd Ze	dddZ
e	dd Ze	dd Ze	ejddddddddf	ddZdS )
Gpt2TesterFr   c                 C   s   |j d | _|j d | _|| _|| _|| _|| _|d u| _|d u| _g | _	d| j|d|| g}t
|D ]}t||r?tjntj}| j	|| q3d | _d | _d | _|	| _|
| _d S )Nr   r   r   )shaper3   input_lengthn_layer	input_idsposition_idsattention_maskhas_position_idshas_attention_maskpastranger6   emptytypefloat16float32rO   tor=   r;   r<   r   top_k_required_order)r   rY   rZ   r[   num_attention_headshidden_size	num_layerdeviceis_fp16r   re   
past_shape_i
empty_pastr   r   r   r   t   s.   


zGpt2Tester.__init__returnc                 C   s   t | j| j| j| jS r,   )r   rY   rZ   r[   r^   rJ   r   r   r   
get_inputs   s   zGpt2Tester.get_inputsc              	      s  ddl m  tj|dt| }tj|r!td| d d S tj|dd  fdd	}g }||| j	d
 | j
rA||| jd | jrK||| jd t| jD ]}||| j| dt|  qPt|D ])\}}	ttj|d| dd}
|
|	  W d    n1 sw   Y  qddd | D }t|D ]B\}} t|| tjr|| n	||    }	ttj|d| dd}
|
|	  W d    n1 sw   Y  qtd|  d S )Nr   numpy_helpertest_data_set_z
Directory z existed. Skip saving test dataT)exist_okc                    s"   |   |   | d S r,   )rO   
from_arrayclonecpunumpy)input_tensorstorch_tensorr   rp   r   r   
add_tensor   s   "z-Gpt2Tester.save_test_data.<locals>.add_tensorrY   rZ   r[   past_input_z.pbwbc                 S   s   g | ]}|j qS r   )r   ).0outputr   r   r   
<listcomp>   s    z-Gpt2Tester.save_test_data.<locals>.<listcomp>output_zTest data saved to directory )onnxrq   ospathjoinstrexistsr   makedirsrY   r\   rZ   r]   r[   r_   rX   r^   	enumerateopenwriteSerializeToStringget_outputsrt   
isinstancerw   ndarrayru   rv   )r   sessionr   save_test_data_dirtest_case_idr   rz   rx   itensorfoutput_names_namer   rp   r   save_test_data   s<   *zGpt2Tester.save_test_datac                 C   sh  t |d tjrt|d n	|d    | _t	
| j| _t	
| j| j| j| _| j  | jdg|| _| jrWt| j| d gd| jd|| _| jrqt| jt| jdg| jgd|| _g | _t |d trt |d | _dS t!| j"D ](}t ||d  tjrt||d  n	||d    }| j#|| qdS )z7
        Update the inputs for next inference.
        r   r   N)$r   rw   r   r6   
from_numpyru   detachrv   r=   rU   predict_next_tokenr;   r   re   r<   reshaper3   rd   rY   r\   r   rW   rG   repeatrZ   r]   catr[   onestype_asr^   tuplelistr_   rX   rO   )r   r   stepri   r   past_ir   r   r   update   s6   0"*zGpt2Tester.updatec                 C   s2  t d | jdur| j|j   }|dkrt d|  t| j|jks/t d| j|j | jrCt| j|jksCt d| j|j | j	rWt| j
|j
ksWt d| j
|j
 t| jt|jkscJ t| jD ].\}}|j|j| jkswJ | dkr||j|    }|dkrt d	| d
|  qhdS )z3
        Compare inputs and logits output.
        zstart diff...Ng-C6?z$Max logits difference is too large: zInput_ids is differentzposition_ids is differentzattention_mask is differentr   zmax_past_diff[z]=)r   r=   r-   r.   r6   rC   rY   r\   rZ   r]   r[   r$   r^   r   rV   nelement)r   r
   max_io_diffr   r   max_past_diffr   r   r   r1      s<   
zGpt2Tester.diffr   c                 C   sb   | dddf }|dkrt |dd}|S t j|dddddd|f }|s/| \}}|S |S )z4
        Get top k topkens based on logits.
        Nr   T)
descending)r6   argmaxargsortsort)r=   r   required_orderlastTokenLogitsgeneratedTokenstopksorted_topk_r   r   r   r     s    zGpt2Tester.predict_next_tokenc                 C   s   g }t |D ]A}t| |d  tjrt| |d  n| |d  }t||d  tjr4t||d  n||d  }||   }|| qt	d|  dS )zO
        Compare the present outputs of two outputs from ONNX Runtime.
        r   zpresent_diff_max=N)
r_   r   rw   r   r6   r   r-   r.   rO   r   )onnx_outputonnx_io_outputrX   present_diff_maxr   onnx_present_ionnx_io_present_ir@   r   r   r   diff_present+  s   

zGpt2Tester.diff_presentc                 C   s*   ddl m} || }ddlm} |j|kS )z>
        Returns True if the ONNX model is quantized.
        r   )load)__producer__)r   r   !onnxruntime.quantization.quantizer   producer_name)onnx_model_pathr   modelquantize_producerr   r   r   is_quantized_onnx_model@  s   
z"Gpt2Tester.is_quantized_onnx_modelGpt2LMHeadModelT   r   .c           -      C   s  t d| d| d| dt| d|	 d |jj}|jj}|jj}|jj}d}|tjk}|r:d| 	 d j
v s:J | | tjd	d
d|j|d}tj|||d}d}|tjkr]dnd}t|||}t|||}t|d ||}t|D ]b\}}|	dkr||	kr nT|d dkrt |  |d }|dd}|dd}t|||||||||| 
}t|||||||||| 
} t|||||||d|| 
}!|!j}"||" ||" t  tj|"tjd}#t|D ]}$t|j d }%t|jd  d }&t ! }'t"||!# }(|$|&t ! |'  |!%|(|$| tj&| |# dd\})}*|$|&|*d  |%|)|$| tj|"|&|%|j|d}+t'||+ tj(| | # ||+dddd\},}*|$|&|*d  ||k rr| )| |,|| |d7 }| %|,|$| |
r|*|  t+|)|,| t d  t d!|!j, t d"|j, t d#| j, |j-|!||&|
d$ |j-|!| |&|
d$ |#|!j,|k. B }#t/|#r nqW d   n	1 sw   Y  |0  |0  qw|   |   |   dS )%z
        Test Generation using greedy beam search (without sampling) to compare PyTorch and ONNX model.
        It will print top 1 and top k errors on the given test inputs.
        zstart test generation: (top_k=z top_k_no_order=z max_steps=z test_inputs=z max_inputs=)r   rb             )r3   past_sequence_lengthsequence_lengthconfigmodel_class)
is_float16r   zQuantized OnnxOnnxz with IO Binding
   rY   rZ   Nr[   Fr4   r      )
total_runsr   )r   T)r   return_numpyinclude_copy_output_latencyzTop 1 tokens:z	Torchz	ONNXz	ONNX with IO binding)r?   )1r   r$   r   rX   n_headn_embdeos_token_idr   FLOAT16r   ra   evalrd   r   get_output_shapesget_output_buffersINT8r   r   getrU   r3   r9   r6   no_gradr7   r8   r_   r   rY   sizer^   timeitdefault_timerpytorch_inferencero   rQ   r   onnxruntime_inferenceauto_increase_buffer_size$onnxruntime_inference_with_binded_ior   r1   r   r;   rA   anyrC   rK   )-r   r   ri   test_inputs	precisionr   r   top_k_no_order	max_steps
max_inputsr?   r   r   rX   r   r   r   test_data_savedr   init_output_shapesoutput_buffersr   r   torch_metriconnx_metriconnx_io_metricr   inputsrY   rZ   r[   onnx_runneronnx_io_runnertorch_runnerr3   doner   seq_lenr>   
start_timepytorch_outputr   avg_latency_msoutput_shapesr   r   r   r   test_generationL  s  &









>
zGpt2Tester.test_generationN)Fr   F)r   F)rR   rS   rT   r   r   ro   r   r   r1   staticmethodr   r   r   r   FLOAT32r   r   r   r   r   rU   s   s4    

+'('

rU   )loggingrM   r   r"   r   rw   r6   benchmark_helperr   gpt2_helperr   r   	getLoggerrR   loggerr   rU   r   r   r   r   <module>   s   
^