o
    	TiB                  	   @   s  d dl Z d dlZd dlZd dlZd dlZd dlmZ d dlmZ d dl	m
Z
mZmZmZ ejddddZed	d
dZe jddZejddedd ejddedd ejddedd ejddedd ejddedd ejddedd e Zejdkrg dZnejd krg d!Znejd"krd#d$gZnejgZejZejZejZejZej Z e rej!" Z#ne rej$" Z#nej%& rej%" nd%Z#e'd&d' Zi Z(e)e d(d)d*Z*e+e*Z+e+,g d+ eeD ]Z-e
j.e-d)e#iej/d,Z0e.e-Z1e1j2e1_3d-e1_4g Z5e6eD ]y\Z7Z8e9d. e8d/ Z:e5;e:dd  e7ekr' n]e7d0 e d kre1e5d1d2d3<e#Z=e=j>de  e=_>e=j?de  e=_?e0j@d?i e=d2ed2d4ZAe1jBeAd2d5ZCd6d7 e6eCD ZCejDeCd8ZEg Z5e-e(vryg e(e-< e(e- FeEd9  q
e1e5d1d2d3<e#Z=e0j@d?i e=d2dd:ZAe1jBeAd2d5ZCd;d7 e6eCD ZCejDeCd8ZEe(e- FeEd9  eGe(e- ZGeHe(e- ZHe+,e-eGeHg eId<e- d=eG d>eH  dZ0e rej!J  qe rej$J  qej%J  qe*K  dS )@    N)load_dataset)tqdm)AutoModelForCausalLMAutoTokenizeris_torch_npu_availableis_torch_xpu_availablezybelkada/toxicityz%DaNLP/da-electra-hatespeech-detectionmeasurement)module_typezOxAISH-AL-LLM/wiki_toxictest)splitzEvaluate de-toxified models)descriptionz--model_typeallz(Relative path to the source model folder)defaulttypehelpz--output_fileztoxicity.csvz--batch_size@   z
Batch sizez--num_samplesi  zNumber of samplesz--context_lengthi  z--max_new_tokens   zMax new tokens for generation)ybelkada/gpt-neo-125m-detoxEleutherAI/gpt-neo-125MEleutherAI/gpt-neo-2.7Bybelkada/gpt-neo-2.7B-detoxybelkada/gpt-j-6b-sharded-bf16zybelkada/gpt-j-6b-detoxszgpt-neo)r   r   r   r   zgpt-jr   zybelkada/gpt-j-6b-detoxcpuc                 C   s   | d dkS )Nlabel    )xr   r   q/home/ubuntu/.local/lib/python3.10/site-packages/examples/research_projects/toxicity/scripts/evaluate-toxicity.py<lambda>K   s    r   w )newline)model_idmean_toxicitystd_toxicity)
device_maptorch_dtypeleft*   comment_textr   ptT)return_tensorspadding)	do_samplemax_new_tokens	use_cache)skip_special_tokensc                 C       g | ]\}}| t| d qS r    replaceinput_texts.0igenerated_textr   r   r   
<listcomp>m   s    r:   )predictionstoxicity)r-   r.   c                 C   r1   r2   r3   r6   r   r   r   r:   {   s     zModel: z	 - Mean: z - Std: r   )Largparsecsvevaluatenumpynptorchdatasetsr   r   transformersr   r   r   r   loadr<   dsArgumentParserparseradd_argumentstrint
parse_argsargs
model_typeMODELS_TO_TESTnum_samplesNUM_SAMPLES
batch_size
BATCH_SIZEoutput_filer.   context_lengthxpucurrent_devicedevicenpucudais_availablefilter
toxicitiesopenfilewriterwriterowr"   from_pretrainedbfloat16model	tokenizer	eos_token	pad_tokenpadding_sider5   	enumerater8   examplemanual_seed
input_textappendtoinputs	input_idsattention_maskgenerateoutputsbatch_decodegenerated_textscomputetoxicity_scoreextendmeanstdprintempty_cachecloser   r   r   r   <module>   s   









