o
    wiF`                     @   s  d dl Z d dlZd dlZd dlZd dlZd dlZd dlZd dlZd dlZd dl	Z	d dl
mZ d dlmZmZmZmZ d dlmZ d dlmZ d dlZejdZejdejedZed	ZG d
d dZ		dPdede de!dee"ee" f fddZ#dededefddZ$dee" defddZ%	dQdee" deee"  dee" fddZ&dRd"d#Z'd$e(de(fd%d&Z)d'd( Z*dSd*e"d+e!fd,d-Z+dTd.d/Z,de"d0e"de"fd1d2Z-de"d0e"dee" fd3d4Z.dee" fd5d6Z/d7d8 Z0d9d: Z1dTd;d<Z2dQd=d>Z3d?e"dee" fd@dAZ4dee" fdBdCZ5d0e"dee" fdDdEZ6dee" fdFdGZ7dee" fdHdIZ8dQdJdKZ9dLdM Z:dNdOl;m<Z<m=Z= dS )U    N)defaultdict)ListOptionalSequenceDict)	Namespace)tabulate~	SACREBLEUz
.sacrebleu	sacrebleuc                   @   s*   e Zd ZdZedededefddZdS )ColorTmsgcolorreturnc                 C   s8   t js| S ttj| d}|r| |  tjj S | S )a
  Returns a colored version of the given message string.

        :param msg: The string to Color.format.
        :param color: The color specifier i.e. 'red', 'blue', 'green', etc.
        :return: A colored version of the string if the output is a terminal.
        N)r   ENABLE_COLORSgetattrcoloramaForeupperStyle	RESET_ALL)r   r   	_ansi_str r   L/home/ubuntu/sommelier/.venv/lib/python3.10/site-packages/sacrebleu/utils.pyformat"   s   zColor.formatN)__name__
__module____qualname__r   staticmethodstrr   r   r   r   r   r      s    r      Tscoreswidth	multiliner   c              
   C   s   d|  di}|rdnd}d}dtfdd}|  D ]W\}}g }	|D ]D}
t|
ts_|
jd| d	}|
jd
urPd}|d|
jd| d	d|
jd| d	d7 }|
jd
ur^||||
j 7 }n|
}|		| q"|rm|d7 }|	||< q|S )z,Formats the scores prior to tabulating them.System
 Fpc                 S   s*   d| dd}| dkrt |dS |d S )Nz(p = z.4f)g?red*)r   r   )r'   r   r   r   r   _color_p_value;   s   z+_format_score_lines.<locals>._color_p_value.fNTz (u    ± r(   u    (μ ± 95% CI))
popfloatitems
isinstancer   scoremeancip_valueappend)r!   r"   r#   
new_scoresp_val_break_charis_bootstrapr+   metricvalsnew_valsresult_strr   r   r   _format_score_lines3   s*   

.

r?   results
signaturesargsc              
   C   sf  |j dkrag }t|  }tt| d D ]@}i }| d | |d< tdt|D ]'}t| ||  | trB| ||  | ||| < q(| ||  | j||| < q(|| qt	t
j|dd dS |j }|dv rkd	}n|d
krqd}t| |j|d	kd} i }	d}
d}|  D ]0}| | }|d ds|
r|d dr|d }d}
t |d d|d< | | |	t |d< qt|	d|dddd|j dd}t	| t	  |jp|j}|r|jrdnd}|jr|jn|j}|jrdnd}d| d| d | }t d!d}t |d}t d"d#}t d$d%}t	d&t| d' | d' d&t|   t	d(| d |jr5t	d) nt	d* t	  t	d+| d,| d- t	d.| d/ t	d0 t	d1|  d2 t	  t	d3|  d4 t	d5 t	d6| d t	d7| d t	  t	d8| d9 t	d: t	  t	d; t	d< t	d; | D ]\}}t	d+|d=d |  qdS )>zEPrints out a nicely formatted table for multi-system evaluation mode.jsonr$   system      )indentNtext
fancy_gridlatexlatex_booktabs)r#   F r   z	Baseline:Tyellowcyankeys)rightcenterr,   r-   )headerstablefmtcolalignstralignnumalignfloatfmtzbootstrap resamplingzapproximate randomizationzresampling trialstrialszPaired z test with r&   baselinezNull hypothesisgreenzhighlighted in redr)   -r%   z' - Each system is pairwise compared to z]   Actual system score / bootstrap estimated true mean / 95% CI are provided for each metric.z3   Actual system score is provided for each metric.z - z: the system and the z translations are essentiallyzH   generated by the same underlying process. For a given system and the ,zR   the p-value is roughly the probability of the absolute score difference (delta)zD   or higher occurring due to chance, under the assumption that the z is correct.z2 - Assuming a significance threshold of 0.05, the z can be rejectedz`   for p-values < 0.05 (marked with "*"). This means that the delta is unlikely to be attributedzE   to chance, hence the system is significantly "different" than the z   Otherwise, the p-values are zJ - NOTE: Significance does not tell whether a system is "better" than the z but ratherzY   emphasizes the "difference" of the systems in terms of the replicability of the delta.z-----------------zMetric signaturesz<10)r   listrO   rangelenr1   r   __dict__r6   printrC   dumpsr?   r"   
startswithr   r   	paired_bs	paired_arpaired_bs_npaired_ar_nlowerr0   )r@   rA   rB   proper_json	dict_keysivaluejrS   new_dicthas_baselinebaseline_namenamevaltable	is_paired	test_typen_samples_or_trialstest_sample_typer   bline
bline_namenull_hyp
pval_colorsigr   r   r   print_results_table\   s   
(
r}   c              	   C   sB  |j dkr"t| dkrdd|  d }t| dS t| d  dS d| d v rBtd	}tt| D ]}|d
d | | | |< q3t| dkrPt| d  dS g }| D ]}z||	dd  W qT t
yo   t| Y qTw t|dkrt|}t|| D ]\}}|d| ||d }	}
t|	d| |
  qdS dS )z/Re-process metric strings to align them nicely.rC   rE   z[
z,
z
]r   Nu   μu   (\(μ = [0-9\.]+ ± [0-9\.]+\))c                 S   s   t |  dS )NrN   )r   r   group)mr   r   r   <lambda>   s    z&print_single_results.<locals>.<lambda>=>)r   r_   joinra   recompiler^   subr6   index
ValueErrormaxzip)r@   rB   ri   color_reidxlenslinew_lenleftrP   r   r   r   print_single_results   s<   


r   rD   refstest_setc                    st   t |  t fdd|D r8td |r1td td tdt d| d td	 td
 d S d S )Nc                 3   s    | ]	}t | kV  qd S Nr_   ).0
ref_streamn_hypsr   r   	<genexpr>   s    z'sanity_check_lengths.<locals>.<genexpr>z4System and reference streams have different lengths.zeThis could be an issue with your system output or with sacreBLEU's reference database if -t is given.z6For the latter, try cleaning out the cache by typing:
z  rm -r /r%   zDThe test sets will be re-downloaded the next time you run sacreBLEU.rE   )r_   anysacreloggererrorSACREBLEU_DIRsysexit)rD   r   r   r   r   r   sanity_check_lengths   s   



r   rtutf-8c                 C   s,   |  drtj| ||ddS t| ||ddS )zConvenience function for reading compressed or plain text files.
    :param file: The file to read.
    :param mode: The file mode (read, write).
    :param encoding: The file encoding.
    z.gzr%   )modeencodingnewline)endswithgzipopen)filer   r   r   r   r   
smart_open   s   
r   numc                 C   s   | dkrdS t | S )zp
    Floors the log function

    :param num: the number
    :return: log(num) floored to a very low number
            lc(	 )mathlog)r   r   r   r   my_log   s   
r   c                 C   sn   t | dkr
| d S t | d }t| d d d}|g| }| D ]}t|D ]}||  || 7  < q'q!|S )z,Aggregates list of numeric lists by summing.rE   r   r   )r_   typer^   )listssizeinit_valtotalllrk   r   r   r   sum_of_lists  s   
r   Fprefixstrip_prefixc                 C   sJ   |d7 }i }| j  D ]\}}||r"|r||dn|}|||< q|S )zbFilters argparse's `Namespace` into dictionary with arguments
    beginning with the given prefix._rL   )r`   r0   rc   replace)rB   r   r   dkvr   r   r   args_to_dict  s   
r   c                    s  | t vrtd|  t |  |}t |  |}d|v r.t|dkr.td td nd|v r4|}d|v rNd|vrNt	dd |D   fdd|D }g }|D ]M}||vrtd	| d
|  d| d td|  d| dd
|  d|vrt	dd |D }	td|	 d td ||}
|||
  qRdd |D }t|| |||}t| D ]}td
tdd | qdS )a  Prints to STDOUT the specified side of the specified test set.

    :param test_set: the test set to print
    :param langpair: the language pair
    :param requested_fields: the fields to print
    :param origlang: print only sentences with a given original language (2-char ISO639-1 code), "non-" prefix means negation
    :param subset: print only sentences whose document annotation matches a given regex
    No such test set allrE   z'Cannot use --echo all with other fieldsrefc                 S      g | ]	}| d r|qS r   rc   r   r-   r   r   r   
<listcomp>>      z"print_test_set.<locals>.<listcomp>c                    s   g | ]
}|d kr
|n qS r   r   r   replacement_refr   r   r   ?  s    zNo such field z in test set z for language pair r,   zavailable fields for r   z: z, c                 S   r   r   r   r   r   r   r   r   G  r   z<'ref' also allowed for backwards compatibility (will return r(   c                 S      g | ]}t |qS r   )r   )r   r   r   r   r   r   M      	c                 S   s   |   S r   )rstrip)xr   r   r   r   P  s    z print_test_set.<locals>.<lambda>N)DATASETS	Exception
fieldnames	get_filesr_   r   r   r   r   minr   r   r6   filter_subsetr   ra   map)r   langpairrequested_fieldsoriglangsubsetr   	all_filesfilesfieldsubrefr   streamslinesr   r   r   print_test_set'  s8   	
"

r   r   c                 C   $   | t vrtd|  t |  |S )a  
    Returns the source file for a given testset/langpair.
    Downloads it first if it is not already local.

    :param test_set: The test set (e.g., "wmt19")
    :param langpair: The language pair (e.g., "de-en")
    :return: the path to the requested source file
    r   )r   r   get_source_filer   r   r   r   r   r   S  s   	r   c                 C   r   )a@  
    Returns a list of one or more reference file paths for the given testset/langpair.
    Downloads the references first if they are not already local.

    :param test_set: The test set (e.g., "wmt19")
    :param langpair: The language pair (e.g., "de-en")
    :return: a list of one or more reference file paths
    r   )r   r   get_reference_filesr   r   r   r   r   b  s   	r   c                 C   r   )a_  
    Returns the path of the source file and all reference files for
    the provided test set / language pair.
    Downloads the references first if they are not already local.

    :param test_set: The test set (e.g., "wmt19")
    :param langpair: The language pair (e.g., "de-en")
    :return: a list of the source file and all reference files
    r   r   r   r   r   r   r   r   r   p  s   r   c                 C   s   t d|  d|  | ds| dr7dd l}|| }|j|d W d    d S 1 s0w   Y  d S | dr_dd l}|| d}|j|d W d    d S 1 sXw   Y  d S d S )	NzExtracting  to z.tar.gzz.tgzr   )pathz.zipr)r   infor   tarfiler   
extractallzipfileZipFile)filepathdestdirr   tarr   r   r   r   extract_tarball  s   "
"r   c                 C   sT   t  }t| d}|D ]}|| qW d    | S 1 s!w   Y  | S )Nrb)hashlibmd5r   update	hexdigest)	dest_pathr   infiler   r   r   r   
get_md5sum  s   
r   c              
   C   s  ddl }ddl}tj|}tj|dd | d}tj|dd tj|r0tj	|dkrt
d|  d	|  z4|j| $}t|d
}	|	|  W d   n1 sZw   Y  W d   n1 siw   Y  W n |jy   t
d td Y nw |durt|}
|
|krt
d|
 d| d t
d|d t
d td |durt|| W d   dS W d   dS W d   dS 1 sw   Y  dS )ao  Downloading utility.

    Downloads the specified test to the system location specified by the SACREBLEU environment variable.

    :param source_path: the remote uri to download
    :param dest_path: where to save the file
    :param extract_to: for tarballs, where to extract to
    :param expected_md5: the MD5 sum
    :return: the set of processed file names
    r   NT)exist_okz.lock<   )timeoutzDownloading r   wbzAn SSL error was encountered in downloading the files. If you're on a Mac, you may need to run the "Install Certificates.command" file located in the "Python 3" folder, often found under /ApplicationsrE   z5Fatal: MD5 sum of downloaded file was incorrect (got z, expected z).zPlease manually delete z and rerun the command.znIf the problem persists, the tarball may have changed, in which case, please contact the SacreBLEU maintainer.)urllib.requestsslosr   dirnamemakedirsportalockerLockexistsgetsizer   r   requesturlopenr   writereadSSLErrorr   r   r   r   r   )source_pathr   
extract_toexpected_md5urllibr  outdirlockfiler-   outcur_md5r   r   r   download_file  sB   
 


"r  c                 C   s,   | t vrtd|  t |  }||}|S )a  Downloads the specified test to the system location specified by the SACREBLEU environment variable.

    :param test_set: the test set to download
    :param langpair: the language pair (needed for some datasets)
    :return: the set of processed file names
    r   r   )r   r   dataset
file_pathsr   r   r   download_test_set  s
   
r  testsetc                 C   s   | t vrg S tt |  j S )z5Return a list of language pairs for a given test set.)r   r]   	langpairsrO   )r  r   r   r   get_langpairs_for_testset  s   r  c                   C   s   t t ddS )z%Return a list of available test sets.T)reverse)sortedr   rO   r   r   r   r   get_available_testsets  s   r   c                 C   sd   |  d}|d }|d }g }t D ]}| d| |jv s)| d| |jv r/||j q|S )z>Return a list of available test sets for a given language pairr[   r   rE   )splitr   valuesr  r6   rq   )r   partssrclangtrglangtestsetsr  r   r   r   #get_available_testsets_for_langpair  s   
r'  c              	   C   s   | du rg S t  }| dD ]]}t| }tjt|d|j| d }ddlm	} t
||r<||d D ]}|| q4|drkt|}|D ]}	|	d	r[td
d|	}
||
 qHW d   n1 sfw   Y  qtt|S )zDReturn a list of origlang values according to the raw XML/SGM files.Nr\   rawr   rE   WMTXMLDatasetr   .sgm<doc .* origlang="([^"]+)".*\n\1)setr!  r   r  r   r   r   r  dataset.wmt_xmlr*  r1   _unwrap_wmt21_or_lateraddr   r   rc   r   r   r  r]   )	test_setsr   	origlangsr   r  rawfiler*  r   finr   doc_origlangr   r   r   get_available_origlangs  s*   




r8  c                 C   s   | du rg S t  }| dD ]S}t| }ddlm} t||r?tjt	|d|j
| d }||}d|v r>|t |d O }q|tv ra|t dd	 t|  D O }|t d
d	 t|  D O }qtt|S )zkReturn a list of domain values according to the raw XML files and domain/country values from the SGM files.Nr\   rE   r)  r(  r   domainc                 s   s"    | ]}d | dd  V  qdS )country:r[   r   Nr!  r   r   r   r   r   r     s     z(get_available_subsets.<locals>.<genexpr>c                 s   s    | ]
}| d d V  qdS )r[   rE   Nr;  r<  r   r   r   r     s    )r/  r!  r   r0  r*  r1   r  r   r   r   r  r1  SUBSETSr"  r  r]   )r3  r   subsetsr   r  r*  r5  fieldsr   r   r   get_available_subsets  s"   

r@  c              
      sb  |du r
|du r
| S |du s|du rt d|dur%|dr%|dd }td}td}g  |dD ]}t| }tjt	|d|j
| d	 }	d
dlm}
 t||
r||	}d|v rb|d ntd}t|d |D ]2\}}|du ryd}n|dr||dd k}n||k}|dur|du st||sd} | qnq6|	dr!i }|dur|tvrtd| t| }t|	W}d}|D ]J}|dr|du rd}n|d|}|dr||dd k}n||k}|dur|d|}t|||dsd}|dr | qW d   n	1 sw   Y  q6td|	 fdd| D S )zRFilter sentences with a given origlang (or subset) according to the raw SGM files.NzPFiltering for --origlang or --subset needs a test (-t) and a language pair (-l).r:     r-  z.* docid="([^"]+)".*\nr\   r(  r   rE   r)  r9  r   Tznon-rF   Fr+  z,No subset annotation available for test set r,  r.  rL   z<seg zE--origlang and --subset supports only WMT *.xml and *.sgm files, not c                    s    g | ]}d d t | D qS )c                 S   s   g | ]\}}|r|qS r   r   )r   sentencekeepr   r   r   r   O  s    z,filter_subset.<locals>.<listcomp>.<listcomp>)r   )r   r   indices_to_keepr   r   r   O  s     z!filter_subset.<locals>.<listcomp>)r   rc   r   r   r!  r   r  r   r   r   r  r0  r*  r1   r1  	itertoolsrepeatr   searchr6   r   r=  r   r   r   get)systemsr3  r   r   r   re_origlangre_idr   r  r5  r*  r?  domainsr7  
doc_domaininclude_docdoc_to_tagsr6  r   doc_idr   rD  r   r     sl   








r   c                 C   s  |j }|jr	|jnt|j|j}t|dkrtd d S tt}|D ]s}d g}|j	d ur3||j	g7 }n	|t
|j|j7 }|D ]V}	t|g||j|j||	^}
}t|
dkrVq>d| }|	d u rd|d7 }n|	dru|d|	dd   7 }n|d|	 7 }|  D ]}||
|}|| t|
|f qq>q"td	d
 | D d }tdd
 t| d D }| D ]1\}}t|d| d}|D ]\}}t| d|dd|jd| d|jd| d qqd S )Nr   z@No subset information found. Consider using --origlang argument.z	origlang=z domain=ALLr:  z	 country=rA  z domain=c                 S   r   r   r   )r   r   r   r   r   r   w  r   z(print_subset_results.<locals>.<listcomp>rE   c                 S   s   g | ]	}t |d  jqS )rE   )r_   rq   )r   rr   r   r   r   r   x  r   <rM   z: sentences=z<6r&   z = r,   r-   )r"   r   r8  r   r   r_   ra   r   r]   r   r@  r   rc   r"  corpus_scorer6   r   rO   r0   r   r   rq   r2   )metricsfull_system	full_refsrB   r   r4  r@   r   r>  r   rD   r   keyr:   r2   max_left_widthmax_metric_widthr!   n_systemr   r   r   print_subset_resultsR  sL   



8r[  rE   )r   r=  )r    Tr   )r   r   )F)NN)>rF  rC   r  r   r   r   r   r   loggingr  collectionsr   typingr   r   r   r   argparser   r   r   r   
expanduserUSERHOMEenvironrI  r   r   	getLoggerr   r   dictintboolr   r?   r}   r   r   r   r/   r   r   r   r   r   r   r   r   r   r  r  r  r   r'  r8  r@  r   r[  r  r   r=  r   r   r   r   <module>   sp    	

)c(



,
	
.
>-