o
    wÖi.o  ã                   @   sd  d Z ddlZddlZddlZddlZddlZddlZddlmZ e	du r=e
dkr=e e¡ ¡ jd Zej deeƒ¡ dZ	ddlmZ ddlmZ dd	lmZmZmZmZ dd
lmZmZmZmZ ddlm Z m!Z!m"Z"m#Z# ddlm$Z$m%Z% ddl&m'Z( e )d¡Z*zddl+m,Z, ddl+m+Z+m-Z- e+e,e-ƒ W n	 e.yž   Y nw dd„ Z/dd„ Z0e
dkr°e0ƒ  dS dS )ap  
SacreBLEU provides hassle-free computation of shareable, comparable, and reproducible BLEU scores.
Inspired by Rico Sennrich's `multi-bleu-detok.perl`, it produces the official WMT scores but works with plain text.
It also knows all the standard test sets and handles downloading, processing, and tokenization for you.

See the [README.md] file for more information.
é    N)ÚdefaultdictÚ__main__é   Ú	sacrebleu)ÚDATASETS)ÚMETRICS)Ú
smart_openÚfilter_subsetÚget_langpairs_for_testsetÚget_available_testsets)Úprint_test_setÚprint_subset_resultsÚget_reference_filesÚdownload_test_set)Úargs_to_dictÚsanity_check_lengthsÚprint_results_tableÚprint_single_results)Ú#get_available_testsets_for_langpairÚColor)Ú__version__)ÚSIGPIPE)ÚsignalÚSIG_DFLc                  C   s<  t jdt jd} | jdddddd | jd	ddd
d | jddtd dd | jdddd dd | jdddd dd | jddd dd | jdtd dd | jddtd dd  | jd!d"td#d d$d% | jd&d#g d'd( | jd)d*td+d,d | jd-d.td/d0d d1d2„ tD ƒ}| jd3d4|dd5gd6d7 | jd8d9dd:d; |  d<¡}|jd=d>td? j 	¡ d@dAdBdC |jdDdEt
d dFdGtd? jdH › dItd? jdJ › dKdL |jdMdNtd? jd dOdPdC |jdQdRdSdddTdU |jdVdddWdXdY |  dZ¡}|jd[d\ttd] jd^d |jd_d`ttd] jdad |jdbttd] jdcd |jdddddedf |jdgdddhdf |jdidddjdf |  dk¡}|jdlddmd; |jdnddod; |jdpddqd; |jdrddsd; |  dt¡}|jdudvddwd; |jdxdytdzd{d |  d|¡}| ¡ }|jd}d~ddd; |jd€ddd‚d; |jdƒd„td…d†d |jd‡dˆtdzd‰d |jdŠd‹td+dŒd |  d¡}|jdŽddddd |jd‘d’ddd“d |jd”d•ddd–d |jd—d˜td+d™d |jdšd›dddœd |jddžddŸd; g d ¢}	|jd¡d¢d£|	d¤d¥ | jd¦d§d¨d© t¡dª |  ¡ }
d«tjv rtjd«  ¡ }||	v r||
_|
S )¬Nz³sacreBLEU: Hassle-free computation of shareable BLEU scores.
Quick usage: score your detokenized output against WMT'14 EN-DE:
    cat output.detok.de | sacrebleu -t wmt14 -l en-de)ÚdescriptionÚformatter_classz
--citationz--citeFÚ
store_truez"Dump the bibtex citation and quit.)ÚdefaultÚactionÚhelpz--listz(Print a list of all available test sets.z
--test-setz-tz`The test set to use (see also --list) or a comma-separated list of test sets to be concatenated.)Útyper   r   z--language-pairz-lÚlangpairz4Source-target language pair (2-char ISO639-1 codes).)Údestr   r   z
--origlangz-olÚoriglangzoUse a subset of sentences with a given original language (2-char ISO639-1 codes), "non-" prefix means negation.z--subsetÚsubsetzkUse a subset of sentences whose document annotation matches a given regex (see SUBSETS in the source code).z
--downloadzDownload a test set and quit.z--echoú+ax  Output the source (src), reference (ref), or other available field (docid, ref:A, ref:1 for example) to STDOUT and quit. You can get available fields with options `--list` and `-t`For example: `sacrebleu -t wmt21 --list`. If multiple fields are given, they are outputted with tsv format in the order they are given.You can also use `--echo all` to output all available fields.)Únargsr    r   r   z--inputz-iÚ*z)Read input from file(s) instead of STDIN.)r    r&   r   r   ÚrefszQOptional list of references. If given, it should preceed the -i/--input argument.)r&   r   r   z
--num-refsz-nrr   z[Split the reference stream on tabs, and expect this many references. (Default: %(default)s)z
--encodingz-eúutf-8z>Open text files with specified encoding (Default: %(default)s)c                 S   s   g | ]}|  ¡ ‘qS © )Úlower)Ú.0Úmr*   r*   úP/home/ubuntu/sommelier/.venv/lib/python3.10/site-packages/sacrebleu/sacrebleu.pyÚ
<listcomp>e   s    zparse_args.<locals>.<listcomp>z	--metricsz-mÚbleuz:Space-delimited list of metrics to compute (Default: bleu))Úchoicesr&   r   r   z--sentence-levelz-slz!Compute metric for each sentence.)r   r   zBLEU related argumentsz--smooth-methodz-sÚBLEUÚexpÚbleu_smooth_methodzSmoothing method: exponential decay, floor (increment zero counts), add-k (increment num/denom by k for n>1), or none. (Default: %(default)s))r1   r   r"   r   z--smooth-valuez-svÚbleu_smooth_valuezGThe smoothing value. Only valid for floor and add-k. (Defaults: floor: Úfloorz	, add-k: zadd-kú))r    r   r"   r   z
--tokenizez-tokÚbleu_tokenizez Tokenization method to use for BLEU. If not provided, defaults to `zh` for Chinese, `ja-mecab` for Japanese, `ko-mecab` for Korean and `13a` (mteval) otherwise.z--lowercasez-lcÚbleu_lowercasez;If True, enables case-insensitivity. (Default: %(default)s))r"   r   r   r   z--forceÚ
bleu_forcez9Insist that your tokenized input is actually detokenized.)r   r   r"   r   zchrF related argumentsz--chrf-char-orderz-ccÚCHRFz.Character n-gram order. (Default: %(default)s)z--chrf-word-orderz-cwz^Word n-gram order (Default: %(default)s). If equals to 2, the metric is referred to as chrF++.z--chrf-betazJDetermine the importance of recall w.r.t precision. (Default: %(default)s)z--chrf-whitespacezMInclude whitespaces when extracting character n-grams. (Default: %(default)s))r   r   r   z--chrf-lowercasez1Enable case-insensitivity. (Default: %(default)s)z--chrf-eps-smoothingz|Enables epsilon smoothing similar to chrF++.py, NLTK and Moses; instead of effective order smoothing. (Default: %(default)s)z@TER related arguments (The defaults replicate TERCOM's behavior)z--ter-case-sensitivez0Enables case sensitivity. (Default: %(default)s)z--ter-asian-supportzEEnables special treatment of Asian characters. (Default: %(default)s)z--ter-no-punctz+Removes punctuation. (Default: %(default)s)z--ter-normalizedzDApplies basic normalization and tokenization. (Default: %(default)s)z@Confidence interval (CI) estimation for single-system evaluationz--confidencez-ciz6Report confidence interval using bootstrap resampling.z--confidence-nz-ciniè  zOSet the number of bootstrap resamples for CI estimation (Default: %(default)s).z7Paired significance testing for multi-system evaluationz--paired-arz-parz„Perform paired test using approximate randomization (AR). This option is mutually exclusive with --paired-bs (Default: %(default)s).z--paired-bsz-pbszzPerform paired test using bootstrap resampling. This option is mutually exclusive with --paired-ar (Default: %(default)s).z--paired-ar-nz-parni'  zKNumber of trials for approximate randomization test (Default: %(default)s).z--paired-bs-nz-pbsnzZNumber of bootstrap resamples for paired bootstrap resampling test (Default: %(default)s).z--paired-jobsz-jz™If 0, launches as many workers as the number of systems. If > 0, sets the number of workers manually. This feature is currently not supported on Windows.zReporting related argumentsz--quietz-qzSuppress verbose messages.z--shortz-shz2Produce a shorter (less human readable) signature.z--score-onlyz-bzPrint only the computed score.z--widthz-wz,Floating point width (Default: %(default)s).z--detailz-dz?Print detailed information (split test sets based on origlang).z
--no-colorz-ncz.Disable the occasional use of terminal colors.)ÚjsonÚtextÚlatexz--formatz-fr<   zþSet the output format. `latex` is only valid for multi-system mode whereas `json` and `text` apply to single-system mode only. This flag is overridden if the SACREBLEU_FORMAT environment variable is set to one of the valid choices (Default: %(default)s).)r   r1   r   z	--versionz-VÚversionz%(prog)s {})r   r?   ÚSACREBLEU_FORMAT)ÚargparseÚArgumentParserÚRawDescriptionHelpFormatterÚadd_argumentÚstrÚintr   Úadd_argument_groupÚSMOOTH_DEFAULTSÚkeysÚfloatÚ
TOKENIZERSÚ
CHAR_ORDERÚ
WORD_ORDERÚBETAÚadd_mutually_exclusive_groupÚformatÚVERSIONÚ
parse_argsÚosÚenvironr+   )Ú
arg_parserÚavail_metricsÚ	bleu_argsÚ	chrf_argsÚter_argsÚ	sign_argsÚ	pair_argsÚpair_args_choiceÚreport_argsÚoutput_formatsÚargsÚ
_new_valuer*   r*   r.   rR   >   s4  üÿ
ÿÿÿÿ
ÿ
ÿÿÿ
ÿÿÿÿ
þÿþþþÿ
ÿ
ÿÿÿ
ÿ
ÿ
ÿ
ÿÿÿÿ

ÿÿ

ÿ
ÿÿÿÿ
ÿÿÿÿÿ
ÿÿ
rR   c            .         sž  t ƒ ‰ ˆ jpˆ j} ttj ¡ dddddt_ttj ¡ ddddt_tj	 
dd	¡s,ˆ jr0d	t_nd
d l}| ¡  ˆ jsCtjtjdd ˆ jrRtˆ jˆ jƒ t d
¡ ˆ jr²ˆ jr|ˆ jr_ˆ jgntˆ jƒD ]}tˆ j  |¡}t|› dd |¡› ƒ qdn1ˆ jrŽtdˆ j› dƒ tˆ jƒ}ntdƒ t ƒ }t!|ƒD ]}t| j" #¡ }t|d›d|› ƒ q™t d
¡ ˆ j$rÆt%ˆ j&ƒdkrÆt' (d¡ t d¡ ˆ j)rúˆ jsÖt' (d¡ t d¡ ˆ j *d¡D ]}dt| vrít' (d|› ¡ qÜtt| j)ƒ qÜt d
¡ ˆ j+dkr"ˆ jd ust%ˆ j,ƒdkr"t' (d¡ t' (d¡ t' (d¡ t d¡ ˆ jd urIˆ j *d¡D ]}|tvrGt' (d|›¡ t' (d¡ t d¡ q.ˆ jd u rmt%ˆ j,ƒd
krlt' (d¡ t' (d¡ t' (t ƒ ¡ t d¡ n]t%ˆ j,ƒd
kr€t' (d ¡ t d¡ nJˆ jd u r‘t' (d!¡ t d¡ n9ˆ j *d¡D ]2}t|ƒ}ˆ j|vrÈt' (d"ˆ j›¡ t' (d#|›d¡ |D ]}	t' (d$|	› ¡ q·t d¡ q—ˆ j-rÿˆ jd u sÚˆ jd u rät' .d%¡ t d¡ ˆ j *d¡D ]}t/|ˆ jˆ j-ˆ j0ˆ j1ƒ qêt d
¡ ˆ jrˆ j *d&¡d ˆ _2ˆ jd ur"ˆ j3d'kr"t' .d(¡ t' .d)¡ g }
ˆ jd u r1|
 4ˆ j,¡ n)ˆ j *d¡D ]"}t5|ˆ jƒ}t%|ƒd
krSt' .d*|› d+ˆ j› d,¡ |
 4|¡ q7d-d.„ t6t7t%|
d
 ƒˆ j+ƒƒD ƒ}|
D ]j}t8|ƒD ]b\}}t8t9|ˆ j:d/dƒD ]R\}}| ;¡ }ˆ j+dkr˜||  4|¡ q|j*d0ˆ j+d d1}t%|ƒˆ j+krÁt' (d2|› d3ˆ j+› d4t%|ƒ› d,¡ t d5¡ t8|ƒD ]\}}||  4|¡ qÅqqsqmt%|ƒˆ _+g g }‰ˆ j<d u rAt=j>tjj?ˆ j:d/}| @¡  ;¡  *d0¡}t%|ƒ}d6d.„ |D ƒ}d7d.„ t6|ƒD ƒ‰|D ].}| ;¡  *d0¡}t%|ƒ|kr+t' (d8¡ t d5¡ t8|ƒD ]\}}||  4| ;¡ ¡ q/qnWˆ j<D ]O}|}|ˆv rt| rb|ˆd
 krbt' Ad9|›d:¡ qDt' (|›d;¡ t' (d<¡ t d¡ g }t9|ˆ j:d/D ]
}| 4| ;¡ ¡ q}| 4|¡ ˆ 4|¡ qDt%ˆƒ}| r¹ˆ j<d u r°d=gd>d.„ t6|d ƒD ƒ ‰n	d?ˆd
 › ˆd
< ˆ j$rà|dkrÌt' (d@¡ t d¡ ˆ jBsÓ| rÝt' (dA¡ t d¡ dˆ _C| rò|dkròt' (dB¡ t d¡ |dkrˆ jBrt' (dC¡ t d¡ tDg |¢|¢ˆ jˆ jˆ j0ˆ j1ƒ}|d |… ||d … ‰}ˆD ]N}t%|ƒd
kridDˆ j›dE}ˆ j0d us@ˆ j1d ur_|dF7 }ˆ j0rP|dGˆ j0› 7 }ˆ j1r_|dHˆ j1› ˆ j1 7 }t' (|¡ t d¡ tE||ˆ jdI q$i }ˆ j&D ]}tFˆ | G¡ ddJ} || dK< | H¡ }tI| dZi | ¤Ž||< qxˆ j$rÕt| J¡ ƒd
 ˆd
 }!}tK|g|¢R Ž D ]^}"}#|! L|"|#¡}$|! M¡  Nˆ jO¡}%t|$ Nˆ jPˆ jQ|%¡ƒ q±t d
¡ ˆ jRrâˆ jNdLkrâdMˆ _N|dkr:g }&t!|ƒD ]6}|| jS|d ˆ jBrüˆ jTnddN}$||  M¡  Nˆ jNdLkrˆ jOnd	¡}%|& 4|$ Nˆ jPˆ jQ|%ˆ jNdLk¡¡ qítU|&ˆ ƒ ˆ jRr8tV||d
 |ˆ ƒ d S d S ‡‡fdOd.„t6|ƒD ƒ}'t' AdP|› dQ¡ | si }(tWtƒ})ˆ|)dR< |'D ]0\}}t!|ƒD ]&}|| jS|d dS}$||  M¡  Nˆ jO¡|(|$jX< |)|$jX  4|$ Nˆ jPd¡¡ qfq^n7ddTlYmZ}* ˆ jrœdUndV}+ˆ jr¥ˆ j[nˆ j\},|*|'|d |+|,ˆ j]dW}-|-j^ˆ __|-ƒ \}(})‡ fdXdY„|( `¡ D ƒ}(ta|)|(ˆ ƒ d S )[NÚrr)   TÚ
)ÚmodeÚencodingÚ	bufferingÚnewlineÚw)rc   rd   re   ÚNO_COLORFr   zsacreBLEU: %(message)s)ÚlevelrP   z: z, zThe available test sets for z are:zThe available test sets are:z<30r   z3Only one metric can be used in sentence-level mode.zI need a test set (-t).ú,ÚcitationzNo citation found for zfThe --num-refs argument allows you to provide any number of tab-delimited references in a single file.zWYou can only use it with externally provided references, however (i.e., not with `-t`),z5and you cannot then provide multiple reference files.zUnknown test set z6Please run with --list to see the available test sets.zhIf manual references given, make sure to provide them before the -i/--input argument to avoid confusion.zEOtherwise, I need a predefined test set (-t) from the following list:zPI need exactly one of (a) a predefined test set (-t) or (b) a list of referenceszZI need a language pair (-l). Use --list to see available language pairs for this test set.zNo such language pair zAvailable language pairs for z > z9--echo requires a test set (--t) and a language pair (-l)ú-Únonez\You are turning off BLEU's internal tokenizer presumably to supply your own tokenized files.z9Published numbers will not be comparable to other papers.z!No references found for test set ú/Ú.c                 S   s   g | ]}g ‘qS r*   r*   )r,   Úxr*   r*   r.   r/   ]  s    zmain.<locals>.<listcomp>)rd   ú	)ÚsepÚmaxsplitzFATAL: line z: expected z fields, but found é   c                 S   s   g | ]}|g‘qS r*   r*   )r,   Úsr*   r*   r.   r/     s    c                 S   ó   g | ]	}d |d › ‘qS ©zSystem r   r*   ©r,   Úir*   r*   r.   r/   „  ó    zRFATAL: the number of tab-delimited fields in the input stream differ across lines.z	Ignoring z& as it was also given as the baseline.z already used to name a system.z7Make sure to have a different basename for each system.ÚBaselinec                 S   rv   rw   r*   rx   r*   r*   r.   r/   ®  rz   z
Baseline: z8Only one system can be evaluated in sentence-level mode.z9Statistical tests are unavailable in sentence-level mode.zBPaired tests require multiple input systems given to --input (-i).z1Use paired tests (--paired) for multiple systems.z	Test set z contains no sentencez withz
 origlang=z subset=)Útest_set)Ústrip_prefixÚ
referencesr<   r=   )r~   Ún_bootstrapc                    s   g | ]
}ˆ | ˆ| f‘qS r*   r*   rx   )Ú	sys_namesÚsystemsr*   r.   r/     s    zFound z	 systems.ÚSystem)r~   )Ú
PairedTestÚbsÚar)r~   Ú	test_typeÚ	n_samplesÚn_jobsc                    s   i | ]\}}||  ˆ j¡“qS r*   )rP   Úshort)r,   ÚkÚv)r_   r*   r.   Ú
<dictcomp>9  s    zmain.<locals>.<dictcomp>r*   )brR   Ú	paired_bsÚ	paired_arÚopenÚsysÚstdinÚfilenoÚstdoutrS   rT   ÚgetÚno_colorr   ÚENABLE_COLORSÚcoloramaÚinitÚquietÚloggingÚbasicConfigÚINFOÚdownloadr   r!   ÚexitÚlistr|   r
   r   Ú
fieldnamesÚprintÚjoinr   r   Úsortedr   ÚstripÚsentence_levelÚlenÚmetricsÚsacreloggerÚerrorrk   ÚsplitÚnum_refsr(   ÚechoÚwarningr   r#   r$   Úbleu_trg_langr8   Úappendr   ÚrangeÚmaxÚ	enumerater   rd   ÚrstripÚinputÚioÚTextIOWrapperÚbufferÚreadlineÚinfoÚ
confidenceÚbleu_effective_orderr	   r   r   r+   Úupperr   ÚvaluesÚzipÚsentence_scoreÚget_signaturerP   r‰   ÚwidthÚ
score_onlyÚdetailÚcorpus_scoreÚconfidence_nr   r   r   ÚnameÚsignificancerƒ   Úpaired_bs_nÚpaired_ar_nÚpaired_jobsr‡   Úpaired_nÚitemsr   ).Úpaired_test_moder—   ÚpairÚfieldsÚtestsetsÚtestsetÚdescr|   Ú	langpairsÚlpÚconcat_ref_filesÚ	ref_filesÚ	full_refsÚrefnoÚref_fileÚlinenoÚliner(   ÚrefÚfull_systemsÚinputfhÚnum_sysÚsys_idxÚsentÚfnameÚsys_nameÚlinesÚoutputsÚsystemÚmessager§   rÆ   Úmetric_argsÚmetricÚ
hypothesisr~   ÚscoreÚsigÚresultsÚnamed_systemsÚsigsÚscoresrƒ   r†   r‡   Úpsr*   )r_   r€   r   r.   ÚmainÚ   sÞ  
þ





(






€


€


€


ÿÿÿ"$
ÿôÿ



ÿú













þ




þÿÿ
ÿýÿþ
rò   )1Ú__doc__rµ   rS   r   rš   ÚpathlibrA   Úcollectionsr   Ú__package__Ú__name__ÚPathÚ__file__ÚabsoluteÚparentsÚparentÚpathÚinsertrE   Údatasetr   r§   r   Úutilsr   r	   r
   r   r   r   r   r   r   r   r   r   r   r   Ú r   rQ   Ú	getLoggerr¨   r   r   r   ÚImportErrorrR   rò   r*   r*   r*   r.   Ú<module>   sF   
ÿ   g
ÿ