o
    [o™i(€  ã                   @   st   d dl Z d dlmZ d dlmZ d dlmZ d dlmZmZ eƒ Z	eƒ Z
G dd„ deƒZG dd	„ d	eƒZdd	gZdS )
é    N)ÚPerluniprops)ÚNonbreakingPrefixes)Úis_cjk)ÚVIRAMASÚNUKTASc                
       sX  e Zd ZdZed e d¡¡ƒZed e d¡¡d e	¡ d e
¡ ƒZed e d¡¡ƒZed e d¡¡ƒZed e d¡¡d e	¡ d e
¡ ƒZed e d¡¡ƒZe d	¡d
fZe d¡dfZdZdZdZe d e¡¡dfZe djed¡dfZe d¡dfZe d¡dfZe d¡dfZe d e¡¡dfZe d e¡¡dfZe d e¡¡dfZe d¡d fZ e d!¡d fZ!e d"¡d#fZ"e d$¡d%fZ#e d&¡d'fZ$e d(¡d'fZ%e d)¡d*fZ&e d+¡d,fZ'e d-¡d.fZ(e d/¡d-fZ)e d0jed1¡d2fZ*e d3jed1¡d2fZ+e d4jed1¡d2fZ,e d5 ee¡¡dfZ-d6jedd7fZ.e d8¡d9fZ/e d:¡dfZ0e d;¡dfZ1e d<¡d=fZ2e d>¡d?fZ3e d@¡dAfZ4e dB¡dCfZ5e dD¡dEfZ6e dF¡dGfZ7e dH¡dIfZ8e dJ¡d
fZ9e dK¡d
fZ:e dL¡dMfZ;e dN¡dOfZ<e dP¡dOfZ=e dQ¡dRfZ>e dS¡dTfZ?e dU¡dVfZ@e dW¡dXfZAe dY¡dZfZBe d[¡d\fZCe d]¡d^fZDe d_¡d`fZEe da¡dbfZFe dc¡ddfZGe de¡dffZHe dg¡dhfZIe di¡djfZJe dk¡dlfZKe dm¡dnfZLe do¡dpfZMe dq¡drfZNe ds¡dtfZOe du¡dvfZPe dw¡d
fZQe dx¡dfZRe dy¡dfZSe dz¡d{fZTe d|¡d}fZUe d~¡dfZVe d€¡dfZWe d‚¡dƒfZXe d„¡d…fZYe d@¡d†fZZe d‡¡dˆfZ[e d‰jedŠ¡d‹fZ\e dŒjeed¡d‹fZ]e dŽjedŠ¡d‹fZ^e djedŠ¡dfZ_e d‘jed’¡dfZ`e\e]e^e_e`gZae d‰jedŠ¡d‹fZbe d“jedŠ¡d‹fZce dŽjedŠ¡d‹fZde djedŠ¡d”fZeebecedeegZfe d‚¡d•fZge d–¡d—fZhd˜Zid™ZjdšZkd›ZldœZmg e‘e‘e ‘e!‘e"‘e#‘e$‘e%‘e&‘e'‘e(‘e*‘e+‘e,‘e-‘e.‘e/‘e0‘e1‘e2‘e3‘e4‘e5‘e6‘e7‘e8‘e9‘e:‘e;‘e<‘e=‘e>‘e?‘e@‘eA‘eB‘eC‘eD‘eE‘eF‘eG‘eH‘eI‘eJ‘eK‘eL‘eM‘eN‘eO‘eP‘Zne)eQeReSeTeUeVeWeXeYg
ZoeTeUeVeWeXeYeZe[gZpeiejekelemgZqg d¢Zrd¶‡ fd d¡„	Zsd¢d£„ Ztd¤d¥„ Zud¦d§„ Zvd¨d©„ Zwdªd«„ Zxd¬d­„ Zyd®d¯„ Zzd·d±d²„Z{	°	°	³	Ÿd¸d´dµ„Z|‡  Z}S )¹ÚMosesTokenizerz–
    This is a Python port of the Moses Tokenizer from
    https://github.com/moses-smt/mosesdecoder/blob/master/scripts/tokenizer/tokenizer.perl
    Ú ÚIsNÚIsAlnumÚIsScÚIsSoÚIsAlphaÚIsLowerz\s+ú z[\000-\037])z +r   )z^ r   )z $r   ú([^{}\s\.'\`\,\-])ú \1 ú ([{alphanum}])\-(?=[{alphanum}])©Úalphanumú\1 @-@ ú	\.([\.]+)z
 DOTMULTIúDOTMULTI\.([^\.])zDOTDOTMULTI ú
DOTMULTI\.ÚDOTDOTMULTIz
([^{}])[,]z\1 , z
[,]([^{}])z , \1z
([{}])[,]$z^``z`` z^"z^`([^`])z` \1z^'z`  z
([ ([{<])"z\1 `` z([ ([{<])``z([ ([{<])`([^`])z\1 ` \2z
([ ([{<])'z\1 ` z\.\.\.z _ELLIPSIS_ Ú
_ELLIPSIS_z([^{numbers}])[,]([^{numbers}]))Únumbersz\1 , \2z([{numbers}])[,]([^{numbers}])z([^{numbers}])[,]([{numbers}])z([;:@#\$%&{}{}])ú([{alphanum}])\/([{alphanum}])ú$1 \@\/\@ $2z([^.])([.])([\]\)}>"']*) ?$z\1 \2\3z([?!])z([\]\[\(\){}<>])z\(z-LRB-z\)z-RRB-z\[z-LSB-z\]z-RSB-z\{z-LCB-z\}z-RCB-z--z -- ú^ú$ú"z '' z([^'])' z\1 ' z([^'])'z'([sSmMdD]) z '\1 z'll z 'll z're z 're z've z 've zn't z n't z'LL z 'LL z'RE z 'RE z'VE z 'VE zN'T z N'T z ([Cc])annot z
 \1an not z ([Dd])'ye z \1' ye z ([Gg])imme z	 \1im me z ([Gg])onna z	 \1on na z ([Gg])otta z	 \1ot ta z ([Ll])emme z	 \1em me z ([Mm])ore'n z
 \1ore 'n z '([Tt])is z '\1 is z '([Tt])was z	 '\1 was z ([Ww])anna z	 \1an na z  *z^ *z *$ú&ú&amp;z\|ú&#124;ú<ú&lt;ú>ú&gt;z\'ú&apos;z\"ú&quot;ú&#91;ú]ú&#93;z([^{alpha}])[']([^{alpha}]))Úalphaz\1 ' \2z([^{alpha}{isn}])[']([{alpha}]))r-   Úisnz([{alpha}])[']([^{alpha}])z([{alpha}])[']([{alpha}])z\1 '\2z([{isn}])[']([s]))r.   z([^{alpha}])[']([{alpha}])z\1' \2z ' z\.' ?$z . ' z<\/?\S+\/?>z#<\S+( [a-zA-Z0-9]+\="?[^"]")+ ?\/?>z#<\S+( [a-zA-Z0-9]+\='?[^']')+ ?\/?>ú'[\w\-\_\.]+\@([\w\-\_]+\.)+[a-zA-Z]{2,}z/(http[s]?|ftp):\/\/[^:\/\s]+(\/\w+)*\/[\w\-\.]+)z"((https?|ftp|rsync)://|www\.)[^ ]*r/   z@[a-zA-Z0-9_]+z#[a-zA-Z0-9_]+ÚenNc                    s¾  t tˆ ƒ ¡  |ˆ _dd„ t |¡D ƒˆ _|rJg ˆ _t|dƒ#}|D ]}| ¡ }|r:| 	d¡s:|ˆ jvr:ˆ j 
|¡ q"W d   ƒ n1 sEw   Y  ‡ fdd„ˆ jD ƒˆ _ˆ jdv rÝd}ˆ jdv rm|td t d	¡¡ƒ7 }ˆ jd
v r~|td t d¡¡ƒ7 }ˆ jdv r§|td t d¡¡ƒ7 }|td t d¡¡ƒ7 }|td t d¡¡ƒ7 }ˆ  j|7  _ˆ  j|7  _t d ˆ j¡¡dfˆ _t djˆ jd¡dfˆ _t djˆ jd¡dfˆ _d S d S )Nc                 S   s   g | ]}|  ¡ ‘qS © )Ústrip)Ú.0Ú_nbpr1   r1   úG/home/ubuntu/.local/lib/python3.10/site-packages/sacremoses/tokenize.pyÚ
<listcomp>.  s    ÿz+MosesTokenizer.__init__.<locals>.<listcomp>Úrú#c                    s$   g | ]}ˆ   |¡r| d ¡d ‘qS )r   r   )Úhas_numeric_onlyÚ
rpartition)r3   Úw©Úselfr1   r5   r6   <  s    ýÿ)ÚzhÚjaÚkoÚcjkr   )r@   rA   ÚHangul)r>   rA   ÚHan)r?   rA   ÚHiraganaÚKatakanar   r   r   r   r   r   r   )Úsuperr   Ú__init__ÚlangÚnonbreaking_prefixesÚwordsÚNONBREAKING_PREFIXESÚopenr2   Ú
startswithÚappendÚNUMERIC_ONLY_PREFIXESÚstrÚjoinÚperlunipropsÚcharsr   r
   ÚreÚcompileÚformatÚPAD_NOT_ISALNUMÚAGGRESSIVE_HYPHEN_SPLITÚINTRATOKEN_SLASHES)r=   rH   Ú custom_nonbreaking_prefixes_fileÚfinÚlineÚ	cjk_chars©Ú	__class__r<   r5   rG   (  sN   ÿ
€üÿ
þ



þ
þîzMosesTokenizer.__init__c                 C   sJ   t  dd|¡}t  d¡}| |¡r#t  dd|¡}| d|¡}| |¡s|S )Nr   z DOTMULTI\1r   r   zDOTDOTMULTI \1r   )rT   ÚsubrU   Úsearch©r=   ÚtextÚdotmultir1   r1   r5   Úreplace_multidotsY  s   


þz MosesTokenizer.replace_multidotsc                 C   s8   t  d¡}| |¡r| d|¡}| |¡s
t  dd|¡S )Nr   z	DOTMULTI.ÚDOTMULTIÚ.)rT   rU   ra   r`   rb   r1   r1   r5   Úrestore_multidotsa  s
   


ÿz MosesTokenizer.restore_multidotsc                 C   s   t |ƒ t | jƒ¡ S ©N)ÚsetÚ
differencer   ©r=   rc   r1   r1   r5   Úislowerg  s   zMosesTokenizer.islowerc                 C   s   t t|ƒ t| jƒ¡ƒS ri   )Úanyrj   Úintersectionr   rl   r1   r1   r5   Ú
isanyalphaj  s   zMosesTokenizer.isanyalphac                 C   s   t t d|¡ƒS )Nz[\s]+(\#NUMERIC_ONLY\#))ÚboolrT   ra   rl   r1   r1   r5   r9   m  s   zMosesTokenizer.has_numeric_onlyc                 C   sÔ   |  ¡ }t|ƒ}t|ƒD ]X\}}t d|¡}|rd| d¡}d|v r&|  |¡sG|| jv r0|| jvsG||d krH||d  rH|  	||d  d ¡rHq|| jv r^|d |k r^t d||d  ¡r^q|d ||< qd 
|¡S )Nz	^(\S+)\.$é   rg   r   z^[0-9]+z .r   )ÚsplitÚlenÚ	enumeraterT   ra   Úgrouprp   rK   rO   rm   rQ   )r=   rc   ÚtokensÚ
num_tokensÚiÚtokenÚtoken_ends_with_periodÚprefixr1   r1   r5   Úhandles_nonbreaking_prefixesp  s.   
	


ÿþ
ÿ€
z+MosesTokenizer.handles_nonbreaking_prefixesc                 C   ó    | j D ]
\}}| ||¡}q|S ri   )ÚMOSES_ESCAPE_XML_REGEXESr`   ©r=   rc   ÚregexpÚsubstitutionr1   r1   r5   Ú
escape_xml™  ó   zMosesTokenizer.escape_xmlFc                 C   sZ   t |ƒ}| jD ]
\}}| ||¡}q|  |¡}| jD ]
\}}| ||¡}q|r)|S | ¡ S )z‚
        This is a Python port of the Penn treebank tokenizer adapted by the Moses
        machine translation community.
        )rP   ÚMOSES_PENN_REGEXES_1r`   r}   ÚMOSES_PENN_REGEXES_2rs   )r=   rc   Ú
return_strr   r‚   r1   r1   r5   Úpenn_tokenizež  s   
zMosesTokenizer.penn_tokenizeTc                    s  t ˆ ƒ‰ | j| jfD ]
\}}| |ˆ ¡‰ q
|rNdd„ |D ƒ}‡ fdd„|D ƒ}t|ƒdks/J ‚tt|ƒdd„ ddD ]\}	}
d	t |	ƒ d
¡ }ˆ  |
|¡‰ q:ˆ  	¡ ‰ 	 | j
\}}| |ˆ ¡‰ |rk| j\}}| |ˆ ¡‰ |  ˆ ¡‰ | j| j| jfD ]
\}}| |ˆ ¡‰ qx| jdkr—| jD ]
\}}| |ˆ ¡‰ q‹n| jdv r«| jD ]
\}}| |ˆ ¡‰ qŸn| j\}}| |ˆ ¡‰ |  ˆ ¡‰ | j\}}| |ˆ ¡ 	¡ ‰ | j\}}| |ˆ ¡‰ |rít|ƒD ]\}	}
d	t |	ƒ d
¡ }ˆ  ||
¡‰ qÙ|  ˆ ¡‰ |rù|  ˆ ¡‰ |rýˆ S ˆ  ¡ S )a  
        Python port of the Moses tokenizer.

            :param tokens: A single string, i.e. sentence text.
            :type tokens: str
            :param aggressive_dash_splits: Option to trigger dash split rules .
            :type aggressive_dash_splits: bool
        c                 S   s   g | ]	}t  |t j¡‘qS r1   )rT   rU   Ú
IGNORECASE)r3   Úpr1   r1   r5   r6   Æ  s    z+MosesTokenizer.tokenize.<locals>.<listcomp>c                    s$   g | ]}|  ˆ ¡D ]}| ¡ ‘q	qS r1   )Úfinditerrv   )r3   Úprotected_patternÚmatch©rc   r1   r5   r6   È  s    ýþÿiè  c                 S   s   t | d ƒS )Nrr   )rt   )Úpairr1   r1   r5   Ú<lambda>Ð  s    z)MosesTokenizer.tokenize.<locals>.<lambda>T)ÚkeyÚreverseÚTHISISPROTECTEDé   r0   )ÚfrÚit)rP   ÚDEDUPLICATE_SPACEÚ
ASCII_JUNKr`   rt   Úsortedru   ÚzfillÚreplacer2   rW   rX   re   ÚCOMMA_SEPARATE_1ÚCOMMA_SEPARATE_2ÚCOMMA_SEPARATE_3rH   ÚENGLISH_SPECIFIC_APOSTROPHEÚFR_IT_SPECIFIC_APOSTROPHEÚNON_SPECIFIC_APOSTROPHEr}   ÚTRAILING_DOT_APOSTROPHErh   rƒ   rs   )r=   rc   Úaggressive_dash_splitsr‡   ÚescapeÚprotected_patternsr   r‚   Úprotected_tokensry   rz   Úsubstituitionr1   rŽ   r5   Útokenize¯  s`   
þ


ý
ÿ
ÿ





zMosesTokenizer.tokenize)r0   N)F)FFTN)~Ú__name__Ú
__module__Ú__qualname__Ú__doc__rP   rQ   rR   rS   r	   r   r   r
   r   r   r   r   rT   rU   r—   r˜   Ú	MID_STRIPÚ
LEFT_STRIPÚRIGHT_STRIPrV   rW   rX   Ú REPLACE_DOT_WITH_LITERALSTRING_1Ú REPLACE_DOT_WITH_LITERALSTRING_2Ú REPLACE_DOT_WITH_LITERALSTRING_3rœ   r   rž   ÚDIRECTIONAL_QUOTE_1ÚDIRECTIONAL_QUOTE_2ÚDIRECTIONAL_QUOTE_3ÚDIRECTIONAL_QUOTE_4ÚDIRECTIONAL_QUOTE_5ÚDIRECTIONAL_QUOTE_6ÚDIRECTIONAL_QUOTE_7ÚDIRECTIONAL_QUOTE_8ÚREPLACE_ELLIPSISÚRESTORE_ELLIPSISÚCOMMA_1ÚCOMMA_2ÚCOMMA_3ÚSYMBOLSrY   ÚFINAL_PERIODÚPAD_QUESTION_EXCLAMATION_MARKÚPAD_PARENTHESISÚCONVERT_PARENTHESIS_1ÚCONVERT_PARENTHESIS_2ÚCONVERT_PARENTHESIS_3ÚCONVERT_PARENTHESIS_4ÚCONVERT_PARENTHESIS_5ÚCONVERT_PARENTHESIS_6ÚPAD_DOUBLE_DASHESÚPAD_START_OF_STRÚPAD_END_OF_STRÚCONVERT_DOUBLE_TO_SINGLE_QUOTESÚHANDLES_SINGLE_QUOTESÚ
APOSTROPHEÚCONTRACTION_1ÚCONTRACTION_2ÚCONTRACTION_3ÚCONTRACTION_4ÚCONTRACTION_5ÚCONTRACTION_6ÚCONTRACTION_7ÚCONTRACTION_8ÚCONTRACTION_9ÚCONTRACTION_10ÚCONTRACTION_11ÚCONTRACTION_12ÚCONTRACTION_13ÚCONTRACTION_14ÚCONTRACTION_15ÚCONTRACTION_16ÚCONTRACTION_17ÚCONTRACTION_18ÚCONTRACTION_19ÚCLEAN_EXTRA_SPACE_1ÚCLEAN_EXTRA_SPACE_2ÚCLEAN_EXTRA_SPACE_3ÚESCAPE_AMPERSANDÚESCAPE_PIPEÚESCAPE_LEFT_ANGLE_BRACKETÚESCAPE_RIGHT_ANGLE_BRACKETÚESCAPE_SINGLE_QUOTEÚESCAPE_DOUBLE_QUOTEÚESCAPE_LEFT_SQUARE_BRACKETÚESCAPE_RIGHT_SQUARE_BRACKETÚEN_SPECIFIC_1ÚEN_SPECIFIC_2ÚEN_SPECIFIC_3ÚEN_SPECIFIC_4ÚEN_SPECIFIC_5rŸ   ÚFR_IT_SPECIFIC_1ÚFR_IT_SPECIFIC_2ÚFR_IT_SPECIFIC_3ÚFR_IT_SPECIFIC_4r    r¡   r¢   ÚBASIC_PROTECTED_PATTERN_1ÚBASIC_PROTECTED_PATTERN_2ÚBASIC_PROTECTED_PATTERN_3ÚBASIC_PROTECTED_PATTERN_4ÚBASIC_PROTECTED_PATTERN_5r…   r†   r   ÚBASIC_PROTECTED_PATTERNSÚWEB_PROTECTED_PATTERNSrG   re   rh   rm   rp   r9   r}   rƒ   rˆ   r¨   Ú__classcell__r1   r1   r^   r5   r      s   "ÿ"ÿþ
þþûüÿþýüûúùø	÷
öõôóòñðïîíìëêéèçæåäãâá à!ß"Þ#Ý$Ü%Û&Ú'Ù(Ø)×*Ö+Õ,Ô-Ó.Ò/Ñ0Ð1Ï2Î6öøû1)
úr   c                       sÜ  e Zd ZdZed e d¡¡ƒZed e d¡¡ƒZ	ed e d¡¡ƒZ
e d¡dfZe d¡d	fZe d
¡dfZe d¡dfZe d¡dfZe d¡dfZe d¡dfZe d¡dfZe d¡dfZe d¡dfZe d¡dfZe d¡dfZe d¡dfZeeeeeeeeeeegZg d¢Zg d¢Zg d¢Ze d  d e¡d e¡d e¡¡¡Ze d! e
¡¡Z e d" e	¡¡Z!e d# e	¡¡Z"e d$ e	¡¡Z#e d%¡Z$e d&¡Z%d1‡ fd(d)„	Z&d*d+„ Z'd2d-d.„Z(d2d/d0„Z)‡  Z*S )3ÚMosesDetokenizerz›
    This is a Python port of the Moses Detokenizer from
    https://github.com/moses-smt/mosesdecoder/blob/master/scripts/tokenizer/detokenizer.perl

    r   r
   r   r   z \@\-\@ ú-z {2,}r   r#   ú|r%   r$   r'   r&   r)   r    r(   ú'r*   ú[r,   r+   r"   r!   z&bar;z&bra;z&ket;)6ÚNÚnÚAÚaõ   Ã„õ   Ã¤ÚssaÚSsau   ssÃ¤u   SsÃ¤Ústau   stÃ¤ÚStau   StÃ¤ÚhunÚHunÚhynÚHynÚhanrC   õ   hÃ¤nu   HÃ¤nu   hÃ¶nu   HÃ¶nÚunÚUnÚynÚYnÚanÚAnu   Ã¤nu   Ã„nu   Ã¶nu   Ã–nÚseenÚSeenÚllaÚLlau   llÃ¤u   LlÃ¤ÚltaÚLtau   ltÃ¤u   LtÃ¤ÚlleÚLleÚksiÚKsiÚkseÚKseÚttaÚTtaÚineÚIne)ÚniÚsiÚmmeÚnneÚnsa)	r@   u   kÃ¶r  r  Úpau   pÃ¤Úkaanu   kÃ¤Ã¤nÚkinz^({})({})?({})$u   ^[{}\(\[\{{\Â¿\Â¡]+$z^['][{}]z[{}][']$z^[{}]z^[\,\.\?\!\:\;\\\%\}\]\)]+$u   ^[\'\"â€žâ€œ`]+$r0   c                    s   t t| ƒ ¡  || _d S ri   )rF   rÿ   rG   rH   )r=   rH   r^   r1   r5   rG   ¥  s   
zMosesDetokenizer.__init__c                 C   r~   ri   )ÚMOSES_UNESCAPE_XML_REGEXESr`   r€   r1   r1   r5   Úunescape_xml©  r„   zMosesDetokenizer.unescape_xmlTc                 C   sè  d  d |¡¡}t|ƒ}| j\}}| ||¡}|r|  |¡}ddddddœ}d}d}	| ¡ }tt|ƒƒD ]¦\}
}t	|d ƒr`| j
dkr`|
dkrWt	||
d  d ƒrW|	|7 }	n|	|| 7 }	d}q4| j |¡ro|	|| 7 }	d}q4| j |¡r‹| j
d	kr„t d
|¡r„|	d7 }	|	|7 }	d}q4| j
dkr¡|
dkr¡| j |¡r¡|	|7 }	d}q4| j
dkrÇ|
dkrÇt d|d ¡rÇt d|d ¡rÇt d|¡rÇ|	|7 }	d}q4| j
dv rí|
t|ƒd krí| j |¡rí| j ||
d  ¡rí|	|| 7 }	d}q4| j
dkr/|
t|ƒd kr/| j |¡r/t d||
d  ¡r/t d||
d  tj¡r/|	|| ||
d   7 }	t|dƒ d}q4| j |¡r²|}t d|¡rAd}| |d¡||< | j
dkrX|dkrXd||< | j
dkrg|dkrgd||< || d dkr£| j
dkr’|dkr’|
dkr’t d||
d  ¡r’|	|7 }	d}q4|	|| 7 }	d}||  d7  < q4|	|7 }	d}||  d7  < q4| j
dkrÓt d||
d  ¡rÓ| j |¡rÓ|	|| 7 }	d}q4|	|| 7 }	d}q4| j\}}| ||	¡}	|	 ¡ }	|rð|	S |	 ¡ S )z«
        Python port of the Moses detokenizer.
        :param tokens: A list of strings, i.e. tokenized text.
        :type tokens: list(str)
        :return: str
        z {} r   r   )r  r    z``ú`z''r   r@   rr   éÿÿÿÿr•   z^[\?\!\:\;\\\%]$r0   Úcsz^[0-9]+$éþÿÿÿz^[.,]$)r•   r–   Úgaé   r”   u   ^[-â€“]$z^li$|^mail.*Nu   ^[â€žâ€œâ€]+$r    u   â€žu   â€œr  z[s]$Úfiz:$)rV   rQ   rP   rX   r`   r3  rs   ru   Úiterr   rH   ÚIS_CURRENCY_SYMBOLra   ÚIS_PUNCTrT   ÚIS_ENGLISH_CONTRACTIONrt   ÚIS_FRENCH_CONRTACTIONÚSTARTS_WITH_ALPHAr‰   ÚnextÚIS_OPEN_QUOTEÚgetÚFINNISH_REGEXÚ	ONE_SPACEr2   )r=   rw   r‡   Úunescaperc   r   r‚   Úquote_countsÚprepend_spaceÚdetokenized_textry   rz   Únormalized_quor1   r1   r5   r¨   ®  sÂ   




ÿ
ÿÿü
û

ÿþ
ÿþý


ÿÿ
þ
zMosesDetokenizer.tokenizec                 C   s   |   |||¡S )z&Duck-typing the abstract *tokenize()*.)r¨   )r=   rw   r‡   rF  r1   r1   r5   Ú
detokenizeG  s   zMosesDetokenizer.detokenize)r0   )TT)+r©   rª   r«   r¬   rP   rQ   rR   rS   r
   r   r   rT   rU   rX   rE  ÚUNESCAPE_FACTOR_SEPARATORÚUNESCAPE_LEFT_ANGLE_BRACKETÚUNESCAPE_RIGHT_ANGLE_BRACKETÚUNESCAPE_DOUBLE_QUOTEÚUNESCAPE_SINGLE_QUOTEÚ UNESCAPE_SYNTAX_NONTERMINAL_LEFTÚ!UNESCAPE_SYNTAX_NONTERMINAL_RIGHTÚUNESCAPE_AMPERSANDÚ UNESCAPE_FACTOR_SEPARATOR_LEGACYÚ'UNESCAPE_SYNTAX_NONTERMINAL_LEFT_LEGACYÚ(UNESCAPE_SYNTAX_NONTERMINAL_RIGHT_LEGACYr2  ÚFINNISH_MORPHSET_1ÚFINNISH_MORPHSET_2ÚFINNISH_MORPHSET_3rV   rD  r<  r>  r?  r@  r=  rB  rG   r3  r¨   rK  rþ   r1   r1   r^   r5   rÿ     sb    õ9ý


 rÿ   )rT   Úsacremoses.corpusr   r   Úsacremoses.utilr   Úsacremoses.indicr   r   rR   rI   Úobjectr   rÿ   Ú__all__r1   r1   r1   r5   Ú<module>   s          /