o
    5ti                     @   s4   d dl mZ d dlZddlmZ G dd deZdS )    )	lru_cacheN   )BaseTokenizerc                   @   s.   e Zd Zdd Zdd Zedddd Zd	S )
TokenizerRegexpc                 C   s   dS )Nre selfr   r   U/home/ubuntu/.local/lib/python3.10/site-packages/sacrebleu/tokenizers/tokenizer_re.py	signature	   s   zTokenizerRegexp.signaturec                 C   s:   t ddft ddft ddft ddfg| _d S )Nz([\{-\~\[-\` -\&\(-\+\:-\@\/])z \1 z([^0-9])([\.,])z\1 \2 z([\.,])([^0-9])z \1 \2z
([0-9])(-))r   compile_rer   r   r   r
   __init__   s
   
zTokenizerRegexp.__init__i   )maxsizec                 C   s*   | j D ]
\}}|||}qd| S )zCommon post-processing tokenizer for `13a` and `zh` tokenizers.

        :param line: a segment to tokenize
        :return: the tokenized line
         )r   subjoinsplit)r	   liner   replr   r   r
   __call__   s   zTokenizerRegexp.__call__N)__name__
__module____qualname__r   r   r   r   r   r   r   r
   r      s
    r   )	functoolsr   r   tokenizer_baser   r   r   r   r   r
   <module>   s    