o
    5t¾iZ  ã                   @   sn   d Z ddlmZ ddlmZ ddlmZ ddlZddlZdZe e¡Z	dZ
e e
¡ZdZe e¡Zd	d
„ ZdS )zA library for tokenizing text.é    )Úabsolute_import)Údivision)Úprint_functionNz
[^a-z0-9]+z\s+z^[a-z0-9]+$c                    sL   |   ¡ } t dt | ¡¡} t | ¡}ˆ r‡ fdd„|D ƒ}dd„ |D ƒ}|S )a,  Tokenize input text into a list of tokens.

  This approach aims to replicate the approach taken by Chin-Yew Lin in
  the original ROUGE implementation.

  Args:
    text: A text blob to tokenize.
    stemmer: An optional stemmer.

  Returns:
    A list of string tokens extracted from input text.
  ú c                    s,   g | ]}t |ƒd krt ˆ  |¡¡n|‘qS )é   )ÚlenÚsixÚ
ensure_strÚstem©Ú.0Úx©Ústemmer© úH/home/ubuntu/.local/lib/python3.10/site-packages/rouge_score/tokenize.pyÚ
<listcomp>8   s    $ÿztokenize.<locals>.<listcomp>c                 S   s   g | ]	}t  |¡r|‘qS r   )ÚVALID_TOKEN_REÚmatchr   r   r   r   r   <   s    )ÚlowerÚNON_ALPHANUM_REÚsubr   r	   Ú	SPACES_REÚsplit)Útextr   Útokensr   r   r   Útokenize"   s   

ÿr   )Ú__doc__Ú
__future__r   r   r   Úrer   ÚNON_ALPHANUM_PATTERNÚcompiler   ÚSPACES_PATTERNr   ÚVALID_TOKEN_PATTERNr   r   r   r   r   r   Ú<module>   s   


