o
    7t¾iÊ  ã                   @   s   d dl Z dd„ Zdd„ ZdS )é    Nc                 C   sH  | d }|  dd¡}t dd|¡}|  dd¡}|  dd	¡}|  d
d¡}|  dd¡}|  dd¡}|  dd¡}|  dd¡}|  dd¡}|  dd¡}t dd|¡}t dd|¡}t dd|¡}t dd|¡}t d d!|¡}|  d"d#¡}|  d$d%¡}|  d&d'¡}|  d(td)ƒ d( td)ƒ¡}|  d*d+¡}|  d,d+¡}|  d-d.¡}|  d/d0¡}|S )1NÚpagezs 'zs'z	/' [0-9]/z/'[0-9]/z @-@ ú-z @,@ ú,z @.@ Ú.z : z: z ; z; z . z. z ! z! z ? z? z , z, z\(\s*([^\)]*?)\s*\)z(\1)z\[\s*([^\]]*?)\s*\]z[\1]z{\s*([^}]*?)\s*}z{\1}z\"\s*([^\"]*?)\s*\"z"\1"z'\s*([^']*?)\s*'z'\1'z= = = =z====z= = =z===z= =z==ú é°   z 
Ú
z
 z N z 1 z 'sz's)ÚreplaceÚreÚsubÚchr)ÚdocÚstring© r   ú^/home/ubuntu/.local/lib/python3.10/site-packages/lm_eval/tasks/wikitext/preprocess_wikitext.pyÚwikitext_detokenizer   s4   r   c                 C   sD   |\}t t d| d ¡ƒ}t | d  d¡ƒ}||f||f||fdœS )Nz\s+r   zutf-8)Úword_perplexityÚbyte_perplexityÚbits_per_byte)Úlenr
   ÚsplitÚencode)r   ÚresultsÚloglikelihoodÚ_wordsÚ_bytesr   r   r   Úprocess_results'   s   ýr   )r
   r   r   r   r   r   r   Ú<module>   s    #