o
    
 ¢iG  ã                   @   s¾   d dl mZ d dlmZmZ d dlZd dlmZ ejdddd„ ƒZ	ejd	d
„ ƒZ
dd„ Zdd„ Zdd„ Zdd„ Zdd„ Zdd„ Zdd„ Zdd„ Zdd„ Zdd„ Zdd „ Zd!d"„ Zd#d$„ ZdS )%é    )ÚPath)ÚdumpsÚloadsN)ÚSentencePieceProcessorÚmodule)Úscopec                 C   s   t | jƒjS ©N)r   ÚfspathÚparent)Úrequest© r   úT/home/ubuntu/.local/lib/python3.10/site-packages/curated_tokenizers/tests/test_sp.pyÚtest_dir	   s   r   c                 C   s   t  t| d ƒ¡S )Nú	toy.model)r   Ú	from_fileÚstr)r   r   r   r   Ú	toy_model   s   r   c                 C   s‚   t t| d ƒdƒ}| ¡ }W d   ƒ n1 sw   Y  t |¡}t|ƒ | ¡ }||ks0J ‚t tƒ ¡}| ¡ tƒ ks?J ‚d S )Nr   Úrb)Úopenr   Úreadr   Úfrom_protobufÚ
_check_idsÚto_protobufÚbytes)r   ÚfÚdataÚsppÚserialized_datar   r   r   Útest_load_proto   s   
ÿ
r   c                   C   s>   t jtdd t d¡ W d   ƒ d S 1 sw   Y  d S )NzNo such file)Úmatchzbogus.model)ÚpytestÚraisesÚOSErrorr   r   r   r   r   r   Útest_load_unknown_file    s   "ÿr#   c                 C   ó2   |   d¡\}}|g d¢ksJ ‚|g d¢ksJ ‚d S )Nz	Test  nul)éï   i~  r   é   é   éç   )u   â–TÚestú u   â–ÚnÚul©Úencode©r   ÚidsÚpiecesr   r   r   Útest_handles_nul_character%   ó   r2   c                 C   ó   |   g d¢¡}|dksJ ‚d S )N©é   iÑ  é
   i³  é)   r7   éª   é¨   én   é   é   é   é   úI saw a girl with a telescope.)Údecode_from_ids©r   Údecodedr   r   r   Útest_decode_from_ids+   s   ÿrD   c                 C   r4   )N©õ   â–Iu   â–sawõ   â–au   â–girlu   â–withrG   u   â–tÚelÚesÚcÚoÚpeÚ.r@   )Údecode_from_piecesrB   r   r   r   Útest_decode_from_pieces2   s   ÿrO   c                 C   sr   t  t¡ |  d¡ W d   ƒ n1 sw   Y  t  t¡ |  g d¢¡ W d   ƒ d S 1 s2w   Y  d S )NÚtest)é   é   é   )r    r!   Ú	TypeErrorrN   ©r   r   r   r   Ú.test_decode_with_pieces_rejects_inccorect_typeG   s   ÿ"ÿrV   c                 C   r$   )Nr@   r5   rE   r-   r/   r   r   r   Útest_encodeN   r3   rW   c                 C   s   t | ƒ d S r   )r   rU   r   r   r   Útest_encode_as_idsb   s   rX   c                 C   s   |   d¡}|g d¢ksJ ‚d S )Nr@   rE   )Úencode_as_pieces)r   r1   r   r   r   Útest_encode_as_piecesf   s   
rZ   c                  C   sÜ  t ƒ } t t¡ |  d¡ W d   ƒ n1 sw   Y  t t¡ |  d¡ W d   ƒ n1 s2w   Y  t t¡ |  d¡ W d   ƒ n1 sLw   Y  t t¡ |  dg¡ W d   ƒ n1 sgw   Y  t t¡ |  dg¡ W d   ƒ n1 s‚w   Y  t t¡ |  	¡  W d   ƒ n1 s›w   Y  t t¡ |  
¡  W d   ƒ n1 s´w   Y  t t¡ |  ¡  W d   ƒ n1 sÍw   Y  t t¡ |  ¡  W d   ƒ d S 1 sçw   Y  d S )Nr@   rF   r6   )r   r    r!   ÚRuntimeErrorr.   Úencode_as_idsrY   rN   rA   Úbos_idÚeos_idÚunk_idÚpad_id)r   r   r   r   Útest_uninitialized_modely   s8   ÿÿÿÿÿ
ÿ
ÿ
ÿ
"ÿra   c                 C   sn   t | ƒdksJ ‚|  ¡ dksJ ‚|  ¡ dksJ ‚|  ¡ dks J ‚|  ¡ dks(J ‚|  d¡}|g d¢ks5J ‚d S )Niè  rQ   rR   r   éÿÿÿÿr@   r5   )Úlenr]   r^   r_   r`   r\   )r   r0   r   r   r   r      s   
r   c                 C   s  |   d¡|  ¡ ksJ ‚|   d¡|  ¡ ksJ ‚|   d¡|  ¡ ks!J ‚|   d¡|  ¡ ks,J ‚|  |  ¡ ¡dks7J ‚|  |  ¡ ¡dksBJ ‚|  |  ¡ ¡dksMJ ‚t t¡ |  d¡ W d   ƒ n1 sbw   Y  t t¡ |  t| ƒ¡ W d   ƒ d S 1 sw   Y  d S )Nz<s>z</s>z<unk>Úqotsarb   )	Úpiece_to_idr]   r^   r_   Úid_to_piecer    r!   Ú
ValueErrorrc   rU   r   r   r   Ú test_id_to_piece_and_piece_to_id™   s   ÿ"ÿrh   c                 C   s6   t | ƒ}t|ƒ}t|tƒsJ ‚| ¡ |  ¡ ksJ ‚d S r   )r   r   Ú
isinstancer   r   )r   Ú
serializedÚdeserializedr   r   r   Útest_pickle¨   s   rl   )Úpathlibr   Úpickler   r   r    Úcurated_tokenizersr   Úfixturer   r   r   r#   r2   rD   rO   rV   rW   rX   rZ   ra   r   rh   rl   r   r   r   r   Ú<module>   s*    



