o
    i,                     @   s   d dl mZmZ d dlmZ d dlmZmZmZ d dl	m
Z
 ddlmZ ddlmZ dZd	d
 ZG dd deZG dd deZG dd deZdgZdS )   )BaseDefaultsLanguage)Doc)DummyTokenizerload_config_from_strregistry)Vocab   )	LEX_ATTRS)
STOP_WORDSz?
[nlp]

[nlp.tokenizer]
@tokenizers = "spacy.th.ThaiTokenizer"
c                  C   s   dd } | S )Nc                 S   s
   t | jS )N)ThaiTokenizervocab)nlp r   J/home/ubuntu/.local/lib/python3.10/site-packages/spacy/lang/th/__init__.pythai_tokenizer_factory   s   
z5create_thai_tokenizer.<locals>.thai_tokenizer_factoryr   )r   r   r   r   create_thai_tokenizer   s   r   c                   @   s0   e Zd ZdeddfddZdedefddZdS )	r   r   returnNc                 C   s:   zddl m} W n ty   tdd w || _|| _d S )N    )word_tokenizezYThe Thai tokenizer requires the PyThaiNLP library: https://github.com/PyThaiNLP/pythainlp)pythainlp.tokenizer   ImportErrorr   )selfr   r   r   r   r   __init__   s   
zThaiTokenizer.__init__textc                 C   s,   t | |}dgt| }t| j||dS )NF)wordsspaces)listr   lenr   r   )r   r   r   r   r   r   r   __call__#   s   zThaiTokenizer.__call__)__name__
__module____qualname__r   r   strr   r   r   r   r   r   r      s    r   c                   @   s   e Zd ZeeZeZeZ	dS )ThaiDefaultsN)
r    r!   r"   r   DEFAULT_CONFIGconfigr
   lex_attr_gettersr   
stop_wordsr   r   r   r   r$   )   s    r$   c                   @   s   e Zd ZdZeZdS )ThaithN)r    r!   r"   langr$   Defaultsr   r   r   r   r)   /   s    r)   N)languager   r   tokensr   utilr   r   r   r   r   	lex_attrsr
   r(   r   r%   r   r   r$   r)   __all__r   r   r   r   <module>   s    
