o
    iS                     @   s   d dl mZ d dlmZ d dlmZ d dlmZ d dlmZ d dl	m
Z
 d dlmZ d dlmZ 					
					ddedeeeee f deeeee f dededededefddZd	S )    )Path)Iterable)Union)AbsTokenizer)CharTokenizer)PhonemeTokenizer)SentencepiecesTokenizer)WordTokenizerNF<space>
token_typebpemodelnon_linguistic_symbolsremove_non_linguistic_symbolsspace_symbol	delimiterg2p_typereturnc                 C   s   | dkr|du rt d|rtdt|S | dkr,|r'|dur't||ddS t|dS | d	kr7t|||d
S | dkrCt||||dS t d|  )z*A helper function to instantiate TokenizerbpeNz*bpemodel is required if token_type = "bpe"zCremove_non_linguistic_symbols is not implemented for token_type=bpewordT)r   r   r   )r   char)r   r   r   phn)r   r   r   r   z2token_mode must be one of bpe, word, char or phn: )
ValueErrorRuntimeErrorr   r	   r   r   )r   r   r   r   r   r   r    r   T/home/ubuntu/.local/lib/python3.10/site-packages/funasr/tokenizer/build_tokenizer.pybuild_tokenizer   s<   

r   )NNFr
   NN)pathlibr   typingr   r   funasr.tokenizer.abs_tokenizerr   funasr.tokenizer.char_tokenizerr   "funasr.tokenizer.phoneme_tokenizerr   (funasr.tokenizer.sentencepiece_tokenizerr   funasr.tokenizer.word_tokenizerr	   strboolr   r   r   r   r   <module>   s>    