o
    ¾e¦iš  ã                   @   sF   d Z ddlmZ ddlmZ e e¡ZG dd„ deƒZeZ	ddgZ
dS )	z$Tokenization classes for FNet model.é   )Úloggingé   )ÚAlbertTokenizerc                   @   s   e Zd ZdZddgZdS )ÚFNetTokenizeraz  
    Construct an FNet tokenizer. Based on [Unigram](https://huggingface.co/docs/tokenizers/python/latest/components.html?highlight=unigram#models).

    This tokenizer inherits from [`AlbertTokenizer`] which contains most of the main methods. Users should refer to
    this superclass for more information regarding those methods.

    Args:
        do_lower_case (`bool`, *optional*, defaults to `True`):
            Whether or not to lowercase the input when tokenizing.
        keep_accents (`bool`, *optional*, defaults to `False`):
            Whether or not to keep accents when tokenizing.
        bos_token (`str`, *optional*, defaults to `"[CLS]"`):
            The beginning of sequence token that was used during pretraining. Can be used a sequence classifier token.
        eos_token (`str`, *optional*, defaults to `"[SEP]"`):
            The end of sequence token.
        unk_token (`str`, *optional*, defaults to `"<unk>"`):
            The unknown token. A token that is not in the vocabulary cannot be converted to an ID and is set to be this
            token instead.
        sep_token (`str`, *optional*, defaults to `"[SEP]"`):
            The separator token, which is used when building a sequence from multiple sequences, e.g. two sequences for
            sequence classification or for a text and a question for question answering. It is also used as the last
            token of a sequence built with special tokens.
        pad_token (`str`, *optional*, defaults to `"<pad>"`):
            The token used for padding, for example when batching sequences of different lengths.
        cls_token (`str`, *optional*, defaults to `"[CLS]"`):
            The classifier token which is used when doing sequence classification (classification of the whole sequence
            instead of per-token classification). It is the first token of the sequence when built with special tokens.
        mask_token (`str`, *optional*, defaults to `"[MASK]"`):
            The token used for masking values. This is the token used when training this model with masked language
            modeling. This is the token which the model will try to predict.
    Ú	input_idsÚtoken_type_idsN)Ú__name__Ú
__module__Ú__qualname__Ú__doc__Úmodel_input_names© r   r   úh/home/ubuntu/transcripts/venv/lib/python3.10/site-packages/transformers/models/fnet/tokenization_fnet.pyr      s     r   ÚFNetTokenizerFastN)r   Úutilsr   Úalbert.tokenization_albertr   Ú
get_loggerr   Úloggerr   r   Ú__all__r   r   r   r   Ú<module>   s   
%