o
    i                     @   s   d dl Z d dlmZmZ d dlZd dlmZ ddlm	Z	 ddl
mZmZ ddlmZ e r3d d	lmZ ed
dG dd de	jjZdgZdS )    N)OptionalUnion)pad_model_inputs   )keras)is_keras_nlp_availablerequires   )GPT2Tokenizer)BytePairTokenizer)	keras_nlp)backendsc                
       s   e Zd ZdZ		ddeeef dee dee dee f fddZ	e
d	efd
dZe
deeejf fddZe
dd Zdd Zddee fddZ  ZS )TFGPT2Tokenizera7  
    This is an in-graph tokenizer for GPT2. It should be initialized similarly to other tokenizers, using the
    `from_pretrained()` method. It can also be initialized with the `from_tokenizer()` method, which imports settings
    from an existing standard tokenizer object.

    In-graph tokenizers, unlike other Hugging Face tokenizers, are actually Keras layers and are designed to be run
    when the model is called, rather than during preprocessing. As a result, they have somewhat more limited options
    than standard tokenizer classes. They are most useful when you want to create an end-to-end model that goes
    straight from `tf.string` inputs to outputs.

    Args:
        vocab (dict[str, int]): Vocabulary dict for Byte Pair Tokenizer
        merges (list[str]): Merges list for Byte Pair Tokenizer
    Nvocabmerges
max_lengthpad_token_idc                    s6   t    || _|| _|| _|| _t|||d| _d S )N)sequence_length)super__init__r   r   r   r   r   tf_tokenizer)selfr   r   r   r   	__class__ a/home/ubuntu/.local/lib/python3.10/site-packages/transformers/models/gpt2/tokenization_gpt2_tf.pyr   !   s   
zTFGPT2Tokenizer.__init__	tokenizerc                 O   s0   dd |j D }| }| ||g|R i |S )ag  Creates TFGPT2Tokenizer from GPT2Tokenizer

        Args:
            tokenizer (GPT2Tokenizer)

        Examples:

        ```python
        from transformers import AutoTokenizer, TFGPT2Tokenizer

        tokenizer = AutoTokenizer.from_pretrained("openai-community/gpt2")
        tf_tokenizer = TFGPT2Tokenizer.from_tokenizer(tokenizer)
        ```
        c                 S   s   g | ]}d  |qS ) )join).0mr   r   r   
<listcomp>@   s    z2TFGPT2Tokenizer.from_tokenizer.<locals>.<listcomp>)	bpe_ranks	get_vocab)clsr   argskwargsr   r   r   r   r   from_tokenizer0   s   zTFGPT2Tokenizer.from_tokenizerpretrained_model_name_or_pathc                 O   s0   t j|g|R i |}| j|g|R i |S )a_  Creates TFGPT2Tokenizer from pretrained GPT2Tokenizer

        Args:
            pretrained_model_name_or_path (Union[str, os.PathLike]): Path to pretrained model

        Examples:

        ```python
        from transformers import TFGPT2Tokenizer

        tf_tokenizer = TFGPT2Tokenizer.from_pretrained("openai-community/gpt2")
        ```
        )r
   from_pretrainedr'   )r$   r(   init_inputsr&   r   r   r   r   r)   D   s   zTFGPT2Tokenizer.from_pretrainedc                 C   s   | di |S )zCreates TFGPT2Tokenizer from configurations

        Args:
            config (Dict): Dictionary with keys such as stated in `get_config`.
        Nr   r   )r$   configr   r   r   from_configV   s   zTFGPT2Tokenizer.from_configc                 C   s   | j | j| j| jdS )Nr   r   r   r   r-   )r   r   r   r   
get_config_   s
   zTFGPT2Tokenizer.get_configc                 C   sV   |  |}t|}| jd ur&|d ur|n| j}|d ur&t||| jd\}}||dS )N)max_seq_length	pad_value)attention_mask	input_ids)r   tf	ones_liker   r   r   )r   xr   r2   r1   r   r   r   callg   s   




zTFGPT2Tokenizer.call)NN)N)__name__
__module____qualname____doc__dictstrintlistr   r   classmethodr
   r'   r   osPathLiker)   r,   r.   r6   __classcell__r   r   r   r   r      s*    

r   )r@   typingr   r   
tensorflowr3   tensorflow_textr   modeling_tf_utilsr   utils.import_utilsr   r   tokenization_gpt2r
   keras_nlp.tokenizersr   layersLayerr   __all__r   r   r   r   <module>   s    
f