o
    ei                     @   sN   d dl mZmZmZ d dlmZ ddlmZ ddiZG dd deZ	dgZ
d	S )
    )	Tokenizerdecodersnormalizers)BPE   )TokenizersBackendtokenizer_fileztokenizer.jsonc                       s~   e Zd ZdZeZdZddgZeZ									
dde
ee
ef B dB de
ee
 B dB de
de
de
de
de
f fddZ  ZS )Siglip2TokenizerzN
    Gemma tokenizer + SigLIP2 training default: lowercase normalization.
    left	input_idsattention_maskN<unk><bos><eos><pad><mask>vocabmerges	unk_token	bos_token	eos_token	pad_token
mask_tokenc           
   
      s   |d u rt |dt |dt |dt |dt |di}|| _|p g | _tt| j| jdt |d dd| _ttdd	t	 t
 g| j_td	d| j_t jd|||||d
| t| drpt| jtrp| jd| jj t| dd }	|	d ur|	jd urtt |	jg|	_d S d S d S )Nr         r      T)r   r   fuse_unkr   dropoutbyte_fallbacku   ▁ )r   r   r   r   r   init_kwargstokenizer_class
_tokenizer )str_vocab_mergesr   r   r"   r   SequenceReplaceByteFallbackFusedecoderr   
normalizersuper__init__hasattr
isinstancer    dict
setdefault	__class____name__getattr	Lowercase)
selfr   r   r   r   r   r   r   kwargsbackendr3   r#   n/home/ubuntu/transcripts/venv/lib/python3.10/site-packages/transformers/models/siglip2/tokenization_siglip2.pyr.   (   sL   

zSiglip2Tokenizer.__init__)NNr   r   r   r   r   )r4   
__module____qualname____doc__VOCAB_FILES_NAMESvocab_files_namespadding_sidemodel_input_namesr   modelr$   r1   intlistr.   __classcell__r#   r#   r:   r;   r	      s8    r	   N)
tokenizersr   r   r   tokenizers.modelsr   tokenization_utils_tokenizersr   r?   r	   __all__r#   r#   r#   r;   <module>   s   
A