o
    $i=
                     @   sb   d Z ddlZddlmZmZmZ erddlmZmZ eddef Z	e
eZde	de	fd	d
ZdS )z'Utility functions for batch processing.    N)TYPE_CHECKINGAnyUnion)PreTrainedTokenizerPreTrainedTokenizerFastr   r   	tokenizerreturnc                    s   t | dd t| dr| j}  du rt | dd t| jt| jt | ddt| G  fddd| j}d| jj |_|| _| S )a  Get tokenizer with cached properties.
    This will patch the tokenizer object in place.
    By default, transformers will recompute multiple tokenizer properties
    each time they are called, leading to a significant slowdown. This
    function caches these properties for faster access.
    Args:
        tokenizer: The tokenizer object.
    Returns:
        The patched tokenizer object.
    chat_templateNr   all_special_tokens_extendedc                       sX   e Zd ZefddZefddZefddZe fddZfd	d
ZdS )z-get_cached_tokenizer.<locals>.CachedTokenizerc                        S N self)tokenizer_all_special_idsr   Z/home/ubuntu/veenaModal/venv/lib/python3.10/site-packages/ray/llm/_internal/batch/utils.pyall_special_ids.      z=get_cached_tokenizer.<locals>.CachedTokenizer.all_special_idsc                    r   r   r   r   )tokenizer_all_special_tokensr   r   all_special_tokens2   r   z@get_cached_tokenizer.<locals>.CachedTokenizer.all_special_tokensc                    r   r   r   r   )%tokenizer_all_special_tokens_extendedr   r   r
   6   r   zIget_cached_tokenizer.<locals>.CachedTokenizer.all_special_tokens_extendedc                    r   r   r   r   )r	   r   r   r	   :   r   z;get_cached_tokenizer.<locals>.CachedTokenizer.chat_templatec                    r   r   r   r   )tokenizer_lenr   r   __len__>   s   z5get_cached_tokenizer.<locals>.CachedTokenizer.__len__N)	__name__
__module____qualname__propertyr   r   r
   r	   r   r   r	   r   r   r   r   r   r   CachedTokenizer-   s    r   Cached)	getattrhasattrr   setr   r   len	__class__r   )r   r   r   r   r   get_cached_tokenizer   s   


r%   )__doc__loggingtypingr   r   r   transformersr   r   AnyTokenizer	getLoggerr   loggerr%   r   r   r   r   <module>   s    
