o
    -i]                     @   sl   d dl mZ d dlmZmZ d dlmZ d dlmZ ddl	m
Z
 ddlmZ ddlmZ G d	d
 d
eZdS )    )Path)Anyoverload)BatchEncoding)ChatCompletionMessageParam   )encode_messages)CachedHfTokenizer)TokenizerLikec                       s  e Zd ZedddddeeB dededB dedB dd	f
 fd
dZdeddf fddZ		dHde
d de
eeef  dB dee
e B fddZdefddZede
e fddZede
e fddZedefddZedefddZedefddZedefd d!Zedefd"d#Zedefd$d%Zedefd&d'Zdefd(d)Zdefd*d+Z		,		dId-ee
e B d.edB d/ed0ed1edB dd2fd3d4Zdeeef fd5d6Zdeeef fd7d8Z			,dJd-ed0edB d1edB d/ede
e f
d9d:Ze d;edefd<d=Z!e d;e
e de
e fd>d=Z!d;ee
e B dee
e B fd?d=Z!d;e
e defd@dAZ"dKdBe
e eB dCedefdDdEZ#	dKdBe
e dCede
e fdFdGZ$  Z%S )LDeepseekV32TokenizerFNtrust_remote_coderevisiondownload_dirpath_or_repo_idr   r   r   returnr
   c                   s*   t  j|g|R |||d|}t|S )Nr   )superfrom_pretrainedr   )clsr   r   r   r   argskwargs	tokenizer	__class__ Y/home/ubuntu/veenaModal/venv/lib/python3.10/site-packages/vllm/tokenizers/deepseek_v32.pyr      s   
z$DeepseekV32Tokenizer.from_pretrainedr   c                    s:   t    || _t|dd| _| j | _t| j| _d S )Nname_or_path )	r   __init__r   getattrr   get_added_vocab_added_vocablen_added_vocab_size)selfr   r   r   r   r   %   s
   
zDeepseekV32Tokenizer.__init__messagesr   toolsc                    s     dd}  dd}|p|}d}|sd}  d|}| }|d ur8t|dkr8|dddi ||d d	< |d
 d dk}t||d}	t|fi |	}
  ddrh fdddD }| j|
fddi|S |
S )NthinkingFenable_thinkingchatconversationr   rolesystemr&   user)thinking_modedrop_thinkingtokenizeTc                    s   i | ]}| v r| | qS r   r   ).0kr   r   r   
<dictcomp>H   s    z<DeepseekV32Tokenizer.apply_chat_template.<locals>.<dictcomp>)
truncation
max_lengthadd_special_tokens)getcopyr"   insertdictr   encode)r$   r%   r&   r   r'   r(   r/   r*   r0   encode_config
prompt_strtokenizer_kwargsr   r4   r   apply_chat_template.   s4   
z(DeepseekV32Tokenizer.apply_chat_templatec                 C   s   t | dS )Nr   )r"   r=   r$   r   r   r   num_special_tokens_to_addS   s   z.DeepseekV32Tokenizer.num_special_tokens_to_addc                 C      | j jS N)r   all_special_tokensrB   r   r   r   rF   V      z'DeepseekV32Tokenizer.all_special_tokensc                 C   rD   rE   )r   all_special_idsrB   r   r   r   rH   Z   rG   z$DeepseekV32Tokenizer.all_special_idsc                 C   rD   rE   )r   bos_token_idrB   r   r   r   rI   ^   rG   z!DeepseekV32Tokenizer.bos_token_idc                 C   rD   rE   )r   eos_token_idrB   r   r   r   rJ   b   rG   z!DeepseekV32Tokenizer.eos_token_idc                 C   rD   rE   )r   pad_token_idrB   r   r   r   rK   f   rG   z!DeepseekV32Tokenizer.pad_token_idc                 C   rD   rE   )r   is_fastrB   r   r   r   rL   j   rG   zDeepseekV32Tokenizer.is_fastc                 C   rD   rE   )r   
vocab_sizerB   r   r   r   rM   n   rG   zDeepseekV32Tokenizer.vocab_sizec                 C   rD   rE   )r   max_token_idrB   r   r   r   rN   r   rG   z!DeepseekV32Tokenizer.max_token_idc                 C   rD   rE   )r   truncation_siderB   r   r   r   rO   v   rG   z$DeepseekV32Tokenizer.truncation_sidec                 C   s   t t| S rE   )hashidrB   r   r   r   __hash__z      zDeepseekV32Tokenizer.__hash__c                 C   s   | j | j S rE   )rM   r#   rB   r   r   r   __len__}   s   zDeepseekV32Tokenizer.__len__Ttext	text_pairr8   r6   r7   r   c                 C   s   | j |||||dS )N)rV   r8   r6   r7   )r   )r$   rU   rV   r8   r6   r7   r   r   r   __call__   s   zDeepseekV32Tokenizer.__call__c                 C   
   | j  S rE   )r   	get_vocabrB   r   r   r   rY         
zDeepseekV32Tokenizer.get_vocabc                 C   rX   rE   )r!   r:   rB   r   r   r   r       rZ   z$DeepseekV32Tokenizer.get_added_vocabc                 C   s   | j j||||dS )N)r6   r7   r8   )r   r=   )r$   rU   r6   r7   r8   r   r   r   r=      s   zDeepseekV32Tokenizer.encodetokensc                 C      d S rE   r   r$   r[   r   r   r   convert_tokens_to_ids      z*DeepseekV32Tokenizer.convert_tokens_to_idsc                 C   r\   rE   r   r]   r   r   r   r^      r_   c                 C      | j |S rE   )r   r^   r]   r   r   r   r^      rS   c                 C   r`   rE   )r   convert_tokens_to_stringr]   r   r   r   ra      rS   z-DeepseekV32Tokenizer.convert_tokens_to_stringidsskip_special_tokensc                 C      | j j||dS N)rc   )r   decoder$   rb   rc   r   r   r   rf      s   zDeepseekV32Tokenizer.decodec                 C   rd   re   )r   convert_ids_to_tokensrg   r   r   r   rh      s   z*DeepseekV32Tokenizer.convert_ids_to_tokensrE   )NTFN)NNT)F)&__name__
__module____qualname__classmethodstrr   boolr   r
   r   listr<   r   intrA   rC   propertyrF   rH   rI   rJ   rK   rL   rM   rN   rO   rR   rT   rW   rY   r    r=   r   r^   ra   rf   rh   __classcell__r   r   r   r   r      s    

%


" r   N)pathlibr   typingr   r   transformersr   vllm.entrypoints.chat_utilsr   deepseek_v32_encodingr   hfr	   protocolr
   r   r   r   r   r   <module>   s   