o
    i                     @   sf   d Z ddlZddlZddlmZ ddlmZ ddlmZ ddl	m
Z
 eeZG d	d
 d
Zd
gZdS )zTokenization classes for RAG.    N)Optional   )BatchEncoding)logging   )	RagConfigc                   @   s   e Zd Zdd Zdd Zedd Zdd Zd	d
 Zdd Z	dd Z
dd Z						ddee deee  dee dee dedee dedefddZdS )RagTokenizerc                 C   s   || _ || _| j | _d S N)question_encoder	generatorcurrent_tokenizer)selfr
   r    r   e/home/ubuntu/veenaModal/venv/lib/python3.10/site-packages/transformers/models/rag/tokenization_rag.py__init__   s   zRagTokenizer.__init__c                 C   sb   t j|rtd| dt j|dd t j|d}t j|d}| j| | j| d S )NzProvided path (z#) should be a directory, not a fileT)exist_okquestion_encoder_tokenizergenerator_tokenizer)	ospathisfile
ValueErrormakedirsjoinr
   save_pretrainedr   )r   save_directoryquestion_encoder_pathgenerator_pathr   r   r   r   #   s   zRagTokenizer.save_pretrainedc                 K   sZ   ddl m} |dd }|d u rt|}|j||jdd}|j||jdd}| ||dS )N   )AutoTokenizerconfigr   )r    	subfolderr   )r
   r   )auto.tokenization_autor   popr   from_pretrainedr
   r   )clspretrained_model_name_or_pathkwargsr   r    r
   r   r   r   r   r$   ,   s   
zRagTokenizer.from_pretrainedc                 O   s   | j |i |S r	   )r   r   argsr'   r   r   r   __call__>   s   zRagTokenizer.__call__c                 O      | j j|i |S r	   )r   batch_decoder(   r   r   r   r,   A      zRagTokenizer.batch_decodec                 O   r+   r	   )r   decoder(   r   r   r   r.   D   r-   zRagTokenizer.decodec                 C      | j | _d S r	   )r
   r   r   r   r   r   _switch_to_input_modeG      z"RagTokenizer._switch_to_input_modec                 C   r/   r	   )r   r   r0   r   r   r   _switch_to_target_modeJ   r2   z#RagTokenizer._switch_to_target_modeNlongestT	src_texts	tgt_texts
max_lengthmax_target_lengthpaddingreturn_tensors
truncationreturnc              	   K   s   t dt |d u r| jj}| |fd||||d|}	|d u r"|	S |d u r*| jj}| d|d||||d|}
|
d |	d< |	S )Nu4  `prepare_seq2seq_batch` is deprecated and will be removed in version 5 of 🤗 Transformers. Use the regular `__call__` method to prepare your inputs and the tokenizer under the `with_target_tokenizer` context manager to prepare your targets. See the documentation of your specific tokenizer for more detailsT)add_special_tokensr:   r7   r9   r;   )text_targetr=   r:   r9   r7   r;   	input_idslabelsr   )warningswarnFutureWarningr   model_max_length)r   r5   r6   r7   r8   r9   r:   r;   r'   model_inputsr@   r   r   r   prepare_seq2seq_batchM   sB   		z"RagTokenizer.prepare_seq2seq_batch)NNNr4   NT)__name__
__module____qualname__r   r   classmethodr$   r*   r,   r.   r1   r3   liststrr   intboolr   rF   r   r   r   r   r      sB    	


r   )__doc__r   rA   typingr   tokenization_utils_baser   utilsr   configuration_ragr   
get_loggerrG   loggerr   __all__r   r   r   r   <module>   s   

_