o
    i	                  
   @   s  d dl mZ d dlmZ d dlmZmZ ddlmZm	Z	m
Z
 dZdZd ZdZdZdZdZd Zd	efd
dZedddd Zde
fddZdede	dedee	ef fddZd	edefddZedddd Zde
fddZdede	dedee	ef fddZd S )!    )	lru_cache)Tuple)ModelRagged   )PieceAdapterBackpropTPieceAdapterInOutTPieceAdapterModelT         piece_idc                 C   s,   | t krtS | tkrtS | tkrtS | t S N)_SPP_UNK_FAIRSEQ_UNK_SPP_BOS_FAIRSEQ_BOS_SPP_EOS_FAIRSEQ_EOS_FAIRSEQ_OFFSETr    r   r/home/ubuntu/.local/lib/python3.10/site-packages/spacy_curated_transformers/tokenization/sentencepiece_adapters.py_update_to_fairseq   s   r      )maxsizec                 C   
   |  tS r   )	vectorizer   xpr   r   r   _update_to_fairseq_vectorized      
r    returnc                   C      t dtdS )zqAlign the original fairseq vocab used by pre-trained HF transformer
    models with the sentencepiece vocabulary.xlmr_adapterforward)r   xlmr_adapter_forwardr   r   r   r   build_xlmr_adapter#   s   r(   modelXis_trainc                 C   @   t | jj}g }|D ]}|t||j|jd q
|dd fS )Ndatalengthsc                 S      g S r   r   dYr   r   r   <lambda>:       z&xlmr_adapter_forward.<locals>.<lambda>)r    opsr   appendr   dataXdr/   r)   r*   r+   update_to_fairseqX_xlmrtokens_piecesr   r   r   r'   -      r'   c                 C   s   | t krtS | t S r   )r   r   _CAMEMBERT_FAIRSEQ_OFFSETr   r   r   r   _camembert_update_to_fairseq=   s   r>   c                 C   r   r   )r   r>   r   r   r   r   '_camembert_update_to_fairseq_vectorizedD   r!   r?   c                   C   r#   )zzAlign the original fairseq vocab used by pre-trained Camembert
    HF transformer model with the sentencepiece vocabulary.camembert_adapterr%   )r   camembert_adapter_forwardr   r   r   r   build_camembert_adapterI   s   rB   c                 C   r,   )Nr-   c                 S   r0   r   r   r1   r   r   r   r3   _   r4   z+camembert_adapter_forward.<locals>.<lambda>)r?   r5   r   r6   r   r7   r/   r8   r   r   r   rA   R   r<   rA   N)	functoolsr   typingr   	thinc.apir   r   typesr   r   r	   r   r=   r   r   r   r   r   r   intr   r    r(   boolr'   r>   r?   rB   rA   r   r   r   r   <module>   sL    




	
