o
    ٷi                     @  sp   d dl mZ d dlZd dlZd dlZd!ddZd"ddZd#d$ddZ			d%d&ddZd'ddZ	d#d(dd Z
dS ))    )annotationsNblobpathstrreturnbytesc              
   C  s   d| vrt | ddd}| W  d    S 1 sw   Y  | dr4dd l}|| }|  |jS zdd l}W n tyK } ztd|d }~ww |	| d}| W  d    S 1 saw   Y  d S )Nz://rbr   	buffering)zhttp://zhttps://Oblobfile is not installed. Please install it by running `pip install blobfile`.)
openread
startswithrequestsgetraise_for_statuscontentblobfileImportErrorBlobFile)r   fr   respr   e r   A/home/ubuntu/.local/lib/python3.10/site-packages/tiktoken/load.py	read_file   s,    

$r   dataexpected_hashboolc                 C  s   t |  }||kS N)hashlibsha256	hexdigest)r   r   actual_hashr   r   r   
check_hash   s   r#   
str | Nonec                 C  s  d}dt jv rt jd }ndt jv rt jd }ndd l}t j| d}d}|dkr/t| S t| 	 
 }t j||}t j|r|t|ddd	}| }W d    n1 s[w   Y  |d u sit||rk|S zt | W n	 ty{   Y nw t| }	|rt|	|std
|  d| ddd l}
z6t j|dd |d t|
  d }t|d}||	 W d    n1 sw   Y  t || W |	S  ty   |rւ Y |	S w )NTTIKTOKEN_CACHE_DIRDATA_GYM_CACHE_DIRr   zdata-gym-cacheF r   r   z'Hash mismatch for data downloaded from z (expected z<). This may indicate a corrupted download. Please try again.)exist_ok.z.tmpwb)osenvirontempfilepathjoin
gettempdirr   r   sha1encoder!   existsr   r   r#   removeOSError
ValueErroruuidmakedirsr   uuid4writerename)r   r   user_specified_cache	cache_dirr-   	cache_key
cache_pathr   r   contentsr7   tmp_filenamer   r   r   read_file_cached$   sV   


rB   Fvocab_bpe_fileencoder_json_filevocab_bpe_hashencoder_json_hashclobber_one_byte_tokensdict[bytes, int]c                   sf  dd t dD }dd |D  d}t dD ]}||vr-|| | td| < |d7 }qt|dks6J t| | }dd |d	dd
 D }	d fdddd t|D }
~t|
}|	D ]\}}||
|| < |d7 }qbdd l}|	t||}fdd|
 D }|dd  |dd  |r|D ]}t|dkr|| |
|< q|
|ksJ |
S )Nc                 S  s(   g | ]}t | rt |d kr|qS ) )chrisprintable.0br   r   r   
<listcomp>b   s   ( z3data_gym_to_mergeable_bpe_ranks.<locals>.<listcomp>   c                 S  s   i | ]}t ||qS r   )rJ   rL   r   r   r   
<dictcomp>d   s    z3data_gym_to_mergeable_bpe_ranks.<locals>.<dictcomp>r      c                 S  s   g | ]}t | qS r   )tuplesplit)rM   	merge_strr   r   r   rO   o   s    
valuer   r   r   c                   s   t  fdd| D S )Nc                 3  s    | ]} | V  qd S r   r   rL   data_gym_byte_to_byter   r   	<genexpr>r   s    zKdata_gym_to_mergeable_bpe_ranks.<locals>.decode_data_gym.<locals>.<genexpr>r   )rX   rY   r   r   decode_data_gymq   s   z8data_gym_to_mergeable_bpe_ranks.<locals>.decode_data_gymc                 S  s   i | ]
\}}t |g|qS r   r\   )rM   irN   r   r   r   rQ   v   s    c                   s   i | ]	\}} ||qS r   r   )rM   kv)r]   r   r   rQ      s    s   <|endoftext|>s   <|startoftext|>)rX   r   r   r   )rangeappendrJ   lenrB   decoderT   	enumeratejsonloadsitemspop)rC   rD   rE   rF   rG   rank_to_intbytenrN   vocab_bpe_contents
bpe_merges	bpe_ranksfirstsecondrf   encoder_jsonencoder_json_loadedr_   r   )rZ   r]   r   data_gym_to_mergeable_bpe_ranksZ   s>   

rs   rn   tiktoken_bpe_fileNonec              
   C  s   zdd l }W n ty } ztd|d }~ww ||d*}t|  dd dD ]\}}|t|d t|	  d  q)W d    d S 1 sKw   Y  d S )	Nr   r
   r*   c                 S  s   | d S )NrR   r   )xr   r   r   <lambda>   s    z#dump_tiktoken_bpe.<locals>.<lambda>)key       
)
r   r   r   sortedrh   r:   base64	b64encoder   r2   )rn   rt   r   r   r   tokenrankr   r   r   dump_tiktoken_bpe   s   &"r   c                 C  sv   t | |}i }| D ]-}|sqz| \}}t||t|< W q ty8 } ztd|d|  |d }~ww |S )NzError parsing line z in )rB   
splitlinesrT   intr|   	b64decode	Exceptionr6   )rt   r   r@   retliner~   r   r   r   r   r   load_tiktoken_bpe   s   
r   )r   r   r   r   )r   r   r   r   r   r   r   )r   r   r   r$   r   r   )NNF)rC   r   rD   r   rE   r$   rF   r$   rG   r   r   rH   )rn   rH   rt   r   r   ru   )rt   r   r   r$   r   rH   )
__future__r   r|   r   r+   r   r#   rB   rs   r   r   r   r   r   r   <module>   s    

9
: