o
    5ti                     @   s   d dl Z d dlZd dlZd dlZd dlZd dlmZ d dlmZ d dl	Z	d dl
Z
d dlZdedefddZG dd	 d	ZG d
d dZG dd dZG dd dZG dd dZdS )    N)Path)Anyobjreturnc                 C   s&   t | tjfr|  S tdt|  )zAJSON serializer for objects not serializable by default json codezType %s not serializable)
isinstancedatetime	isoformat	TypeErrortype)r    r   T/home/ubuntu/.local/lib/python3.10/site-packages/lm_eval/decontamination/archiver.pyjson_serial   s   r   c                   @   s:   e Zd ZddededdfddZdddd	Zdd
dZdS )Archive   	file_pathcompression_levelr   Nc                 C   sT   || _ tj|}|rtj|dd t| j d| _tj|d| _	| j	
| j| _d S )NTexist_okwb)level)r   ospathdirnamemakedirsopenfh	zstandardZstdCompressorcctxstream_writer
compressor)selfr   r   dir_namer   r   r   __init__   s   zArchive.__init__c                 C   s6   |d u ri }| j tj||dtddd  d S )N)textmeta)defaultUTF-8   
)r    writejsondumpsr   encode)r!   datar%   r   r   r   add_data!   s   zArchive.add_datac                 C   s&   | j tj | j  | j  d S N)r    flushr   FLUSH_FRAMEr   closer!   r   r   r   commit+   s   
zArchive.commit)r   r/   r   N)__name__
__module____qualname__strintr#   r.   r4   r   r   r   r   r      s    	
r   c                   @   s4   e Zd ZdddZ			dded	ed
efddZdS )Readerr   Nc                 C   s   d S r/   r   r3   r   r   r   r#   3   s   zReader.__init__FT

get_metaautojoin_paragraphspara_joinerc                 c   s    t |dR}|| _t }t||}t|}|D ]2}	t	|	t
r,|r(J |	V  q|	d }
|r<t	|
tr<||
}
|rL|
d|	v rG|	d ni fV  q|
V  qW d    d S 1 s[w   Y  d S )Nrbr$   r%   )r   r   r   ZstdDecompressorioBufferedReaderstream_reader	jsonlinesr;   r   r9   listjoin)r!   filer=   r>   r?   r   r   readerrdrobr$   r   r   r   read6   s&   


"zReader.readr5   )FTr<   )r6   r7   r8   r#   boolr9   rL   r   r   r   r   r;   2   s    
r;   c                   @   s4   e Zd ZddeddfddZdddZdd	d
ZdS )TextArchiverb+moder   Nc                 C   sN   || _ tj|}|rtj|dd tj|st|  t| j || _	d S )NTr   )
r   r   r   r   r   existsr   touchr   r   )r!   r   rP   r"   r   r   r   r#   U   s   zTextArchive.__init__c                 C   s   | j |dd  d S )Nr'   r(   )r   r)   r,   )r!   r-   r   r   r   r.   `   s   zTextArchive.add_datac                 C   s   | j   | j   d S r/   )r   r0   r2   r3   r   r   r   r4   c   s   
zTextArchive.commit)rO   r5   )r6   r7   r8   r9   r#   r.   r4   r   r   r   r   rN   T   s    
rN   c                   @   s>   e Zd ZdddZddefddZd	d
 Zdd Zdd ZdS )
TextReaderr   Nc                 C   
   || _ d S r/   )r   )r!   r   r   r   r   r#   i      
zTextReader.__init__'  update_frequencyc           
   
   c   s   d}d}t | jdddx}tjtj| jddddQ}tj| dtjd	6}t	|j
d
D ]'}|d}|d7 }||krO| }|| }	|}||	 d}|d d V  q/W d    n1 saw   Y  W d    n1 spw   Y  W d    d S W d    d S 1 sw   Y  d S )Nr   rutf-8encodingTbyte   )totaldynamic_ncolsunit
unit_scalelengthaccess    )r   r   tqdmr   r   getsizemmapfilenoACCESS_READiterreadlinedecodetellupdate)
r!   rW   current_file_positionline_counterr   progressmmap_objlinenew_file_pos
bytes_readr   r   r   	read_tqdmn   s:   

PzTextReader.read_tqdmc              	   c   s    d}t | jdddM}tj| dtjd)}t|jdD ]}|d}| }|| }|}|d d |fV  qW d    n1 sCw   Y  W d    d S W d    d S 1 s[w   Y  d S )	Nr   rX   utf8rZ   rb   re   rY   rf   )	r   r   ri   rj   rk   rl   rm   rn   ro   )r!   rq   r   rt   ru   rv   raw_bytes_readr   r   r   read_and_tell   s   
"zTextReader.read_and_tellc              	   c   s    t | jdddA}tj| dtjd}t|jdD ]}|d}|d d V  qW d    n1 s5w   Y  W d    d S W d    d S 1 sMw   Y  d S )	NrX   ry   rZ   r   rb   re   rY   rf   )r   r   ri   rj   rk   rl   rm   rn   )r!   r   rt   ru   r   r   r   rL      s   
"zTextReader.readc                 c   sd    t | jddd}	 | }|dks|dkrn|d d V  qW d    d S 1 s+w   Y  d S )NrX   ry   rZ   Trf    )r   r   rm   )r!   r   ru   r   r   r   	read_slow   s   "zTextReader.read_slowr5   )rV   )	r6   r7   r8   r#   r:   rx   r{   rL   r}   r   r   r   r   rS   h   s    
rS   c                   @   s   e Zd ZdddZdd ZdS )ZStdTextReaderr   Nc                 C   rT   r/   )rH   )r!   rH   r   r   r   r#      rU   zZStdTextReader.__init__c                 c   sN    | j d d }td td| j   t|}| E d H  t| d S )Nz"Decompressing file, please wait...zzstd -d )rH   printr   systemrS   rx   remove)r!   decompressed_filerI   r   r   r   rx      s   zZStdTextReader.read_tqdmr5   )r6   r7   r8   r#   rx   r   r   r   r   r~      s    
r~   )r   rB   r*   ri   r   pathlibr   typingr   rE   rg   r   r9   r   r   r;   rN   rS   r~   r   r   r   r   <module>   s     	"<