o
    ॵi{                     @   s   d dl Z G dd deZdS )    Nc                   @   s0   e Zd ZdZdd fddZdd Zdd	 Zd
S )LazyDatasetz]
    Lazy load dataset from disk.

    Each line of data file is a preprocessed example.
    c                 C   s
   t | S N)jsonloads)s r   c/home/ubuntu/.local/lib/python3.10/site-packages/modelscope/preprocessors/nlp/space/lazy_dataset.py<lambda>   s   
 zLazyDataset.<lambda>c                 C   s   || _ || _|| _dg| _t|ddd}| dkr)| j|  | dksW d   n1 s3w   Y  | j  t|ddd| _	dS )z
        Initialize lazy dataset.

        By default, loading .jsonl format.

        :param data_file
        :type str

        :param transform
        :type callable
        r   rzutf-8)encoding N)
	data_file	transformreaderoffsetsopenreadlineappendtellpopfp)selfr   r   r   r   r   r   r   __init__   s   
zLazyDataset.__init__c                 C   s
   t | jS r   )lenr   )r   r   r   r   __len__"   s   
zLazyDataset.__len__c                 C   s@   | j | j| d | | j   }| jjr| j|}|S )Nr   )	r   seekr   r   r   stripr   with_mlm"create_token_masked_lm_predictions)r   idxsampler   r   r   __getitem__%   s
   zLazyDataset.__getitem__N)__name__
__module____qualname____doc__r   r   r!   r   r   r   r   r      s
    r   )r   objectr   r   r   r   r   <module>   s   