o
    ॵi-                     @   s   d dl Z d dlZd dlZd dlmZ d dlmZ d dlm	Z	 d dl
mZmZmZ dd Zdd	 ZG d
d deZG dd dZdS )    N)str2bool)batch)LazyDataset)RandomSamplerSequentialSamplerSortedSamplerc                 C   s>   t j|sJ | dt||d}t|| |j||d}|S )Nz doesn't exist)reader)
collate_fnis_test)ospathexistsr   
DataLoaderTrainer)
batch_sizer   hparamsfiler	   r
   datasetdata_loader r   b/home/ubuntu/.local/lib/python3.10/site-packages/modelscope/preprocessors/nlp/space/data_loader.pyget_data_loader   s   r   c           
      C   sV   g }|D ] }t j|| d|jj d}|t| |||||dkd qt|}	|	S )N.z.jsonltrain)r   r   r   r   r	   r
   )r   r   joinBPETextFieldtokenizer_typeappendr   SequentialDataLoaderWrapper)
r   r   r   
data_pathsr	   	data_typedata_loaders	data_pathr   r   r   r   r   get_sequential_data_loader   s$   r#   c                   @   s<   e Zd ZdZedd Z			dddZdd	 Zd
d ZdS )r   z Implement of DataLoader. c                 C   s$   |j dtdd |j dtdd |S )Nz	--shuffleT)typedefaultz--sort_pool_sizer   )add_argumentr   int)clsgroupr   r   r   add_cmdline_argument2   s   zDataLoader.add_cmdline_argumentNFc           	         s   || _ || _|j| _|j| _ d u r |jr|st| nt| | jdkr-|s-t | j  fdd}| jdks:|r<dnd}t|||d| _	|rTt
t|| | _d S t
t|| | _d S )Nr   c                  3   s     D ]} | V  qd S Nr   )idxsamplerr   r   r   M   s   z#DataLoader.__init__.<locals>.reader   FT)r   	drop_last)r   r	   gpusort_pool_sizeshuffler   r   r   r   r   mathfloorlenceilnum_batches)	selfr   r   r   r	   r.   r
   r   r0   r   r-   r   __init__8   s    

zDataLoader.__init__c                 C   s   | j S r+   )r8   r9   r   r   r   __len__V   s   zDataLoader.__len__c                 #   s2       D ]} fdd|D } |V  qd S )Nc                    s   g | ]} j | qS r   )r   ).0r,   r;   r   r   
<listcomp>[   s    z'DataLoader.__iter__.<locals>.<listcomp>)r   r	   )r9   batch_indicessamplesr   r;   r   __iter__Y   s
   zDataLoader.__iter__)NNF)	__name__
__module____qualname____doc__classmethodr*   r:   r<   rA   r   r   r   r   r   /   s    
	
r   c                   @   s$   e Zd Zdd Zdd Zdd ZdS )r   c                 C   s   || _ dd | j D | _d S )Nc                 S   s   i | ]}|j j|j qS r   )r   	data_filer=   r   r   r   r   
<dictcomp>c   s    
z8SequentialDataLoaderWrapper.__init__.<locals>.<dictcomp>)r!   data_file_to_dataset)r9   r!   r   r   r   r:   a   s   z$SequentialDataLoaderWrapper.__init__c                 c   s*    | j D ]}|D ]	}|jj|fV  qqd S r+   )r!   r   rG   )r9   r   	tmp_batchr   r   r   rA   h   s   
z$SequentialDataLoaderWrapper.__iter__c                 C   s   t dd | jD S )Nc                 S   s   g | ]}t |qS r   )r6   rH   r   r   r   r>   n   s    z7SequentialDataLoaderWrapper.__len__.<locals>.<listcomp>)npsumr!   r;   r   r   r   r<   m   s   z#SequentialDataLoaderWrapper.__len__N)rB   rC   rD   r:   rA   r<   r   r   r   r   r   _   s    r   )r4   r   numpyrL   'modelscope.preprocessors.nlp.space.argsr   (modelscope.preprocessors.nlp.space.batchr   /modelscope.preprocessors.nlp.space.lazy_datasetr   *modelscope.preprocessors.nlp.space.samplerr   r   r   r   r#   objectr   r   r   r   r   r   <module>   s   0