o
    Ti                     @   sL   d dl Zd dlmZ ddlmZ dd Zdd Zd	d
 Zdd Z	dd Z
dS )    N)logger   )MMapIndexedDatasetBuilderc                 C   s   | dkr|dkrt jS |dkrt jS |dkrt jS t jS |dkr'| dkr't jS |dkr2| dkr2t jS |d	kr=| d
kr=t jS t jS )Nr      i  l       ii  i ii   )	npuint8uint16uint32uint64int8int16int32int64)	min_value	max_value r   g/home/ubuntu/.local/lib/python3.10/site-packages/deepspeed/runtime/data_pipeline/data_sampling/utils.pyfind_fit_int_dtype   s   r   c                    s,   t j| |t|d d  fddt|D S )Nr   )dtypenumc                    s    g | ]} |  |d   fqS )r   r   ).0ipartition_boundariesr   r   
<listcomp>#   s     zsplit_index.<locals>.<listcomp>)r   linspaceintrange)	start_idxend_idxnum_partitionsr   r   r   split_index!   s   r"   c                 C   s4   t dt| |}t || d || d |}||fS )Nr   r   )r"   len)datasetnum_workers	worker_idnum_threadsworker_splitsthread_splitsr   r   r   split_dataset&   s   r*   c                 C   s$   t d|  d t|  d|dS )Nz!Creating mmap dataset builder at .z.bin)r   )r   infor   )fnamer   r   r   r   create_mmap_dataset_builder,   s   r.   c                 C   s.   |    | | d td| d d S )Nz.idxz"Finalized mmap dataset builder at r+   )end_documentfinalizer   r,   )builderr-   r   r   r   close_mmap_dataset_builder1   s   r2   )numpyr   deepspeed.utilsr   indexed_datasetr   r   r"   r*   r.   r2   r   r   r   r   <module>   s   