o
    Ti&                     @   sV  d Z ddlZddlZddlZddlZddlZddlZddlmZ ddlm	Z	 ddl
mZmZ ddlmZmZmZmZmZ dZd	Zd
dgddgdgdgddgdZG dd deZdd Zdd Zdd Zdd Zdd Zdd Zdd  Zd!d" Zd#d$ Z d%d& Z!d'd( Z"d)d* Z#d+d, Z$d-d. Z%d/d0 Z&d1d2 Z'd3d4 Z(d5d6 Z)e*d7kre(  dS dS )8zM
Functionality of swapping optimizer tensors to/from (NVMe) storage devices.
    N)AsyncIOBuilder)
GDSBuilder   )Jobrun_job)READ_OP_DESCWRITE_OP_DESCBENCH_LOG_DIRREAD_LOG_DIRWRITE_LOG_DIRz--handleds_io1M8M       F   )
block_sizequeue_depthsequential_requestssingle_submitio_parallelc                   @   s   e Zd Zdd ZdS )SweepConfigc                 C   s   t |j| _t|j| _| j| j |j | _|j	 | _
|j| _|j| _|j| _t d|j d|j | _|jr@|  jd7  _|jrL|  jd7  _d S d S )Nz	 --loops z --io_size z --gpuz
 --use_gds)get_ftd_mapnvme_dirfolder_to_device_mappingget_sweep_config_dictsweep_configsearch_spaceupdateno_readreadno_writewriteflush_page_cacheflush_cachelog_dirverboseOTHER_OPTIONSloopsio_sizeother_optionsgpugds)selfargs r/   Q/home/ubuntu/.local/lib/python3.10/site-packages/deepspeed/nvme/perf_run_sweep.py__init__"   s   

zSweepConfig.__init__N)__name__
__module____qualname__r1   r/   r/   r/   r0   r       s    r   c                 C   sB   t  sd}t| t  | jrt sd}t| t  d S d S d S )Nz
            Failing because environment is not properly configured for deepspeed async i/o module.
            Possible fix: apt install libaio-dev.
        zl
            Failing because environment is not properly configured for deepspeed GDS I/O operator.
        )async_io_setupprintquitr,   gds_io_setup)r.   	error_msgr/   r/   r0   validate_arguments2   s   
r:   c               	   C   s   t  } | jddddd | jdtd dd | jd	d
dd | jdd
dd | jdtddd | jdd
dd | jdd
dd | jdd
dd | jdttdtjdt d | jdtddd | jdd
dd | 	 }|j
rwtd |  t| |S )!Nz
--nvme_dir+TzPDirectory in which to perform I/O tests. A writeable directory on a NVMe device.)nargsrequiredhelpz--sweep_configz*Performance sweep configuration json file.)typedefaultr>   z	--no_read
store_truez&Disable read performance measurements.)actionr>   z
--no_writez'Disable write performance measurements.z	--io_size400Mz?Number of I/O bytes to read/write for performance measurements.z--gpuz9Test tensor transfers between GPU device and NVME device.z--gdsz3Run the sweep over NVIDIA GPUDirectStorage operatorz--flush_page_cachezmPage cache will not be flushed and reported read speeds may be higher than actual ***Requires sudo access***.z	--log_dirz7Output directory for performance log files. Default is .z--loopsr   zCount of operation repetitionsz	--verbosezPrint debugging information.zargs = )argparseArgumentParseradd_argumentstrr	   ospathjoinint
parse_argsr&   r6   r:   )parserr.   r/   r/   r0   parse_sweep_argumentsC   sD   rO   c                 C   s:   t dt|   t| D ]\}}t | d|  qd S )Nzcmd line count =  z:  )r6   len	enumerate)	cmd_linesicmdr/   r/   r0   dump_cmd_liness   s   rU   c                 C   s0   dd t | D }ddd |D g}d|iS )Nc                 S   s   g | ]\}}| d | qS ):r/   ).0devdirr/   r/   r0   
<listcomp>z       zget_ftd_map.<locals>.<listcomp> c                 s   s    | ]}|V  qd S Nr/   )rW   ftdr/   r/   r0   	<genexpr>{   s    zget_ftd_map.<locals>.<genexpr>r   )rQ   rK   )nvme_dir_listftd_listftd_argr/   r/   r0   r   y   s   r   c                 C   sD   | d u rt S t| }t|}W d    |S 1 sw   Y  |S r]   )DEFAULT_SWEEP_CONFIGopenjsonload)sweep_config_jsonfpr   r/   r/   r0   r      s   

r   c                    s>   dd   fdd|   D }ttj| }dd |D }|S )Nc                 S   sV   g }|D ]$}t |tur|d|  d|  q|r#|d|   q|d q|S )Nz--r\   )r?   boolappend)key
value_list	flat_listvr/   r/   r0   flatten_options   s   z,get_sweep_cmd_lines.<locals>.flatten_optionsc                    s   g | ]	\}} ||qS r/   r/   )rW   rk   valuero   r/   r0   rZ          z'get_sweep_cmd_lines.<locals>.<listcomp>c                 S   s   g | ]}t |qS r/   )listrW   rT   r/   r/   r0   rZ      s    )itemsrs   	itertoolsproduct)sweep_config_dictrm   cmd_listr/   rq   r0   get_sweep_cmd_lines   s
   rz   c                 C   s>   | D ]}|d urt || t || t || t || qd S r]   )r   )
sweep_jobssync_jobflush_cache_jobr&   perf_jobr/   r/   r0   launch_sweep   s   


r   c                 C   s   i }| D ]G}|  }t|dkrd ||d < qt|dkr9|d dkr0t|dd  ||d < q|d ||d < qt|dkrKt|dd  ||d < q|S )Nr   r      --folder_to_device_mapping)splitrP   )cmd_linetagsparam_valuefieldsr/   r/   r0   create_cmd_tags   s   r   c                    s   d}d}dd d}d}|d|dd	 d
|d|di|d|dd d|d|di fdd}fdd} ||||g}| g}	t |}
|D ]}||
v rY|	|||
|  qH|	|| qHd|	}|d7 }|S )Nz--queue_depthz--block_sizez--single_submitz--sequential_requestsr   z--io_paralleldbssingle
sequentialr^   pr   r   blockoverlapc                    s(   |  }|  fv r|S |   | S r]   r/   )tagrp   SEQUENTIAL_REQUESTSSINGLE_SUBMITtag_defaulttag_mapr/   r0   get_default_value   s   z'get_log_file.<locals>.get_default_valuec                    s     |  }|d u r
|S | | S r]   r/   )r   rp   tag_key)r   r/   r0   get_config_value   s   z&get_log_file.<locals>.get_config_value_z.txt)r   rj   rK   )
io_op_descr   QUEUE_DEPTH
BLOCK_SIZEFTD_MAPIO_PARALLELr   r   tag_listlog_tagscmd_tagsr   log_filer/   r   r0   get_log_file   s@   
	
r   c                 C   sR   t jt tg}g }|D ]}t j|t| |}t|| |d}|| q|S )N)r   output_file)rI   rJ   rK   script_pathPERF_SCRIPTr   r   rj   )r   r%   rR   py_cmd	perf_jobsrT   r   jobr/   r/   r0   create_perf_jobs   s   r   c                   C   s   t jt jtjd S )Nr   )rI   rJ   dirnamerealpathsysargvr/   r/   r/   r0   r      s   r   c                   C   
   t   S r]   )r   is_compatibler/   r/   r/   r0   r5         
r5   c                   C   r   r]   )r   r   r/   r/   r/   r0   r8      r   r8   c                 C   s*   t j| sJ d|  dt|  d S )NzError: cannot remove z - folder not found)rI   rJ   isdirshutilrmtree)folderr/   r/   r0   remove_folder  s   r   c                    V    fdd|D }t j jt }t j|dd tt||d}t||| j	d d S )Nc                    s   g | ]}d  j  g| qS )z--read r*   rt   r   r/   r0   rZ   
  r[   z"run_read_sweep.<locals>.<listcomp>Texist_okr   r%   rR   r{   r|   r}   r&   )
rI   rJ   rK   r%   r
   makedirsr   r   r   r&   )r   r}   r|   rR   read_cmd_lines
log_folderr   r/   r   r0   run_read_sweep	     
r   c                    r   )Nc                    s   g | ]	} j  g| qS r/   r   rt   r   r/   r0   rZ     rr   z#run_write_sweep.<locals>.<listcomp>Tr   r   r   )
rI   rJ   rK   r%   r   r   r   r   r   r&   )r   r}   r|   rR   write_cmd_linesr   r   r/   r   r0   run_write_sweep  r   r   c                 C   sh   t | }t|j}|jrtg dd}nd }tdgd}|jr&t|||| |jr2t|||| d S d S )N)sudozbash -cz#'echo 1 > /proc/sys/vm/drop_caches')r   sync)	r   rz   r   r$   r   r    r   r"   r   )r.   r   rR   r}   r|   r/   r/   r0   
sweep_main'  s   
r   c                  C   s"   t  } td| j  t|  d S )Nz&Running DeepNVMe performance sweep on )rO   r6   r   r   )r.   r/   r/   r0   main9  s   r   __main__)+__doc__rI   r   rE   re   rv   r   deepspeed.ops.op_builderr   r   
ds_aio_jobr   r   perf_sweep_utilsr   r   r	   r
   r   r'   r   rc   objectr   r:   rO   rU   r   r   rz   r   r   r   r   r   r5   r8   r   r   r   r   r   r2   r/   r/   r/   r0   <module>   sR   	0	4
