o
    Ti                     @   s   d Z ddlZddlmZ ddlmZ ddlmZ dZ	dZ
e	Zdd	 Zd
d Zg fddZG dd deZG dd deZG dd deZdd Zdd ZdS )zC
Functionality of swapping tensors to/from (NVMe) storage devices.
    N)logger)get_accelerator)commi   i   c                 C   .   t ||D ]\}}| ||ddksJ qd S Nr   )zipasync_preadswap_handletensor_buffers
swap_pathsbufferpath r   W/home/ubuntu/.local/lib/python3.10/site-packages/deepspeed/runtime/swap_tensor/utils.pyswap_in_tensors      r   c                 C   r   r   )r   async_pwriter	   r   r   r   swap_out_tensors   r   r   c              
   C   sX   t d| tt| D ]}||vr)ddt|  }t d||t| | qd S )Nz{}:.   z
  {} {} {})r   infoformatsortedvarslengetattr)objnameexclude_listargdotsr   r   r   print_object   s   r"   c                   @   sl   e Zd Zdd Zdd Zdd Zdd Zd	d
 Zdd Zdd Z	dd Z
dd Zdd Zdd Zdd ZdS )
SwapBufferc                 C   s   || _ |   d S N)r   resetselfr   r   r   r   __init__(   s   zSwapBuffer.__init__c                 C   s"   d| _ i | _i | _i | _d| _d S r   )offsetswap_tensorscompute_tensorsr   num_elemr'   r   r   r   r%   ,   s
   
zSwapBuffer.resetc                 C   s,   |  || |\}}|j|j ||fS r$   )allocate_tensornumeldatacopy_r'   tensor	swap_pathaligned_numelswap_tensorcompute_tensorr   r   r   insert_tensor3   s   zSwapBuffer.insert_tensorc                 C   s   |  |sJ | j| jvsJ | j}| jd||}|dd|}|| j|< || j|< || j|< |  j|7  _|  j|7  _| j| | j| fS r   )	has_spacer)   r*   r   narrowr+   r   r,   )r'   r4   r/   r5   allocate_offsetr6   dest_tensorr   r   r   r.   8   s   


zSwapBuffer.allocate_tensorc                 C   s   | j | | j kS r$   )r)   r   r/   r'   r/   r   r   r   r9   H      zSwapBuffer.has_spacec                 C      dd | j  D S )Nc                 S      g | ]}|qS r   r   .0r3   r   r   r   
<listcomp>L       z/SwapBuffer.get_swap_tensors.<locals>.<listcomp>)r*   valuesr-   r   r   r   get_swap_tensorsK   r>   zSwapBuffer.get_swap_tensorsc                 C   r?   )Nc                 S   r@   r   r   )rB   r   r   r   r   rC   O   rD   z-SwapBuffer.get_swap_paths.<locals>.<listcomp>)r   rE   r-   r   r   r   get_swap_pathsN   r>   zSwapBuffer.get_swap_pathsc                 C   r?   )Nc                 S   r@   r   r   rA   r   r   r   rC   R   rD   z2SwapBuffer.get_compute_tensors.<locals>.<listcomp>)r+   rE   r-   r   r   r   get_compute_tensorsQ   r>   zSwapBuffer.get_compute_tensorsc                 C   s   | j S r$   )r,   r-   r   r   r   get_num_elemT   s   zSwapBuffer.get_num_elemc                 C      | j |d S r$   )r*   getr'   r)   r   r   r   get_swap_tensorW      zSwapBuffer.get_swap_tensorc                 C   rJ   r$   )r+   rK   rL   r   r   r   get_compute_tensorZ   rN   zSwapBuffer.get_compute_tensorc                 C   s   |  |d S r$   )r   rL   r   r   r   get_swap_path]      zSwapBuffer.get_swap_pathN)__name__
__module____qualname__r(   r%   r8   r.   r9   rF   rG   rH   rI   rM   rO   rP   r   r   r   r   r#   &   s    r#   c                   @   sp   e Zd Zdd Zdd Zdd Zdd Zd	d
 Zdd Zdd Z	dd Z
dddZdddZdd Zdd ZdS )SwapBufferPoolc                 C   s0   t dd |D sJ dd |D | _d| _d S )Nc                 S   s   g | ]}t  |qS r   )r   	is_pinnedrB   bufr   r   r   rC   d   s    z+SwapBufferPool.__init__.<locals>.<listcomp>c                 S   s   g | ]}t |qS r   )r#   rW   r   r   r   rC   e       r   )allbufferscurrent_index)r'   r[   r   r   r   r(   c   s   
zSwapBufferPool.__init__c                 C   s   d| _ | jD ]}|  qd S r   )r\   r[   r%   r&   r   r   r   r%   h   s   

zSwapBufferPool.resetc                 C   s,   |  |r|  |||\}}||fS dS N)NN)r9   _get_current_bufferr.   )r'   r/   r4   r5   r6   r7   r   r   r   r.   m      
zSwapBufferPool.allocate_tensorc                 C   s,   |  |r|  |||\}}||fS dS r]   )r9   r^   r8   r2   r   r   r   r8   t   r_   zSwapBufferPool.insert_tensorc                 C   "   g }|   D ]}|| 7 }q|S r$   )_get_used_buffersrF   )r'   r*   r   r   r   r   rF   {      zSwapBufferPool.get_swap_tensorsc                 C   r`   r$   )ra   rG   )r'   r   r   r   r   r   rG      rb   zSwapBufferPool.get_swap_pathsc                 C   r`   r$   )ra   rH   )r'   r+   r   r   r   r   rH      rb   z"SwapBufferPool.get_compute_tensorsc                 C   sF   |   |r	dS | jt| jd krdS |  jd7  _|   |S )NT   F)r^   r9   r\   r   r[   r=   r   r   r   r9      s   zSwapBufferPool.has_spaceFc                 C   R   |   }|  }tdd |D sJ t||| |s%t|| ks'J d S d S )Nc                 S      g | ]}|d uqS r$   r   rB   pr   r   r   rC      rY   z+SwapBufferPool.swap_out.<locals>.<listcomp>)rF   rG   rZ   r   r   waitr'   
aio_handleasync_opr*   r   r   r   r   swap_out      zSwapBufferPool.swap_outc                 C   rd   )Nc                 S   re   r$   r   rf   r   r   r   rC      rY   z*SwapBufferPool.swap_in.<locals>.<listcomp>)rF   rG   rZ   r   r   rh   ri   r   r   r   swap_in   rm   zSwapBufferPool.swap_inc                 C   s   | j | j S r$   r[   r\   r-   r   r   r   r^      rQ   z"SwapBufferPool._get_current_bufferc                 C   s   | j d | jd  S )Nrc   ro   r-   r   r   r   ra      r>   z SwapBufferPool._get_used_buffersN)F)rR   rS   rT   r(   r%   r.   r8   rF   rG   rH   r9   rl   rn   r^   ra   r   r   r   r   rU   a   s    




rU   c                   @   s,   e Zd Zdd Zdd Zdd Zdd Zd	S )
SwapBufferManagerc                    s   | _ || _ | _ fddt|D | _dd t|D | _i | _| jd   | d | _t	
 dkrCdg}t| d|d d S d S )	Nc                    s(   g | ]}t  jtjd  dddqS )cpu)devicedtyper   )align_bytes)r   
pin_memorytorchzeros)rB   _rs   	num_elemsr   r   rC      s    z.SwapBufferManager.__init__.<locals>.<listcomp>c                 S   r@   r   r   )rB   ir   r   r   rC      rD   r   i   @all_buffersrp   )r   r   r   )rz   countrs   ranger|   free_buffer_indexused_buffer_indexelement_size	gigabytesdistget_rankr"   )r'   rz   r}   rs   r   r   ry   r   r(      s   zSwapBufferManager.__init__c                 C   s   || j ksJ || jksJ |t| jkrd S | j| d  }| jd |  | _g }|D ]}| j| dd|}|| || jt|< q,|S r   )	rs   rz   r   r   r|   r:   appendr   id)r'   rz   r}   rs   used_indicesr[   r{   
tmp_bufferr   r   r   allocate   s   
zSwapBufferManager.allocatec                 C   s   | j |t| j|dS )N)rz   r}   rs   )r   r   r   )r'   rz   rs   r   r   r   allocate_all   s   zSwapBufferManager.allocate_allc                    s^   g }|D ]	}| t| qt fdd|D sJ |D ]} j  j|   j|= qd S )Nc                    s   g | ]}| j v qS r   )r   )rB   b_idr-   r   r   rC      s    z*SwapBufferManager.free.<locals>.<listcomp>)r   r   rZ   r   r   )r'   r[   
buffer_idsrX   r   r   r-   r   free   s   
zSwapBufferManager.freeN)rR   rS   rT   r(   r   r   r   r   r   r   r   rp      s
    rp   c                 C   sB   ||   ksJ d| d|    ||   k r| dd|S | S )Nz
num_elems z
 > buffer r   )r/   r:   )r   rz   r   r   r   get_sized_buffer   s   r   c                 C   s   dd t | |D }|S )Nc                 S   s   g | ]	\}}t ||qS r   )r   )rB   r   rz   r   r   r   rC      s    z%get_sized_buffers.<locals>.<listcomp>)r   )buffer_listnum_elems_listswap_buffersr   r   r   get_sized_buffers   s   r   )__doc__rv   deepspeed.utils.loggingr   deepspeed.acceleratorr   	deepspeedr   r   MIN_AIO_BYTESAIO_ALIGNED_BYTESMIN_SWAPPABLE_BYTESr   r   r"   objectr#   rU   rp   r   r   r   r   r   r   <module>   s    ;T1