o
    i.                     @   s\  d Z ddlZddlZddlZddlmZ ddlmZ ddlmZ ddl	m
Z
 ddlmZ ddlmZ dd	lmZ d
dlmZ ddlmZ ddlZdZeded Zeded ZdZdZeejeef Ze
G dd dZdedejfddZ 	d#de!ej de"eje#ej f de$deegef dB fddZ%d#d edeegef dB fd!d"Z&dS )$
    N)as_completed)ThreadPoolExecutor)copy_context)	dataclass)Queue)Any)Callable   )debug   empty_like_raw_alloci   SC_PAGE_SIZESC_PHYS_PAGESl           i   c                   @   sb   e Zd ZU eed< eee  ed< ee ed< eej	eej	 f ed< e
ed< dd Zdd	 Zd
S )ZeroGPUTensorPackbase_dirbatchesbig_tensorsfakes
total_sizec                 C   s   | j  dt|  S )N/)r   idself r   M/home/ubuntu/.local/lib/python3.10/site-packages/spaces/zero/torch/packing.pypath&   s   zZeroGPUTensorPack.pathc                 C   s*   z
t |   W d S  ty   Y d S w N)osremover   FileNotFoundErrorr   r   r   r   __del__(   s
   zZeroGPUTensorPack.__del__N)__name__
__module____qualname__str__annotations__listTensorWithSizesdicttorchTensorintr   r#   r   r   r   r   r      s   
 r   fdtensorc           
      C   s   t |}|  }t t}| }| t }| t }|||||  d|j	|
  || ttj||  || }d}	|	|k rZ|	t| ||	d  7 }	|	|k sHd S d S Nr   )r,   
empty_likeuntyped_storagesizeUntypedStorageVM_MAX_SIZEdata_ptr	PAGE_SIZEset_shapestridecopy_
memoryviewctypesc_charfrom_addressr    write)
r/   r0   cloner4   buffer
buffer_ptroffsetpaddingmvwritten_bytesr   r   r   rA   /   s   



"
rA   tensorsr   offload_dircallbackc              	      s  |d u rdd n|}g }g }g }| D ]}|  |  }|| t  }	||||	fg7 }qg d}
}t|dd dD ]1\}}}	|	tkrK||||	fg7 }q9||	7 }|tkrb||
g7 }|||	fg|	}
}q9|
|||	fg7 }
q9|
rr||
g7 }dd | D   fdd	|D } fd
d	|D } fdd| D }t||||tdd	 |D d}t	|
 tjtjB tjB }zNtdd	 |D }|tdd	 |D 7 }|dkrt|d| |D ]}|D ]\}}}t|| || qq|D ]\}}}t|| || q|W t| S t| w )Nc                 S      d S r   r   bytesr   r   r   <lambda>E       zpack_tensors.<locals>.<lambda>r   c                 S   s   | d S )Nr   r   )itemr   r   r   rO   Q   s    )keyc                 S   s   i | ]}|t |qS r   r   ).0r0   r   r   r   
<dictcomp>_       z pack_tensors.<locals>.<dictcomp>c                    s   g | ]} fd d|D qS )c                        g | ]\}}} | ||fqS r   r   rS   r0   r4   asizeget_metar   r   
<listcomp>`        z+pack_tensors.<locals>.<listcomp>.<listcomp>r   )rS   batchrY   r   r   r[   `   s    z pack_tensors.<locals>.<listcomp>c                    rV   r   r   rW   rY   r   r   r[   a   r\   c                    s   i | ]	\}} | |qS r   r   )rS   r0   	fake_listrY   r   r   rT   b   s    c                 S   s   g | ]\}}}|qS r   r   )rS   _r4   r   r   r   r[   i   rU   )r   r   r   r   r   c                 S   s    g | ]}|D ]^ }}|qqS r   r   )rS   r]   r_   aligned_sizer   r   r   r[   n   r\   c                 S      g | ]^ }}|qS r   r   rS   r_   r`   r   r   r   r[   o   rU   )numelelement_sizer8   sortedBUFFER_SIZEitemsr   sumr    openr   O_CREATO_WRONLYO_DIRECTposix_fallocaterA   close)rI   r   rJ   rK   r   r   tensors_with_sizesr0   r4   r`   current_batchcurrent_sizebatches_metabig_tensors_meta
fakes_metapackr/   total_asizer]   r_   r   rY   r   pack_tensors>   s^   






rw   ru   c           	   
      s   d u rdd n  t  t  ttD ]}tt  qdtdtj	dtfdddtffdd	} fd
d}t
d?}t tjtjB }z#|t j|||t j|g}t|D ]}|  qiW t| nt| w W d    d S 1 sw   Y  d S )Nc                 S   rL   r   r   rM   r   r   r   rO      rP   zpack_to_cuda.<locals>.<lambda>r/   rC   r4   c                 S   sN   t tj| | }d}||k r%|t| ||d  g7 }||k sd S d S r1   )r=   r>   r?   r@   r7   r    readv)r/   rC   r4   rG   
read_bytesr   r   r   read   s
   zpack_to_cuda.<locals>.readc                    s   j D ]}  }tdd |D }| || | qjD ])^ }}d}||k rJ  }tt|| }| || | ||7 }||k s,q!d S )Nc                 S   ra   r   r   rb   r   r   r   r[      rU   z5pack_to_cuda.<locals>.disk_to_pin.<locals>.<listcomp>r   )r   getrh   putr   minrf   )r/   r]   rC   
batch_sizer_   r`   ry   	read_size)free_buffersru   rz   read_buffersr   r   disk_to_pin   s    

z!pack_to_cuda.<locals>.disk_to_pinc               	      s  d} j D ]s} }d}g }|D ]\}}}|||||  jddg7 }||7 }qtj  | d}t||D ],\\}}}	}
tjg |jdd}|	|

 d|j| }j| D ]}||_q]||7 }q;t } | | t | 7 } qjD ]n\}}}	tj|tjdd}
d}||k rɈ }tt|| }|d | |
||| < ||7 }tj  | t } | | t | 7 } ||k stjg |jdd}|	|

 d|j| }j| D ]}||_qq|td|  d S )Nr   T)non_blockingcuda)dtypedeviceztotal_duration_in_callback=)r   r{   r   r,   synchronizer|   zipr0   r   r9   r3   r:   r;   r   datatimeperf_counterr   emptyuint8r}   rf   r   )total_duration_in_callbackr]   rC   rE   cuda_storagesr0   r4   r`   batch_total_sizer_   cuda_storagecuda_tensorfaket0r   )rK   r   ru   r   r   r   pin_to_cuda   sP   







z!pack_to_cuda.<locals>.pin_to_cudar   )r   rangeBUFFER_COUNTr|   r,   
ByteTensorrf   
pin_memoryr.   r-   r   r    ri   r   O_RDONLYrl   submitr   runr   resultrn   )	ru   rK   r_   r   r   er/   futuresfuturer   )rK   r   ru   rz   r   r   pack_to_cuda~   s&   
)
"r   r   )'__doc__r   r>   r    concurrent.futuresr   r   contextvarsr   dataclassesr   queuer   typingr   r	   utilsr   r   r,   r8   sysconfTOTAL_MEMORYr}   r6   rf   r   tupler-   r.   r*   r   rA   setr+   r)   r'   rw   r   r   r   r   r   <module>   sD    
&@