o
    i}F                     @   sf  U d Z ddlZddlZddlZddlZddlmZ ddlmZ ddlm	Z	 ddl
mZ ddlmZ ddlmZ dd	lmZ dd
lmZ ddlZddlmZ ddlmZ ddlmZ ddlmZ ddlmZ ddlmZ ddlmZ ddlm Z  ddl!m!Z! ddl"m#Z# ddl$m%Z% ddl&m'Z' ddl&m(Z( ddl&m)Z) ddl*T ddl"m+Z+ ddl,m-Z- dZ.ej/j0fZ1ej/j2ej3j4j2j5fZ6d Z7ej/j8Z9ej:Z;ej<Z=ej>j?Z?ej@jAZBej@jCZDej@jEZFej@jGZHej@jIZJej@jKZLej@jMZNej@jOZPej@jQjRZSej@j%ZTeUej@d!dZVe ZWeXej/ej/dB f eYd"< g ZZe[e' eYd#< G d$d% d%ej/Z\d&ej/fd'd(Z]d)d* Z^G d+d, d,eZ_G d-d. d.eZ`e_ Zae` Zbd/d0 Zcedd1d2 Zed3d4 Zfd5d6 ZgG d7d8 d8ehZiG d9d: d:eid;Zjd<d= Zkd>d? Zld@dA ZmdBdC ZndDdE ZodFepfdGdHZqdIdJ ZrdKepfdLdMZsdNdO ZtdYdPeeugef dB fdQdRZvdYdPeeugef dB fdSdTZwdUdV ZxdWdX ZydS )Z
    N)defaultdict)ProcessPoolExecutor)ThreadPoolExecutor)nullcontext)copy_context)Path)Any)Callable)TorchFunctionMode)resolve_name)is_traceable_wrapper_subclass)transform_subclass)TorchDispatchMode)tree_map_only)WeakTensorKeyDictionary   )Config   )tqdm)malloc_trim   cudart)ZeroGPUTensorPack)pack_tensors)pack_to_cuda)*)empty_like_raw_alloc)AliasIdg?z4Cannot apply function {} on disk-offloaded Tensor {}_maybe_exchange_devicecuda_aliasestensor_packsc                   @   s   e Zd ZdS )ZeroGPUTensorN)__name__
__module____qualname__ r'   r'   N/home/ubuntu/.local/lib/python3.10/site-packages/spaces/zero/torch/patching.pyr#   K   s    r#   tensorc                 C   s0   t | | jd}|j| jkrt| j|| jd}|S )N)requires_grad)require_grad)r   r*   	__class___tensor_make_subclassr)   faker'   r'   r(   
empty_fakeN   s   r0   c                  O   s^   t | rt| d  }trd| g| dd  R } t|d }tr+d| |d< | |fS )Nr   zcuda:r   device)len
isinstanceintget)argskwargsindexr'   r'   r(   no_int_deviceU   s
   r9   c                   @   (   e Zd Zddeeef dB fddZdS )ZeroGPUFunctionModer'   Nr7   c              	      s  |d u ri n|}t jjjkrt|i |\}}|i |S t jjks+t jjkrS|d}| 	t jj
||d fdt jjkrCdndi|d urOd|iS i S t jj
krt|dkrt|dd  i |\}}d|v rwd|dini }t jjj|i |\}	}
}}| 	t jj
||d f|	|
|d|B S t jjjkr|\} }|tv rt|  }d u rttt|t| }||_|t| < n| tv rt| = || _d S t jjjkr|\}|tv rt jddd	S ndt jjkr|\}|tv rt|  }d u r|
d
}|j}t|_z|fi |W ||_S ||_w n/t jjkrM|\}|tv rMt|  }d u r@ttt||fi |}d|_|S d  |d }	d urpt |	}	|	jdkrnt d|d< d nd i t dt jffdd}tt j||}tt j||}dhkr durd t|dkrt|d  }rڈt jj t j!j"j#j$t j!j"j%j$hv r|  t&|fddW  d    S 1 sw   Y  |i |}' D ]
\}}t||_qt j!j"j(jt jj)hv r|d } | tv   ht*|t jst+v r?d } t|dkr*t*|d t jr*|d } || us4t,v r?ddhkr?t-ddt jf fdd}tt j||S )Nmemory_formatr   r1   cudacpur   copy)r1   dtyper<   r8   metaTFr)   c                    sP   | t vrdhO | S t |   }d u rttt | | |< dhO |S NFT)r!   	ExceptionOFFLOADED_ERROR_MESSAGEformatr   )r)   original)funcinputs_are_cudaswappedr'   r(   swap   s   

z4ZeroGPUFunctionMode.__torch_function__.<locals>.swapc                    s    |S Nr'   )_t)rH   r'   r(   <lambda>   s    z8ZeroGPUFunctionMode.__torch_function__.<locals>.<lambda>zhExpected all tensors to be on the same device, but found at least two devices, cuda:0 (ZeroGPU) and cpu!c                    s8   | v r dur|  S  dur| S t | }| t|< |S rC   )r0   r!   r.   )r=   rJ   r'   r(   register   s   z8ZeroGPUFunctionMode.__torch_function__.<locals>.register).torch_C_nn	_parse_tor9   Tensorr=   r>   r5   __torch_function__tor2   popdata__set__r!   rD   rE   rF   r   r0   r1   __get____repr__r,   r#   untyped_storage_zerogputypesetr   r   detachopsatenaliasdefaultcloner   itemsr8   __getitem__r3   OPS_INPUTS_CHECK_NO_RETURNOPS_INPUT_CHECK_SELF_RETURNRuntimeError)selfrH   typesr6   r7   r<   parse_to_argsparse_to_kwargs
copy_kwargr1   r@   rM   targettarget_originalrG   r)   original_classresrK   args_kwargs_wrapper_tensorr/   rP   r'   )r=   rH   rI   rJ   r(   rV   _   s   








 

"
 	z&ZeroGPUFunctionMode.__torch_function__r'   N)r$   r%   r&   dictstrr	   rV   r'   r'   r'   r(   r;   ]   s     r;   c                   @   r:   )DefaultDispatchModer'   Nr7   c                 C   s   ||i |pi S rL   r'   )rl   rH   rm   r6   r7   r'   r'   r(   __torch_dispatch__   s   z&DefaultDispatchMode.__torch_dispatch__rx   )r$   r%   r&   ry   rz   r	   r|   r'   r'   r'   r(   r{      s     r{   c                  O   sV   d}| d }d urt|}|jdkrd}|d= tjjj| i |}|r)d|_|S )NFr1   r=   T)r5   rQ   r1   r_   rR   StorageBase__new__r^   )r6   r7   r=   r1   storager'   r'   r(   _untyped_storage_new_register   s   

r   c                 C   s(   t | drtjdddS tjjj| S )Nr^   r=   r   rA   )hasattrrQ   r1   rR   r}   r[   )rl   r'   r'   r(   _untyped_storage_device	  s   
r   c                  O   @   t j  tjtd| |dW  d    S 1 sw   Y  d S Nr'   r6   r7   )rQ   rR   DisableTorchFunctionfunction_moderV   r-   r   r'   r'   r(   #_tensor_make_subclass_function_mode     $r   c                  O   r   r   )rQ   rR   r   r   rV   _asarrayr   r'   r'   r(   _asarray_function_mode  r   r   c                   @      e Zd Zdd ZdS )_DeviceStringOnlyMetac                 C   s
   t |tS rL   )r3   _device)clsinstancer'   r'   r(   __instancecheck__  s   
z'_DeviceStringOnlyMeta.__instancecheck__N)r$   r%   r&   r   r'   r'   r'   r(   r         r   c                   @   r   )_DeviceStringOnlyc                 O   s    t |i |\}}t|i |S rL   )r9   r   )r   r6   r7   r'   r'   r(   r~     s   z_DeviceStringOnly.__new__N)r$   r%   r&   r~   r'   r'   r'   r(   r     r   r   )	metaclassc                   C   s   t d)NzCUDA must not be initialized in the main process on Spaces with Stateless GPU environment.
You can look at this Stacktrace to find out which part of your code triggered a CUDA init)rk   r'   r'   r'   r(   _cuda_init_raise   s   r   c                 C   s   | dv sJ | S )N>   r   r'   )r1   r'   r'   r(   _cuda_dummy_exchange_device(  s   r   c                   C   s   t   t  ttj_ttj_	t
tj_tt_tt_ttj_ttj_dd tj_dd tj_dd tj_dd tj_dd tj_dd tj_dd tj_d	d tjj_d
d tj_td urdt tjdt t! "  d S )Nc                   S      dS )NTr'   r'   r'   r'   r(   rO   8      zpatch.<locals>.<lambda>c                   S   r   )Nr   r'   r'   r'   r'   r(   rO   9  r   c                   S   r   )Nr   r'   r'   r'   r'   r(   rO   :  r   c                  W      d S rL   r'   )r6   r'   r'   r(   rO   ;  r   c                  _      t S rL   )CUDA_DEVICE_CAPABILITYr   r'   r'   r(   rO   <  r   c                  _   r   rL   )CUDA_DEVICE_PROPERTIESr   r'   r'   r(   rO   =  r   c                  _   r   rL   )CUDA_DEVICE_NAMEr   r'   r'   r(   rO   >  r   c                  _   r   rL   ) CUDA_MEMORY_STATS_AS_NESTED_DICTr   r'   r'   r(   rO   ?  r   c                   S   r   rL   r   r'   r'   r'   r(   rO   @  r   r    )#r   	__enter__dispatch_moder   rQ   rU   _make_subclassr   UntypedStorager~   r   r1   r   asarrayr   r   rR   
_cuda_initr   r=   _exchange_deviceis_availabledevice_countcurrent_devicesynchronizeget_device_capabilityget_device_propertiesget_device_namememorymemory_stats_as_nested_dictr   _cuda_maybe_exchange_devicesetattrbitsandbytespatchr'   r'   r'   r(   r   ,  s*   r   c                   C   s   zt d d d  td d d  W n	 ty   Y nw ttj_tjj	j
tj_
tjj	jtj_tt_tt_ttj_ttj_ttj_ttj_ttj_ttj_ttj_ttj_t tj_!t"tjj#_$t%tj_&t'd urjt(tjdt t) *  d S )Nr    )+r   __exit__r   rk   r-   rQ   rU   r   rR   r}   r~   r   r1   r   r   r   r   _cuda_exchange_devicer=   r   _cuda_availabler   _cuda_device_countr   _cuda_current_devicer   _cuda_synchronizer   _cuda_get_device_capabilityr   _cuda_get_device_propertiesr   _cuda_get_device_namer   !_cuda_memory_stats_as_nested_dictr   r   _cuda_cudartr   r   r   r   unpatchr'   r'   r'   r(   r   F  s2   
r   c                  C   s6   dd t  D } dd | D }tdd | D S )Nc                 S   s   g | ]}|d ur|qS rL   r'   .0r)   r'   r'   r(   
<listcomp>c      z(_total_unpacked_size.<locals>.<listcomp>c                 S   s   i | ]}t ||qS r'   )r   from_tensorr   r'   r'   r(   
<dictcomp>d  r   z(_total_unpacked_size.<locals>.<dictcomp>c                 S   s   g | ]
}|  |  qS r'   )numelelement_sizer   r'   r'   r(   r   e  s    )r!   valuessum)tensorsdedupedr'   r'   r(   _total_unpacked_sizeb  s   r   offload_dirc                 C   s  t  }i }tt}t D ]%\}}|d ur2t|}||vr'|||< ||hO }|||   |g7  < qt }td urBt|ddddnt	 }|}|d urO|j
ndd }	t||| |	d}
W d    n1 sew   Y  t|
 | D ]}|D ]}d t|< qwqs|S )NBTzZeroGPU tensors packing)totalunit
unit_scaledescc                 S   r   rL   r'   rM   r'   r'   r(   rO   }  r   z_pack.<locals>.<lambda>callback)r`   r   listr!   rg   r   r   r   r   r   updater   r"   appendr   )r   	originalsoriginals_dedupfakesr/   rG   original_id
total_sizeprogressr   pack	fake_listr'   r'   r(   _packh  s>   



r   c                  C   s>   t jtjdd ttjjdd ttj} t  t	  | S )NT)ignore_errors)parents)
shutilrmtreer   zerogpu_offload_dirr   mkdirr   gccollectr   r   r'   r'   r(   r     s   
r   nvidia_uuidc                 C   s   | t jd< tdg  d S )NCUDA_VISIBLE_DEVICESr   )osenvironrQ   rU   r=   )r   r'   r'   r(   init  s   
r   c                   C   s   t  tdd tD  S )Nc                 S   s   g | ]}|j qS r'   r   )r   r   r'   r'   r(   r     s    zsize.<locals>.<listcomp>)r   r   r"   r'   r'   r'   r(   size  s   r   r   c                 C   s   | d ur| ndd } t  t }i }t D ];\}}|d urPt|}t|}||vrP| |	  |k r>|
 jdd}n| }|||< | | |	   qtj  t D ]\}}|d urj|t| |_qZtD ]}t|| d qmt   d S )Nc                 S   r   rL   r'   r   r'   r'   r(   rO     r   z_move.<locals>.<lambda>T)non_blockingr   )r   PINNED_MEMORY_RATIO_LIMITr!   rg   rQ   rU   r   r   r   r   
pin_memoryr=   r   rY   r"   r   r   move)r   pinned_limitmovedr/   rG   r   original_cudatensor_packr'   r'   r(   _move  s,   



r   c                 C   sb   | d ur| ndd } t d}|jt jt| d  W d    n1 s%w   Y  tj  d S )Nc                 S   r   rL   r'   r   r'   r'   r(   rO     r   zmove.<locals>.<lambda>r   r   )	r   submitr   runr   resultrQ   r=   r   )r   er'   r'   r(   r     s
   
r   c                  C   sJ   t tdd} | tjj}| W  d    S 1 sw   Y  d S )Nfork)
mp_context)r   multiprocessingget_contextr   rQ   r=   _is_in_bad_forkr   )r   fr'   r'   r(   is_in_bad_fork  s   $r  c                  C   s   ddl m}  | S )Nr   r   ) r   r  r'   r'   r(   r     s   r   rL   )z__doc__r   r   r   r   collectionsr   concurrent.futuresr   r   
contextlibr   contextvarsr   pathlibr   typingr	   r
   rQ   torch.overridesr   r   torch.utils._python_dispatchr   r   r   torch.utils._pytreer   torch.utils.weakr   configr   r   utilsr   r  r   packingr   r   r   staticr   rm   r   r   rU   equalri   set_rb   rc   source_Tensorrj   rE   r   r-   r   r   r1   r   rR   r   r=   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   getattrr   r!   ry   __annotations__r"   r   r#   r0   r9   r;   r{   r   r   r   propertyr   r   r   r_   r   r   r   r   r   r   r   rz   r   r   r   r   r4   r   r   r  r   r'   r'   r'   r(   <module>   s    

 
