o
    Ti                     @   s@   d dl Z d dlmZ d dl mZ d dlmZ G dd deZdS )    N)CallableTensor)versionc                   @   sn   e Zd ZdZejZejZej	Z
ejZdddZdedejdefd	d
ZdejdefddZdd Zdd ZdS )OnDeviceaB  
    Create modules/tensors w. specific devices and dtypes. Examples:

    Create MyModule which consists of many different sub-modules and parameters. In this case we can create
    MyModule as a collection of 'meta' tensors by passing `device='meta'` or we can create the module _directly_
    on a CUDA device by passing `device=f'cuda:{local_rank}'` (where `local_rank` is the local GPU id.

    with OnDevice(dtype=torch.float16, device='meta'):
        model = MyModel()

    with OnDevice(dtype=torch.float16, device=f'cuda:{local_rank}'):
        model = MyModel()

    metaTc                 C   s@   || _ || _|| _|dkrtdttjkrtdd S d S )Nr   z1.10zCMeta tensor support is not available, please upgrade to torch 1.10+)dtypeenableddevicepkg_versionparsetorch__version__NotImplementedError)selfr   r
   r	    r   R/home/ubuntu/.local/lib/python3.10/site-packages/deepspeed/utils/init_on_device.py__init__!   s   zOnDevice.__init__fntarget_fp_dtypereturnc                    s   dt f fdd}|S )Nr   c                     s>   | dd d u rj|d<  | i |}| r|}|S )Nr
   )getr
   is_floating_pointto)argskwargstensorr   r   r   r   r   
wrapped_fn,   s   

z2OnDevice.fp_tensor_constructor.<locals>.wrapped_fnr   )r   r   r   r   r   r   r   fp_tensor_constructor*   s   zOnDevice.fp_tensor_constructorr   c                    s   dt f fdd}|S )Nr   c                    s,   t jdjdj| }| r| }|S )Nr   )r
   )r   _orig_torch_emptyr
   	new_emptyr   r   )clsr   r   r   r   r   r   
new_tensor8   s   
z8OnDevice.get_new_tensor_fn_for_dtype.<locals>.new_tensorr   )r   r   r$   r   r#   r   get_new_tensor_fn_for_dtype6   s   z$OnDevice.get_new_tensor_fn_for_dtypec                 C   sr   | j sd S tjjtj_| | jtj_| | j| jt_	| | j
| jt_| | j| jt_| | j| jt_d S N)r	   r   r   __new____old_new__r%   r   r   r    empty_orig_torch_zeroszeros_orig_torch_onesones_orig_torch_fullfull)r   r   r   r   	__enter__@   s   zOnDevice.__enter__c                 C   s:   | j sd S tjjtj_| jt_| jt_| j	t_
| jt_d S r&   )r	   r   r   r(   r'   r    r)   r*   r+   r,   r-   r.   r/   )r   exc_type	exc_value	tracebackr   r   r   __exit__J   s   zOnDevice.__exit__N)r   T)__name__
__module____qualname____doc__r   r)   r    r+   r*   r-   r,   r/   r.   r   r   r   r   r%   r0   r4   r   r   r   r   r      s    
	

r   )	r   typingr   r   	packagingr   r   objectr   r   r   r   r   <module>   s
   