o
    Ti                     @   s:   d dl Z d dlmZ ddlmZ G dd dee jjZdS )    N)get_accelerator   )	CUDAGraphc                       sH   e Zd Zd fdd	Zdd Zdd Zdd	 Z		
	
	
dddZ  ZS )DSUNetTc                    sh   t  j|d || _|j| _| jj| _| jj| _| jj| _d| _| jjdd | jj	t
jd d| _d S )N)enable_cuda_graphr   F)requires_grad)memory_format)super__init__unetin_channelsdevicedtypeconfig	fwd_countrequires_grad_totorchchannels_lastcuda_graph_created)selfr   r   	__class__ b/home/ubuntu/.local/lib/python3.10/site-packages/deepspeed/model_implementations/diffusers/unet.pyr
      s   



zDSUNet.__init__c                 O   st   t t|D ]}t|| r| j| ||  q|D ]}t|| r/| j| ||  qt | j	 | j
S N)rangelenr   	is_tensorstatic_inputscopy_static_kwargsr   replay_graph_cuda_graphsstatic_output)r   inputskwargsikr   r   r   _graph_replay   s   zDSUNet._graph_replayc                 O   sT   | j r"| jr| j|i |}|S | j|i | | j|i |}|S | j|i |S r   )r   r   r)   _create_cuda_graph_forward)r   r%   r&   outputsr   r   r   forward$   s   zDSUNet.forwardc                 O   s   t j }|t j  t j| tdD ]
}| j|i |}qW d    n1 s-w   Y  t j | t 	 | _
|| _|| _t | j
 | j| ji | j| _W d    n1 scw   Y  d| _d S )N   T)r   cudaStreamwait_streamcurrent_streamstreamr   r+   r   create_graphr#   r   r!   capture_to_graphr$   r   )r   r%   r&   cuda_streamr'   retr   r   r   r*   /   s   

zDSUNet._create_cuda_graphNc                 C   s(   |r| j |||||dS |  ||||S )N)cross_attention_kwargs)r   )r   sample	timestampencoder_hidden_statesreturn_dictr8   timestep_condadded_cond_kwargsr   r   r   r+   B   s   zDSUNet._forward)T)TNNN)	__name__
__module____qualname__r
   r)   r-   r*   r+   __classcell__r   r   r   r   r      s    
r   )r   deepspeed.acceleratorr   features.cuda_graphr   nnModuler   r   r   r   r   <module>   s   