o
    Tid                     @   s:   d dl Z d dlmZ ddlmZ G dd dee jjZdS )    N)get_accelerator   )	CUDAGraphc                       s   e Zd Zd fdd	Zdd ZdddZd	d
 Zdd Zdd ZdddZ	dd Z
dd Zdd Zdd Zdd ZdddZ  ZS )DSVAETc                    sT   t  j|d || _|j| _| jj| _| jj| _| jjdd d| _d| _d| _	d S )N)enable_cuda_graphF)requires_grad)
super__init__vaeconfigdevicedtyperequires_grad_decoder_cuda_graph_createdencoder_cuda_graph_createdall_cuda_graph_created)selfr
   r   	__class__ a/home/ubuntu/.local/lib/python3.10/site-packages/deepspeed/model_implementations/diffusers/vae.pyr	      s   


zDSVAE.__init__c                 O   t   t t|D ]}t|| r| j| ||  q|D ]}t|| r/| j| ||  qt | j	 | j
S N)rangelentorch	is_tensorstatic_decoder_inputscopy_static_decoder_kwargsr   replay_graph_decoder_cuda_graphstatic_decoder_outputr   inputskwargsikr   r   r   _graph_replay_decoder      zDSVAE._graph_replay_decoderNc                 C      | j j||dS N)return_dict)r
   decode)r   xr,   	generatorr   r   r   _decode"      zDSVAE._decodec                 O      t j }|t j  t j| tdD ]
}| j|i |}qW d    n1 s-w   Y  t j | t 	 | _
|| _|| _t | j
 | j| ji | j| _W d    n1 scw   Y  d| _d S N   T)r   cudaStreamwait_streamcurrent_streamstreamr   r0   r   create_graphr!   r   r   capture_to_graphr"   r   r   r$   r%   cuda_streamr&   retr   r   r   _create_cuda_graph_decoder%      

z DSVAE._create_cuda_graph_decoderc                 O   T   | j r"| jr| j|i |}|S | j|i | | j|i |}|S | j|i |S r   )r   r   r(   r?   r0   r   r$   r%   outputsr   r   r   r-   8      zDSVAE.decodec                 O   r   r   )r   r   r   r   static_encoder_inputsr   static_encoder_kwargsr   r    _encoder_cuda_graphstatic_encoder_outputr#   r   r   r   _graph_replay_encoderC   r)   zDSVAE._graph_replay_encoderc                 C   r*   r+   )r
   encode)r   r.   r,   r   r   r   _encodeM   r1   zDSVAE._encodec                 O   r2   r3   )r   r5   r6   r7   r8   r9   r   rK   r   r:   rG   rE   rF   r;   rH   r   r<   r   r   r   _create_cuda_graph_encoderP   r@   z DSVAE._create_cuda_graph_encoderc                 O   rA   r   )r   r   rI   rL   rK   rB   r   r   r   rJ   c   rD   zDSVAE.encodec                 O   r   r   )r   r   r   r   static_inputsr   static_kwargsr   r    _all_cuda_graphstatic_outputr#   r   r   r   _graph_replayn   r)   zDSVAE._graph_replayc                 O   rA   r   )r   cuda_graph_createdrQ   _create_cuda_graph_forwardrB   r   r   r   forwardx   rD   zDSVAE.forwardc                 O   r2   r3   )r   r5   r6   r7   r8   r9   r   rT   r   r:   rO   rM   rN   r;   rP   r   r<   r   r   r   rS      r@   zDSVAE._create_cuda_graphc                 C   s   |  ||||S r   )r
   )r   sample	timestampencoder_hidden_statesr,   r   r   r   rT      r1   zDSVAE._forward)T)TN)__name__
__module____qualname__r	   r(   r0   r?   r-   rI   rK   rL   rJ   rQ   rU   rS   rT   __classcell__r   r   r   r   r      s    




r   )r   deepspeed.acceleratorr   features.cuda_graphr   nnModuler   r   r   r   r   <module>   s   