o
    Ti                     @   s:   d dl Z d dlmZ ddlmZ G dd dee jjZdS )    N)get_accelerator   )	CUDAGraphc                       sF   e Zd Zd fdd	Zdd Zdd Zdd	 Zd
d Zdd Z  Z	S )DSClipEncoderFc                    sx   t  j|d | j|j_|| _| jj| _| jj| _ddg| _d d g| _d d g| _	d d g| _
d d g| _d| _| jj| _d S )N)enable_cuda_graphFr   )super__init___build_causal_attention_mask
text_modelencdevicedtypecuda_graph_createdstatic_inputsstatic_kwargsstatic_output_cuda_graphsiterconfig)selfr   r   	__class__ m/home/ubuntu/.local/lib/python3.10/site-packages/deepspeed/model_implementations/transformers/clip_encoder.pyr      s   







zDSClipEncoder.__init__c                 C   sJ   t j||||t  d}|t t |j |d |	d}|S )N)r   r      )
torchemptyr   current_device_namefill_tensorfinfomintriu_	unsqueeze)r   bszseq_lenr   maskr   r   r   r	      s
   

z*DSClipEncoder._build_causal_attention_maskc                 O   s   t t|D ]}t|| r| j| j | ||  q|D ]}t|| r5| j| j | ||  qt 	| j
| j  | j| j S N)rangelenr   	is_tensorr   r   copy_r   r   replay_graphr   r   )r   inputskwargsikr   r   r   _graph_replay"   s   zDSClipEncoder._graph_replayc                 O   sh   | j r,| j| j r| j|i |}n| j|i | | j|i |}| jd d | _|S | j|i |S )Nr   r   )r   r   r   r1   _create_cuda_graphr   )r   r-   r.   outputsr   r   r   forward,   s   zDSClipEncoder.forwardc                 O   s
  t j }|t j  t j| tdD ]
}| j|i |}qW d    n1 s-w   Y  t j | t 	 | j
| j< || j| j< || j| j< t | j
| j  | j| j| j i | j| j | j| j< W d    n1 sxw   Y  d| j| j< d S )N   T)r   cudaStreamwait_streamcurrent_streamstreamr(   _forwardr   create_graphr   r   r   r   capture_to_graphr   r   )r   r-   r.   cuda_streamr/   retr   r   r   r2   8   s"   

z DSClipEncoder._create_cuda_graphc                 O   s   | j |i |S r'   )r   )r   r-   r.   r   r   r   r;   L   s   zDSClipEncoder._forward)F)
__name__
__module____qualname__r   r	   r1   r4   r2   r;   __classcell__r   r   r   r   r      s    
r   )r   deepspeed.acceleratorr   features.cuda_graphr   nnModuler   r   r   r   r   <module>   s   