o
    Ti                     @   sX   d dl T ddlmZ ddlZddlmZ ddlmZ G dd	 d	eZ	G d
d deZ
dS )   )*    )DeepSpeedGPTInferenceN)	Parameter   )TransformerPolicyc                       s&   e Zd Z fddZdddZ  ZS )DS_CLIPContainerc                    s   t  jdi | d S )N )super__init__)selfkwargs	__class__r	   [/home/ubuntu/.local/lib/python3.10/site-packages/deepspeed/module_inject/containers/clip.pyr      s   zDS_CLIPContainer.__init__Nc                 C   s4   |d ur|n| j }t|| jd| _| j| jj_| jS )N)mp_group)ds_model_configr   r   modulescale_attentionconfig)r   r   _configr	   r	   r   create_module   s   zDS_CLIPContainer.create_moduleN)__name__
__module____qualname__r   r   __classcell__r	   r	   r   r   r      s    r   c                       sB   e Zd Zd fdd	Zdd ZdddZddd	Zd
d Z  ZS )HFCLIPLayerPolicyFc                    s\   t  j|ddd || _d| _tjd u r,zdd l}|jjj	j
t_W d S    d t_Y d S d S )NT)pre_attn_normr   r   )r
   r   client_modulecuda_graph_supportedr   _orig_layer_classtransformersmodelsclipmodeling_clipCLIPEncoderLayer)r   r   	inferencer"   r   r	   r   r      s   
zHFCLIPLayerPolicy.__init__c                 C   s&   | j jjjjd | j jj| j jjtfS )Nr   )	r   	self_attnq_projweightshape	num_headslayer_norm1epsDEFAULT_INTERMEDIATE_SIZEr   r	   r	   r   get_hidden_heads)   s
   z"HFCLIPLayerPolicy.get_hidden_headsc           
      C   s   | j jjj}| j jjj}| j jjj}| j jjj}| j jjj}| j jjj}ttj	|||fdd|d}ttj	|||fdd|d}	||	| j jj
j| j jj
jfS )Nr   )dim)requires_grad)r   r(   r)   r*   biask_projv_projr   torchcatout_proj)
r   enable_trainingqwqbkwkbvwvbqkvwqkvbr	   r	   r   	attention/   s   

zHFCLIPLayerPolicy.attentionc                 C   s,   | j jjj| j jjj| j jjj| j jjjfS r   )r   mlpfc1r*   r4   fc2)r   r:   r	   r	   r   rD   ?   s
   



zHFCLIPLayerPolicy.mlpc                 C   s$   | j jj| j jj| j jj| j jjfS r   )r   layer_norm2r*   r4   r-   r0   r	   r	   r   	layernormE   s
   zHFCLIPLayerPolicy.layernorm)F)	r   r   r   r   r1   rC   rD   rH   r   r	   r	   r   r   r      s    

r   )base3deepspeed.model_implementations.transformers.ds_gptr   r7   torch.nn.parameterr   policyr   BaseTransformerContainerr   r   r	   r	   r	   r   <module>   s   