o
    Ni(                     @   s   d dl Z d dlmZmZ d dlZd dlmZ d dlm  mZ	 ddl
mZ G dd dejZG dd deZG d	d
 d
eZdS )    N)OptionalUnion   )TRANSFORMERS_MODEL_CONFIGc                       s0   e Zd ZdZejfdedef fddZ  Z	S )_BaseAdaptedAttentionzEBase module, which defines adaption prompts for multiple model types.
model_typeadapter_lenc              	      s   t |tr	tdt   || _|| _|| _t|	 j
}t| jdr)| jj}n| jjj}t| jdr:| jj| _n| jjj| _ttjd||||d | _ttjd||d| _dS )aT  
        Initialize object.

        Args:
            model_type: The transformer model type. This is used to retrieve the right method to
                compute query states.
            adapter_len: The length of the adaption prompt to insert.
            model: The original transformer attention module that is being wrapped.
        z)Unable to stack multiple adaption promptshidden_size	num_headsr   )devicedtypeN)
isinstancer   
ValueErrorsuper__init__r   modelr   next
parametersr   hasattrr	   configr
   num_attention_headsnn	Parametertorchemptynormal_adaption_promptzerosadaption_gate)selfr   r   r   target_dtyper   r	   	__class__ U/home/ubuntu/.local/lib/python3.10/site-packages/peft/tuners/adaption_prompt/layer.pyr      s"   




z_BaseAdaptedAttention.__init__)
__name__
__module____qualname____doc__r   float32strintr   __classcell__r#   r#   r!   r$   r      s    $r   c                       s   e Zd ZdZ fddZ							ddeeej  deeej	  deej d	eej d
eej	 deej dee
 dee
 deeej	eej	 f df fddZ  ZS )AdaptedAttentionGPTzEThis module wraps a GPT2Attention module and injects adaption promptsc                    >   |j jjtjtjfvr|j jjntj}t j||||d d S N)r    )	c_projweightr   r   int8uint8r)   r   r   r   r   r   r   r    r!   r#   r$   r   J      $zAdaptedAttentionGPT.__init__NFhidden_states
layer_pastattention_mask	head_maskencoder_hidden_statesencoder_attention_mask	use_cacheoutput_attentionsreturn.c	              
   K   s  | j d
|||||||d|	}
	 |
d }|
dd  }t| j j}|jd }|jd }|jd }t| j || jj|dd\}}}|d| j	| j
| j j|ddddd}|d| j	| j
| j j|ddddd}t| j j}|| j ||d}|j}t||dd|t| j j }| jtj|dtjd	| }t||dd||d}|| }||}|f| }|S )N)r6   r8   r9   r:   r;   r<   r=   r   r      dim)r6   r:      rA   r   r#   )r   r   r   k_proj_layershapegetattrr   splitviewr   r
   head_dimrepeat	transposecompute_query_statesr   r   matmultomathsqrtr   Fsoftmaxr)   reshape)r   r6   r7   r8   r9   r:   r;   r<   r=   kwargsattn_outputsattn_outputadd_outputsc_attn_layerbszq_len	embed_dim_keyvalue	adapter_k	adapter_vrM   query_statesprevious_dtypescoresadapter_outputhidden_stateoutputr#   r#   r$   forwardP   sJ   



"**

zAdaptedAttentionGPT.forward)NNNNNFF)r%   r&   r'   r(   r   r   tupler   FloatTensorTensorboolr   rh   r,   r#   r#   r!   r$   r-   G   s:    		r-   c                       s(   e Zd ZdZ fddZdd Z  ZS )AdaptedAttentionzGThis module wraps a LLamaAttention module and injects adaption prompts.c                    r.   r/   )	q_projr1   r   r   r2   r3   r)   r   r   r4   r!   r#   r$   r      r5   zAdaptedAttention.__init__c                 K   s,  | ddr
td| jdi |^}}|jd }|jd }|jd }t| j j}t| j j}t| j j}	| jj	j
| jj	j }
||krUt| j|| jj|dd\}}}nt| j|| j}t| j|| j}t| jdrr| jj}n| jjj}|d| j||
 | jj|ddddd}|d| j||
 | jj|ddddd}tj||
dd	}tj||
dd	}t| j j}|dd
| ji|}|j}t||dd|t| jj }| j t!j"|dtj#d| }t||dd$||d}|	durt| j|	|}|| }||}|g|R S )aK  
        Forward pass for the adapter which wraps the original LlamaAttention module.

        "Official" paper implementation:
        https://github.com/ZrrSkywalker/LLaMA-Adapter/blob/41c3546fe1997ab8a65809dc8d8f9252b19d9faf/llama/model.py#L141

        Args:
            kwargs: See the original LlamaAttention module.
        output_attentionFz,output_attention is not currently supported.r   r   r?   r@   r
   )repeatsrA   r   rB   rC   rD   Nr#   )%getNotImplementedErrorr   rF   r   r   rE   v_proj_layero_proj_layerk_projin_featuresout_featuresrG   r   rH   r   r
   r   r   rI   r   rJ   rK   rL   r   repeat_interleaverM   r   rN   rO   rP   rQ   r   rR   rS   r)   rT   )r   rU   rg   r]   rZ   r[   r\   rE   rs   rt   factorr^   r_   r
   r`   ra   rM   rb   rc   rd   re   r#   r#   r$   rh      sR   



$



zAdaptedAttention.forward)r%   r&   r'   r(   r   rh   r,   r#   r#   r!   r$   rm      s    rm   )rP   typingr   r   r   torch.nnr   torch.nn.functional
functionalrR   r   r   Moduler   r-   rm   r#   r#   r#   r$   <module>   s   .P