o
    8wi                     @   sP   d dl Z d dlZd dlmZ d dlm  mZ ddlmZ G dd dej	Z
dS )    N   )TRANSFORMERS_MODEL_CONFIGc                       s2   e Zd ZdZdedef fddZdd Z  ZS )AdaptedAttentionzGThis module wraps a LLamaAttention module and injects adaption prompts.
model_typeadapter_lenc              	      s   t |trJ t   || _|| _|| _t| j	}|j
jjtjtjfvr,|j
jjntj}t| jdr:| jj}n| jjj}ttjd||||d | _ttjd||d| _dS )aT  
        Initialize object.

        Args:
            model_type: The transformer model type. This is used to retrieve the right method to
                compute query states.
            adapter_len: The length of the adaption prompt to insert.
            model: The original transformer attention module that is being wrapped.
        hidden_sizer   )devicedtypeN)
isinstancer   super__init__r   modelr   next
parametersr   q_projweightr	   torchint8uint8float32hasattrr   confignn	Parameteremptynormal_adaption_promptzerosadaption_gate)selfr   r   r   r   target_dtyper   	__class__ ^/home/ubuntu/sommelier/.venv/lib/python3.10/site-packages/peft/tuners/adaption_prompt/layer.pyr      s   

$

zAdaptedAttention.__init__c                 K   s,  | ddr
td| jdi |^}}|jd }|jd }|jd }t| j j}t| j j}t| j j}	| jj	j
| jj	j }
||krUt| j|| jj|dd\}}}nt| j|| j}t| j|| j}t| jdrr| jj}n| jjj}|d| j||
 | jj|ddddd}|d| j||
 | jj|ddddd}tj||
dd	}tj||
dd	}t| j j}|dd
| ji|}|j}t||dd|t| jj }| j t!j"|dtj#d| }t||dd$||d}|	durt| j|	|}|| }||}|g|R S )aK  
        Forward pass for the adapter which wraps the original LlamaAttention module.

        "Official" paper implementation:
        https://github.com/ZrrSkywalker/LLaMA-Adapter/blob/41c3546fe1997ab8a65809dc8d8f9252b19d9faf/llama/model.py#L141

        Args:
            kwargs: See the original LlamaAttention module.
        output_attentionFz,output_attention is not currently supported.r   r      )dim	num_heads)repeatsr'   r      )r'   r	   Nr#   )%getNotImplementedErrorr   shaper   r   k_proj_layerv_proj_layero_proj_layerk_projin_featuresout_featuresgetattrr   splitr   r(   r   num_attention_headsviewr   head_dimrepeat	transposer   repeat_interleavecompute_query_statesr	   matmultomathsqrtr   Fsoftmaxr   reshape)r   kwargsoutput_bszq_len	embed_dimr/   r0   r1   factorkeyvaluer(   	adapter_k	adapter_vr=   query_statesprevious_dtypescoresadapter_outputr#   r#   r$   forward>   sR   



$



zAdaptedAttention.forward)	__name__
__module____qualname____doc__strintr   rT   __classcell__r#   r#   r!   r$   r      s    #r   )r@   r   torch.nnr   torch.nn.functional
functionalrB   r   r   Moduler   r#   r#   r#   r$   <module>   s   