o
    8wÖiª  ã                   @   sf   d dl Z d dlZd dlmZ dejdejfdd„Zdd„ Zdejdejfd	d
„Zde	de
fdd„ZdS )é    NÚxÚreturnc                 C   sH   | dd| j d d …f }| d| j d d d…f }tj| |fddS )aÈ  
    Rotate half the hidden dims of the input.

    This function was duplicated verbatim from:
    https://github.com/huggingface/transformers/blob/1de8ce9ee1191ba761a593ac15d9ccbf5851bfc5/src/transformers/models/llama/modeling_llama.py#L126

    This was done to eliminate the Llama transformers implementation as a dependency of this file. Note that some other
    functions were also adapted from the transformers implementation but were modified.
    .Néÿÿÿÿé   )Údim)ÚshapeÚtorchÚcat)r   Úx1Úx2© r   ú^/home/ubuntu/sommelier/.venv/lib/python3.10/site-packages/peft/tuners/adaption_prompt/utils.pyÚllama_rotate_half   s   
r   c                 C   s¸   t |jƒdkrB|dd…ddd…df }| d|jd d|jd ¡}t | |jd ddd¡d|¡}t | |jd ddd¡d|¡}n||  d¡}||  d¡}| | t| ƒ|  }|S )a®  
    Apply rotary position embedding to query states in the Llama model.

    This function was adapted from:
    https://github.com/huggingface/transformers/blob/1de8ce9ee1191ba761a593ac15d9ccbf5851bfc5/src/transformers/models/llama/modeling_llama.py#L133

    It was modified to remove unnecessary processing of key states. The method is compatible with transformers <=
    4.34.2 and also with the latest version (>=4.35).
    é   Né   é   r   r   )Úlenr   Úrepeatr   ÚgatherÚ	unsqueezer   )ÚqÚcosÚsinÚposition_idsÚgather_indicesÚq_embedr   r   r   Úllama_apply_rotary_pos_emb#   s    "r   Úmodelc                 K   s  |  d¡}|  d¡}|  d¡}| ¡ \}}}t| dƒr| j}n| jj}|  |¡ |||| j¡ 	dd¡}	| j
j| j
j }
|  |¡ ||||
 | j¡ 	dd¡}|}|durjt|tƒrb||d jd	 7 }n|| | j¡7 }d
|v rˆ|d
 \}}| d¡}| d¡}|	| t|	ƒ|  S dt | jj¡jvr¢| j||d\}}t|	|||ƒS d}|du rÏ|du r¸tj||| |jd}n| || j¡}tj||| |jd}| d¡}d|i}dt | jj¡jv rã|| |d< | j|fi |¤Ž\}}t|jƒdkrÿ| d¡}| d¡}|	| t|	ƒ|  S )a  
    Compute query states for Llama models specifically. They need to be recomputed as the forward() method of the
    original LlamaModel in the transformers library does not return them. See the related discussion in the PR:
    https://github.com/huggingface/peft/pull/268
    Úhidden_statesr   Úpast_key_valueÚ	num_headsr   r   Nr   éþÿÿÿÚposition_embeddings)Úseq_len)Údevicer#   r   ) ÚgetÚsizeÚhasattrr    ÚconfigÚnum_attention_headsÚq_projÚviewÚhead_dimÚ	transposeÚk_projÚin_featuresÚout_featuresÚv_projÚ
isinstanceÚtupler   Úget_seq_lengthÚ	layer_idxr   r   ÚinspectÚ	signatureÚ
rotary_embÚforwardÚ
parametersr   r   Úaranger$   Úget_usable_lengthr   )r   Úkwargsr   r   r   ÚbszÚq_lenÚ_r    Úquery_statesÚfactorÚvalue_statesr#   r   r   Úpast_seen_tokensÚnew_cache_positionsÚrotary_emb_kwargsr   r   r   Úllama_compute_query_states<   sL   



 $





rG   Úparamsc                 C   s   |   d¡d  d¡S )zEReturn True if module is trainable under adaption prompt fine-tuning.Ú.r   Ú	adaption_)ÚsplitÚ
startswith)rH   r   r   r   Úis_adaption_prompt_trainable‚   s   rM   )r6   r   Útorch.nnÚnnÚTensorr   r   ÚModulerG   ÚstrÚboolrM   r   r   r   r   Ú<module>   s   F