o
    8wÖiÕ   ã                   @   s@   d dl Z d dlZd dlmZ d dlmZ G dd„ dejjƒZdS )é    N)ÚCrossEntropyLoss)Úgather_params_ctxc                       sL   e Zd ZdZ‡ fdd„Zdd„ Zdd„ Zdd	„ Zd
d„ Ze	dd„ ƒZ
‡  ZS )ÚCPTEmbeddingzÓ
    CPTEmbedding is a custom embedding layer designed for Context-aware Prompt Tuning (CPT) in PEFT. It initializes
    embeddings, applies prompt-specific projections, and computes loss using label masks.
    c                    sð   t ƒ  ¡  t |¡| _|j}tj ||j	¡| _
|js[|jt|jƒks$J ‚t |j¡ |jj¡}t| ¡ ƒ ||ƒ ¡  ¡ }W d  ƒ n1 sHw   Y  | tj¡}tj |¡| j
_tj ||j	¡| _t | jj¡ tj¡| jj_|  ¡  dS )aI  
        Initializes the CPTEmbedding module.

        Args:
            config (Namespace):
                Configuration object containing model hyperparameters and CPT-specific settings.
            word_embeddings (torch.nn.Embedding):
                The base word embedding layer used to initialize CPT embeddings.
        N)ÚsuperÚ__init__ÚcopyÚdeepcopyÚconfigÚnum_virtual_tokensÚtorchÚnnÚ	EmbeddingÚ	token_dimÚ	embeddingÚinference_modeÚlenÚcpt_token_idsÚ
LongTensorÚtoÚweightÚdevicer   Ú
parametersÚdetachÚcloneÚfloat32Ú	ParameterÚdelta_embeddingÚ
zeros_likeÚdataÚset_updated_tokens)Úselfr	   Úword_embeddingsr
   Úinit_token_idsÚword_embedding_weights©Ú	__class__© úR/home/ubuntu/sommelier/.venv/lib/python3.10/site-packages/peft/tuners/cpt/model.pyr      s   

ÿzCPTEmbedding.__init__c                 C   sR   t  ¡  |  |¡}W d  ƒ n1 sw   Y  |  ¡ | jj_|  |¡}|| S )a#  
        Computes the prompt embeddings and applies delta adjustments.

        Args:
            indices (torch.Tensor):
                Indices of the tokens to be embedded.

        Returns:
            torch.Tensor:
                Sum of prompt embeddings and delta embeddings.
        N)r   Úno_gradr   Úget_projectionr   r   r   )r    ÚindicesÚprompt_embeddingsÚdelta_prompt_embeddingsr&   r&   r'   Úforward?   s   
ÿ
zCPTEmbedding.forwardc                    sx   t  | jj¡ ¡ }t  |d¡dk}t  |d¡dk}t  |d¡dk}||B |B ‰ ˆ  dd¡‰ ‡ fdd„}| jj 	|¡ dS )	zq
        Sets up a backward hook to selectively update token gradients based on the CPT token type mask.
        é   é   é   é   éÿÿÿÿc                    s   | ˆ   | j¡ } | S )N)r   r   )Úgrad©Úmaskr&   r'   Úbackward_hook_   s   z6CPTEmbedding.set_updated_tokens.<locals>.backward_hookN)
r   ÚTensorr	   Úcpt_tokens_type_maskÚlongÚ	remainderÚviewr   r   Úregister_hook)r    Útensor_ICL_maskÚmask_input_templateÚ
mask_inputÚmask_output_templater6   r&   r4   r'   r   T   s   zCPTEmbedding.set_updated_tokensc                 C   sÒ   | j j}d}| j jt t | j jd g¡¡ }| j jt t | j jd g¡¡ }t t |¡¡ 	tj
¡| }t |¡ ¡ }|||dkt |d¡dk@ < |||dkt |d¡dk@ < |||dkt |d¡dk@ < |S )Ng»½×Ùß|Û=i   r   r.   r/   r1   r0   )r	   r8   Úopt_projection_format_epsilonr   Úsqrtr7   r   Úopt_projection_epsilonÚ	ones_liker   r   r9   r:   )r    r8   Ú	MIN_VALUEÚnormalized_format_epsÚnormalized_input_epsÚepsilonr&   r&   r'   Úget_epsilone   s   
ÿ
ÿzCPTEmbedding.get_epsilonc                 C   s®   t  ¡ I | jj ¡  | jjj¡}t j|ddd}|dk}t  |¡rD|  	¡  | jjj¡}||  || || j
|| d  dd¡9  < |W  d  ƒ S 1 sPw   Y  dS )za
        Applies epsilon-based projection to the delta embeddings to control their norm.
        r0   r/   )ÚpÚdimr   )Úminr2   N)r   r(   r   r   r   r   r   ÚnormÚanyrI   Úclampr;   )r    Únew_embeddings_weightsÚ
token_normÚprojection_maskrH   r&   r&   r'   r)   {   s   

þ$özCPTEmbedding.get_projectionc                 C   s€  | j j}| j }| |¡}|ddd…dd…f  ¡ }|ddd…f  ¡ }|ddd…f  ¡ }| ¡  ¡ dk ¡ }	|j\}
}}tddd}|| 	|
| |¡| 	|
| ¡ƒ}| 	|
|¡}|	 ¡  ¡  
¡ }t|
ƒD ]K}|| dk|| d	 dk@ }|| |  ¡ }t || ¡j|d
 
¡ }d}t |dg¡D ]}|||| |k< ||j9 }q“|jdkr°||  |9  < qe||	 ||	   ¡ }|| _| S )al  
        Computes the loss for CPT models with optional exponential decay.

        Args:
            base_model_output (ModelOutput):
                Output from the base model containing logits.
            labels (torch.Tensor):
                Ground-truth labels for the input tokens.
            cpt_type_mask (torch.Tensor):
                Token type mask used for filtering valid loss terms.
            config (Namespace):
                Configuration object containing loss-related hyperparameters.

        Returns:
            ModelOutput:
                The base model output with computed loss.
        .Nr2   r/   iœÿÿÿÚnone)Ú	reductionÚignore_indexr   r.   )r   Údecay)Úlogitsr   r   Ú
contiguousr   r   ÚboolÚshaper   r;   ÚfloatÚrangeÚuniquer   rD   ÚflipÚopt_loss_decay_factorÚopt_weighted_loss_typeÚmeanÚloss)Úbase_model_outputÚlabelsÚcpt_type_maskr	   r   Ú	lm_logitsÚshift_logitsÚshift_labelsÚshift_cpt_type_maskÚshift_labels_boolÚ
batch_sizeÚ
seq_lengthÚ
vocab_sizeÚloss_fctrb   Úshift_labels_weightsÚiÚ
idx_labelsÚ
labels_idsÚexponential_decayÚdecay_valueÚlabel_mask_idxr&   r&   r'   Úcalculate_loss   s8   
ÿ
€zCPTEmbedding.calculate_loss)Ú__name__Ú
__module__Ú__qualname__Ú__doc__r   r-   r   rI   r)   Ústaticmethodrv   Ú__classcell__r&   r&   r$   r'   r      s    "r   )	r   r   Útorch.nnr   Úpeft.utils.integrationsr   r   ÚModuler   r&   r&   r&   r'   Ú<module>   s
   