o
    5ti&                  
   @   s   d dl mZ d dlmZ d dlmZ d dlmZmZm	Z	m
Z
mZ d dlZd dlmZ d dlmZmZ d dlmZ d d	lmZ d d
lmZ edeeef deeef de	dedf fddZedG dd deZdS )    )contextmanager)partial)Path)AnyCallable	GeneratorOptionalUnionN)	PeftModel)Tensornn)PreTrainedModel)register_model)HFLMmodelhook_to_steerreturnc              	   #   s    dt f fdd}g }t  }| j D ]\}}||v r+|||}|| qt|t|kr;td| zdV  W |D ]}|	  qBdS |D ]}|	  qMw )z
    Context manager that temporarily hooks models and steers them.

    Args:
        model: The transformer model to hook
        hook_to_steer: Dictionary mapping hookpoints to steering functions

    Yields:
        None
    	hookpointc                    s"   dt jdtdtf fdd}|S )Nmoduleinputoutputc                    s>   t |tr  |d g|dd  R }|S   |}|S )Nr      )
isinstancetuple)r   r   r   )r   r    M/home/ubuntu/.local/lib/python3.10/site-packages/lm_eval/models/hf_steered.pyhook_fn   s
   
 z+steer.<locals>.create_hook.<locals>.hook_fn)r   Moduler   r   )r   r   r   )r   r   create_hook   s   	zsteer.<locals>.create_hookz&Not all hookpoints could be resolved: N)
strlistkeys
base_modelnamed_modulesregister_forward_hookappendlen
ValueErrorremove)r   r   r   handles
hookpointsnamer   handler   r   r   steer   s$   


r.   steeredc                       s   e Zd ZU eeef ed< 	ddededee f fddZe	defdd	Z
e	d
ededee fddZe		dd
edededee dee f
ddZdd Z fddZ fddZ  ZS )SteeredModelr   N
pretrained
steer_pathdevicec                    s|  t  jd||d| |ds|dr3t|d}tj|dd}W d   n1 s-w   Y  n|dr>| |}ntd	| i }| D ]m\}}	|	d
 }
|	d 	| j
	| jj}t|	dd}|	dd}|	dd}|dur|	| j
	| jj}|
dkr|du sJ dt| j|| |d||< qK|
dkrt| j|t| |||d||< qKtd|
 || _dS )a  
        HFLM with a steered forward pass.

        To load steering vectors directly, provide the path to a pytorch (.pt) file with content in the following format:

        {
            hookpoint: {
                "steering_vector": <torch.Tensor>,
                "steering_coefficient": <float>,
                "action": <Literal["add", "clamp"]>,
                "bias": <torch.Tensor | None>,
                "head_index": <int | None>,
            },
            ...
        }

        To derive steering vectors from a sparse model loadable with sparsify or sae_lens,
        provide the path to a CSV file with the following columns (example rows are provided below):

        loader,action,sparse_model,hookpoint,feature_index,steering_coefficient,head_index,sae_id,description,
        sparsify,add,EleutherAI/sae-pythia-70m-32k,layers.3,30,10.0,,,,
        sae_lens,add,gemma-scope-2b-pt-res-canonical,layers.20,12082,240.0,,layer_20/width_16k/canonical,increase dogs,
        )r1   r3   z.ptz.pthrbT)weights_onlyNz.csvzUnknown steer file type: actionsteering_vectorsteering_coefficientg      ?
head_indexbiasaddz+Bias is not supported for the `add` action.)vectorr9   clamp)	directionvaluer:   r9   zUnknown hook type: r   )super__init__endswithopentorchloadderive_steer_configr(   itemstor3   r   dtypefloatgetr   r;   r=   normr   )selfr1   r2   r3   kwargsfsteer_configr   r   
steer_infor6   r7   r8   r9   r:   	__class__r   r   rA   @   sL   

zSteeredModel.__init__c                    sv  ddl }||}i }t|d dkrddlm} t|d dkr5ddlm  i dtd	tf fd
d}| D ]\}}|	dd}	|d }
|d }t
|d }t|d }|	dd}|dkrt|
}| rp||| n||
|}|jdus}J |j| }|j}n)|dkr||
|d	 d}|j| }|j}|dks||r|jj}ntd| |	|||d||< q9|S )zVDerive a dictionary of steering vectors from sparse model(/s) specified in a CSV file.r   Nloadersparsify)SparseCodersae_lens)SAEsae_releasesae_idc                    s,   | |f}|vr  | |d |< | S )Nr   )from_pretrained)rY   rZ   	cache_keyrX   	sae_cacher   r   load_from_sae_lens   s   z<SteeredModel.derive_steer_config.<locals>.load_from_sae_lensr6   r;   sparse_modelr   feature_indexr8   )rY   rZ    zUnknown loader: )r6   r8   r7   r:   )pandasread_csvanyrU   rV   rW   rX   r    iterrowsrK   intrJ   r   existsload_from_diskload_from_hubW_decb_decisnacfg	hook_namer(   )clsr2   pddf
steer_datarV   r_   _rowr6   sparse_namer   ra   r8   rT   	name_pathsparse_coderr7   r:   r   r]   r   rF      sR   



z SteeredModel.derive_steer_configactsr<   r9   c                 C   sP   |dur"|dddd|ddf | |dddd|ddf< |S || }|S )a.  Adds the given vector to the activations.

        Args:
            acts (Tensor): The activations tensor to edit of shape [batch, pos, ..., features]
            vector (Tensor): A vector to add of shape [features]
            head_index (int | None): Optional attention head index to add to
        Nr   )rp   ry   r<   r9   r   r   r   r;      s
   8zSteeredModel.addr>   r?   r:   c           	      C   s   |dur|| }|durD|dddd|ddf }|| j ddd}||| ks+J | }||||   |dddd|ddf< ntj || ddd}||||   }|dur^|| S |S )ac  Clamps the activations to a given value in a specified direction. The direction
        must be a unit vector.

        Args:
            acts (Tensor): The activations tensor to edit of shape [batch, pos, ..., features]
            direction (Tensor): A direction to clamp of shape [features]
            value (float): Value to clamp the direction to
            head_index (int | None): Optional attention head index to clamp
            bias (Tensor | None): Optional bias to add to the activations

        Returns:
            Tensor: The modified activations with the specified direction clamped
        NT)dimkeepdim)sumclonerD   )	rp   ry   r>   r?   r9   r:   xprojclampedr   r   r   r=      s   *zSteeredModel.clampc              	   O   s|   t  0 t| j| j | jj|i |W  d    W  d    S 1 s'w   Y  W d    d S 1 s7w   Y  d S N)rD   no_gradr.   r   r   forwardrM   argsrN   r   r   r   r   	  s   
"zSteeredModel.forwardc                    D   t | j| j t j|i |W  d    S 1 sw   Y  d S r   )r.   r   r   r@   _model_callr   rR   r   r   r        $zSteeredModel._model_callc                    r   r   )r.   r   r   r@   _model_generater   rR   r   r   r     r   zSteeredModel._model_generater   )__name__
__module____qualname__dictr    r   __annotations__r   rA   classmethodrF   r   rg   r;   rJ   r=   r   r   r   __classcell__r   r   rR   r   r0   <   sH   
 L>(r0   )
contextlibr   	functoolsr   pathlibr   typingr   r   r   r   r	   rD   peft.peft_modelr
   r   r   transformersr   lm_eval.api.registryr   lm_eval.models.huggingfacer   r   r    r.   r0   r   r   r   r   <module>   s(    

,