o
    8wi*                     @  s   d dl mZ d dlZd dlmZ d dlZd dlmZ d dl	m
Z
 d dlm
  mZ d dlmZ d dlmZmZ d dlmZmZ G dd de
jeZdS )	    )annotationsN)Optional)
BufferDict)BaseTunerLayercheck_adapters_to_merge)check_deepspeed_zero3_enabledgather_params_ctxc                      s   e Zd ZdZdZ	d-d. fddZedd Zd/ddZdd Z	dd Z
d0d1d"d#Zd2d$d%Zd&d' Zd3d)d*Zd3d+d,Z  ZS )4TrainableTokensLayer)trainable_tokens_delta)token_indicestrainable_tokens_originalN
base_layer	nn.Moduleadapter_namestrr   	list[int]tied_adapterOptional[TrainableTokensLayer]returnNonec                   sx   t    || _|| _|| _|r|gng | _| js(ti | _	t
i | _i | _n| jj	| _	| jj| _| jj| _g | _d S N)super__init__r   _active_adapterkwargs_tied_adapterr   nnParameterDictr
   r   r   r   merged_adapters)selfr   r   r   r   r   	__class__ _/home/ubuntu/sommelier/.venv/lib/python3.10/site-packages/peft/tuners/trainable_tokens/layer.pyr   %   s   





zTrainableTokensLayer.__init__c                 C  s   | j r| j d S d S )Nr   )r   )r   r"   r"   r#   r   H   s   
z!TrainableTokensLayer.tied_adapterweighttorch.Tensorrows	embed_dimintc                 C  s   d}t dt j }t|gdd" t |kr ||  }nt jt	||f|j
|d}W d   n1 s7w   Y  tj||d |S )zDeepSpeed zero3 specific code to initialize trainable tokens.

        Ensures that only the necessary weights are collected to a single rank, initialized, and then shared with all
        ranks.
        r   cudaN)modifier_rankdtypedevice)src)torchr-   r)   current_devicer   distget_rankcloneemptylenr,   	broadcast)r   r$   r&   r'   src_rankr-   token_weightsr"   r"   r#   _collect_token_weightsN   s   
z+TrainableTokensLayer._collect_token_weightsc                 K  s   | dd rd S |d | j|< | dd}|  j}|  j}|r8t r/| || j| |}n| j| j|  }ntjt	| j| |f|j
|jd}tj| dd| j|< | | j|< | | d S )Nr   r   init_weightsTr+   )requires_grad)getr   get_base_layerr$   embedding_dimr   r9   r/   randnr5   r,   r-   r   	Parameterr3   r
   r   %_move_adapter_to_device_of_base_layer)r   r   r   r:   r$   r'   valuesr"   r"   r#   update_layerg   s$   
	
z!TrainableTokensLayer.update_layerc                 C  sd   t |dkrdS t }t|| j D ]}t| j| }t ||r*td| d|| qdS )a  Raises an error if the token indices of the given adapter names are overlapping.
        This is currently not supported and can lead to undefined behavior of the model if no specific merging between
        the overlapping indices' values is applied.
           NzToken indices of adapter zy are already defined and would result in undefined merging behavior. Only disjunct token indices are currently supported.)r5   setr   r   intersection
ValueErrorupdate)r   adapter_namesindicesr   	index_setr"   r"   r#   _check_overlapping_tokens   s   
z.TrainableTokensLayer._check_overlapping_tokensF
safe_mergeboolrI   Optional[list[str]]c                 C  s   t | |}|s	d S | | | jjj}|D ]/}t| j| |j	}| j
| |}|jd||d}|rDt| sDtd| dq|| jj_| j| d S )Nr   dimindexsourcez1NaNs detected in the merged weights. The adapter z seems to be broken)r   rL   r   r$   datar/   tensorr   tor-   r
   
index_copyisfiniteallrG   r   extend)r   rM   rI   mergedr   rR   deltasr"   r"   r#   merge   s   



zTrainableTokensLayer.mergec                 C  s   | j s
td d S t| jdkrB| j }t| j| 	| j
jj}| j| 	| j
j}| j
jjjd||d t| jdksd S d S )Nz Already unmerged. Nothing to do.r   rP   )r[   warningswarnr5   r   popr/   rU   r   rV   r   r$   r-   r   rT   index_copy_)r   r   rR   	originalsr"   r"   r#   unmerge   s   

zTrainableTokensLayer.unmergec                 C  sN   | j j}|D ]}t| j| |j}| j| |}|jd||d}q|S )Nr   rP   )	r   r$   r/   rU   r   rV   r-   r
   rW   )r   active_adaptersWr   rR   r\   r"   r"   r#   get_merged_weights   s   z'TrainableTokensLayer.get_merged_weightsxc              	   O  s   | j s|s| jr|   | j|g|R i |}|S | jr+| j|g|R i |}|S | | | |}t| jtjj	rUt
j||| jj| jj| jj| jj| jjd}|S t| jtjjrft
j||d}|S td)N)inputr$   padding_idxmax_norm	norm_typescale_grad_by_freqsparse)rh   r$   zZTrainableTokensLayer wraps an unknown layer type, maybe you are targeting the wrong layer?)disable_adaptersr[   rc   r   rL   rf   
isinstancer/   r   	EmbeddingF	embeddingri   rj   rk   rl   rm   LinearlinearrG   )r   rg   rd   argsr   resultre   r"   r"   r#   forward_adapters   s<   
#!

	z%TrainableTokensLayer.forward_adaptersc                 O  s   | j || jg|R i |S r   )rw   rd   )r   rg   ru   r   r"   r"   r#   forward   s   zTrainableTokensLayer.forwardr   )
r   r   r   r   r   r   r   r   r   r   )r$   r%   r&   r%   r'   r(   r   r%   )FN)rM   rN   rI   rO   r   r   )r   r   )rg   r%   r   r%   )__name__
__module____qualname__adapter_layer_namesother_param_namesr   propertyr   r9   rC   rL   r]   rc   rf   rw   rx   __classcell__r"   r"   r    r#   r	      s    #

$


)r	   )
__future__r   r^   typingr   r/   torch.distributeddistributedr1   torch.nnr   torch.nn.functional
functionalrq   peft.tuners._buffer_dictr   peft.tuners.tuners_utilsr   r   peft.utils.integrationsr   r   Moduler	   r"   r"   r"   r#   <module>   s   