o
    Ni.                     @  s   d dl mZ d dlZd dlmZ d dlZd dlmZ d dl	m
Z
 d dlm
  mZ d dlmZ d dlmZmZmZ d dlmZmZ G dd de
jeZdS )	    )annotationsN)Optional)
BufferDict)BaseTunerLayer_get_in_out_featurescheck_adapters_to_merge)check_deepspeed_zero3_enabledgather_params_ctxc                      s   e Zd ZdZdZ	d-d. fddZedd Zd/ddZdd Z	dd Z
d0d1d"d#Zd2d$d%Zd3d&d'Zd4d)d*Zd4d+d,Z  ZS )5TrainableTokensLayer)trainable_tokens_delta)token_indicestrainable_tokens_originalN
base_layer	nn.Moduleadapter_namestrr   	list[int]tied_adapterOptional[TrainableTokensLayer]returnNonec                   s   t    || _|| _|| _|r|gng | _| js(ti | _	t
i | _i | _n| jj	| _	| jj| _| jj| _g | _t|  \}}|| _|| _d S N)super__init__r   _active_adapterkwargs_tied_adapterr   nnParameterDictr   r   r   r   merged_adaptersr   get_base_layerin_featuresout_features)selfr   r   r   r   r   r!   r"   	__class__ V/home/ubuntu/.local/lib/python3.10/site-packages/peft/tuners/trainable_tokens/layer.pyr   %   s    





zTrainableTokensLayer.__init__c                 C  s   | j r| j d S d S )Nr   )r   )r#   r&   r&   r'   r   L   s   
z!TrainableTokensLayer.tied_adapterweighttorch.Tensorrows	embed_dimintc                 C  s   d}t dt j }t|gdd* t r(t r(t |kr(|| 	 }nt j
t||f|j|d}W d   n1 s?w   Y  tj||d |S )zDeepSpeed zero3 specific code to initialize trainable tokens.

        Ensures that only the necessary weights are collected to a single rank, initialized, and then shared with all
        ranks.
        r   cudaN)modifier_rankdtypedevice)src)torchr1   r-   current_devicer	   distis_availableis_initializedget_rankcloneemptylenr0   	broadcast)r#   r(   r*   r+   src_rankr1   token_weightsr&   r&   r'   _collect_token_weightsR   s   
z+TrainableTokensLayer._collect_token_weightsc                 K  s   | dd rd S |d | j|< | dd}|  j}t|  dr'|  j}n|  j}|rEt r<| || j| |}n| j| j|  }nt	j
t| j| |f|j|jd}tj| dd| j|< | | j|< | | d S )Nr   r   init_weightsTembedding_dimr/   )requires_grad)getr   r    r(   hasattrrA   r!   r   r?   r3   randnr;   r0   r1   r   	Parameterr9   r   r   %_move_adapter_to_device_of_base_layer)r#   r   r   r@   r(   r+   valuesr&   r&   r'   update_layerk   s(   
	
z!TrainableTokensLayer.update_layerc                 C  sd   t |dkrdS t }t|| j D ]}t| j| }t ||r*td| d|| qdS )a  Raises an error if the token indices of the given adapter names are overlapping.
        This is currently not supported and can lead to undefined behavior of the model if no specific merging between
        the overlapping indices' values is applied.
           NzToken indices of adapter zy are already defined and would result in undefined merging behavior. Only disjunct token indices are currently supported.)r;   setr   r   intersection
ValueErrorupdate)r#   adapter_namesindicesr   	index_setr&   r&   r'   _check_overlapping_tokens   s   
z.TrainableTokensLayer._check_overlapping_tokensF
safe_mergeboolrO   Optional[list[str]]c                 C  s   t | |}|s	d S | | | jjj}|D ]/}t| j| |j	}| j
| |}|jd||d}|rDt| sDtd| dq|| jj_| j| d S )Nr   dimindexsourcez1NaNs detected in the merged weights. The adapter z seems to be broken)r   rR   r   r(   datar3   tensorr   tor1   r   
index_copyisfiniteallrM   r   extend)r#   rS   rO   mergedr   rX   deltasr&   r&   r'   merge   s   



zTrainableTokensLayer.mergec                 C  s   | j s
td d S t| jdkrB| j }t| j| 	| j
jj}| j| 	| j
j}| j
jjjd||d t| jdksd S d S )Nz Already unmerged. Nothing to do.r   rV   )ra   warningswarnr;   r   popr3   r[   r   r\   r   r(   r1   r   rZ   index_copy_)r#   r   rX   	originalsr&   r&   r'   unmerge   s   

zTrainableTokensLayer.unmergec                 C  sN   | j j}|D ]}t| j| |j}| j| |}|jd||d}q|S )Nr   rV   )	r   r(   r3   r[   r   r\   r1   r   r]   )r#   active_adaptersWr   rX   rb   r&   r&   r'   get_merged_weights   s   z'TrainableTokensLayer.get_merged_weightsxc              	   O  s   | j s|s| jr|   | j|g|R i |}|S | jr+| j|g|R i |}|S | | | |}t| jtjj	ret
j||| jj| jj| jj| jj| jjd}|  }|d urc|||j }|S t| jtjjrvt
j||d}|S td)N)inputr(   padding_idxmax_norm	norm_typescale_grad_by_freqsparse)rn   r(   zZTrainableTokensLayer wraps an unknown layer type, maybe you are targeting the wrong layer?)disable_adaptersra   ri   r   rR   rl   
isinstancer3   r   	EmbeddingF	embeddingro   rp   rq   rr   rs   _get_embed_scaler\   r0   LinearlinearrM   )r#   rm   rj   argsr   resultrk   embed_scaler&   r&   r'   forward_adapters   sB   
(&

	z%TrainableTokensLayer.forward_adaptersc                 O  s   | j || jg|R i |S r   )r   rj   )r#   rm   r|   r   r&   r&   r'   forward  s   zTrainableTokensLayer.forwardr   )
r   r   r   r   r   r   r   r   r   r   )r(   r)   r*   r)   r+   r,   r   r)   )FN)rS   rT   rO   rU   r   r   )r   r   )r   r)   )rm   r)   r   r)   )__name__
__module____qualname__adapter_layer_namesother_param_namesr   propertyr   r?   rI   rR   rc   ri   rl   r   r   __classcell__r&   r&   r$   r'   r
      s    '

)


.r
   )
__future__r   rd   typingr   r3   torch.distributeddistributedr5   torch.nnr   torch.nn.functional
functionalrw   peft.tuners._buffer_dictr   peft.tuners.tuners_utilsr   r   r   peft.utils.integrationsr   r	   Moduler
   r&   r&   r&   r'   <module>   s   