o
    Ni$                     @   sz   d dl Z d dlZd dlmZ d dlZd dlmZ d dlm  mZ	 d dl
mZmZ G dd deZG dd dejeZdS )    N)Optional)BaseTunerLayercheck_adapters_to_mergec                   @   sL   e Zd ZdZdZdejfddZ		dded	efd
dZ	dd Z
dd ZdS )
ShiraLayer)shira_weight)rscalingshira_indices
base_layerc                 K   s|   || _ i | _i | _ti | _i | _|jj| _	d| _
g | _|  }t|tjr/|j|j}}ntd|| _|| _|| _d S )NFz)Only nn.Linear layers supported currently)r
   r   r   nnParameterDictr   r	   weightshapeweight_shape_disable_adaptersmerged_adaptersget_base_layer
isinstanceLinearin_featuresout_featuresNotImplementedErrorkwargs)selfr
   r   r   r    r   K/home/ubuntu/.local/lib/python3.10/site-packages/peft/tuners/shira/layer.py__init__    s   

zShiraLayer.__init__TFinit_weightsinference_modec           
      K   sd  |dkrt d| || j|< d| j|< || j| j  }|| j| j kr4t d| d| j| j  d|r;t|nt|}tj	|
| jjj
| jjjdd| j|< |d urt|dk}	t|	d d|	d	 dgd
tj| j|< | j| 
| jjj| j|< | j| jd	 | j| jd krt d
| d| j | | | j| j|d d S )Nr   z?`r` should be a positive integer value but the value passed is g      ?zThe set rank zP results in more shira params than the total number of params in the base layer z and this is not allowed.T)requires_grad   zFThe SHiRA indices and weights are not the same dimensions for adapter z
 in layer )r   )
ValueErrorr   r   r   r   torchzerosrandnr   	Parametertor
   r   dtypedevicer   wherecat	unsqueezeintr	   r   %_move_adapter_to_device_of_base_layerset_adapteractive_adapters)
r   adapter_namemaskr   r   r   r   num_shira_weightshira_init_weightmask_indicesr   r   r   update_layer6   s:   	
 
zShiraLayer.update_layerc                 C   s   t j| j|  d S N)r   initzeros_r   )r   r0   r   r   r   reset_shira_parametersf   s   z!ShiraLayer.reset_shira_parametersc                 C   s   || j vrd S || j |< d S r6   )r   )r   adapterscaler   r   r   	set_scalei   s   
zShiraLayer.set_scaleN)TF)__name__
__module____qualname__adapter_layer_namesother_param_namesr   Moduler   boolr5   r9   r<   r   r   r   r   r      s    
0r   c                       s   e Zd Z			ddededededd	f
 fd
dZddedeee  dd	fddZ	dddZ
dejfddZdejdejfddZdef fddZ  ZS )r   r   FTr0   r   fan_in_fan_outr   returnNc                    sV   t    tj| |fi | || _| j|  urtd|| _| j||||d d S )Nz)SHiRA does not support nested base layers)r   )	superr   r   rD   r
   r   r!   _active_adapterr5   )r   r
   r1   r0   r   rD   r   r   	__class__r   r   r   r   s   

zLinear.__init__
safe_mergeadapter_namesc                 C   s   t | |}|s	dS |D ]A}|| j v rL|  }|r;|jj }|| |7 }t	|
 s6td| d||j_n|j j| |7  _| j| qdS )a^  
        Merge the active adapter weights into the base weights

        Args:
            safe_merge (`bool`, *optional*):
                If True, the merge operation will be performed in a copy of the original weights and check for NaNs
                before merging the weights. This is useful if you want to check if the merge operation will produce
                NaNs. Defaults to `False`.
            adapter_names (`List[str]`, *optional*):
                The list of adapter names that should be merged. If None, all active adapters will be merged. Defaults
                to `None`.
        Nz1NaNs detected in the merged weights. The adapter z seems to be broken)r   r   keysr   r   datacloneget_delta_weightr"   isfiniteallr!   r   append)r   rJ   rK   active_adapterr
   orig_weightsr   r   r   merge   s$   


zLinear.mergec                 C   sj   | j s
td d S t| jdkr3| j }|| j v r*|  j	 j
| |8  _
t| jdksd S d S )Nz Already unmerged. Nothing to do.r   )mergedwarningswarnlenr   popr   rL   r   r   rM   rO   )r   rS   r   r   r   unmerge   s   

zLinear.unmergec                 C   sD   | j | | j| j| j |< t| j | | j| | j|  | jS )z
        Compute the delta weight for the given adapter.

        Args:
            adapter (str):
                The name of the adapter for which the delta weight should be computed.
        )r	   r&   r   r(   r"   sparse_coo_tensorr   r   )r   r:   r   r   r   rO      s   
zLinear.get_delta_weightxc                 O   s   | j r| jr
|   | j|g|R i |}|S | jr)| j|g|R i |}|S t| jjj}| jD ]}|| j	
 vr>q4|| |7 }q4tj||| jjd}|S )N)bias)disable_adaptersrV   r[   r
   copydeepcopyr   rM   r/   r   rL   rO   Flinearr^   )r   r]   argsr   result
new_weightrS   r   r   r   forward   s   

zLinear.forwardc                    s   t   }d| S )Nzshira.)rF   __repr__)r   reprH   r   r   rh      s   
zLinear.__repr__)r   FT)FN)rE   N)r=   r>   r?   strr,   rC   r   r   listrU   r[   r"   TensorrO   rg   rh   __classcell__r   r   rH   r   r   p   s(    	 
'
r   )r`   rW   typingr   r"   torch.nnr   torch.nn.functional
functionalrb   peft.tuners.tuners_utilsr   r   r   rB   r   r   r   r   r   <module>   s   V