o
    Ni                     @  s   d dl mZ d dlZd dlmZmZmZ d dlZd dlm	Z	 d dl
m	  mZ d dlmZmZ ddlmZ G dd de	jZG d	d
 d
e	jZG dd deZG dd de	jeZG dd de	jeZdddZdS )    )annotationsN)AnyOptionalUnion)BaseTunerLayercheck_adapters_to_merge   )	OFTConfigc                      s*   e Zd ZdZd fdd	Zdd Z  ZS )MultiplicativeDropoutLayerz>
    Implements the multiplicative dropout layer for OFT.
            c                   s   t    || _dS )z
        Initializes the multiplicative dropout layer.

        Parameters:
        p (float): The probability of dropping out a block. Defaults to 0.0.
        N)super__init__p)selfr   	__class__ I/home/ubuntu/.local/lib/python3.10/site-packages/peft/tuners/oft/layer.pyr   !   s   

z#MultiplicativeDropoutLayer.__init__c           	      C  s   | j rc| jdkrc|jd |jd krtd|j\}}}|dkr"|S t| j| }|| }ttj||jdtj	||jdg}|t
| |dd}tj||jd|dd}d| | ||  }|S )aI  
        Applies multiplicative dropout to the input tensor.

        Parameters:
        x (Tensor): The input tensor of shape (D, H, H), where `D` represents
                    the number of OFT blocks, and `H` is the size of the square blocks along the last two dimensions,
                    the block size in OFT.
        r   z4The last two dimensions of input should be the same!r   device)trainingr   shape
ValueErrorinttorchcatonesr   zerosrandpermvieweyerepeat)	r   xDH_num_to_replace	num_zerosmask
eye_matrixr   r   r   forward+   s   	&z"MultiplicativeDropoutLayer.forward)r   )__name__
__module____qualname____doc__r   r,   __classcell__r   r   r   r   r
      s    
r
   c                      s|   e Zd Z						d% fdd	Zdd	 Zd
d Z	d&d'ddZd(ddZd)ddZdd Z	dd  Z
d!d" Zd#d$ Z  ZS )*OFTRotationModuleFiUMu?r   r   T   c                   s   t    || _|| _|| _|| _tt	||| _
|| _|| _|| _|| _|	| _|
| _t||d\}}| jd|dd | jd|dd d S )Nr   rowsF)
persistentcols)r   r   r
n_elements
block_sizein_featuresnn	Parameterr   emptyweightcoftepsblock_sharekernel_sizeuse_cayley_neumannnum_cayley_neumann_termstriu_indicesregister_buffer)r   r9   r:   r;   r<   rA   rB   rC   rD   rE   rF   r6   r8   r   r   r   r   I   s   
zOFTRotationModule.__init__c                 C  sL   |j d }tj||||j|jd}||d d | j| jf< ||dd }|S )Nr   r   dtyper   r   )r   r   r   r   rJ   r6   r8   	transpose)r   vecr;   
batch_sizematrixr   r   r   _pytorch_skew_symmetrich   s
   
z)OFTRotationModule._pytorch_skew_symmetricc                 C  s$   |j d }|d d | j| jf }|S )Nr   )r   r6   r8   )r   rN   r;   rM   rL   r   r   r   _pytorch_skew_symmetric_invp   s   
z-OFTRotationModule._pytorch_skew_symmetric_invQtorch.Tensorr;   r   rE   boolnum_neumann_termsreturnc                 C  s  |j \}}|j}| ||}|r`tj||j|jd|dd}	|dkr_|	j|dd |dkr_t||}
|	j|
dd |
}t	d|d D ]}t||}|	j|dd qDt||}|	| n&tj|j d |jd
d	||j d |j d }tjj|| || d
d}	|	|S )z
        Perform the Cayley parametrization on a batch of skew-symmetric matrices.

        Args:
            data: A batch of skew-symmetric matrices of shape (b, r, c).
        rI   r   g       @)alpha      r   r   r   F)left)r   rJ   rO   r   r"   r   r#   add_bmmrange	unsqueezeexpandlinalgsolveto)r   rQ   r;   rE   rT   br'   previous_dtypeQ_skewR	Q_squaredQ_powerid_matr   r   r   _cayley_batchw   s.   



zOFTRotationModule._cayley_batchh㈵>c           	      C  s   |  || j}|d tt|jd  }tj|d|df|j|j	d
d|}|| }tj|| ddd}||k }t||||||   }| || jS )Nr   r   rI   )r   rW   T)dimkeepdim)rO   r;   r   sqrttensorr   r   sizer   rJ   r]   	expand_asnormrS   whererP   )	r   rQ   rB   oft_RIdiff	norm_diffr*   outr   r   r   _project_batch   s   "z OFTRotationModule._project_batchrs   rankc                   sJ    j d dkr fddt|D }n fddt|D }tj| }|S )Nr   r   c                   s   g | ]} d  qS ))r   .r   .0irs   r   r   
<listcomp>   s    z5OFTRotationModule._block_diagonal.<locals>.<listcomp>c                   s   g | ]} |d f qS ).r   rz   r}   r   r   r~      s    )r   r\   r   
block_diag)r   rs   ry   blocksAr   r}   r   _block_diagonal   s
   
z!OFTRotationModule._block_diagonalc                 C  s   |j \}}}}t| jtr| j| j}}n| j\}}d }}	d }
}|d|
  | | d }|d|  | |	 d }|d||d||	}|dddddd }||| | d}|S )z
        Unfold with stride=1, padding=0 to preserve spatial dimensions. Only use kernel_size from base layer to define
        patch size.
        r   r   rW   rX      r5   r   )r   
isinstancerD   r   unfoldpermute
contiguousr!   )r   r$   rM   in_channels	in_heightin_widthkernel_heightkernel_widthstride_hstride_wpad_hpad_w
out_height	out_width
x_unfoldedr   r   r   _unfold   s   
zOFTRotationModule._unfoldc                 C  s   |\}}}}t | jtr| j| j}}n| j\}}|| d }	|| d }
|||	|
|||}|dddddd }tj|||| | |	|
 ||f||fdd}|S )	z;
        Fold back to preserve spatial dimensions.
        r   r   rX   rW   r   r5   )r   r   )output_sizerD   stride)r   rD   r   r!   r   r   Ffold)r   r   
orig_shaperM   r   r   r   r   r   r   r   
x_reshapedx_foldedr   r   r   _fold   s   
zOFTRotationModule._foldc                 C  s@  |j }|| jj kr|| jj }|j}| jr7t  | j| j| j| j	d W d    n1 s2w   Y  | 
| j| j| j| j}t|dkrN| |}|j}| jrZ| j| j n| j}|jd d }|jg ||| jR  }| jr||dd}td||}	ntd||}	|	j| }
t|dkr| |
|}
|
|S )NrB   r   r   r   z...rk,rkc->...rc)rJ   r@   ra   r   rA   r   no_gradcopy_rx   rB   ri   r;   rE   rF   lenr   rC   r<   r9   reshaper#   einsumr   )r   r$   required_dtyper   orth_rotatefolded_shapery   
batch_dimsr   x_rotated_reshaped	x_rotatedr   r   r   r,      s2   



zOFTRotationModule.forwardc                 C  s   | j }| jr(t  | j|| jd}| j | W d   n1 s#w   Y  | || j| j	| j
}| js9| jn| j| j }| ||S )
        Compute the delta weight for the given adapter.

        Args:
            adapter (str):
                The name of the adapter for which the delta weight should be computed.
        r   N)r@   rA   r   r   rx   rB   r   ri   r;   rE   rF   rC   r9   r<   r   )r   r@   r   ry   r   r   r   
get_weight  s   
zOFTRotationModule.get_weight)Fr3   Fr4   Tr5   )Tr5   )
rQ   rR   r;   r   rE   rS   rT   r   rU   rR   )rj   )rs   rR   ry   r   rU   rR   )r-   r.   r/   r   rO   rP   ri   rx   r   r   r   r,   r   r1   r   r   r   r   r2   H   s$    
)
)r2   c                   @  s~   e Zd ZU dZdZded< dZded< d"ddZed#ddZ	dd Z
d$ddZd%d&ddZ	d'd(ddZdd Zd d! ZdS ))OFTLayerz#
    Implements the OFT layer.
    r}   ztuple[str, ...]adapter_layer_names)r9   oft_block_sizeoft_dropoutother_param_names
base_layer	nn.ModulerU   Nonec                 K  s  || _ ti | _i | _i | _i | _ti | _d| _g | _d| _	|| _
|  }t|tjr6|j|j}}nt|tjrD|j|j}}nt|drVt|drV|j|j}}n{t|drht|drh|j|j}}nit|dr{|jjdkr{|j|j}}nVt|d	r|jjd
kr|j|j}}nC|jjdkr|j|j}}n5t|dr|jjdkr|j|j}}n"t|drt|dr|j|j}}nd\}}tdt| dt || _|| _dS )z
        Initializes the OFT layer.

        Note, currently only support linear layer and convolutional layer, with further support for other layers to be
        added soon.

        Parameters:
        base_layer: the pretrained model layer
        FT
infeaturesoutfeatures
input_sizer   	codebooksQuantizedLinearw_bitWQLinear_GEMM
EetqLinearW_q	HQQLinearr<   out_features)NNzUnsupported layer type 'z(' encountered, proceed at your own risk.N)r   r=   
ModuleDictrs   r   r9   r   _disable_adaptersmerged_adapterscast_input_dtype_enabledkwargsget_base_layerr   Linearr<   r   Conv2dr   out_channelshasattrr   r   r   r   r   r-   warningswarntypeUserWarning)r   r   r   r<   r   r   r   r   r   @  sF   

zOFTLayer.__init__set[str]c                 C  s
   h | j S Nr}   )r   r   r   r   _available_adapters{  s   
zOFTLayer._available_adaptersc                 C  s   || j vrd S td d S )NFScaling operation for OFT not supported! Automatically set scale to 1.)scalingr   r   )r   adapterscaler   r   r   	set_scale  s   
zOFTLayer.set_scaler   floatc                 C  s6   |dkrd S | j D ]}|| j vrq	td q	d S )Nr   r   active_adaptersrs   keysr   r   r   r   active_adapterr   r   r   scale_layer  s   
zOFTLayer.scale_layerNc                 C  s*   | j D ]}|| j vrqtd qd S )Nz>Unscaling operation for OFT not supported! Keeping scale to 1.r   r   r   r   r   unscale_layer  s
   
zOFTLayer.unscale_layerFinference_moderS   c                 K  sx  	 |dkrt |d}nt }| jt||i |dkrK|dkrK| j| dks.|| jkrC|}| | j|}t	d| d| d t
| j| }n5|dkr||dkr|| j| dks_|| jkrt|}| | j|}t	d| d| d t
| j| }ntd	||d
  d }t|s|nd
||| j||||	|
d	| j|< | || || j|< || j|< | | | j| j|d dS )ze
        Update the linear layer with trainable OFT weights. Override for other layer types.
        r   r   r   Invalid `oft_block_size` (!)! Adjusted `oft_block_size` to ().Invalid `r` ()! Adjusted `r` to (ZSomething went wrong, please report this error: https://github.com/huggingface/peft/issuesr   rW   )rA   rB   rC   rE   rF   r   N)r
   r=   Identityr   updater   r<   adjust_oft_parametersr   r   r   r   r2   rs   reset_oft_parametersr9   r   %_move_adapter_to_device_of_base_layerset_adapterr   )r   adapter_namer9   r   module_dropoutrA   rB   rC   init_weightsrE   rF   r   r   oft_dropout_layerold_oft_block_sizeold_rr:   r   r   r   update_layer  sN   



zOFTLayer.update_layerc                 C  sf   |du rt jj| j| jddd dS || j v r1|du r*t j| j| j dS td|dS )z+
        Reset the OFT parameters.
        Fr   g?)meanstdNTz$Unknown initialization init_weights=)r=   initnormal_rs   r@   r   zeros_r   )r   r   r   r   r   r   r     s   zOFTLayer.reset_oft_parametersc                 C  s   ||k r|}||kr|| dkr|d7 }||kr|| dksn|S |}|dkr;|| dkr;|d8 }|dkr;|| dks-|| || krE|S |S )zY
        Adjust the OFT parameters to be divisible by the in_features dimension.
        r   r   r   )r   r<   paramshigher_paramslower_paramsr   r   r   r     s   zOFTLayer.adjust_oft_parameters)r   r   rU   r   )rU   r   )r   r   rU   r   r   rU   r   Fr   rS   )r-   r.   r/   r0   r   __annotations__r   r   propertyr   r   r   r   r   r   r   r   r   r   r   r   6  s   
 
;

Sr   c                      sr   e Zd ZdZ											d/d0 fddZd1d2d"d#Zd3d$d%Zd4d'd(Zd5d+d,Zd6 fd-d.Z	  Z
S )7r   zOFT implemented in Linear layer   r   r   Fr3   r5   Tr   strr9   r   r   r   r   rA   rS   rB   rC   rE   rF   fan_in_fan_outr   Union[bool, str]is_target_conv_1d_layerrU   r   c                   sR   t    tj| |fi | || _|| _| j|||||||||	|
d
 || _d S N)r   r   rA   rB   rC   r   rE   rF   )r   r   r   r  _active_adapterr   r  )r   r   r   r9   r   r   rA   rB   rC   rE   rF   r  r   r  r   r   r   r   r     s"   

zLinear.__init__N
safe_mergeadapter_namesOptional[list[str]]c                 C  s  t | |}|s	dS |D ]{}|| jv r|  }|jj}|rV|jj}| |}t|dd}t	||
|j}t|dd}t| sLtd| d| 
||j_n*|jj}| |}t|dd}t	||
|j}t|dd}| 
||j_| j| qdS )ab  
        Merge the active adapter weights into the base weights

        Args:
            safe_merge (`bool`, *optional*):
                If `True`, the merge operation will be performed in a copy of the original weights and check for NaNs
                before merging the weights. This is useful if you want to check if the merge operation will produce
                NaNs. Defaults to `False`.
            adapter_names (`List[str]`, *optional*):
                The list of adapter names that should be merged. If `None`, all active adapters will be merged.
                Defaults to `None`.
        Nr   r   z1NaNs detected in the merged weights. The adapter z seems to be broken)r   r   r   r@   rJ   dataget_delta_weightr   rK   mmra   isfiniteallr   r   r   appendr   r  r  r   r   
orig_dtypeorig_weightsoft_matr   r   r   merge6  s6   




zLinear.mergec                 C  s   | j s
td dS |  }|jj}t| jdkrl| j }|| j	
 v rc| |}|j}|tjkr8|tj}|  jj}t|dd}ttj||||}t|dd}|||j_t| jdksdS dS zW
        This method unmerges all merged adapter layers from the base weights.
        z Already unmerged. Nothing to do.Nr   r   )mergedr   r   r   r@   rJ   r   r   poprs   r   r
  r   float32ra   r	  rK   r  r_   invr   r   r  r   r  rc   r  r   r   r   unmergee  s$   



 zLinear.unmerge!tuple[torch.Tensor, torch.Tensor]c                 C     | j |  S r   rs   r   r   r   r   r   r   r
       	zLinear.get_delta_weightr$   rR   c                 O     |j }| jr| jr|   | j|g|R i |}n>| jr*| j|g|R i |}n.| jD ]}|| j vr7q-| j| }| ||j	j }||}q-| j|
|g|R i |}|
|}|S r   rJ   disable_adaptersr  r  r   r   rs   r   _cast_input_dtyper@   ra   r   r$   argsr   rc   resultr   rs   r   r   r   r,     s    



zLinear.forwardc                      t   }d| S Nzoft.r   __repr__r   repr   r   r   r+       
zLinear.__repr__)r   r   r   Fr3   FFr5   FTF)r   r   r9   r   r   r   r   r   rA   rS   rB   r   rC   rS   rE   rS   rF   r   r  rS   r   r  r  rS   rU   r   FNr  rS   r  r  rU   r   r   rU   r  )r$   rR   rU   rR   rU   r   )r-   r.   r/   r0   r   r  r  r
  r,   r+  r1   r   r   r   r   r     s&    %
/

r   c                      s~   e Zd ZdZ										d6d7 fddZ	d8d9d d!Zd:d;d&d'Zd<d(d)Zd=d+d,Zd>d2d3Z	d? fd4d5Z
  ZS )@r   zOFT implemented in Conv2d layerr   r   Fr   r3   Tr5   r   r   r   r   r9   r   r   r  rS   r   r   rA   rB   rC   r   r  rE   rF   rU   r   c                   sD   t    t| | || _|| _| j|||||||	|
||d
 d S r  )r   r   r   r  r  r   )r   r   r   r9   r   r  r   rA   rB   rC   r   rE   rF   r   r   r   r   r     s    

zConv2d.__init__r   c                 K  s  |dkr
t |d}nt }| jt||i |  }|jd dkr(td| j	|j
d  |j
d  }|dkrb|dkrb|| dksG||kr[|}| ||}td| d| d t|| }n1|dkr|dkr|| dkst||kr|}| ||}td	| d
| d t|| }ntd||d  d }t|s|nd|||||||j
|	|
d
| j|< | || || j|< || j|< | | | j| j|d dS )zE
        Update the conv2d layer with trainable OFT weights.
        r   r   r   r   z1Conv2d with dilation > 1 is not supported by OFT.r   r   r   r   r   r   rW   )rA   rB   rC   rD   rE   rF   r   N)r
   r=   r   r   r   r   r   dilationr   r<   rD   r   r   r   r   r2   rs   r   r9   r   r   r   r   )r   r   r9   r   r   rA   rB   rC   r   rE   rF   r   r   r   r   conv_filter_dimr   r   r:   r   r   r   r     sV   



zConv2d.update_layerNr  r  r  c                 C  s  t | |}|s	dS |D ]}|| j v r|  }|jj}|rm|jj }| |}|	| j
| j|jd  |jd  }t|dd}t|||j}t|dd}|	| j
| j|jd |jd }| ||j_nN| |}|jj }|	| j
| j|jd  |jd  }t|dd}t|||j}t|dd}|	| j
| j|jd |jd }| ||j_| j| qdS )a^  
        Merge the active adapter weights into the base weights

        Args:
            safe_merge (`bool`, *optional*):
                If True, the merge operation will be performed in a copy of the original weights and check for NaNs
                before merging the weights. This is useful if you want to check if the merge operation will produce
                NaNs. Defaults to `False`.
            adapter_names (`List[str]`, *optional*):
                The list of adapter names that should be merged. If None, all active adapters will be merged. Defaults
                to `None`.
        Nr   r   )r   rs   r   r   r@   rJ   r	  cloner
  r!   r   r<   rD   r   rK   r  ra   r   r   r  r  r   r   r   r    sF   


zConv2d.mergec                 C  s4  | j s
td dS |  }|jj}t| jdkr| j }|| j	
 v r| |}|j}|tjkr8|tj}|  jj }|| j| j|  jd  |  jd  }t|dd}ttj||||}t|dd}|| j| j|  jd |  jd }|||j_t| jdksdS dS r  )r  r   r   r   r@   rJ   r   r   r  rs   r   r
  r   r  ra   r	  r5  r!   r   r<   rD   rK   r  r_   r  r  r   r   r   r  S  s8   



  zConv2d.unmerger  c                 C  r  r  r  r  r   r   r   r
  w  r   zConv2d.get_delta_weightr$   rR   r&  r   r   c                 O  r!  r   r"  r%  r   r   r   r,     s    



zConv2d.forwardc                   r(  r)  r*  r,  r   r   r   r+    r.  zConv2d.__repr__)
r   r   Fr   Fr3   FTFr5   )r   r   r   r   r9   r   r   r   r  rS   r   r   rA   rS   rB   r   rC   rS   r   r  rE   rS   rF   r   rU   r   r   r   r/  r0  r   r1  )r$   rR   r&  r   r   r   rU   rR   r2  )r-   r.   r/   r0   r   r   r  r  r
  r,   r+  r1   r   r   r   r   r     s(    0M
9
$
r   targettorch.nn.Moduler   r   
oft_configr	   rU   Optional[torch.nn.Module]c                 K  s   d }t | tr|  }n| }t |tjjr t| |fi |}|S t |tjjr@|d r7td d |d< |_	t| |fi |}|S )Nr  zjfan_in_fan_out is set to True but the target module is `torch.nn.Linear`. Setting fan_in_fan_out to False.F)
r   r   r   r   r=   r   r   r   r   r  )r6  r   r8  r   
new_moduletarget_base_layerr   r   r   dispatch_default  s   


r<  )r6  r7  r   r   r8  r	   rU   r9  )
__future__r   r   typingr   r   r   r   torch.nnr=   torch.nn.functional
functionalr   peft.tuners.tuners_utilsr   r   configr	   Moduler
   r2   r   r   r   r<  r   r   r   r   <module>   s$   , o Y  y