o
    NÆÏiC  ã                   @  sŽ   d dl mZ d dlZd dlZd dlmZmZmZ d dlZd dl	m
Z
 d dlm
  mZ d dlmZmZ G dd„ deƒZG dd„ de
jeƒZdS )	é    )ÚannotationsN)ÚAnyÚOptionalÚUnion)ÚBaseTunerLayerÚcheck_adapters_to_mergec                   @  sj   e Zd ZdZdZd$dd„Z		d%d&dd„Zd'dd„Zd'dd„Zd'dd„Z	d'dd„Z
d(dd „Zd)d*d"d#„Zd!S )+Ú	MissLayer)Ú
miss_block)Úmiss_rÚmiss_dropoutÚmiss_mini_rÚ
base_layerú	nn.ModuleÚreturnÚNonec                 K  s~   || _ i | _t i ¡| _i | _t i ¡| _d| _g | _	d| _
|| _|  ¡ }t|tjƒr6|j|j| _| _d S tdt|ƒ› ƒ‚)NFTzUnsupported layer type )r   r
   ÚnnÚ
ModuleDictr   r   ÚParameterDictr	   Ú_disable_adaptersÚmerged_adaptersÚcast_input_dtype_enabledÚkwargsÚget_base_layerÚ
isinstanceÚLinearÚin_featuresÚout_featuresÚ
ValueErrorÚtype)Úselfr   r   © r    úJ/home/ubuntu/.local/lib/python3.10/site-packages/peft/tuners/miss/layer.pyÚ__init__!   s   zMissLayer.__init__FÚadapter_nameÚstrÚrÚintÚmini_rÚinit_weightsú
bool | strÚinference_modeÚboolc           
      K  s<  |dkrt d|› ƒ‚|| j|< || j|< |dkr tj|d}nt ¡ }|| j|< |  ¡ }	t|	tj	ƒrCtj
t || j¡dd| j|< n
tdt|	ƒj› ƒ‚|dkrj| j| dks_| j| dkrct d	ƒ‚|  ||¡ n%|d
kr| j| dkryt dƒ‚|  |||¡ n|rŠ|  ||¡ n|  |¡ |  |¡ | j| j|d dS )zôInternal function to create miss adapter

        Args:
            adapter_name (`str`): Name for the adapter to add.
            r (`int`): Rank for the added adapter.
            init_weights (`bool`): Whether to initialize weights.
        r   z?`r` should be a positive integer value but the value passed is ç        )ÚpT©Úrequires_gradz0MiSS is not implemented for base layers of type Úbatz=The weight matrix must be fully divisible into [r, r] blocks.ÚminizÍmini_r is divided along the out_features dimension. For optimal performance and implementation simplicity,it is recommended that out_features be divisible by mini_r.Error: {self.out_features} % mini_r != 0)r*   N)r   r
   r   r   ÚDropoutÚIdentityr   r   r   r   Ú	ParameterÚtorchÚzerosr   r	   Ú	TypeErrorr   Ú__name__r   Úreset_bat_parametersÚreset_mini_parametersÚreset_miss_parametersÚreset_miss_parameters_randomÚ%_move_adapter_to_device_of_base_layerÚset_adapterÚactive_adapters)
r   r#   r%   r'   r   r(   r*   r   Úmiss_dropout_layerr   r    r    r!   Úupdate_layer4   s6   


 ÿ

zMissLayer.update_layerc                 C  s"   t jt || j¡dd| j|< d S ©NTr.   ©r   r4   r5   r6   r   r	   ©r   r#   r%   r    r    r!   r;   m   s   "zMissLayer.reset_miss_parametersc                 C  s(   t jt | j| ||¡dd| j|< d S rB   rC   rD   r    r    r!   r9   p   s   (zMissLayer.reset_bat_parametersc                 C  s    t jt ||¡dd| j|< d S rB   )r   r4   r5   r6   r	   )r   r#   r%   r'   r    r    r!   r:   s   ó    zMissLayer.reset_mini_parametersc                 C  s    t jj| j| t d¡d d S )Né   )Úa)r   ÚinitÚkaiming_uniform_r	   ÚmathÚsqrt)r   r#   r    r    r!   r<   v   rE   z&MissLayer.reset_miss_parameters_randomÚscaleÚfloatc                 C  s6   |dkrd S | j D ]}|| j ¡ vrq	t d¡ q	d S )Né   zGScaling operation for MiSS not supported! Automatically set scale to 1.©r?   r	   ÚkeysÚwarningsÚwarn©r   rL   Úactive_adapterr    r    r!   Úscale_layery   s   
üzMissLayer.scale_layerNc                 C  s*   | j D ]}|| j ¡ vrqt d¡ qd S )Nz?Unscaling operation for MiSS not supported! Keeping scale at 1.rO   rS   r    r    r!   Úunscale_layerƒ   s
   
üzMissLayer.unscale_layer)r   r   r   r   ©F)r#   r$   r%   r&   r'   r&   r(   r)   r*   r+   r   r   )r#   r$   )rL   rM   r   r   ©N©r   r   )r8   Ú
__module__Ú__qualname__Úadapter_layer_namesÚother_param_namesr"   rA   r;   r9   r:   r<   rU   rV   r    r    r    r!   r      s    
ù
9




r   c                      sr   e Zd ZdZ				d*d+‡ fdd„Zd,d-dd„Zd.dd„Zd/d0dd„Zd/d0d d!„Zd1d&d'„Z	d2‡ fd(d)„Z
‡  ZS )3Ú
MissLinearz,
    MiSS implemented in a dense layer.
    r   r,   Tr#   r$   r%   r&   r'   r   rM   r(   úUnion[bool, str]r   r   c                   sH   t ƒ  ¡  tj| |fi |¤Ž || _| j|||||fi |¤Ž || _d S rX   )Úsuperr"   r   Ú_active_adapterrA   Úmiss_fn)r   r   r#   r%   r'   r   r(   r   ©Ú	__class__r    r!   r"      s
   


zMissLinear.__init__FNÚ
safe_merger+   Úadapter_namesúOptional[list[str]]c                 C  s`  t | |ƒ}|s	dS |D ]¢}|| j ¡ v r­|  ¡ }|jj}|rg|jj ¡ }| jdkr4|  	||¡}||7 }n| jdkrE|  
|| jjj¡}|}n|  
|| jjj¡}|}t |¡ ¡ s_td|› dƒ‚| |¡|j_n@| jdkr|  	|| jjj¡}|j j| |¡7  _n&| jdkr—|  
|| jjj¡}| |¡|j_n|  
|| jjj¡}| |¡|j_| j |¡ qdS )ab  
        Merge the active adapter weights into the base weights

        Args:
            safe_merge (`bool`, *optional*):
                If `True`, the merge operation will be performed in a copy of the original weights and check for NaNs
                before merging the weights. This is useful if you want to check if the merge operation will produce
                NaNs. Defaults to `False`.
            adapter_names (`List[str]`, *optional*):
                The list of adapter names that should be merged. If `None`, all active adapters will be merged.
                Defaults to `None`.
        Nr0   r1   z1NaNs detected in the merged weights. The adapter z seems to be broken)r   r	   rP   r   ÚweightÚdtypeÚdataÚclonerb   Úget_delta_weightÚget_delta_weight_missr   r5   ÚisfiniteÚallr   Útor   Úappend)r   re   rf   rT   r   Ú
orig_dtypeÚorig_weightÚdelta_weightr    r    r!   Úmerge    sB   




ÿ

€ÞzMissLinear.mergec                 C  sÆ   | j s
t d¡ dS t| jƒdkra| j ¡ }|  ¡ }|jj}|| j	 
¡ v rX|  ¡ jj ¡ }| jdkr;| j||dd}n| jdkrI| j||dd}n| j||dd}| |¡|j_t| jƒdksdS dS )zW
        This method unmerges all merged adapter layers from the base weights.
        z Already unmerged. Nothing to do.Nr   r0   T)Úrer1   )ÚmergedrQ   rR   Úlenr   Úpopr   rh   ri   r	   rP   rj   rk   rb   rl   rm   rp   )r   rT   r   rr   rs   rt   r    r    r!   ÚunmergeÖ   s    



ózMissLinear.unmergerv   útorch.Tensorc                 C  sd  | j | j}| j | j}|jdko|tjkp|tjk}| j | }|r&| ¡ }| |j¡}| 	d¡}|rw| 
| 	d¡| || 	d¡| |¡ dddd¡}	t | 	d¡¡ |j¡}
t |
| ¡}| |j¡}|	| | }| dddd¡j
|jŽ }n(| 
| 	d¡| || 	d¡| |¡ dddd¡| | }| dddd¡j
|jŽ }|r°|j|d}| |¡| j | _|S )úÂ
        Compute the delta weight for the given adapter.

        Args:
            adapter (str):
                The name of the adapter for which the delta weight should be computed.
        Úcpuéÿÿÿÿr   rN   é   é   ©ri   )r	   Údeviceri   r   r5   Úfloat16Úbfloat16rM   rp   ÚsizeÚreshapeÚpermuteÚeyeÚinverseÚshaperj   )r   Úadapterrs   rv   r‚   ri   Úcast_to_fp32Úweight_missr%   ÚoÚoneÚinv_I_plus_bÚwÚoutput_tensorr    r    r!   rl   í   s4   

0.ÿþÿzMissLinear.get_delta_weightc                 C  sž  | j | j}| j | j}|jdko|tjkp|tjk}| j | }|r&| ¡ }| d¡}| d¡}	| d¡}
| j	dkrE| 
d|	| j|  ¡}||
 dkrû||
 }||
 }||
 }|r©|dd…d|…f  d||
¡ ddd¡|  ddd¡j|dd…d|…f jŽ |dd…d|…f< |dd…|d…f | dd¡dd…d|…f  |dd…|d…f< nO|dd…d|…f  d||
¡ ddd¡|  ddd¡j|dd…d|…f jŽ |dd…d|…f< |dd…|d…f | dd¡dd…d|…f  |dd…|d…f< |}n@|r| d| d¡|
 |
¡ ddd¡| }| ddd¡j|jŽ }n| d| d¡|
 |
¡ ddd¡| }| ddd¡j|jŽ }|rM|j|d}| |¡| j | _|S )	r|   r}   r~   r   r1   rN   Nr   r   )r	   r‚   ri   r   r5   rƒ   r„   rM   r…   rb   Úrepeatr   r†   r‡   rŠ   Ú	transposerp   rj   )r   r‹   rs   rv   r‚   ri   rŒ   r   r   r   r%   Ú	last_sizeÚn_blockÚn_block_sizer’   r‘   r    r    r!   rm     sP   




*
þÿ.ÿ*
þÿ.ÿ&&z MissLinear.get_delta_weight_missÚxÚargsr   r   c                 O  sÚ  |j }| jr| jr|  ¡  | j|g|¢R i |¤Ž}nÌ| jr*| j|g|¢R i |¤Ž}n¼| jdkrg| jjj ¡ }| j	D ]}|| j
 ¡ vrCq9|  ||¡}|| }q9|  ||j ¡}|  | jj|j ¡}	tj|||	d}n| j|g|¢R i |¤Ž}| j	D ]o}|| j
 ¡ vr€qv| j
| }
| jdkr—|
 d| jj| j|  ¡}
| j| }|
 d¡}| d¡| dkr½|| d¡|  | }t |d|f¡}|  ||
j ¡}|tj||ƒjg |jd d… ¢| d¡| ‘|‘R Ž dd|
  }qv| |¡}|S )	Nr0   )Úinputrh   Úbiasr1   rN   r   r~   éþÿÿÿ)Údim)ri   Údisable_adaptersrw   rz   r   rb   rh   rj   rk   r?   r	   rP   rl   Ú_cast_input_dtyper›   ÚFÚlinearr“   r   r   r   r…   Úpadr5   Úsumr†   rŠ   rp   )r   r˜   r™   r   Úprevious_dtypeÚresultrs   rT   rt   r›   ÚmissÚdropoutr%   Úpadding_sizer    r    r!   Úforward^  sB   







D
zMissLinear.forwardc                   s   t ƒ  ¡ }d| S )Nzmiss.)r`   Ú__repr__)r   Úreprc   r    r!   rª   ‡  s   
zMissLinear.__repr__)r   r   r,   T)r#   r$   r%   r&   r'   r&   r   rM   r(   r_   r   r   )FN)re   r+   rf   rg   r   r   rY   rW   )rv   r+   r   r{   )r˜   r{   r™   r   r   r   r   r{   )r   r$   )r8   rZ   r[   Ú__doc__r"   ru   rz   rl   rm   r©   rª   Ú__classcell__r    r    rc   r!   r^   ‹   s    ù
6.
C)r^   )Ú
__future__r   rJ   rQ   Útypingr   r   r   r5   Útorch.nnr   Útorch.nn.functionalÚ
functionalr    Úpeft.tuners.tuners_utilsr   r   r   ÚModuler^   r    r    r    r!   Ú<module>   s   p