o
    NÆÏi§D  ã                   @   s  d dl Z d dlmZmZmZ d dlZd dlmZ d dlm	Z	m
Z
 ddlmZmZ G dd„ de	ƒZG dd	„ d	ejeƒZd
ededejdejfdd„Zd
ededejdejdeejejf f
dd„Zd
ededejdejdejf
dd„Zdejjdededeejj fdd„ZdS )é    N)ÚAnyÚOptionalÚUnion)ÚBaseTunerLayerÚcheck_adapters_to_mergeé   )Ú
RoadConfigÚRoadVariantc                   @   s„   e Zd ZU dZdZeedf ed< dZeedf ed< dde	j
d	ed
dfdd„Zed
ee fdd„ƒZ	ddefdd„Zdd„ ZdS )Ú	RoadLayeru¢  
    Road layer.

    Generally the idea of RoAD is to split the input vector into many 2D vectors and rotate each 2D vector with its own
    2D rotation matrix. For additional flexibility, each rotation matrix is multiplied by a trainable scale.

    when applied to vector R @ x each pair of elements of x is transformed like this: `yâ‚€ = xâ‚€ * Î± * cosÎ¸ - xâ‚™ * Î± *
    sinÎ¸` and `yâ‚™ = xâ‚€ * Î± * sinÎ¸ + xâ‚™ * Î± * cosÎ¸`

    The scales Î± and angles Î¸ are learned for each pair of elements and, moreover, each of the 4 instances in the
    rotation matrix may actually be different (when using variant 2 or 4).

    Note that instead of using two consecutive elements xâ‚€ xâ‚ we first split the whole vector into groups and pair
    elements from the first with the second half of the same group, which allows for more efficient inference
    implementation.

    The adapter needs to only store the angles Î¸ and scales Î±, rather than the full matrix R and the inference
    implementation only needs to do elementwise vector multiplications.

    For merging the weights, we make use of the following formula: R @ (W @ x + b) = (R @ W) @ x + R @ b. The lhs part
    is how it is used in unmerged state (using efficient elementwise implementation instead of matrix multiplication)
    and the rhs part is how it is used in merged state where (R @ W) becomes the new weight matrix and R @ b becomes
    the new bias.

    )Ú
road_thetaÚ
road_alpha.Úadapter_layer_names)ÚvariantÚ
group_sizeÚother_param_namesFÚ
base_layerÚephemeral_gpu_offloadÚreturnNc                 K   s~   || _ i | _i | _t i ¡| _t i ¡| _d| _g | _|  	¡ }t
|tjƒr-|j|j}}n
tdt|ƒ› dƒ‚|| _|| _d S )NFzUnsupported layer type 'z)' encountered, cannot apply RoAd adapter.)r   r   r   ÚnnÚParameterDictr   r   Ú_disable_adaptersÚmerged_adaptersÚget_base_layerÚ
isinstanceÚLinearÚin_featuresÚout_featuresÚ
ValueErrorÚtype)Úselfr   r   Úkwargsr   r   © r!   úJ/home/ubuntu/.local/lib/python3.10/site-packages/peft/tuners/road/layer.pyÚ__init__8   s   
zRoadLayer.__init__c                 C   s
   h | j £S ©N)r   )r   r!   r!   r"   Ú_available_adaptersJ   s   
zRoadLayer._available_adaptersÚinference_modec                 C   sÐ   || j |< || j|< | j| dkrtd|› dƒ‚|dkr#| jd }n|dkr+| j}n|dkr5| jd }ntd|› d	ƒ‚t t |¡¡| j|< t t |¡¡| j	|< |  
||¡ |  |¡ | j| j|d
 d S )Nr   zDThe out_features of the base layer must be divisible by group_size (z) when using RoadLayer.Úroad_1é   Úroad_2Úroad_4úUnsupported variant úB for RoadLayer. Supported variants are road_1, road_2, and road_4.)r&   )r   r   r   r   r   Ú	ParameterÚtorchÚemptyr   r   Úreset_parametersÚ%_move_adapter_to_device_of_base_layerÚset_adapterÚactive_adapters)r   Úadapter_namer   r   Úinit_weightsr&   Úsizer!   r!   r"   Úupdate_layerN   s(   


ÿ
ÿ
zRoadLayer.update_layerc                 C   sl   |du r t jj| j| jddd t jj| j| jddd d S t j | j| j¡ t j | j| j¡ d S )NFg        g      à?)ÚmeanÚstdg      ð?)r   ÚinitÚnormal_r   Údatar   Úzeros_Úones_)r   r4   r5   r!   r!   r"   r0   q   s   zRoadLayer.reset_parameters)F)Ú__name__Ú
__module__Ú__qualname__Ú__doc__r   ÚtupleÚstrÚ__annotations__r   r   ÚModuleÚboolr#   ÚpropertyÚsetr%   r7   r0   r!   r!   r!   r"   r
      s   
 	ú
ú#r
   c                       sÊ   e Zd Z			ddedededeeef dd	f
‡ fd
d„Zdd„ Z	de
jdedede
jfdd„Zde
jdedee dede
jf
dd„Zddedeee  dd	fdd„Zd dd„Zdef‡ fdd„Z‡  ZS )!r   r'   é@   Tr4   r   r   r5   r   Nc                    s:   t ƒ  ¡  tj| |fi |¤Ž || _| j||||d d S )N)r5   )Úsuperr#   r
   Ú_active_adapterr7   )r   r   r4   r   r   r5   r    ©Ú	__class__r!   r"   r#   |   s   
	
üzLinear.__init__c                 O   s`   |  dd¡}|du rdS t|ƒt|ƒkr%dt|ƒ› dt|ƒ› d}t|ƒ‚| jr.d}t|ƒ‚dS )zMCheck if the arguments are compatible with the configs and state of the modelÚadapter_namesNzNLength of `adapter_names` should be the same as the number of inputs, but got z and z respectively.z`Cannot pass `adapter_names` when there are merged adapters, please call `unmerge_adapter` first.)ÚgetÚlenr   Úmerged)r   ÚxÚargsr    rO   Úmsgr!   r!   r"   Ú_check_forward_args‘   s   ÿÿÿüzLinear._check_forward_argsrS   rT   r    c                 O   s  | j |g|¢R i |¤Ž | dd ¡}| jr*| jr|  ¡  | j|g|¢R i |¤Ž}|S | jr;| j|g|¢R i |¤Ž}|S |d urO| j|g|¢R d|i|¤Ž}|S | j|g|¢R i |¤Ž}|j}| jD ]&}|| j	vriqa|  
|| j| j¡}t| j| | j| | j| | j| |ƒ}qa| |¡}|S )NrO   )rV   ÚpopÚdisable_adaptersrR   Úunmerger   Ú_mixed_batch_forwardÚdtyper3   r%   Ú_cast_input_dtyper   Ú_apply_roadr   r   r   Úto)r   rS   rT   r    rO   ÚresultÚtorch_result_dtypeÚactive_adapterr!   r!   r"   Úforward¤   s8   éëî

û
zLinear.forwardrO   c                   sÆ   | j |g|¢R i |¤Ž}t|ƒ}g }|D ]‰ | ‡ fdd„t|ƒD ƒ¡ qt|ƒD ]7\}}	|	dkr2q)|	| jvr8q)| j|	 jj}
|||   |
¡}t	| j
|	 | j|	 | j|	 | j|	 |ƒ||| < q)|S )Nc                    s   g | ]
\}}|ˆ kr|‘qS r!   r!   )Ú.0ÚindexÚitem©Úadapterr!   r"   Ú
<listcomp>Ï   s    z/Linear._mixed_batch_forward.<locals>.<listcomp>Ú__base__)r   rI   ÚappendÚ	enumerater%   r   r<   r[   r^   r]   r   r   r   )r   rS   rO   rT   r    r_   Úunique_adaptersÚsub_batch_indices_listÚira   r[   Ú	sub_batchr!   rf   r"   rZ   Å   s(   
ûzLinear._mixed_batch_forwardFÚ
safe_mergec           	      C   s|  t | |ƒ}|s	dS |D ]°}|| jv r»|  ¡ }|jj}t| j| | j| | j| j	| j
| j	ƒ}|r„|jj	 ¡ }t | |¡|¡}t |¡ ¡ sOtd|› dƒ‚| ¡  |¡|j_	|jdurƒ|j ¡ }t | |¡|¡}t |¡ ¡ sztd|› dƒ‚| ¡  |¡|j_	n1|jj	}t | |¡|¡}| ¡  |¡|j_	|jdurµ|jj	}t | |¡|¡}| ¡  |¡|j_	| j |¡ qdS )ab  
        Merge the active adapter weights into the base weights

        Args:
            safe_merge (`bool`, *optional*):
                If `True`, the merge operation will be performed in a copy of the original weights and check for NaNs
                before merging the weights. This is useful if you want to check if the merge operation will produce
                NaNs. Defaults to `False`.
            adapter_names (`List[str]`, *optional*):
                The list of adapter names that should be merged. If `None`, all active adapters will be merged.
                Defaults to `None`.
        Nz1NaNs detected in the merged weights. The adapter z seems to be brokenz.NaNs detected in the merged bias. The adapter )r   r%   r   Úweightr[   Ú_get_delta_weightr   r   r   r<   r   Úcloner.   Úmatmulr^   ÚisfiniteÚallr   Ú
contiguousÚbiasr   rj   )	r   rp   rO   ra   r   Ú
orig_dtypeÚroad_RÚorig_weightÚ	orig_biasr!   r!   r"   Úmergeæ   sP   



ü
ÿ


ÿ€
€ÕzLinear.mergec                 C   sê   | j s
t d¡ dS t| jƒdkrs| j ¡ }|| jv rj|  ¡ j}|j	}t
| j| | j| | j| j| j| jƒ}tj | tj¡¡ |¡}t ||j¡}| ¡ |_|  ¡ jdurjt ||  ¡ jj¡}| ¡ |  ¡ j_t| jƒdksdS dS )zW
        This method unmerges all merged adapter layers from the base weights.
        z Already unmerged. Nothing to do.Nr   )rR   ÚwarningsÚwarnrQ   r   rW   r%   r   rq   r[   rr   r   r   r   r<   r   r.   ÚlinalgÚinvr^   Úfloat32rt   rw   rx   )r   ra   rq   ry   rz   Ú
inv_road_Rr{   r|   r!   r!   r"   rY   %  s*   





ü
ìzLinear.unmergec                    s   t ƒ  ¡ }d| S )Nzroad.)rK   Ú__repr__)r   ÚreprM   r!   r"   r„   B  s   
zLinear.__repr__)r'   rJ   T)FN)r   N)r?   r@   rA   rD   r	   Úintr   rG   r#   rV   r.   ÚTensorr   rb   ÚlistrZ   r   r}   rY   r„   Ú__classcell__r!   r!   rM   r"   r   z   s>    úýüû
úø!ÿÿÿÿ
þ !
?r   r   r   r   r   c           
      C   sÄ   t | |||ƒ\}}t |¡}|jd }| dd|d ¡d d …ddgd d …f  ¡ }t |¡ dd|d |¡d d …ddgd d …d d …f }	|	d d …dd d …d d …f  d9  < |	 ||¡}	||	7 }|S )Nr   éÿÿÿÿr(   r   )Ú_prepare_colsr.   ÚdiagÚshapeÚreshapeÚflatten)
r   r   r   r   Ú	first_colÚ
second_colÚoutput_tensorr6   Úswapped_second_colÚrotated_diag_second_colr!   r!   r"   rr   G  s   

,6$rr   r   c           
      C   sX  | dkr6|  d|d ¡jddd ¡ }|  d|d ¡jddd ¡ }| ¡ }| ¡ }|| }|| }||fS | dkrN| ¡ }| ¡ }|| }|| }||fS | dkr¤|  dd|¡}|d d …dd d …f  ¡  ¡ }|d d …dd d …f  ¡  ¡ }|  dd|¡}|d d …dd d …f  ¡ }|d d …dd d …f  ¡ }	|| }|	| }||fS td	| › d
ƒ‚)Nr'   rŠ   r(   r   ©Údimr)   r*   r   r+   r,   )rŽ   Úrepeat_interleaver   ÚcosÚsinr   )
r   r   r   r   Ú	theta_cosÚ	theta_sinr   r‘   Úalpha_1Úalpha_2r!   r!   r"   r‹   [  s6   éðü
ÿr‹   rS   c                 C   s€   t | |||ƒ\}}| dd|d ¡}|d d …dd d …f }|d d …dd d …f }	tj|	 |fdd |j¡}
|| |
|  }|S )NrŠ   r(   r   r   r•   )r‹   rŽ   r.   Ústackr   )r   r   r   r   rS   r   r‘   Ú	x_groupedÚx1Úx2Úrotate_half_xr_   r!   r!   r"   r]   ƒ  s   r]   Útargetr4   Úroad_configc                 K   s@   d }t | tƒr|  ¡ }n| }t |tjjƒrt| |fi |¤Ž}|S r$   )r   r   r   r.   r   r   )r£   r4   r¤   r    Ú
new_moduleÚtarget_base_layerr!   r!   r"   Údispatch_default’  s   

r§   )r~   Útypingr   r   r   r.   Útorch.nnr   Úpeft.tuners.tuners_utilsr   r   Úconfigr   r	   r
   rF   r   r†   r‡   rr   rC   r‹   r]   rD   r§   r!   r!   r!   r"   Ú<module>   sR   ` Nÿÿÿÿ
þ(ÿÿÿÿ
ÿÿþý
û