o
    Ni                     @  s   d dl mZ d dlZd dlmZmZ d dlZd dlmZ ddl	m
Z
 eejejejf ZG dd dejZG dd	 d	ejZdS )
    )annotationsN)OptionalUnion   )XLoraConfigc                      s&   e Zd Zd fdd	Zdd Z  ZS )TemperatureScaledSoftmax      ?c                   s"   t    || _tjdd| _d S )N)dim)super__init__temperaturennSoftmaxsoftmax)selfr   	__class__ P/home/ubuntu/.local/lib/python3.10/site-packages/peft/tuners/xlora/classifier.pyr      s   
z!TemperatureScaledSoftmax.__init__c                 C  s   || j  }| |S )N)r   r   )r   logitsscaled_logitsr   r   r   forward"   s   

z TemperatureScaledSoftmax.forward)r   )__name__
__module____qualname__r   r   __classcell__r   r   r   r   r      s    r   c                      sV   e Zd ZdZd fddZ		d d!ddZ		d d!ddZd"ddZd#ddZ  Z	S )$XLoraClassifierz7
    A classifier to select LoRA layers for XLora.
    model	nn.Moduleconfigr   	n_classesintn_layersdevicetorch.devicec           
        s  t    || _|| _|| _g | _t| jjd| _|j	| _
d| _t| j| _|jdk}g }| jjdkr\|jrKtj|j|| dd|| j}ntj|j|dd|| j}n| jjdkrftd|tj|j|jdd|| j |t  |r|tj|jd	 t|jd
 D ])}	|tj|j|jdd|| j |t  |r|tj|jd	 q|jrtj|j|| dd|| j}ntj|j|dd|| j}tjg ||R  | _dS )z
        Construct an X-LoRA classifier from a model, config and some metadata. Note that n_layers is the number of LoRA
        adapter layers, not the number of model layers.
        )r   Fg        r   T)biasr   z'X-LoRA depth must be strictly positive.)p   N)r   r   r!   r#   r    log_scalingsr   softmax_temperaturer   scaling_pass_valueoverride_scaling_pass_valuescalings_loggingnext
parametersdtypexlora_dropout_pxlora_depthlayerwise_scalingsr   Linearhidden_sizeto
ValueErrorappend
xlora_sizeReLUDropoutrange
Sequentiallayers)
r   r   r    r!   r#   r$   add_dropoutr>   last_r   r   r   r   .   s>   

&"((& zXLoraClassifier.__init__N	input_idsOptional[torch.LongTensor]inputs_embedsOptional[torch.FloatTensor]returntorch.Tensorc                 O  sd   |dur|j d }|j}|j d }n|j d }|j}|j d }t||| j| jf| jj|| jdS )aP  
        Make some dummy scalings for the scalings pass (the one to get the logits for the X-LoRA classifier). These are
        of shape (batch_size, seq_len, n_layers, n_classes) and filled with the override scalings pass value. Note that
        n_layers is the number of LoRA adapter layers, not the number of model layers.
        Nr   r   )r$   r0   )	shaper$   torchfullr#   r!   r,   r6   r0   )r   rB   rD   argskwargs
batch_sizer$   seq_lenr   r   r   make_dummy_scalingse   s   


z#XLoraClassifier.make_dummy_scalingsc                 O  s   |dur|j d }|j d }n
|j d }|j d }|j}|d }	| j|	}
| jjs8|
d}
|
dd| jd}
|
	||| j| j
}| jjrK| |}| jrT| j| |S )zt
        Using the hidden states of the model, predict `n_classes` LoRA alpha values. Returns the scalings.
        Nr   r   r	   r(   )rH   hidden_statesr>   r   r    r3   	unsqueezeexpandr#   reshaper!   enable_softmaxr   r-   r)   r8   )r   resultrB   rD   rK   rL   rM   rN   rP   hidden_stater   scalingsr   r   r   r      s"   




zXLoraClassifier.forward/dict[int, tuple[list[int], list[torch.Tensor]]]c                 C  sd   i }t | jD ](\}}|jd }||vr|g|gf||< q|| d | || d | q|S )aL  
        Returns bucketed scalings, bucketed by seq_len. Each value consists of the positions (the first) and the
        associated tensors. The positions are paired with the associated tensors and give the position in the scaling
        log. Each scaling is a tensor of shape (batch_size, seq_len, n_layers, n_classes)).
        r   r   )	enumerater)   rH   r8   )r   seqlens_mapiscalingrN   r   r   r   _get_bucketed_scalings   s   
z&XLoraClassifier._get_bucketed_scalingsvalueUnion[Number, None]c                 C  s*   |d u rd| j  | _n|| _| j| j_d S )Nr   )r!   r,   r    r+   )r   r^   r   r   r    _set_override_scaling_pass_value   s   z0XLoraClassifier._set_override_scaling_pass_value)
r   r   r    r   r!   r"   r#   r"   r$   r%   )NN)rB   rC   rD   rE   rF   rG   )rF   rX   )r^   r_   )
r   r   r   __doc__r   rO   r   r]   r`   r   r   r   r   r   r   )   s    9
.r   )
__future__r   builtinstypingr   r   rI   torch.nnr   r    r   r"   floatboolNumberModuler   r   r   r   r   r   <module>   s   