o
    Ni                     @  st   d dl mZ d dlZd dlmZmZ d dlmZmZm	Z	 d dl
mZ d dlmZ ddlmZ eG d	d
 d
eZdS )    )annotationsN)	dataclassfield)LiteralOptionalUnion)
PeftConfig)PeftType   )random_maskc                      s   e Zd ZU dZedddidZded< eddd	idZd
ed< edddidZded< edddidZ	ded< edddidZ
ded< edddidZded< edddidZded<  fddZ  ZS )ShiraConfiga  
    This is the configuration class to store the configuration of a [`ShiraModel`].

    Args:
        r (`int`, *optional*, defaults to `32`):
            For a given target module, the number of SHiRA parameters is computed as r(m+n), where the original tensor
            dimensions are m x n. This means the number of SHiRA parameters is the same as that for a LoRA adapter.
            SHiRA is a high rank adapter. Setting this r parameter does not restrict the rank to this value.
        mask_type (`str`, defaults to `random`):
            Type of mask function. Defaults to a random sparse mask. An optional user-defined mask_fn to compute the
            mask value can also be supplied by instantiating `config = ShiraConfig(...)` and then setting
            `config.mask_fn = <your custom mask function>`. For a pretrained weight with shape m x n, the custom mask
            function must return only one mask (shape: m x n) which must be binary 0 or 1 with num_shira_parameters =
            r(m + n) for linear layers. Device and dtype of mask must be same as base layer's weight's device and
            dtype. Please see mask_functions.py for more details and to see the default random sparse mask
            implementation.
        random_seed (`int`, *optional*, defaults to `None`):
            random seed for the torch generator for random_mask.
        target_modules (`Union[List[str], str]`):
            List of module names or regex expression of the module names to replace with SHiRA. For example, ['q', 'v']
            or '.*decoder.*(SelfAttention|EncDecAttention).*(q|v)$'. Only linear layers are supported.
        fan_in_fan_out (`bool`):
            Set this to True if the layer to replace stores weight like (fan_in, fan_out). For example, gpt-2 uses
            `Conv1D` which stores weights like (fan_in, fan_out) and hence this should be set to `True`.
        init_weights (`bool`, defaults to `True`):
            Initialize SHiRA weight to have zero values. If set to False, SHiRA weights are initialized to randn values
            instead of zeros and this is used only for testing.
        modules_to_save (`List[str]`):
            List of modules apart from SHiRA layers to be set as trainable and saved in the final checkpoint.
        helpa3  For a given target module, the number of SHiRA parameters is computed as r(m+n), where the original tensor dimensions are m x n. This means the number of SHiRA parameters is the same as that for a LoRA adapter. SHiRA is a high rank adapter. Setting this r parameter does not restrict the rank to this value.)defaultmetadataintrrandomao  Type of mask function. Defaults to a random sparse mask. An optional user-defined mask_fn to compute the mask value can also be supplied by instantiating `config = ShiraConfig(...)` and then setting `config.mask_fn = <your custom mask function>`. For a pretrained weight with shape m x n, the custom mask function must return only one mask (shape: m x n) which must be binary 0 or 1 with num_shira_parameters = r(m + n) for linear layers. Device and dtype of mask must be same as base layer's weight's device and dtype. Please see mask_functions.py for more details and to see the default random sparse mask implementation.zLiteral['random']	mask_typeNz3random seed for the torch generator for random_maskzOptional[int]random_seedzList of module names or regex expression of the module names to replace with SHiRA.For example, ['q', 'v'] or '.*decoder.*(SelfAttention|EncDecAttention).*(q|v)$'. Only linear layers are supported.zOptional[Union[list[str], str]]target_modulesFzMSet this to True if the layer to replace stores weight like (fan_in, fan_out)boolfan_in_fan_outTzInitialize SHiRA weight to have zero values. If set to False, SHiRA weights are initialized to randn values instead of zeros and this is used only for testing.init_weightsa  List of modules apart from SHiRA layers to be set as trainable and saved in the final checkpoint. For example, in Sequence Classification or Token Classification tasks, the final layer `classifier/score` are randomly initialized and as such need to be trainable and saved.zOptional[list[str]]modules_to_savec                   sh   t    tj| _t| jtrt| jn| j| _| j	dkr"t
| _d S | js/td| j	d d | _d S )Nr   zArgument self.mask_type=ze is not recognized, please supply your own masking function by calling `config.mask_fn = my_mask_fn`.)super__post_init__r	   SHIRA	peft_type
isinstancer   listsetr   r   mask_fninference_modewarningswarn)self	__class__ L/home/ubuntu/.local/lib/python3.10/site-packages/peft/tuners/shira/config.pyr   t   s   



zShiraConfig.__post_init__)__name__
__module____qualname____doc__r   r   __annotations__r   r   r   r   r   r   r   __classcell__r)   r)   r'   r*   r      sN   
 

r   )
__future__r   r$   dataclassesr   r   typingr   r   r   peft.configr   
peft.utilsr	   mask_functionsr   r   r)   r)   r)   r*   <module>   s   