o
    Ni6                     @   s&   d dl mZ d dlZG dd dZdS )    )OptionalNc                   @   s  e Zd ZdZ								d(dee dee dee d	ee d
edee dedee fddZdd Z	dd Z
dd ZdejfddZedeejejejf fddZed)deejejf fddZd)ddZd)dd Zdejfd!d"Zed*d$eded%efd&d'ZdS )+IncrementalPCAay  
    An implementation of Incremental Principal Components Analysis (IPCA) that leverages PyTorch for GPU acceleration.
    Adapted from https://github.com/scikit-learn/scikit-learn/blob/main/sklearn/decomposition/_incremental_pca.py

    This class provides methods to fit the model on data incrementally in batches, and to transform new data based on
    the principal components learned during the fitting process.

    Args:
        n_components (int, optional): Number of components to keep. If `None`, it's set to the minimum of the
            number of samples and features. Defaults to None.
        copy (bool): If False, input data will be overwritten. Defaults to True.
        batch_size (int, optional): The number of samples to use for each batch. Only needed if self.fit is called.
            If `None`, it's inferred from the data and set to `5 * n_features`. Defaults to None.
        svd_driver (str, optional): name of the cuSOLVER method to be used for torch.linalg.svd. This keyword
            argument only works on CUDA inputs. Available options are: None, gesvd, gesvdj, and gesvda. Defaults to
            None.
        lowrank (bool, optional): Whether to use torch.svd_lowrank instead of torch.linalg.svd which can be faster.
            Defaults to False.
        lowrank_q (int, optional): For an adequate approximation of n_components, this parameter defaults to
            n_components * 2.
        lowrank_niter (int, optional): Number of subspace iterations to conduct for torch.svd_lowrank.
            Defaults to 4.
        lowrank_seed (int, optional): Seed for making results of torch.svd_lowrank reproducible.
    NTF   n_componentscopy
batch_size
svd_driverlowrank	lowrank_qlowrank_niterlowrank_seedc	           	      C   sL   || _ || _|| _|| _|| _|| _|| _|| _d | _| jr$| 	  d S d S )N)
r   r   r   r   r	   r
   r   r   n_features__validate_lowrank_params)	selfr   r   r   r   r	   r
   r   r    r   N/home/ubuntu/.local/lib/python3.10/site-packages/peft/utils/incremental_pca.py__init__.   s   zIncrementalPCA.__init__c                 C   sD   | j d u r| jd u rtd| jd | _ d S | j | jk r tdd S )NzKn_components must be specified when using lowrank mode with lowrank_q=None.   z8lowrank_q must be greater than or equal to n_components.)r
   r   
ValueError)r   r   r   r   r   G   s   

z'IncrementalPCA._validate_lowrank_paramsc                 C   s   t jj|d| jdS )NF)full_matricesdriver)torchlinalgsvdr   r   Xr   r   r   _svd_fn_fullO   s   zIncrementalPCA._svd_fn_fullc                 C   sr   | j d u}tjj|d# |rt| j  tj|| j| jd\}}}|||jfW  d    S 1 s2w   Y  d S )N)enabled)qniter)	r   r   randomfork_rngmanual_seedsvd_lowrankr
   r   mH)r   r   seed_enabledUSVr   r   r   _svd_fn_lowrankR   s   

$zIncrementalPCA._svd_fn_lowrankreturnc                 C   s   t jt jg}t|t jst j|t jd}n| jr| }|j\}}| j	du r'n!| j	|kr8t
d| j	 d| d| j	|krHt
d| j	 d| |j|vrS|t j}|S )z
        Validates and converts the input data `X` to the appropriate tensor format.

        Args:
            X (torch.Tensor): Input data.

        Returns:
            torch.Tensor: Converted to appropriate format.
        )dtypeNzn_components=z invalid for n_features=z<, need more rows than columns for IncrementalPCA processing.z6 must be less or equal to the batch number of samples )r   float32float64
isinstanceTensortensorr   cloneshaper   r   r+   to)r   r   valid_dtypes	n_samples
n_featuresr   r   r   _validate_dataZ   s&   





zIncrementalPCA._validate_datac                 C   sH  | j d dkr|||fS |dkr |du rtd|du r tdtj| j d g| jd}|| }|du rBtj| j d tj| jd}n|| }| jdtjd}|| | }|| }	| |	 }
|
jdtjd }|
	  |
jdtjd}||| 8 }|du r|| }n|| }|
 | }|| || || |    }|| }|||fS )	aj  
        Computes the incremental mean and variance for the data `X`.

        Args:
            X (torch.Tensor): The batch input data tensor with shape (n_samples, n_features).
            last_mean (torch.Tensor): The previous mean tensor with shape (n_features,).
            last_variance (torch.Tensor): The previous variance tensor with shape (n_features,).
            last_sample_count (torch.Tensor): The count tensor of samples processed before the current batch.

        Returns:
            Tuple[torch.Tensor, torch.Tensor, torch.Tensor]: Updated mean, variance tensors, and total sample count.
        r   Nz6last_mean should not be None if last_sample_count > 0.z:last_variance should not be None if last_sample_count > 0.device   )r+   r9   )dimr+   )r2   r   r   r0   r9   zerosr-   sumsquaresquare_double)r   	last_meanlast_variancelast_sample_countnew_sample_countupdated_sample_countlast_sumnew_sumupdated_meanTtemp
correctionnew_unnormalized_varianceupdated_variancelast_unnormalized_variancelast_over_new_countupdated_unnormalized_variancer   r   r   _incremental_mean_and_var}   s@   


z(IncrementalPCA._incremental_mean_and_varc                 C   s   |rt jt | dd}t | |t| jd f }nt jt |dd}t |t|jd |f }| |d| jd  dd9 } ||dd9 }| |fS )a[  
        Adjusts the signs of the singular vectors from the SVD decomposition for deterministic output.

        This method ensures that the output remains consistent across different runs.

        Args:
            u (torch.Tensor): Left singular vectors tensor.
            v (torch.Tensor): Right singular vectors tensor.
            u_based_decision (bool, optional): If True, uses the left singular vectors to determine the sign flipping.
                Defaults to True.

        Returns:
            Tuple[torch.Tensor, torch.Tensor]: Adjusted left and right singular vectors tensors.
        r   r;   r:   N)r   argmaxabssignranger2   view)uvu_based_decisionmax_abs_colssignsmax_abs_rowsr   r   r   	_svd_flip   s   zIncrementalPCA._svd_flipc                 C   s`   |r|  |}|j\}}| jdu rd| | _| j|| j| jpddD ]}| j|| dd q"| S )a\  
        Fits the model with data `X` using minibatches of size `batch_size`.

        Args:
            X (torch.Tensor): The input data tensor with shape (n_samples, n_features).
            check_input (bool, optional): If True, validates the input. Defaults to True.

        Returns:
            IncrementalPCA: The fitted IPCA model.
        N   r   )min_batch_sizeF)check_input)r7   r2   r   gen_batchesr   partial_fit)r   r   rb   r5   r6   batchr   r   r   fit   s   



zIncrementalPCA.fitc                 C   s  t | d }|r| |}|j\}}|r0d| _d| _tjdg|jd| _|| _	| j
s0t||| _
|| j	kr9td| || j| j| j\}}}|rN||8 }n-tj|dd}	||	8 }t| j | | }
|
| j|	  }t| jd| j ||f}| jr| |\}}}n| |\}}}| j||dd	\}}|d
 |d  }|d
 t||  }|| _|d| j
 | _|d| j
 | _|| _|| _|d| j
 | _|d| j
 | _| j
||fvr|| j
d  | _| S tjd|jd| _| S )ak  
        Incrementally fits the model with batch data `X`.

        Args:
            X (torch.Tensor): The batch input data tensor with shape (n_samples, n_features).
            check_input (bool, optional): If True, validates the input. Defaults to True.

        Returns:
            IncrementalPCA: The updated IPCA model after processing the batch.
        components_Nr   r8   z]Number of features of the new batch does not match the number of features of the first batch.rR   )rS   r:   F)r[   r   r:   g        )hasattrr7   r2   mean_var_r   r0   r9   n_samples_seen_r   r   minr   rQ   meansqrtr@   vstacksingular_values_rX   rg   r	   r)   r   r_   r=   explained_variance_explained_variance_ratio_noise_variance_)r   r   rb   
first_passr5   r6   col_meancol_varn_total_samplescol_batch_meanmean_correction_factormean_correctionr&   r'   Vtexplained_varianceexplained_variance_ratior   r   r   rd      s`   




zIncrementalPCA.partial_fitc                 C   s&   || j  }t| | jj|jS )a  
        Applies dimensionality reduction to `X`.

        The input data `X` is projected on the first principal components previously extracted from a training set.

        Args:
            X (torch.Tensor): New data tensor with shape (n_samples, n_features) to be transformed.

        Returns:
            torch.Tensor: Transformed data tensor with shape (n_samples, n_components).
        )ri   r   mmr@   rg   rI   r3   r+   r   r   r   r   	transform-  s   
zIncrementalPCA.transformr   nra   c                 c   s^    d}t t| | D ]}|| }|| | krqt||V  |}q|| k r-t|| V  dS dS )a  Generator to create slices containing `batch_size` elements from 0 to `n`.

        The last slice may contain less than `batch_size` elements, when `batch_size` does not divide `n`.

        Args:
            n (int): Size of the sequence.
            batch_size (int): Number of elements in each batch.
            min_batch_size (int, optional): Minimum number of elements in each batch. Defaults to 0.

        Yields:
            slice: A slice of `batch_size` elements.
        r   N)rW   intslice)r   r   ra   start_endr   r   r   rc   <  s   zIncrementalPCA.gen_batches)NTNNFNr   N)T)r   )__name__
__module____qualname____doc__r   r   boolstrr   r   r   r)   r   r/   r7   staticmethodtuplerQ   r_   rf   rd   r   rc   r   r   r   r   r      sT    	
#8

Gr   )typingr   r   r   r   r   r   r   <module>   s   