o
    
۾i+                     @   s   d dl Z d dlmZ d dlmZ d dlmZ d dlmZm	Z	 d dl
mZ d dlmZ ddlmZ dd	lmZmZ G d
d deZG dd deZdS )    N)PretrainedConfig)
LoRAConfig)split_tensor_along_last_dim tensor_model_parallel_all_reduce)RowParallelLinear)current_platform   )BaseLinearLayerWithLoRA)_fully_sharded_can_replace_not_fully_sharded_can_replacec                       s   e Zd Zdeddf fddZdejdejfddZd	ejdejfd
dZdejdeje	ejejdB f B fddZ
ee	ddejdedededB def
ddZ  ZS )RowParallelLinearWithLoRA
base_layerreturnNc                    s*   t  | | jj| _| jj| _d| _d S Nr   )super__init__r   input_size_per_partition
input_sizeoutput_sizen_slices)selfr   	__class__ X/home/ubuntu/.local/lib/python3.10/site-packages/vllm/lora/layers/row_parallel_linear.pyr      s   


z"RowParallelLinearWithLoRA.__init__lora_ac                 C   s6   | j }| j| }| jd | }|d d ||f }|S r   )r   tp_rank)r   r   
shard_size	start_idxend_idxr   r   r   slice_lora_a   s
   
z&RowParallelLinearWithLoRA.slice_lora_alora_bc                 C   s   |S Nr   )r   r!   r   r   r   slice_lora_b&   s   z&RowParallelLinearWithLoRA.slice_lora_binput_c                 C   s   | j jr|}nt|| jd}|| j  }| jdks| j jr dn| j j}| ||}| j j	r8| jdkr8t
|}n|}| j jrB| j jnd}| j jsJ|S ||fS )a*  Forward of RowParallelLinear

        Args:
            input_: tensor whose last dimension is `input_size`. If
                    `input_is_parallel` is set, then the last dimension
                    is `input_size // tp_size`.

        Returns:
            - output
            - bias
        )num_partitionsr   Nr   )r   input_is_parallelr   tp_sizer   
contiguousskip_bias_addbiasapplyreduce_resultsr   return_bias)r   r$   input_parallelsplitted_inputbias_output_paralleloutputoutput_biasr   r   r   forward)   s$   
z!RowParallelLinearWithLoRA.forwardsource_layerlora_configpacked_modules_listmodel_configc                 C   s   t |tu S r"   )typer   clsr5   r6   r7   r8   r   r   r   can_replace_layerS   s   	z+RowParallelLinearWithLoRA.can_replace_layerr"   )__name__
__module____qualname__r   r   torchTensorr    r#   tupler4   classmethodr   nnModuler   listr   boolr<   __classcell__r   r   r   r   r      s.    	
*r   c                       s   e Zd ZdZdejdejfddZddejdejdB dejfd	d
Zee		dde
jdedededB def
 fddZ  ZS ) RowParallelLinearWithShardedLoRAa  
    Differs from RowParallelLinearWithLoRA by slicing the
    LoRA B's also.

    Based on S-LoRA, slicing happens along the output dim.
    This yields a combined partial sum from the row parallel base
    layer and column partitioned output from the LoRA.
    r!   r   c                 C   s@   | j d jd }| j| }| jd | }|||d d f }|S )Nr      r   )lora_b_stackedshaper   )r   r!   r   r   r   r   r   r   r#   n   s
   
z-RowParallelLinearWithShardedLoRA.slice_lora_bNxr*   c           
      C   s   | j j| j ||}|d|jd }|d|jd |j}}tj| j|jd | jd jd ftj	|j
d}| j||| jd}t sG|}| jdkrPt|}| jd jd }| j| }| jj||| j| j|dd}	t sq|	}|j| }|S )	Nr   rJ   )dtypedeviceg      ?r   T)offset_start	add_input)r   quant_methodr+   viewrL   r@   zerosr   lora_a_stackedfloat32rP   punica_wrapper
add_shrinkr   can_update_inplacer'   r   rK   r   
add_expandoutput_slices)
r   rM   r*   r2   out_orig_shapebuffershrunk_bufferr   rQ   lora_outputr   r   r   r+   u   s:   

	
	
z&RowParallelLinearWithShardedLoRA.applyr5   r6   r7   r8   c                    s   t  j||||ddS )NF)r5   r6   r7   r8   decorate)r   r<   r:   r   r   r   r<      s   
z2RowParallelLinearWithShardedLoRA.can_replace_layerr"   )r=   r>   r?   __doc__r@   rA   r#   r+   rC   r
   rD   rE   r   rF   r   rG   r<   rH   r   r   r   r   rI   d   s$    	"+rI   )r@   torch.nnrD   transformersr   vllm.config.lorar   vllm.distributedr   r   !vllm.model_executor.layers.linearr   vllm.platformsr   base_linearr	   utilsr
   r   r   rI   r   r   r   r   <module>   s   O