o
    
۾iO                     @   sv   d dl Z d dlmZ d dlm  mZ d dlmZ d dlm	Z	 d dl
mZ d dlmZ ddlmZ G dd	 d	eZdS )
    N)PretrainedConfig)
LoRAConfig)VocabParallelEmbedding)current_platform   )BaseLayerWithLoRAc                       s   e Zd Zdeddf fddZ	ddedededB ddfd	d
ZdefddZ	dede
jee
j B de
jee
j B fddZde
jde
jfddZe	ddejdedededB def
ddZedd Z  ZS )VocabParallelEmbeddingWithLoRA
base_layerreturnNc                    s   t    || _|  |  d S N)super__init__r	   )selfr	   	__class__ ]/home/ubuntu/.local/lib/python3.10/site-packages/vllm/lora/layers/vocal_parallel_embedding.pyr      s   
z'VocabParallelEmbeddingWithLoRA.__init__	max_loraslora_configmodel_configc                 C   s   | j jdkr:| j jj| j j| j j| j j  | _| j jj| j j | j jj	| j j f| _
| j jj| j jd  d nd | _
d | _tj|| j j|jf|j| j jjd| _tj|d| j j|jf|j| j jjd| _| j| jjd | jjd  | jjd | _d S )Nr   )dtypedevicer      )r	   "num_added_embeddings_per_partitionweightdata num_org_embeddings_per_partitionembeddings_weightsshard_indicesadded_vocab_start_indexorg_vocab_sizeadded_vocab_end_indexembeddings_slicefill_torchzerosmax_lora_rank
lora_dtyper   lora_a_stackedembedding_dimlora_b_stackedviewshapelora_a_stacked_2d)r   r   r   r   r   r   r   create_lora_weights   sP   

	


z2VocabParallelEmbeddingWithLoRA.create_lora_weightsindexc                 C   s   d| j |< d| j|< d S )Nr   )r(   r*   )r   r/   r   r   r   
reset_loraH   s   
z)VocabParallelEmbeddingWithLoRA.reset_loralora_alora_bc                 C   s   t |tjsJ t |tjsJ | | | j|d |jd d |jd f j|jdd | j|dd |jd d |jd f j|dd d S )Nr   r   T)non_blocking)	
isinstancer$   Tensorr0   r(   r,   copy_Tr*   )r   r/   r1   r2   r   r   r   set_loraL   s   
$&
z'VocabParallelEmbeddingWithLoRA.set_loraxc                 C   s   |j d }| jjd d | }t|| | j}| j|}|}|jdkr3|	|j d |j d  d}|jdkrF|	|j d |j d  d}| jj
||| jdd}t sW|}||S )Nr   r      T)	add_input)r,   punica_wrapper_embeddings_indicesF	embeddingr-   r	   forwardndimr+   add_lora_embeddingr*   r   can_update_inplaceview_as)r   r9   
num_tokens	indices_1full_lora_a_embeddingsfull_outputfull_output_orglora_outputr   r   r   rA   _   s.   




z&VocabParallelEmbeddingWithLoRA.forwardsource_layerpacked_modules_listc                 C   s   t |tu S r   )typer   )clsrL   r   rM   r   r   r   r   can_replace_layer   s   z0VocabParallelEmbeddingWithLoRA.can_replace_layerc                 C   s   | j jS r   )r	   r   )r   r   r   r   r      s   z%VocabParallelEmbeddingWithLoRA.weightr   )__name__
__module____qualname__r   r   intr   r   r.   r0   r$   r5   listr8   rA   classmethodnnModuleboolrP   propertyr   __classcell__r   r   r   r   r      sH    

1
 	r   )r$   torch.nnrW   torch.nn.functional
functionalr?   transformersr   vllm.config.lorar   3vllm.model_executor.layers.vocab_parallel_embeddingr   vllm.platformsr   baser   r   r   r   r   r   <module>   s   