o
    -iA)                     @   sd  U d dl Z d dlmZmZ d dlZd dlmZmZ d dlm	Z	 d dl
mZ d dlmZ d dlmZ d dlmZ d d	lmZmZmZmZmZmZmZmZmZmZmZmZmZm Z m!Z!m"Z"m#Z# d d
l$m%Z% d dl&m'Z' d dl(m)Z)m*Z* erd dl+m,Z, d dl-m.Z. d dl/m0Z0 ee1Z2d a3dd Z4e#eeeee!e eeeeeee"eehZ5e6e7e  e8d< de	j9de:fddZ;	d5de	j9de<dede=dedB de	j9fddZ>	d5dddd de<dededB defd!d"Z?de	j9d#e@d$e	j9de	j9fd%d&ZA	d5d'e@d(ed) deBe@e:f fd*d+ZCd'e@de:fd,d-ZDde	j9de=e@ fd.d/ZEd0e@de@fd1d2ZFde	j9deGe@e=e@ f fd3d4ZHdS )6    N)TYPE_CHECKINGOptional)HfHubHTTPErrorHFValidationError)nn)PretrainedConfig)envs)
LoRAConfig)init_logger)BaseLayerWithLoRAColumnParallelLinearWithLoRA#ColumnParallelLinearWithShardedLoRAFusedMoE3DWithLoRAFusedMoEWithLoRALogitsProcessorWithLoRA/MergedColumnParallelLinearVariableSliceWithLoRA"MergedColumnParallelLinearWithLoRA)MergedColumnParallelLinearWithShardedLoRAMergedQKVParallelLinearWithLoRA&MergedQKVParallelLinearWithShardedLoRAQKVParallelLinearWithLoRA QKVParallelLinearWithShardedLoRAReplicatedLinearWithLoRARowParallelLinearWithLoRA RowParallelLinearWithShardedLoRAVocabParallelEmbeddingWithLoRA)FusedMoE)
LinearBase)get_moe_expert_mappingget_packed_modules_mapping)LogitsProcessor)ParallelLMHead)WeightsMapperc                   C   s   t d7 a t S )N   )_GLOBAL_LORA_ID r%   r%   L/home/ubuntu/veenaModal/venv/lib/python3.10/site-packages/vllm/lora/utils.pyget_lora_id2   s   r'   _all_lora_classesmodelreturnc                 C   s(   t dd |  D rtd dS dS )z@Checks if the model contains FusedMoE layers and warns the user.c                 s   s    | ]}t |tV  qd S N)
isinstancer   ).0moduler%   r%   r&   	<genexpr>N   s    zis_moe_model.<locals>.<genexpr>z8MoE model detected. Using fused MoE LoRA implementation.TF)anymoduleslogger	info_once)r)   r%   r%   r&   is_moe_modelL   s   
r4   layer	max_loraslora_configpacked_modules_listmodel_configc                 C   s>   t D ]}|j| |||dr|| }|||| |  S q| S )N)source_layerr7   r8   r9   )r(   can_replace_layercreate_lora_weights)r5   r6   r7   r8   r9   lora_clsinstance_layerr%   r%   r&   
from_layerT   s   	r?   r    lm_headr!   c                 C   s0   t | |j|jj|jj| }|||| |S r+   )r   embedding_dimweightdtypedeviceget_sharded_to_full_mappingr<   )r5   r@   r6   r7   r9   retr%   r%   r&   from_layer_logits_processori   s   rG   module_name
new_modulec                 C   s<   |  d|ddd }|dd }t||| |S )z1Replace a submodule in a model with a new module..N)get_submodulejoinsplitsetattr)r)   rH   rI   parenttarget_namer%   r%   r&   replace_submodule{   s   rR   nameweights_mapperr"   c                 C   s   |  dr| dd} |r|| n| } d|  } n	|r || n| } |  dr)dnd}| d}|d dkrS|d d	ksB|d d
krSd||d }||d d	kfS |d dks_|d dkrpd||d }||d dkfS t|  d)a  Parse the name of lora weights.

    args:
        name: the name of the fine-tuned LoRA, e.g.
            base_model.model.dense1.weight
        weights_mapper: maps the name of weight, e.g.
            `model.` -> `language_model.model.`,
    return:
        tuple(module_name, is_lora_a):
            module_name: the name of the module, e.g. model.dense1,
            is_lora_a whether the tensor is lora_a or lora_b.
    zbase_model.model.    r   rJ   rK   rB   lora_Alora_Blora_embedding_Alora_embedding_Bz is unsupported LoRA weight)
startswithreplace	_map_namerN   rM   
ValueError)rS   rT   start_indexpartsnew_namer%   r%   r&   parse_fine_tuned_lora_name   s   


$rc   c                 C   s   d}|  |S )N)z.embed_tokens.base_layer.weightz.lm_head.base_layer.weight)endswith)rS   embedding_suffixesr%   r%   r&   is_base_embeddding_weights   s   
rf   c                 C   s   t  }|  D ]8\}}t|dd}|dur|D ]}|| qt|tfr/||dd  t|tfr?||dd  qt|S )z2
    In vLLM, all linear layers support LoRA.
    embedding_modulesNrJ   rK   )	setnamed_modulesgetattraddr,   r   rN   r   list)r)   supported_lora_modulesrS   r.   rg   r%   r%   r&   get_supported_lora_modules   s   rn   	lora_pathc                    s   t j r S  drt j S t j rt j S tjr>ddl	m
}m ddlm}  fdd}||f}d}n fdd}ttf}d	}z| }W |S  |y`   t|   Y S w )
a'  
    Resolves the given lora_path to an absolute local path.

    If the lora_path is identified as a Hugging Face model identifier,
    it will download the model and return the local snapshot path.
    Otherwise, it treats the lora_path as a local file path and
    converts it to an absolute path.

    Parameters:
    lora_path (str): The path to the lora model, which can be an absolute path,
                     a relative path, or a Hugging Face model identifier.

    Returns:
    str: The resolved absolute local path to the lora model.
    ~r   )InvalidParametersnapshot_download)	HTTPErrorc                      s
    dS )N)model_idr%   r%   ro   rr   r%   r&   <lambda>   s   
 z+get_adapter_absolute_path.<locals>.<lambda>z&Error downloading the ModelScope modelc                      s   t j dS )N)repo_id)huggingface_hubrr   r%   )ro   r%   r&   rv      s    z'Error downloading the HuggingFace model)ospathisabsr\   
expanduserexistsabspathr   VLLM_USE_MODELSCOPE modelscope.hub.snapshot_downloadrq   rr   requestsrs   r   r   r2   	exception)ro   rq   rs   download_fndownload_exceptions	error_loglocal_snapshot_pathr%   ru   r&   get_adapter_absolute_path   s,   

r   c                 C   sH   t | r t|  }rt| }| jsdd |D |d< |S tdt| S )Nc                 S   s&   g | ]\}}}}d |vr| dqS )z..rJ   )rstrip)r-   _weight_namer%   r%   r&   
<listcomp>  s
    
z2process_packed_modules_mapping.<locals>.<listcomp>expertszGTo support LoRA for MoE model, 'get_expert_mapping' must be implemented)r4   r   r   is_3d_moe_weightAttributeError)r)   moe_packed_mappingpacked_modules_mappingr%   r%   r&   process_packed_modules_mapping	  s   
r   r+   )Iry   typingr   r   rx   huggingface_hub.utilsr   r   torchr   transformersr   vllmr   vllm.config.lorar	   vllm.loggerr
   vllm.lora.layersr   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   $vllm.model_executor.layers.fused_moer   !vllm.model_executor.layers.linearr   vllm.model_executor.utilsr   r   +vllm.model_executor.layers.logits_processorr    3vllm.model_executor.layers.vocab_parallel_embeddingr!    vllm.model_executor.models.utilsr"   __name__r2   r$   r'   r(   rh   type__annotations__Moduleboolr4   intrl   r?   rG   strrR   tuplerc   rf   rn   r   dictr   r%   r%   r%   r&   <module>   s   
L




,	$7