o
    
۾i                     @   s   d dl mZ d dlmZ d dlZd dlmZ d dlmZ d dl	m
Z
 G dd deZeG dd	 d	Zd
ejdejfddZdd Zdd Z	ddedeedf deedf dedededB dedee dB deeedB f fddZdS )    )	dataclass)EnumN)try_get_optimal_moe_config)next_power_of_2c                   @   s   e Zd ZdZdZdZdS )LoRAMappingType         N)__name__
__module____qualname__LANGUAGETOWER	CONNECTOR r   r   J/home/ubuntu/.local/lib/python3.10/site-packages/vllm/lora/layers/utils.pyr      s    r   c                   @   sP   e Zd ZU eedf ed< eedf ed< dZeed< ej	Z
eed< dd Zd	S )
LoRAMapping.index_mappingprompt_mappingF
is_prefilltypec                 C   s   t | j| _t | j| _d S N)tupler   r   )selfr   r   r   __post_init__   s   zLoRAMapping.__post_init__N)r
   r   r   r   int__annotations__r   boolr   r   r   r   r   r   r   r   r      s   
 r   
base_layerreturnc                 C   sz   t | dr	| jjS t | dr| jjS t | dr| jjS t | dr$| jjS t | dr-| jjS t | dr6| jjS td|  )z7Returns the device for where to place the LoRA tensors.weightweight_packedqweight	w2_weightw2_weight_packed
w2_qweightzUnsupported base layer: )	hasattrr    devicer!   r"   r#   r$   r%   
ValueError)r   r   r   r   _get_lora_device    s   





r)   c                        fdd}|S )zv
    decorator which adds the condition of not using fully sharded loras
    intended to wrap can_replace_layer()
    c                     s<   d|v r	| dnd}|r|d j nd} | i |o|S )NdecorateTlora_config)popfully_sharded_loras)argskwargsr+   	conditioncan_replacer   r   dec?   s   z+_not_fully_sharded_can_replace.<locals>.decr   r3   r4   r   r2   r   _not_fully_sharded_can_replace9      r6   c                    r*   )zl
    decorator which adds the condition of fully sharded loras
    intended to wrap can_replace_layer()
    c                     s    | i |o|d j S )Nr,   )r.   )r/   r0   r2   r   r   r4   M   s   z'_fully_sharded_can_replace.<locals>.decr   r5   r   r2   r   _fully_sharded_can_replaceG   r7   r8   op_typew1_shape.w2_shaperanktop_kdtypeMblock_shapec           	      C   sh   t |||||| }| dv rt|ddt||d< |S | dv r2tdt|ddt||d< |S )N)fused_moe_lora_w13_shrinkfused_moe_lora_w2_shrinkBLOCK_SIZE_N@   )fused_moe_lora_w13_expandfused_moe_lora_w2_expand   BLOCK_SIZE_K    )r   copymingetr   max)	r9   r:   r;   r<   r=   r>   r?   r@   configr   r   r   try_get_optimal_moe_lora_configU   s   

rO   r   )dataclassesr   enumr   torchtorch.nnnn.vllm.model_executor.layers.fused_moe.fused_moer   vllm.utils.math_utilsr   r   r   Moduler'   r)   r6   r8   strr   r   listdictrO   r   r   r   r   <module>   s@   


	