o
    
۾i)                     @   s  U d dl Z d dlZd dl mZ d dlmZ d dlmZ d dlZd dlm	Z	 d dl
mZ d dlmZ d dlmZ d d	lmZ eeZe Zi Zeeed
f eejd
f f ed< i Zeeed
f eejd
f f ed< deej dejfddZdeej dedejfddZ e jde!de"dB dedB fddZ#e j		d$de!dededededede"dB dedB dee!edB f fd d!Z$ed%dejdB de"fd"d#Z%dS )&    N)	lru_cache)Path)Any)envs)init_logger)vllm_is_batch_invariant)current_platform)next_power_of_2._LORA_A_PTR_DICT_LORA_B_PTR_DICTlora_a_weightsdevicec           
      C   sF  t dd | D }t| }r|S g }g }g }g }| D ]C}|jdkr3|ddks,J |jdd}n|jdks:J | s@J ||  ||	d ||	d ||	d qt
| dkrptj||tjd	}	n| d }	t
t|dkst
t|dkst
t|dkrtd
|	|d |d |d ft|< t|S )a  
    `_LORA_A_PTR_DICT` collects the required information during `profile_run`,
    After this, it remains constant and subsequent usage is through LUT.
    Refer to:
    https://github.com/triton-lang/triton/blob/release/3.1.x/python/tutorials/08-grouped-gemm.py
    c                 s       | ]}|  V  qd S Ndata_ptr.0lora_weight r   R/home/ubuntu/.local/lib/python3.10/site-packages/vllm/lora/ops/triton_ops/utils.py	<genexpr>        z"_get_lora_a_ptr.<locals>.<genexpr>      dim   r      r   dtypez+All LoRA weights must have the same stride.)tupler
   getndimsizesqueezeis_contiguousappendr   stridelentorchtensoruint64set
ValueError)
r   r   keyvalueslora_strides_d0lora_strides_d1lora_strides_d2tensor_ptrslora_a_weightlora_ptr_tensorr   r   r   _get_lora_a_ptr   s<   

r7   lora_weightsoffset_startc                 C   s  t dd | D }t| }r|S g }g }g }g }g }	g }
|}| D ]W}|jdkr9|ddks2J |jdd}n|jdks@J | sFJ ||  ||	d ||	d |	|	d || ||d7 }|
|d q"t
| dkrtj||tjd	}tj||tjd	}n|d }|d }t
t|dkrt
t|dkrt
t|	dkrt
t|
dkr|d }|d }|	d }|
d }d
}ntj||d}tj||d}tj|	|d}tj|
|d}d}t|
}||||||||ft|< t|S )a  
     `_LORA_B_PTR_DICT` collects the required information during `profile_run`,
    After this, it remains constant and subsequent usage is through LUT.
    Refer to:
    https://github.com/triton-lang/triton/blob/release/3.1.x/python/tutorials/08-grouped-gemm.py

    c                 s   r   r   r   r   r   r   r   r   T   r   z"_get_lora_b_ptr.<locals>.<genexpr>r   r   r   r   r   r   r   Tr   F)r!   r   r"   r#   r$   r%   r&   r'   r   r(   r)   r*   r+   r,   r-   max)r8   r9   r   r/   r0   slice_offset_lstr4   r1   r2   r3   hidden_sizesslice_offsetlora_b_weightr6   slice_start_tensorlora_strides_d0_tensorlora_strides_d1_tensorlora_strides_d2_tensorhidden_sizes_tensorsame_strideMAX_Nr   r   r   _get_lora_b_ptrI   sn   



rG   op_type
add_inputsreturnc                 C   s   t j}|d urztsztj }|dd}|dd}d }| dkr2| d|   dt|  d}n
| d|   d}t	| d| }|
 sStd|  d S td| d	 tt|}t|}W d    |S 1 ssw   Y  |S d }|S )
N _-expandz.json/z No LoRA kernel configs found in z%Using tuned LoRA kernel configs from .)r   VLLM_TUNED_CONFIG_FOLDERis_batch_invariantr*   cudaget_device_namereplaceupperstrr   existsloggerwarning_once	info_onceopenjsonload)rH   rI   user_defined_config_foldergpu_nameconfig_fnameconfig_pathfconfig_datar   r   r   load_lora_op_config   s.   
 
re   	max_lorasbatchhidden_sizerank
num_slicesmoe_intermediate_sizec              
      s  | dv sJ i }| dkr+|dk rdnd}	t rd}	dd|dk r d	nd|	d
dddd d	}n<| dv r?dtdt|dd
dddd}n(| dv rVddtdtdt|d
dddd}ndtdtd| dd
ddd d}|| dkrq||fn||f\t| |}
|
std |S |
tp|
t|
	 fddd }
|
t| }
|
tp|
t|
	 fddd }
|
tp|
t|
	 fddd }
|
tp|
t|
	 fddd }
|d ur| |
t p|
t|
	  fddd }
|
d usJ |
S )N)shrinkrN   fused_moe_lora_w13_shrinkfused_moe_lora_w13_expandfused_moe_lora_w2_shrinkfused_moe_lora_w2_expandrl      @      r             r   r   )	block_mblock_nblock_ksplit_k	num_warpsnum_ctasgroup_size_m
num_stagesmax_nreg)rm   ro   r   )rw   rx   ry   r{   r~   r}   rz   )rn   rp   )rw   rx   ry   r{   r|   r~   r   z!Using default LoRA kernel configsc                       t t|   S r   absintx)rf   r   r   <lambda>      z%get_lora_op_configs.<locals>.<lambda>)r/   c                    r   r   r   r   )mr   r   r     r   c                    r   r   r   r   )kr   r   r     r   c                    r   r   r   r   )nr   r   r   $  r   c                    r   r   r   r   )ir   r   r   ,  r   )
rR   minr	   r;   re   rY   rZ   r"   rW   keys)rH   rf   rg   rh   ri   rj   rI   rk   defaultrz   rd   r   )r   r   r   rf   r   r   get_lora_op_configs   s   
		

r   c                 C   s   t  ot dotj S )zz
    Refer to: https://github.com/triton-lang/triton/blob/v3.5.0/python/tutorials/11-programmatic-dependent-launch.py
    Z   )r   is_cudahas_device_capabilityr   VLLM_LORA_DISABLE_PDLr:   r   r   r   supports_pdl3  s
   r   )NNr   )&	functoolsr]   r   pathlibr   typingr   r*   vllmr   vllm.loggerr   *vllm.model_executor.layers.batch_invariantr   vllm.platformsr   vllm.utils.math_utilsr	   __name__rY   rR   r
   dictr!   r   r+   __annotations__r   listTensorr   r7   rG   rW   boolre   r   r   r   r   r   r   <module>   sb   
&&0
P  	x 