o
    پiV                     @   s   d dl Z d dlmZmZmZmZmZmZmZm	Z	 d dl
Z
d dlmZ d dlmZ d dlmZ d dlmZ d dlmZ d dlmZ d d	lmZmZmZmZmZmZmZ d d
lm Z  e !e"Z#G dd dZ$e$ Z%G dd dZ&dS )    N)CallableDictIterableListOptionalSetTupleUnion)divide)get_eviction_policy)BaseLayerWithLoRA)LoRAAdapter)
LoRAConfig)LoRARef)EMBEDDING_NAMES!ROW_PARALLELISM_LINEAR_LORA_NAMESLoRATypeget_hidden_dimget_normalized_target_modulesget_stacked_multiplyget_target_module_name)
AutoConfigc                       s,   e Zd ZdZdZdd Z fddZ  ZS )	EmptySlotz
    Singleton class to represent an empty slot in the memory pool.
    This is used to improve readability by not using special str as a placeholder.
     c                 C   s   dS )Nz|EMPTY|r   selfr   r   L/home/ubuntu/.local/lib/python3.10/site-packages/sglang/srt/lora/mem_pool.py__repr__"   s   zEmptySlot.__repr__c                    s   t | dst | | _| jS )N	_instance)hasattrsuper__new__r   )cls	__class__r   r   r!   %   s   
zEmptySlot.__new__)__name__
__module____qualname____doc__	__slots__r   r!   __classcell__r   r   r#   r   r      s
    r   c                   @   s  e Zd ZdZdededejdedededee	 d	ej
jd
e	defddZdeeee f defddZde	d	ej
jdededee f
ddZde	d	ej
jdededee f
ddZde	d	ej
jdededee f
ddZde	d	ej
jdededee f
ddZd	ej
jfddZdeee	  d ee	ef d!eee	ef  d"ee	ef d#ee	ef d$ee	ef fd%d&Zd'e	d(ed)ed!eee	ef  d#ee	ef d$ee	ef fd*d+Zd,e	d-e deej! fd.d/Z"d,e	d0ed-e dej!fd1d2Z#d3e	fd4d5Z$d6S )7LoRAMemoryPoolz0Class for memory pool management of lora modulesbase_hf_configmax_loras_per_batchdtypetp_sizetp_rankmax_lora_ranktarget_modules
base_modeleviction_policylora_added_tokens_sizec                 C   s   || _ |j| _|| _|| _|| _|| _|
| _|| _|| _	t
|	| _i | _i | _i | _i | _i | _i | _i | _| j j| _i | _tg| j | _| | d S N)r,   num_hidden_layers	num_layerr-   r.   r/   r0   r5   r1   r2   r   r4   A_bufferB_bufferembedding_A_bufferembedding_B_bufferlm_head_A_bufferlm_head_B_buffernew_embeddings_bufferhidden_sizeembedding_dimuid_to_buffer_id
EMPTY_SLOTbuffer_id_to_uidinit_buffers)r   r,   r-   r.   r/   r0   r1   r2   r3   r4   r5   r   r   r   __init__1   s0   

zLoRAMemoryPool.__init__configreturnc                    s>   dt dtffdd t|t r |S t fdd|D S )zO
        Check if the memory pool can support the given LoRA adapters.
        rG   rH   c                    sB   | j  jkrdS | j jkrdS t| j}d|v rdS | jS )zU
            Check if the memory pool can support a single LoRA adapter.
            FallT)rr1   r5   r   r2   issubset)rG   target_module_namesr   r   r   _can_supportm   s   
z0LoRAMemoryPool.can_support.<locals>._can_supportc                 3   s    | ]} |V  qd S r6   r   ).0x)rM   r   r   	<genexpr>}   s    z-LoRAMemoryPool.can_support.<locals>.<genexpr>)r   bool
isinstancerI   )r   rG   r   )rM   r   r   can_supporth   s   
zLoRAMemoryPool.can_supportmodule_namemax_lora_dim	layer_idxc                 C   sJ   t || j||\}}t|}| jdkr|tv rt|| j}| j|| |fS zu
        Given a module_name (might be a stacked name), return the hidden dims of modules' input and output.
           )r   r,   r   r/   r   r
   r-   )r   rT   r3   rU   rV   	input_dim_cr   r   r   get_lora_A_shape   s   

zLoRAMemoryPool.get_lora_A_shapec                 C   s$   t || j|d| j\}}| j||fS Nr   r   r,   r5   r-   )r   rT   r3   rU   rV   rY   rZ   r   r   r   get_embedding_lora_A_shape      z)LoRAMemoryPool.get_embedding_lora_A_shapec                 C   s>   t || j||\}}| jdkr|tvrt|| j}| j||fS rW   )r   r,   r/   r   r
   r-   r   rT   r3   rU   rV   rZ   
output_dimr   r   r   get_lora_B_shape   s   

zLoRAMemoryPool.get_lora_B_shapec                 C   s$   t || j|d| j\}}| j||fS r]   r^   ra   r   r   r   get_embedding_lora_B_shape   r`   z)LoRAMemoryPool.get_embedding_lora_B_shapec              
      sB  t   jdttttj f dtt dt	ttj
jttgtt f f fdd}dtttjf dtt dt	tgtt f f fdd}jdkr_tjjjjfjd	jd
< djv rv|jjj |jjj djv r|jjj |jjj |jjj |jjj d S )Nbufferr2   get_lora_shape_fnc                    s>   |t t }|D ] fddtjD | < qd S )Nc                    s*   g | ]}t j j|jd qS )r.   device)torchemptyr1   r.   )rN   idx)r3   rh   rf   rT   r   r   r   
<listcomp>   s    zDLoRAMemoryPool.init_buffers.<locals>.init_buffer.<locals>.<listcomp>)setr   ranger8   )re   r2   rf   r3   rh   r   )rf   rT   r   init_buffer   s   z0LoRAMemoryPool.init_buffers.<locals>.init_bufferc                    s<   |t t@ }|D ]}tj|| jdjd| |< qd S )Nr   rg   )rm   r   ri   rj   r1   r.   )re   r2   rf   rT   ro   r   r   init_embedding_buffer   s   z:LoRAMemoryPool.init_buffers.<locals>.init_embedding_bufferr   rg   input_embeddingsembed_tokenslm_head)next
parametersrh   r   strr   ri   Tensorr   r   nnModuleintr   r5   rj   r-   rA   r.   r?   r2   r;   r_   r<   rd   r=   r>   r9   r\   r:   rc   )r   r3   rp   rq   r   ro   r   rE      sp   



zLoRAMemoryPool.init_bufferscur_uidslora_adapterslora_modules	lora_refslora_embed_tokens_modulelora_lm_head_modulec              	      sx    fdd} D ]}j | q
 D ]$}|jvr9| }	||d }
||	|
||| |	j|< |j|	< qd S )Nc                     s   t jD ]} j|  tkr|   S qt }t jD ] } j|  }| v r'q|d ur6|}|r6|jr6q|| q|sBtd|d h }|rL|}n|}j	
|}j| }j| j	| tj|< td| d| d |S )NztNo available buffer slots found. Please ensure the number of active (pinned) loras is less than max_loras_per_batch.zEvicting LoRA z from buffer slot .)rn   r-   rD   rC   rm   getpinnedadd
ValueErrorr4   select_victimrB   popremoveloggerdebug)	buffer_id
candidatesuidlora_refnon_none_candidatescandidates_to_use
victim_uidvictim_buffer_idr|   r   r   r   r   get_available_buffer_slot0  s>   





zDLoRAMemoryPool.prepare_lora_batch.<locals>.get_available_buffer_slot)r4   	mark_usedrB   r   load_lora_weight_to_bufferrD   )r   r|   r}   r~   r   r   r   r   r   r   lora_adapterr   r   r   prepare_lora_batch'  s&   	6


z!LoRAMemoryPool.prepare_lora_batchr   r   r   c                 C   s  dt jdtt j fdd}|d u rJt| jD ]}| j D ]}	d| j|	 | |< qq| j D ]	}	d| j|	 |< q/| j D ]	}	d| j|	 |< q>d S |d usPJ |j	j
}
t| jD ]}|j| j}dd | jD }dd | jD }| D ]\}}t|| j}d	|v r|||< qu|||< qu| jd
kr|| }| D ]'\}}t|| j}|| d u rq||| | j||< ||| | j||< q| D ]!\}}t|}| j| | }||d |
| d d f }||| q| D ]\}}| j| | }||d d d |
f }||| qqY|jr| jj}|j	j}|jr9|j D ]\}}d|v r7| jd |d |f }||| q|j D ]\}}t|| j}|dkrsd|v rsd|v s\d	|v rs| j| |d |
d || f }||| q>|dkrd|v rd|v sd|v r|}| j| |d d d |
f }||| q>|dkrd|v rd|v sd	|v r| j| |d |
d d f }||| q>|dkrd|v rd|v sd|v r|}| j| |d || j d |
f }||| q>d S d S )Nbuffer_viewweightc                 S   sL   |d u r
|    d S | j|jksJ d| j d|j d| j|dd d S )NzLoRA buffer shape z does not match weight shape r   T)non_blocking)zero_shapecopy_)r   r   r   r   r   load_lora_weight_tensor  s   zJLoRAMemoryPool.load_lora_weight_to_buffer.<locals>.load_lora_weight_tensorr   c                 S      i | ]}|d qS r6   r   rN   target_moduler   r   r   
<dictcomp>      z=LoRAMemoryPool.load_lora_weight_to_buffer.<locals>.<dictcomp>c                 S   r   r6   r   r   r   r   r   r     r   lora_ArX   rr   rs   lora_embedding_Alora_embedding_Blora_Brt   )ri   rx   r   rn   r8   r9   keysr;   r=   rG   rJ   layersweightsr:   itemsr   r2   r/   slice_lora_a_weightsr0   slice_lora_b_weightsr   embedding_layersr,   
vocab_sizer5   added_tokens_embeddingsr?   r<   r>   )r   r   r   r   r~   r   r   r   ik	lora_ranklayer_idlayer_weightstemp_A_buffertemp_B_buffernamer   r   cur_layer_modulesrT   moduler[   target_bufferr   org_vocab_sizer5   lora_b_weightsr   r   r   r   x  s   	














	



	
z)LoRAMemoryPool.load_lora_weight_to_bufferr   	lora_typec                 C   s   |dkr| j dur| j dkr| jd S dS |dkr(|tjkr#| j| S | j| S |dkr;|tjkr6| j| S | j| S td| d)	aC  
        Get LoRA tensor for non-layer modules (embed_tokens, lm_head).

        Args:
            target_module: Module name, either "embed_tokens" or "lm_head"
            lora_type: Either LoRAType.LORA_A or LoRAType.LORA_B

        Returns:
            The corresponding buffer tensor, or None if not available
        added_tokensNr   rr   rs   rt   zInvalid target_module 'z('. Expected 'embed_tokens' or 'lm_head'.)	r5   r?   r   LORA_Ar;   r<   r=   r>   r   )r   r   r   r   r   r   get_embedding_tensor  s    









z#LoRAMemoryPool.get_embedding_tensorr   c                 C   s&   |t jkr| j| | S | j| | S r6   )r   r   r9   r:   )r   r   r   r   r   r   r   
get_tensor<  s   
zLoRAMemoryPool.get_tensorlora_uidc                 C   s
   | j | S r6   )rB   )r   r   r   r   r   get_buffer_idE  s   
zLoRAMemoryPool.get_buffer_idN)%r%   r&   r'   r(   r   r{   ri   r.   r   rw   ry   rz   rF   r	   r   r   rQ   rS   r   r\   r_   rc   rd   rE   r   r   r   r   r   r   r   r   r   rx   r   r   r   r   r   r   r   r+   .   s    	

7



[





Q


 "
#
	r+   )'loggingtypingr   r   r   r   r   r   r   r	   ri   sglang.srt.distributedr
   sglang.srt.lora.eviction_policyr   sglang.srt.lora.layersr   sglang.srt.lora.lorar   sglang.srt.lora.lora_configr   sglang.srt.lora.lora_registryr   sglang.srt.lora.utilsr   r   r   r   r   r   r   &sglang.srt.utils.hf_transformers_utilsr   	getLoggerr%   r   r   rC   r+   r   r   r   r   <module>   s    ($	
