o
    پix[                     @   s  d Z ddlZddlmZ ddlmZmZ ddlZddlmZ ddl	m
Z
 ddlmZmZmZ ddlmZmZmZmZ dd	lmZmZ dd
lmZmZ ddlmZ ddlmZ ddlmZ ddl m!Z!m"Z" ddl#m$Z$m%Z% ddl&m'Z'm(Z( ddl)m*Z*m+Z+ ddl,m-Z- e.e/Z0d"ddZ1G dd dej2Z3G dd dej4Z5G dd dej4Z6G dd dej4Z7G dd dej4Z8G d d! d!ej4Z9e9gZ:dS )#z=Inference-only OPT model compatible with HuggingFace weights.    N)Iterable)OptionalUnion)nn)	OPTConfig)get_pp_groupget_tensor_model_parallel_rank$get_tensor_model_parallel_world_size)ColumnParallelLinearQKVParallelLinearReplicatedLinearRowParallelLinear)LogitsProcessorLogitsProcessorOutput)PoolerPoolingType)QuantizationConfig)RadixAttention)get_layer_id)ParallelLMHeadVocabParallelEmbedding)ForwardBatchPPProxyTensors)default_weight_loaderkv_cache_scales_loader)
add_prefixmake_layers)get_exception_tracebackreluc                 C   sB   |   } | dkrt S | dkrt S | dkrtj S t S )zSelect an activation function by name

    Args:
        name: str
            activation function name,
            one of ["relu", "gelu", "swish", "sigmoid"],
            default "relu".
    r   gelusigmoid)lowerr   ReLUGELUtorchSigmoidIdentity)name r(   I/home/ubuntu/.local/lib/python3.10/site-packages/sglang/srt/models/opt.pyget_activation8   s   	
r*   c                       s:   e Zd Zdedef fddZdejf fddZ  ZS )OPTLearnedPositionalEmbeddingnum_embeddingsembedding_dimc                    s   d| _ t || j  | d S )N   )offsetsuper__init__)selfr,   r-   	__class__r(   r)   r1   M   s   z&OPTLearnedPositionalEmbedding.__init__	positionsc                    s   t  || j S N)r0   forwardr/   )r2   r5   r3   r(   r)   r7   T      z%OPTLearnedPositionalEmbedding.forward)	__name__
__module____qualname__intr1   r$   Tensorr7   __classcell__r(   r(   r3   r)   r+   K   s    r+   c                       sb   e Zd Z				ddedededed	ee d
eddf fddZde	j
dede	j
fddZ  ZS )OPTAttentionr   TN 	embed_dim	num_headslayer_idbiasquant_configprefixreturnc           	   
      s   t    || _t }|}|| dksJ || | _|| | _| jd | _t|| j|||td|d| _	t
||||td|d| _t| j| j| j| j||td|d| _d S )Nr   g      qkv_projrD   rE   rF   o_projattn)num_kv_headsrC   rE   rF   )r0   r1   rA   r	   rB   head_dimscalingr   r   rH   r   out_projr   rK   )	r2   rA   rB   rC   rD   rE   rF    tensor_model_parallel_world_sizetotal_num_headsr3   r(   r)   r1   Z   s@   
	

zOPTAttention.__init__hidden_statesforward_batchc           
      C   sD   |  |\}}|jddd\}}}| ||||}| |\}	}|	S )N   )chunksdim)rH   chunkrK   rO   )
r2   rR   rS   qkv_qkvattn_outputoutputr(   r(   r)   r7      s
   zOPTAttention.forward)r   TNr@   )r9   r:   r;   r<   boolr   r   strr1   r$   r=   r   r7   r>   r(   r(   r3   r)   r?   X   s6    ,r?   c                	       sT   e Zd Z			ddededee def fdd	Zd
e	j
dede	j
fddZ  ZS )OPTDecoderLayerr   Nr@   configrC   rE   rF   c              	      s   t    || _|j| _t| j|j||j|td|d| _	|j
| _
tj| j|jd| _t| j|j|j|td|d| _t|j| _t|j| j|j|td|d| _tj| j|jd| _d S )N	self_attn)rA   rB   rC   rD   rE   rF   elementwise_affinefc1rI   fc2)r0   r1   rc   hidden_sizerA   r?   num_attention_headsenable_biasr   rd   do_layer_norm_beforer   	LayerNormlayer_norm_elementwise_affineself_attn_layer_normr
   ffn_dimrg   r*   activation_functionactivation_fnr   rh   final_layer_normr2   rc   rC   rE   rF   r3   r(   r)   r1      sB   
zOPTDecoderLayer.__init__rR   rS   rG   c                 C   s   |}| j r
| |}| j||d}|| }| j s| |}|}| j r'| |}| |\}}| |}| |\}}|| }| j sF| |}|S )NrR   rS   )rl   ro   rd   rs   rg   rr   rh   )r2   rR   rS   residualrZ   r(   r(   r)   r7      s&   




zOPTDecoderLayer.forwardr   Nr@   )r9   r:   r;   r   r<   r   r   ra   r1   r$   r=   r   r7   r>   r(   r(   r3   r)   rb      s(    *rb   c                       sz   e Zd Z			ddededee def fdd	Z		dd
e	j
de	j
dedee dee	j
 dee	j
ef fddZ  ZS )
OPTDecoderr   Nr@   rc   rC   rE   rF   c                    s  t     | _ j| _ j| _t | _t j j	t
d|d| _t j j| _ j	 jkr?t j j	dt
d|d| _nd | _ j	 jkrXt j	 jdt
d|d| _nd | _ jrl jsltj j jd| _nd | _t j fdd	| jj| jjd
d\| _| _| _d S )Nembed_tokensrF   Fproject_outrI   
project_inre   c                    s   t  | |dS )N)rc   rC   rE   rF   )rb   )idxrF   rc   rE   r(   r)   <lambda>  s    z%OPTDecoder.__init__.<locals>.<lambda>zmodel.layers)pp_rankpp_sizerF   )r0   r1   rc   max_position_embeddingsmax_target_positions
vocab_sizer   pp_groupr   word_embed_proj_dimr   ry   r+   ri   embed_positionsr   r{   r|   rl   _remove_final_layer_normr   rm   rn   rs   r   num_hidden_layersrank_in_group
world_sizelayersstart_layer	end_layerrt   r3   r~   r)   r1      sX   



zOPTDecoder.__init__	input_idsr5   rS   pp_proxy_tensorsinput_embedsrG   c           
      C   s   | j jr#|d u r| |}| |}| jd ur| |\}}|| }n
|d us)J |d }| j| j| j D ]}	|	||d}q6| j jsIt	d|iS | j
d urS| 
|}| jd ur_| |\}}|S )NrR   ru   )r   is_first_rankry   r   r|   r   r   r   is_last_rankr   rs   r{   )
r2   r   r5   rS   r   r   
pos_embedsrZ   rR   layerr(   r(   r)   r7   &  s(   






zOPTDecoder.forwardrw   )NN)r9   r:   r;   r   r<   r   r   ra   r1   r$   r=   r   r   r   r7   r>   r(   r(   r3   r)   rx      s8    Jrx   c                       s   e Zd Z		ddedee deddf fddZ	dd	ej	d
ej	de
dee deej	 deej	ef fddZdeddfddZ  ZS )OPTModelNr@   rc   rE   rF   rG   c                    sB   t    || _|j| _|j| _t | _t||t	d|d| _
d S )Ndecoderrc   rE   rF   )r0   r1   rc   pad_token_idpadding_idxr   r   r   rx   r   r   r2   rc   rE   rF   r3   r(   r)   r1   I  s   
zOPTModel.__init__r   r5   rS   r   r   c                 C   s   | j |||||dS )N)r   r   rS   )r   )r2   r   r5   rS   r   r   r(   r(   r)   r7   ^  s   zOPTModel.forwardquantization_param_pathc                 C   sz   t  }t }t|||| jj| jjjD ]'\}}t| jj	| t
js(| jj	| j}t|jdr7||j_||j_qtdd S )Nk_scalez8Self attention has no KV cache scaling factor attribute!)r	   r   r   rc   r   r4   
model_type
isinstancer   r   r   r&   rd   hasattrrK   r   v_scaleRuntimeError)r2   r   tp_sizetp_rank	layer_idxscaling_factorlayer_self_attnr(   r(   r)   load_kv_cache_scalesn  s$   
zOPTModel.load_kv_cache_scalesNr@   r6   )r9   r:   r;   r   r   r   ra   r1   r$   r=   r   r   r   r7   r   r>   r(   r(   r3   r)   r   G  s6    
r   c                       s&  e Zd ZddgZ		d3dedee def fdd	Z			
d4de	j
de	j
dedee dee	j
 dedefddZdeeee	j
f  ddfddZedd Zedd ZdejfddZdd Zdd  Z	"d5d#ed$ed%edee	j
 fd&d'Zd(d) Zd*d+ Zd,d- Zd.d/ Z d0eddfd1d2Z!  Z"S )6OPTForCausalLMz.down_proj.z.o_proj.Nr@   rc   rE   rF   c                    s   t    || _|| _t||td|d| _| jjr!| jjj	| _
nt|j|jtd|d| _
t|| _ttjdd| _d| _t | _g d| _d S )	Nmodelr   lm_headrz   T)pooling_type	normalizeF))	.qkv_projz.q_projr[   )r   z.k_projr\   )r   z.v_projr]   )r0   r1   rc   rE   r   r   r   tie_word_embeddingsr   ry   r   r   r   r   r   logits_processorr   r   LASTpoolercapture_aux_hidden_statesr   r   stacked_params_mappingr   r3   r(   r)   r1     s$   

zOPTForCausalLM.__init__Fr   r5   rS   r   r   get_embeddingrG   c           	      C   sX   | j |||||d}d }| jr|\}}| jjr*|s$| j||| j||dS | ||S |S )N)r   r5   rS   r   r   )aux_hidden_states)r   r   r   r   r   r   r   )	r2   r   r5   rS   r   r   r   rR   r   r(   r(   r)   r7     s*   	zOPTForCausalLM.forwardweightsc                 C   s(  g d}t | jdd}|D ]\}}|dr|dd}t|}|d ur8t| jdr8|| jjk s7|| jjkr8q|D ](\}}}	||vrDq:|||}|	drT||vrTq:|| }
|
j
}||
||	  n.|	drm||vrmq||vrrq|| v r|| }
t|
d	t}||
| qtd
| d qd S )N))rH   q_projr[   )rH   k_projr\   )rH   v_projr]   F)remove_duplicater   zdecoder.zmodel.decoder.r   z.biasweight_loaderz
Parameter z not found in params_dict)dictnamed_parameters
startswithreplacer   r   r   r   r   endswithr   keysgetattrr   loggerwarning)r2   r   r   params_dictr'   loaded_weightrC   
param_nameweight_nameshard_idparamr   r(   r(   r)   load_weights  sF   

zOPTForCausalLM.load_weightsc                 C      | j jS r6   )r   r   r2   r(   r(   r)   r        zOPTForCausalLM.start_layerc                 C   r   r6   )r   r   r   r(   r(   r)   r     r   zOPTForCausalLM.end_layerc                 C   r   r6   )r   ry   r   r(   r(   r)   get_input_embeddings  s   z#OPTForCausalLM.get_input_embeddingsc                 C   sT   | j D ]\}}}}||v r|||d td  |f  S q|d td  dfS )Nz.weight   )r   r   len)r2   r'   r   r   r   	num_shardr(   r(   r)    get_module_name_from_weight_name
  s   z/OPTForCausalLM.get_module_name_from_weight_namec                 C   s   t |  }t|S r6   )r   r   r   )r2   r   r(   r(   r)   get_num_params  s   zOPTForCausalLM.get_num_paramsd   r   r'   truncate_sizer   c              	      s  z|dkr"| j jr"td | jjj t	j
  d| W S |}d}| jD ]\}}}||v r<|||}|} nq)t|  }	|	| }
|dur|dv r| j j| }| j j| }| j j| j j }|dkrnd}|| }n|dkr{|| }|| }n|dkr|| | }|| }|
jd|| n-|d	v r| j j}|| }|dkrd}|}n|d
kr|}|}|
jd|| n|
j n|
j |d
krd|v sd|v r fddt|D }t	j|  t	j|d
d   t	j
  d| W S  ty   td| dt   Y dS w )zGet the weights of the parameter by its name. Similar to `get_parameter` in Hugging Face.

        Only used for unit test with an unoptimized performance.
        For optimized performance, please use torch.save and torch.load.
        zlm_head.weightzTword embedding is tied for this model, return embed_tokens.weight as lm_head.weight.N)r[   r\   r]   r[   r   r\   r]   )r   r   r   rJ   	down_projc                    s   g | ]}t  qS r(   )r$   
zeros_like).0rZ   weightr(   r)   
<listcomp>U  s    z6OPTForCausalLM.get_weights_by_name.<locals>.<listcomp>)rW   zError getting weights by name z in OPTForCausalLM: )rc   r   r   infor   ry   r   cputor$   float32numpytolistr   r   r   r   rj   ri   datanarrowrp   rangedistributed
all_gathercat	Exceptionerrorr   )r2   r'   r   r   mapped_namemapped_shard_idr   r   r   r   r   rB   rL   rM   r/   sizeintermediate_size
slice_sizegathered_weightsr(   r   r)   get_weights_by_name  sx   


"z"OPTForCausalLM.get_weights_by_namec                 C   s   | j jj| jjfS r6   )r   ry   r   r   r   r(   r(   r)   get_embed_and_head`  r8   z!OPTForCausalLM.get_embed_and_headc                 C   s8   | j j`| j`|| j j_|| j_tj  tj  d S r6   )r   ry   r   r   r$   cudaempty_cachesynchronize)r2   embedheadr(   r(   r)   set_embed_and_headc  s   

z!OPTForCausalLM.set_embed_and_headc                 C   s
   | j jjS r6   )r   ry   r   r   r(   r(   r)   	get_embedk  s   
zOPTForCausalLM.get_embedc                 C   sJ   t | jdr| jj| jjkrd S | jj`|| jj_tj	  tj
  d S )Ntarget_hidden_size)r   rc   r   ri   r   ry   r   r$   r   r   r   )r2   r   r(   r(   r)   	set_embedn  s   


zOPTForCausalLM.set_embedr   c                 C   s   | j | d S r6   )r   r   )r2   r   r(   r(   r)   r   z  s   z#OPTForCausalLM.load_kv_cache_scalesr   )NNF)r   r   )#r9   r:   r;   column_parallel_weights_modulesr   r   r   ra   r1   r$   r=   r   r   r`   r   r7   r   tupler   propertyr   r   r   	Embeddingr   r   r   r<   r   r   r   r   r   r   r>   r(   r(   r3   r)   r     sh    %
 "4

	
Ir   )r   );__doc__loggingcollections.abcr   typingr   r   r$   r   transformersr   sglang.srt.distributedr   r   r	   sglang.srt.layers.linearr
   r   r   r   "sglang.srt.layers.logits_processorr   r   sglang.srt.layers.poolerr   r   *sglang.srt.layers.quantization.base_configr   !sglang.srt.layers.radix_attentionr   sglang.srt.layers.utilsr   *sglang.srt.layers.vocab_parallel_embeddingr   r   ,sglang.srt.model_executor.forward_batch_infor   r   $sglang.srt.model_loader.weight_utilsr   r   sglang.srt.utilsr   r   sglang.utilsr   	getLoggerr9   r   r*   r  r+   Moduler?   rb   rx   r   r   
EntryClassr(   r(   r(   r)   <module>   s:   

:Mh= 
{