o
    پi2                     @   sZ  d Z ddlmZ ddlmZmZmZ ddlZddlmZ ddl	m
Z
 ddlmZ ddlmZ dd	lmZ dd
lmZmZmZ ddlmZmZ ddlmZ ddlmZ ddlmZ ddlmZ ddl m!Z!m"Z" ddl#m$Z$m%Z% ddl&m'Z' ddl(m)Z)m*Z* G dd dej+Z,G dd dej+Z-G dd dej+Z.G dd dej+Z/G dd dej+Z0e0Z1dS )zCInference-only Orion-14B model compatible with HuggingFace weights.    )Iterable)AnyOptionalTupleN)nn)PretrainedConfig)$get_tensor_model_parallel_world_size)get_pp_group)
SiluAndMul)MergedColumnParallelLinearQKVParallelLinearRowParallelLinear)LogitsProcessorLogitsProcessorOutput)QuantizationConfig)RadixAttention)get_rope)PPMissingLayer)ParallelLMHeadVocabParallelEmbedding)ForwardBatchPPProxyTensors)default_weight_loader)
add_prefixmake_layersc                       sH   e Zd Z		ddedededee deddf fd	d
Zdd Z  Z	S )OrionMLPN hidden_sizeintermediate_size
hidden_actquant_configprefixreturnc                    sh   t    t||gd d|td|d| _t||d|td|d| _|dkr.td| dt | _	d S )	N   Fgate_up_projbiasr    r!   	down_projsiluzUnsupported activation: z!. Only silu is supported for now.)
super__init__r   r   r$   r   r'   
ValueErrorr
   act_fn)selfr   r   r   r    r!   	__class__ K/home/ubuntu/.local/lib/python3.10/site-packages/sglang/srt/models/orion.pyr*   )   s(   

zOrionMLP.__init__c                 C   s*   |  |\}}| |}| |\}}|S N)r$   r,   r'   )r-   xgate_up_r0   r0   r1   forwardG   s   
zOrionMLP.forwardNr   )
__name__
__module____qualname__intstrr   r   r*   r6   __classcell__r0   r0   r.   r1   r   (   s"    r   c                       s   e Zd Z						ddededed	ed
eeeef  dedee	 dededdf fddZ
dejdejdedejfddZ  ZS )OrionAttention'  N    r   r   r   	num_headsnum_kv_heads
rope_thetarope_scalingmax_position_embeddingsr    layer_idr!   r"   c
              
      sJ  t    || _t }
|| _| j|
 dksJ | j|
 | _|| _| j|
kr/| j|
 dks.J n	|
| j dks8J td| j|
 | _|| j | _	| j| j	 | _
| j| j	 | _| j	d | _|| _|| _t|| j	| j| jd|td|	d| _t| j| j	 |d|td|	d| _t| j	| j	|||d| _t| j| j	| j| j||td	|	d
| _d S )Nr      g      Fqkv_projr%   o_proj)
rotary_dimmax_positionbaserD   attn)rB   rF   r    r!   )r)   r*   r   r   total_num_headsrA   total_num_kv_headsmaxrB   head_dimq_sizekv_sizescalingrC   rE   r   r   rH   r   rI   r   
rotary_embr   rM   )r-   r   rA   rB   rC   rD   rE   r    rF   r!   tp_sizer.   r0   r1   r*   O   sb   

	
zOrionAttention.__init__	positionshidden_statesforward_batchc                 C   sd   |  |\}}|j| j| j| jgdd\}}}| |||\}}| j||||d}	| |	\}
}|
S )N)dim)rY   )rH   splitrR   rS   rU   rM   rI   )r-   rW   rX   rY   qkvr5   qkvattn_outputoutputr0   r0   r1   r6      s    zOrionAttention.forward)r?   Nr@   Nr   r   )r8   r9   r:   r;   floatr   dictr<   r   r   r*   torchTensorr   r6   r=   r0   r0   r.   r1   r>   N   sJ    	
Ar>   c                       s\   e Zd Z		ddededee deddf
 fdd	Zd
e	j
de	j
dede	j
fddZ  ZS )OrionDecoderLayerNr   configrF   r    r!   r"   c                    s   t    |j| _t|dd}t|dd }t|dd}t| j|j|j||||td||d	| _t	| j|j
|j|td|d	| _tj|j|jd
| _tj|j|jd
| _d S )NrC   r?   rD   rE   r@   	self_attn)	r   rA   rB   rC   rD   rE   r    r!   rF   mlp)r   r   r   r    r!   eps)r)   r*   r   getattrr>   num_attention_headsnum_key_value_headsr   ri   r   r   r   rj   r   	LayerNormrms_norm_epsinput_layernormpost_attention_layernorm)r-   rh   rF   r    r!   rC   rD   rE   r.   r0   r1   r*      s6   
zOrionDecoderLayer.__init__rW   rX   rY   c                 C   sJ   |}|  |}| j|||d}|| }|}| |}| |}|| }|S )N)rW   rX   rY   )rr   ri   rs   rj   )r-   rW   rX   rY   residualr0   r0   r1   r6      s   


zOrionDecoderLayer.forwardr7   )r8   r9   r:   r   r;   r   r   r<   r*   re   rf   r   r6   r=   r0   r0   r.   r1   rg      s.    #rg   c                       sf   e Zd Z		ddedee def fddZ		ddej	d	ej	d
e
deej	 dee f
ddZ  ZS )
OrionModelNr   rh   r    r!   c                    s   t     | _t | _| jjrt j j| _	nt
 | _	t j fdd| jj| jjtd|d\| _| _| _| jjrItj j jd| _d S t
 | _d S )Nc                    s   t  | |dS )N)rF   r    r!   )rg   )idxr!   rh   r    r0   r1   <lambda>   s    z%OrionModel.__init__.<locals>.<lambda>layers)pp_rankpp_sizer!   rk   )r)   r*   rh   r	   pp_groupis_first_rankr   
vocab_sizer   embed_tokensr   r   num_hidden_layersrank_in_group
world_sizer   ry   start_layer	end_layeris_last_rankr   rp   rq   normr-   rh   r    r!   r.   rw   r1   r*      s$   

zOrionModel.__init__	input_idsrW   rY   inputs_embedspp_proxy_tensorsc           	      C   s   | j jr|d ur|}n| |}n
|d usJ |d }t| j| jD ]}| j| }||||}q"| j js:td|iS | 	|}|S )NrX   )
r|   r}   r   ranger   r   ry   r   r   r   )	r-   r   rW   rY   r   r   rX   ilayerr0   r0   r1   r6      s   

zOrionModel.forwardr7   )NN)r8   r9   r:   r   r   r   r<   r*   re   rf   r   r   r6   r=   r0   r0   r.   r1   ru      s.    %ru   c                       s|   e Zd Z		ddedee def fddZ	ddej	d	ej	d
e
deej	 def
ddZdeeeej	f  fddZ  ZS )OrionForCausalLMNr   rh   r    r!   c                    s   t    || _|| _t | _t||td|d| _| jj	rBt
|j|j|td|d| _| jjr;| jjr;| jjj| j_t|| _d S t | _d S )Nmodel)rh   r    r!   lm_head)r    r!   )r)   r*   rh   r    r	   r|   ru   r   r   r   r   r~   r   r   tie_word_embeddingsr}   r   weightr   logits_processorr   r   r.   r0   r1   r*     s$   
zOrionForCausalLM.__init__r   rW   rY   r   r"   c                 C   s4   | j ||||d}| jjr| ||| j|}|S |S )N)r   rW   rY   r   )r   r|   r   r   r   )r-   r   rW   rY   r   rX   logitsr0   r0   r1   r6   3  s   
zOrionForCausalLM.forwardweightsc                 C   s   g d}t |  }|D ]]\}}d|v rqd}|D ].\}}}	||vr#q|||}|dr3||vr3q||vr8q|| }
|
j}||
||	 d} |rKq|drU||vrUq||vrZq|| }
t|
dt}||
| qd S )N))rH   q_projr^   )rH   k_projr_   )rH   v_projr`   )r$   	gate_projr   )r$   up_projrG   zrotary_emb.inv_freqFz.biasTweight_loader)rd   named_parametersreplaceendswithr   rm   r   )r-   r   stacked_params_mappingparams_dictnameloaded_weight	is_packed
param_nameweight_nameshard_idparamr   r0   r0   r1   load_weightsH  s:   zOrionForCausalLM.load_weightsr7   r2   )r8   r9   r:   r   r   r   r<   r*   re   rf   r   r   r6   r   r   r   r=   r0   r0   r.   r1   r     s.     
$r   )2__doc__collections.abcr   typingr   r   r   re   r   transformersr   sglang.srt.distributedr   %sglang.srt.distributed.parallel_stater	   sglang.srt.layers.activationr
   sglang.srt.layers.linearr   r   r   "sglang.srt.layers.logits_processorr   r   sglang.srt.layers.quantizationr   !sglang.srt.layers.radix_attentionr   "sglang.srt.layers.rotary_embeddingr   sglang.srt.layers.utilsr   *sglang.srt.layers.vocab_parallel_embeddingr   r   ,sglang.srt.model_executor.forward_batch_infor   r   $sglang.srt.model_loader.weight_utilsr   sglang.srt.utilsr   r   Moduler   r>   rg   ru   r   
EntryClassr0   r0   r0   r1   <module>   s2   &P<=[