o
    پi}                     @   s   d dl mZmZmZ d dlZd dlmZ d dlmZ d dl	m
Z
 d dlmZmZmZmZ d dlmZmZ dejd	ed
edejfddZG dd deZG dd deZG dd deZG dd deZegZdS )    )AnyDictOptionalN)PretrainedConfig)QuantizationConfig)ForwardBatch)LlamaAttentionLlamaDecoderLayerLlamaForCausalLM
LlamaModel)
add_prefixmake_layerspositions_idsbetamax_position_embeddingsreturnc              	   C   s*   d|t dt | |    }|dS )N   )torchlogfloor	unsqueeze)r   r   r   scaling r   P/home/ubuntu/.local/lib/python3.10/site-packages/sglang/srt/models/ministral3.py_get_llama_4_attn_scale   s   
r   c                       s   e Zd Zddi dddddfded	ed
ededededeeee	f  de
dedee dede
ddf fddZdejdejdedejfddZ  ZS )Ministral3Attentionr       .ATi    N Fconfighidden_size	num_headsnum_kv_headslayer_id
rope_thetarope_scalingrope_is_neox_styler   quant_configprefixbiasr   c                    sh   t  |||||||||	|
|| d | _t|dr#|jr#|jd| _t|dd | _| jd ur2	 d S d S )Nrope_parametersllama_4_scaling_betasliding_window)super__init__r+   hasattrr*   getgetattrr,   )selfr   r    r!   r"   r#   r$   r%   r&   r   r'   r(   r)   	__class__r   r   r.      s.   
zMinistral3Attention.__init__	positionshidden_statesforward_batchc                 C   s   |  |\}}|j| j| j| jgdd\}}}| |||\}}| jd urKt|| j| j|j	}	|
d| j| j}||	d }|
d| j| j }| ||||}
| |
\}}|S )Nr   )dimr   )qkv_projsplitq_sizekv_size
rotary_embr+   r   r   todtypeviewr!   head_dimr   attno_proj)r2   r5   r6   r7   qkv_qkvscaleattn_outputoutputr   r   r   forwardF   s    

zMinistral3Attention.forward)__name__
__module____qualname__r   intfloatr   r   strr   boolr   r.   r   Tensorr   rL   __classcell__r   r   r3   r   r      sZ    	
+r   c                       s   e Zd Zd fdd	Z  ZS )Ministral3DecoderLayerr   Nr   c                    sv   t  |||| t|| j|j|j|t|di ddt|di t|dd|td|t|ddp5t|d	dd
| _	d S )Nr*   r$   r    original_max_position_embeddingsi @  	self_attnattention_biasFr)   )r   r    r!   r"   r#   r$   r%   r   r'   r(   r)   )
r-   r.   r   r    num_attention_headsnum_key_value_headsr1   r0   r   rX   )r2   r   r#   r'   r(   r3   r   r   r.   f   s*   
zMinistral3DecoderLayer.__init__)r   Nr   )rM   rN   rO   r.   rU   r   r   r3   r   rV   e   s    rV   c                	       s8   e Zd Z		d	dedee deddf fddZ  ZS )
Ministral3ModelNr   r   r'   r(   r   c                    sF   t   | t j fdd| jj| jjdd\| _| _| _	d S )Nc                    s   t  | |dS )N)r   r'   r#   r(   )rV   )idxr(   r   r'   r   r   <lambda>   s    z*Ministral3Model.__init__.<locals>.<lambda>zmodel.layers)pp_rankpp_sizer(   )
r-   r.   r   num_hidden_layerspp_grouprank_in_group
world_sizelayersstart_layer	end_layerr2   r   r'   r(   r3   r^   r   r.      s   zMinistral3Model.__init__Nr   )	rM   rN   rO   r   r   r   rR   r.   rU   r   r   r3   r   r\   ~   s    r\   c                   @   s,   e Zd Z		ddedee defddZdS )	Ministral3ForCausalLMNr   r   r'   r(   c                 C   s   t |||dS )N)r(   )r\   ri   r   r   r   _init_model   s   z!Ministral3ForCausalLM._init_modelrj   )rM   rN   rO   r   r   r   rR   rl   r   r   r   r   rk      s    rk   )typingr   r   r   r   transformersr   *sglang.srt.layers.quantization.base_configr   ,sglang.srt.model_executor.forward_batch_infor   sglang.srt.models.llamar   r	   r
   r   sglang.srt.utilsr   r   rT   rQ   rP   r   r   rV   r\   rk   
EntryClassr   r   r   r   <module>   s*    
	K

