o
    پi                     @   s   d dl mZmZmZmZ d dlZd dlmZ d dlm	Z	 d dl
mZmZ d dlmZmZ G dd deZG d	d
 d
eZeZdS )    )ListOptionalTupleUnionN)LlamaConfig)QuantizationConfig)ForwardBatchPPProxyTensors)LlamaForCausalLM
LlamaModelc                       s   e Zd ZdZ		ddedee deddf fdd	Z		dd
e	j
de	j
dede	j
dee dee	j
ee	j
ee	j
 f ef f fddZ  ZS )TeleFLMModeluM  
    This implementation is based on the µScaling paper presented at
    the ICLR 2025 Workshop:
    NanoLM: An Affordable LLM Study Benchmark     via Accurate Loss Prediction across Scales
    by Yiqun Yao et al.
    Available at: https://openreview.net/forum?id=IwaPYg1SCA
    arXiv preprint: https://arxiv.org/abs/2304.06875
    N configquant_configprefixreturnc                    s:   t  j|||d t| jdd| _| jr| jj| _d S d S Nr   r   use_mupF)super__init__getattrr   r   
input_multselfr   r   r   	__class__ M/home/ubuntu/.local/lib/python3.10/site-packages/sglang/srt/models/teleflm.pyr   0   s
   zTeleFLMModel.__init__	input_ids	positionsforward_batchinput_embedspp_proxy_tensorsc                    s@   | j jr|d u r| |}| jr|| j }t j|||||dS )N)r   r    r!   r"   r#   )pp_groupis_first_rankembed_tokensr   r   r   forward)r   r   r    r!   r"   r#   r   r   r   r'   ;   s   

zTeleFLMModel.forwardNr   )NN)__name__
__module____qualname____doc__r   r   r   strr   torchTensorr   r	   r   r   r   r'   __classcell__r   r   r   r   r   %   s8    r   c                       sT   e Zd Z		d
dedee def fddZ		d
dedee defdd	Z  Z	S )TeleFLMForCausalLMNr   r   r   r   c                    sT   t  j|||d t| jdd| _| jr(| jj| _| jj| j | _| j| j_d S d S r   )	r   r   r   r   r   mup_scale_factoroutput_multlogits_processorlogit_scaler   r   r   r   r   R   s   
zTeleFLMForCausalLM.__init__c                 C   s   t |||dS )Nr   )r   r   r   r   r   _init_model_   s   zTeleFLMForCausalLM._init_modelr(   )
r)   r*   r+   r   r   r   r-   r   r6   r0   r   r   r   r   r1   Q   s&    r1   )typingr   r   r   r   r.   transformersr   *sglang.srt.layers.quantization.base_configr   ,sglang.srt.model_executor.forward_batch_infor   r	   sglang.srt.models.llamar
   r   r   r1   
EntryClassr   r   r   r   <module>   s   ,