o
    پi.                     @   s  d dl Z d dlmZmZmZ d dlZd dlmZ d dlmZm	Z	m
Z
 d dlmZ d dlmZ d dlmZ d dlmZ d d	lmZ d d
lmZ d dlmZ dZG dd dejZG dd dejZG dd dejZ	 dddZG dd dejZG dd dejZ ee gZ!dS )    N)IterableOptionalTuple)nn)CrossEncodingPoolerPoolerPoolingType)QuantizationConfig)SparsePooler)VocabParallelEmbedding)ForwardBatch)default_weight_loader)BertEncoder)download_from_hfc                       s.   e Zd ZdZdef fddZdd Z  ZS )RobertaClassificationHeadz-Head for sentence-level classification tasks.configc                    s2   t    t|j|j| _t|j|j| _d S N)super__init__r   Linearhidden_sizedense
num_labelsout_projselfr   	__class__ M/home/ubuntu/.local/lib/python3.10/site-packages/sglang/srt/models/roberta.pyr      s   
z"RobertaClassificationHead.__init__c                 K   s2   |dd d f }|  |}t|}| |}|S )Nr   )r   torchtanhr   )r   featureskwargsxr   r   r   forward   s
   


z!RobertaClassificationHead.forward)__name__
__module____qualname____doc__RobertaConfigr   r%   __classcell__r   r   r   r   r      s    r   c                
       sH   e Zd Zdef fddZdejdejdejdedejf
d	d
Z  Z	S )RobertaEmbeddingr   c                    s   t    |j| _t|j|j| _|j| _t	j
|j|j| jd| _t	
|j|j| _t	j|j|jd| _t	td|jf| _|j| _| jdkrMtdd S )N)padding_idx)eps   absolutez4Only 'absolute' position_embedding_type is supported)r   r   r   sizer   
vocab_sizeword_embeddingspad_token_idr-   r   	Embeddingmax_position_embeddingsposition_embeddingstype_vocab_sizetoken_type_embeddings	LayerNormlayer_norm_eps	Parameterr    emptyposition_idsposition_embedding_type
ValueErrorr   r   r   r   r   (   s0   

zRobertaEmbedding.__init__	input_idsseq_lensr>   forward_batchreturnc                 C   s  |  }| |}g }g }d}	|D ]}
|||	|	|
   |||	|	|
   |	|
7 }	qg }t||D ]#\}}tj|  d tj|jd}t||sOJ |t	|| j
 q5t|}| |}|j}|d u rttj|tj|jd}| |}|| | }| |}|S )Nr   )dtypedevice)r1   r3   appendzipr    arangelongrF   equal"create_position_ids_from_input_idsr-   catr7   token_type_idszerosr9   r:   )r   rA   rB   r>   rC   input_shapeinputs_embedspos_list
token_listoffsetseq_lennew_pos_list	positionstokensexpected_posr7   rN   r9   
embeddingsr   r   r   r%   D   s:   







zRobertaEmbedding.forward)
r&   r'   r(   r*   r   r    Tensorr   r%   r+   r   r   r   r   r,   &   s    r,   c                       s   e Zd Zdddddedee dedef fd	d
Ze	
 		dde	jde	jdede	jdede	jfddZdeeee	jf  fddZ  ZS )XLMRobertaBaseModelN F)quant_configprefixadd_pooling_layerr   r^   r_   r`   c                   sL   t    || _t|| _t||dd| _|r!ttj	dd| _
d S d | _
d S )Nr]   r   r^   r_   Tpooling_type	normalize)r   r   r   r,   rZ   r   encoderr   r   CLSpooler)r   r   r^   r_   r`   r   r   r   r   u   s   

zXLMRobertaBaseModel.__init__rA   rW   rC   input_embedsget_embeddingrD   c                 C   s2   |dksJ | j |||j|d}| j||d}|S )NT)rA   r>   rB   rC   )rC   )rZ   rB   re   r   rA   rW   rC   rh   ri   hidden_statesr   r   r   r%      s   	zXLMRobertaBaseModel.forwardweightsc                 C   s   g d}t |  }|D ]X\}}|dd}| jd u r d|v r q|D ](\}}}||vr,q"|||}|dr<||vr<q"|| }	|	j}
|
|	||  n|drU||vrUq|| }	t|	dt}
|
|	| qd S )N))qkv_projqueryq)rm   keyk)rm   valuevr   	self_attnrg   z.biasweight_loader)dictnamed_parametersreplacerg   endswithru   getattrr   )r   rl   stacked_params_mappingparams_dictnameloaded_weight
param_nameweight_nameshard_idparamru   r   r   r   load_weights   s.   
z XLMRobertaBaseModel.load_weightsNF)r&   r'   r(   r*   r   r	   strboolr   r    no_gradr[   r   r%   r   r   r   r+   r   r   r   r   r\   t   s<    $r\   c                 C   s6   |  | }tj|dd|| | }| | S )Nr   )dim)neintr    cumsumtype_asrJ   )rA   r-   past_key_values_lengthmaskincremental_indicesr   r   r   rL      s
   rL   c                       s   e Zd Zddddddedee dedee dee f
 fd	d
Z		ddej	dej	de
dej	dedej	fddZdeeeej	f  fddZedededefddZ  ZS )XLMRobertaModelNr]   )r^   r_   sparse_head
model_pathr   r^   r_   r   r   c                   s   t    t|||d| _|d ur4d| _|| _|| _t|d| _|j	|j
|jg| _dd | jD | _d S d| _ttjdd| _d S )Nra   T)r   c                 S   s   g | ]}|d ur|qS r   r   ).0tr   r   r   
<listcomp>   s    z,XLMRobertaModel.__init__.<locals>.<listcomp>Frb   )r   r   r\   roberta
_is_sparse_model_path_sparse_headr
   rg   bos_token_ideos_token_idr4   _special_tokensr   r   rf   )r   r   r^   r_   r   r   r   r   r   r      s    
	zXLMRobertaModel.__init__FrA   rW   rC   rh   ri   rD   c           	      C   sR   |  |||||}| ||}| jr'| jD ]}d|jd d |f< q|j |_|S )Ng        )r   rg   r   r   rZ   	to_sparse)	r   rA   rW   rC   rh   ri   rk   rZ   token_idr   r   r   r%      s   

zXLMRobertaModel.forwardrl   c                 C   s6   | j | | jrt| j| j}| j| d S d S r   )r   r   r   r   _load_sparse_linearr   r   rg   )r   rl   sparse_dictr   r   r   r      s   zXLMRobertaModel.load_weightsmodel_path_or_dirc                 C   sd   t j| rt j| |}t j|std| d|  nt| |d}t j||}t|}|S )z
        Load sparse_head from local dir or HF Hub.
        Returns a state_dict suitable for nn.Linear.load_state_dict().
        'z' not found in )allow_patterns)	ospathisdirjoinexistsFileNotFoundErrorr   r    load)r   r   r   	local_dir
state_dictr   r   r   r     s   
z#XLMRobertaModel._load_sparse_linearr   )r&   r'   r(   r*   r   r	   r   r   r    r[   r   r   r%   r   r   r   staticmethodrv   r   r+   r   r   r   r   r      sD    #
	 r   c                       s   e Zd Zddddedee def fddZ			dd
ej	dej	de
dej	dedej	fddZdeeeej	f  fddZ  ZS )#XLMRobertaForSequenceClassificationNr]   )r^   r_   r   r^   r_   c                   s<   t    t|||d| _t|| _t|| j| jj| _d S )Nra   )r   r   r\   r   r   
classifierr   rg   )r   r   r^   r_   r   r   r   r     s   

z,XLMRobertaForSequenceClassification.__init__TrA   rW   rC   rh   ri   rD   c                 C   s*   |sJ d|  |||||}| ||S )Nz;XLMRobertaForSequenceClassification is only used for rerank)r   rg   rj   r   r   r   r%   ,  s   	
z+XLMRobertaForSequenceClassification.forwardrl   c                    sf   g   fdd}| j |  t|  } D ]\}}|dr0|| }t|dt}||| qd S )Nc                  3   sD    D ]\} }|  dr| tdd  |fV  q | |f qd S )Nzroberta.)
startswithlenrG   )r}   weightself_weightsrl   r   r   weight_filter@  s   
zGXLMRobertaForSequenceClassification.load_weights.<locals>.weight_filterr   ru   )r   r   rv   rw   r   rz   r   )r   rl   r   r|   r}   r~   r   ru   r   r   r   r   =  s   

z0XLMRobertaForSequenceClassification.load_weights)NT)r&   r'   r(   r*   r   r	   r   r   r    r[   r   r   r%   r   r   r   r+   r   r   r   r   r     s4    
$r   )r   )"r   typingr   r   r   r    r   sglang.srt.layers.poolerr   r   r   *sglang.srt.layers.quantization.base_configr	   sglang.srt.layers.sparse_poolerr
   *sglang.srt.layers.vocab_parallel_embeddingr   ,sglang.srt.model_executor.forward_batch_infor   $sglang.srt.model_loader.weight_utilsr   sglang.srt.models.bertr   &sglang.srt.utils.hf_transformers_utilsr   r*   Moduler   r,   r\   rL   r   r   
EntryClassr   r   r   r   <module>   s*   NO

Q5