o
    ei                     @   s*  d dl Z ddlmZ ddlmZ ddlmZ ddlmZm	Z	m
Z
mZmZ ddlmZmZmZmZmZmZ d	d
lmZ eeZG dd deZG dd deZG dd deZG dd de	ZG dd deZG dd de
ZG dd deZG dd deZ G dd deZ!G dd deZ"g dZ#dS )     N   )CausalLMOutputWithPast)Unpack)logging   )DeepseekV3DecoderLayerDeepseekV3MLPDeepseekV3MoEDeepseekV3PreTrainedModelDeepseekV3TopkRouter)Qwen3AttentionQwen3ForCausalLM
Qwen3ModelQwen3RMSNormQwen3RotaryEmbeddingTransformersKwargs   )Dots1Configc                   @      e Zd ZdS )Dots1RMSNormN__name__
__module____qualname__ r   r   e/home/ubuntu/transcripts/venv/lib/python3.10/site-packages/transformers/models/dots1/modular_dots1.pyr   (       r   c                   @   r   )Dots1RotaryEmbeddingNr   r   r   r   r   r   ,   r   r   c                   @   r   )Dots1AttentionNr   r   r   r   r   r   0   r   r   c                   @   r   )Dots1MLPNr   r   r   r   r   r   4   r   r   c                   @   r   )Dots1TopkRouterNr   r   r   r   r   r    8   r   r    c                   @   s   e Zd Zdd ZdS )Dots1MoEc                 C   s  |  }|| jj }|d| j| j| j jdddd jdd}tj|| j	dddd }t
|}|d|d |dd| j| j| j d| j}||  d}tj|| jdddd }|d|}	| jrx|	jdd	d
d }
|	|
 }	|	| j }	||	fS )Nr   )dimr   F)kr#   sortedr   g        T)r#   keepdimg#B;)sigmoidgatee_score_correction_biasviewn_groupn_routed_expertstopksumtorch
topk_group
zeros_likescatter_	unsqueezeexpandreshapemasked_fillbooltop_kgathernorm_topk_probrouted_scaling_factor)selfrouter_logitsrouter_logits_for_choicegroup_scores	group_idx
group_mask
score_maskscores_for_choicetopk_indicestopk_weightsdenominatorr   r   r   route_tokens_to_experts=   s2   


z Dots1MoE.route_tokens_to_expertsN)r   r   r   rG   r   r   r   r   r!   <   s    r!   c                       s&   e Zd Zdedef fddZ  ZS )Dots1DecoderLayerconfig	layer_idxc                    s   t  || |j| | _d S )N)super__init__layer_typesattention_type)r<   rI   rJ   	__class__r   r   rL   X   s   zDots1DecoderLayer.__init__)r   r   r   r   intrL   __classcell__r   r   rO   r   rH   W   s    rH   c                   @   s   e Zd ZdZdS )Dots1PreTrainedModelN)r   r   r   "_keys_to_ignore_on_load_unexpectedr   r   r   r   rS   ]   s    rS   c                   @   r   )
Dots1ModelNr   r   r   r   r   rU   a   r   rU   c                       s*   e Zd Zdee def fddZ  ZS )Dots1ForCausalLMsuper_kwargsreturnc                    s   t  jdi |S )a~  
        labels (`torch.LongTensor` of shape `(batch_size, sequence_length)`, *optional*):
            Labels for computing the masked language modeling loss. Indices should either be in `[0, ...,
            config.vocab_size]` or -100 (see `input_ids` docstring). Tokens with indices set to `-100` are ignored
            (masked), the loss is only computed for the tokens with labels in `[0, ..., config.vocab_size]`.

        Example:

        ```python
        >>> from transformers import AutoTokenizer, Dots1ForCausalLM

        >>> model = Dots1ForCausalLM.from_pretrained("rednote-hilab/dots1.llm1.inst")
        >>> tokenizer = AutoTokenizer.from_pretrained("rednote-hilab/dots1.llm1.inst")

        >>> prompt = "Hey, are you conscious? Can you talk to me?"
        >>> inputs = tokenizer(prompt, return_tensors="pt")

        >>> # Generate
        >>> generate_ids = model.generate(inputs.input_ids, max_length=30)
        >>> tokenizer.batch_decode(generate_ids, skip_special_tokens=True, clean_up_tokenization_spaces=False)[0]
        "Hey, are you conscious? Can you talk to me?\nI'm not conscious, but I can talk to you."
        ```Nr   )rK   forward)r<   rW   rO   r   r   rY   f   s   zDots1ForCausalLM.forward)r   r   r   r   r   r   rY   rR   r   r   rO   r   rV   e   s    rV   )rS   rU   rV   )$r/   modeling_outputsr   processing_utilsr   utilsr    deepseek_v3.modeling_deepseek_v3r   r   r	   r
   r   qwen3.modeling_qwen3r   r   r   r   r   r   configuration_dots1r   
get_loggerr   loggerr   r   r   r   r    r!   rH   rS   rU   rV   __all__r   r   r   r   <module>   s&    
