o
    wi                     @   s|  d Z ddlmZmZ ddlZddlZddlmZ ddlm	Z	 ddl
mZ ddlmZ dd	lmZ dd
lmZmZ ddlmZ ddlmZ ddlmZmZmZmZmZmZmZmZm Z  ddl!m"Z" e#e$Z%dZ&G dd deZ'G dd deZ(G dd deZ)G dd deZ*G dd deZ+G dd de	eZ,G dd deZ-G d d! d!eZ.G d"d# d#eZ/G d$d% d%eZ0g d&Z1dS )'zPyTorch Qwen3 model.    )CallableOptionalN   )Cache)FlashAttentionKwargs)CausalLMOutputWithPast)ALL_ATTENTION_FUNCTIONS)Unpack)
LossKwargslogging   )GemmaMLP)LlamaAttention)	Qwen2DecoderLayerQwen2ForCausalLMQwen2ForQuestionAnsweringQwen2ForSequenceClassificationQwen2ForTokenClassification
Qwen2ModelQwen2RMSNormapply_rotary_pos_embeager_attention_forward   )Qwen3ConfigzQwen/Qwen3-8Bc                   @      e Zd ZdS )Qwen3RMSNormN__name__
__module____qualname__ r    r    d/home/ubuntu/sommelier/.venv/lib/python3.10/site-packages/transformers/models/qwen3/modular_qwen3.pyr   3       r   c                   @   r   )Qwen3MLPNr   r    r    r    r!   r#   7   r"   r#   c                       s   e Zd Zdedef fddZ		ddejdeejejf de	ej d	e	e
 d
e	ej dee deeje	ej e	eej  f fddZ  ZS )Qwen3Attentionconfig	layer_idxc                    sV   t  || t| j|jd| _t| j|jd| _|j| dkr&|j| _d S d | _d S )N)epssliding_attention)	super__init__r   head_dimrms_norm_epsq_normk_normlayer_typessliding_window)selfr%   r&   	__class__r    r!   r*   <   s   $zQwen3Attention.__init__Nhidden_statesposition_embeddingsattention_maskpast_key_valuecache_positionkwargsreturnc                 K   s4  |j d d }g |d| jR }| | ||dd}	| | ||dd}
| ||dd}|\}}t	|	|
||\}	}
|d ur]|||d}|
|
|| j|\}
}t}| jjdkrkt| jj }|| |	|
||f| jswdn| j| j| jd|\}}|jg |dR   }| |}||fS )Nr   r   )sincosr8   eagerg        )dropoutscalingr0   )shaper+   r-   q_projview	transposer.   k_projv_projr   updater&   r   r%   _attn_implementationr   trainingattention_dropoutr@   r0   reshape
contiguouso_proj)r1   r4   r5   r6   r7   r8   r9   input_shapehidden_shapequery_states
key_statesvalue_statesr=   r<   cache_kwargsattention_interfaceattn_outputattn_weightsr    r    r!   forwardB   s:   		

zQwen3Attention.forward)NN)r   r   r   r   intr*   torchTensortupler   r   
LongTensorr	   r   rW   __classcell__r    r    r2   r!   r$   ;   s&    r$   c                   @   r   )Qwen3DecoderLayerNr   r    r    r    r!   r^   o   r"   r^   c                   @   r   )
Qwen3ModelNr   r    r    r    r!   r_   s   r"   r_   c                   @   r   )KwargsForCausalLMNr   r    r    r    r!   r`   w   s    r`   c                       s*   e Zd Zdee def fddZ  ZS )Qwen3ForCausalLMsuper_kwargsr:   c                    s   t  jdi |S )a^  
        labels (`torch.LongTensor` of shape `(batch_size, sequence_length)`, *optional*):
            Labels for computing the masked language modeling loss. Indices should either be in `[0, ...,
            config.vocab_size]` or -100 (see `input_ids` docstring). Tokens with indices set to `-100` are ignored
            (masked), the loss is only computed for the tokens with labels in `[0, ..., config.vocab_size]`.

        Example:

        ```python
        >>> from transformers import AutoTokenizer, Qwen3ForCausalLM

        >>> model = Qwen3ForCausalLM.from_pretrained("Qwen/Qwen3-8B")
        >>> tokenizer = AutoTokenizer.from_pretrained("Qwen/Qwen3-8B")

        >>> prompt = "Hey, are you conscious? Can you talk to me?"
        >>> inputs = tokenizer(prompt, return_tensors="pt")

        >>> # Generate
        >>> generate_ids = model.generate(inputs.input_ids, max_length=30)
        >>> tokenizer.batch_decode(generate_ids, skip_special_tokens=True, clean_up_tokenization_spaces=False)[0]
        "Hey, are you conscious? Can you talk to me?\nI'm not conscious, but I can talk to you."
        ```Nr    )r)   rW   )r1   rb   r2   r    r!   rW   {   s   zQwen3ForCausalLM.forward)r   r   r   r	   r`   r   rW   r]   r    r    r2   r!   ra   z   s    ra   c                   @   r   )Qwen3ForSequenceClassificationNr   r    r    r    r!   rc      r"   rc   c                   @   r   )Qwen3ForTokenClassificationNr   r    r    r    r!   rd      r"   rd   c                   @   r   )Qwen3ForQuestionAnsweringNr   r    r    r    r!   re      r"   re   )ra   re   r_   Qwen3PreTrainedModelrc   rd   )2__doc__typingr   r   rY   torch.utils.checkpointcache_utilsr   modeling_flash_attention_utilsr   modeling_outputsr   modeling_utilsr   processing_utilsr	   utilsr
   r   gemma.modeling_gemmar   llama.modeling_llamar   qwen2.modeling_qwen2r   r   r   r   r   r   r   r   r   configuration_qwen3r   
get_loggerr   logger_CHECKPOINT_FOR_DOCr   r#   r$   r^   r_   r`   ra   rc   rd   re   __all__r    r    r    r!   <module>   s6   ,
4