o
    	۷i                     @   s&  d dl mZmZ d dlZddlmZ ddlmZ ddlm	Z	 ddl
mZ ddlmZ dd	lmZmZ dd
lmZ ddlmZmZmZmZ ddlmZ ddlmZ ddlmZ eeZ dZ!G dd deZ"G dd de	Z#G dd deZ$G dd deZ%G dd deZ&G dd deZ'g dZ(dS )    )OptionalUnionN   )Cache)FlashAttentionKwargs)GradientCheckpointingLayer)CausalLMOutputWithPast)Unpack)TransformersKwargslogging)deprecate_kwarg   )GlmAttentionGlmForCausalLMGlmForSequenceClassificationGlmForTokenClassification)Phi3MLP   )
Glm4Config)Glm4RMSNormzTHUDM/GLM-4-9B-0414c                   @      e Zd ZdS )Glm4MLPN__name__
__module____qualname__ r   r   [/home/ubuntu/vllm_env/lib/python3.10/site-packages/transformers/models/glm4/modular_glm4.pyr   &       r   c                       s   e Zd Zdedef fddZedddd							
				ddejde	ej de	ej
 de	e de	e de	ej
 de	eejejf  dee deeje	eejejf  f fddZ  ZS )Glm4DecoderLayerconfig	layer_idxc                    sv   t    |j| _t||d| _t|| _t|j|jd| _	t|j|jd| _
t|j|jd| _t|j|jd| _d S )N)r    r!   )eps)super__init__hidden_sizeGlm4Attention	self_attnr   mlpr   rms_norm_epsinput_layernormpost_attention_layernormpost_self_attn_layernormpost_mlp_layernorm)selfr    r!   	__class__r   r   r$   +   s   

zGlm4DecoderLayer.__init__past_key_valuepast_key_valuesz4.58)new_nameversionNFhidden_statesattention_maskposition_ids	use_cachecache_positionposition_embeddingskwargsreturnc              
   K   sr   |}	|  |}| jd|||||||d|\}}
| |}|	| }|}	| |}| |}| |}|	| }|S )N)r5   r6   r7   r2   r8   r9   r:   r   )r*   r'   r,   r+   r(   r-   )r.   r5   r6   r7   r2   r8   r9   r:   r;   residual_r   r   r   forward6   s*   





zGlm4DecoderLayer.forward)NNNFNN)r   r   r   r   intr$   r   torchTensorr   
LongTensorr   booltupler	   r   FloatTensorr?   __classcell__r   r   r/   r   r   *   s8    	
r   c                   @   r   )r&   Nr   r   r   r   r   r&   [   r   r&   c                       s2   e Zd Zdee deeef f fddZ  Z	S )Glm4ForCausalLMsuper_kwargsr<   c                    s   t  jdi |S )ah  
        labels (`torch.LongTensor` of shape `(batch_size, sequence_length)`, *optional*):
            Labels for computing the masked language modeling loss. Indices should either be in `[0, ...,
            config.vocab_size]` or -100 (see `input_ids` docstring). Tokens with indices set to `-100` are ignored
            (masked), the loss is only computed for the tokens with labels in `[0, ..., config.vocab_size]`.

        Example:

        ```python
        >>> from transformers import AutoTokenizer, Glm4ForCausalLM

        >>> model = Glm4ForCausalLM.from_pretrained("THUDM/GLM-4-9B-0414")
        >>> tokenizer = AutoTokenizer.from_pretrained("THUDM/GLM-4-9B-0414")

        >>> prompt = "Hey, are you conscious? Can you talk to me?"
        >>> inputs = tokenizer(prompt, return_tensors="pt")

        >>> # Generate
        >>> generate_ids = model.generate(inputs.input_ids, max_length=30)
        >>> tokenizer.batch_decode(generate_ids, skip_special_tokens=True, clean_up_tokenization_spaces=False)[0]
        "Hey, are you conscious? Can you talk to me?\nI'm not conscious, but I can talk to you."
        ```Nr   )r#   r?   )r.   rI   r/   r   r   r?   `   s   zGlm4ForCausalLM.forward)
r   r   r   r	   r
   r   rE   r   r?   rG   r   r   r/   r   rH   _   s    
rH   c                   @   r   )Glm4ForSequenceClassificationNr   r   r   r   r   rJ   }   r   rJ   c                   @   r   )Glm4ForTokenClassificationNr   r   r   r   r   rK      r   rK   )Glm4PreTrainedModel	Glm4ModelrH   rJ   rK   ))typingr   r   rA   cache_utilsr   modeling_flash_attention_utilsr   modeling_layersr   modeling_outputsr   processing_utilsr	   utilsr
   r   utils.deprecationr   glm.modeling_glmr   r   r   r   phi3.modeling_phi3r   configuration_glm4r   modeling_glm4r   
get_loggerr   logger_CHECKPOINT_FOR_DOCr   r   r&   rH   rJ   rK   __all__r   r   r   r   <module>   s,   
1