o
    i                     @   s"  d Z ddlmZmZ ddlZddlmZ ddlmZ ddl	m
Z
 ddlmZ dd	lmZ dd
lmZ ddlmZ ddlmZ ddlmZmZmZmZmZmZmZ ddlmZ ee Z!G dd deZ"G dd deZ#G dd deZ$G dd deZ%G dd deZ&G dd deZ'g dZ(dS )zPyTorch BitNet model.    )CallableOptionalN   )Cache)FlashAttentionKwargs)CausalLMOutputWithPast)ALL_ATTENTION_FUNCTIONS)Unpack)logging)deprecate_kwarg   )GemmaMLP)LlamaAttentionLlamaDecoderLayerLlamaForCausalLM
LlamaModelLlamaRMSNormapply_rotary_pos_embeager_attention_forward   )BitNetConfigc                   @      e Zd ZdS )BitNetRMSNormN__name__
__module____qualname__ r   r   f/home/ubuntu/veenaModal/venv/lib/python3.10/site-packages/transformers/models/bitnet/modular_bitnet.pyr   +       r   c                       s*   e Zd Zdef fddZdd Z  ZS )	BitNetMLPconfigc                    s"   t  | t|j|jd| _d S N)eps)super__init__r   intermediate_sizerms_norm_epsffn_sub_norm)selfr!   	__class__r   r   r%   0   s   zBitNetMLP.__init__c              	   C   s*   |  | | | || | }|S )N)	down_projr(   act_fn	gate_projup_proj)r)   xr,   r   r   r   forward4   s   &zBitNetMLP.forward)r   r   r   r   r%   r1   __classcell__r   r   r*   r   r    /   s    r    c                       s   e Zd Zdedef fddZedddd				dd
ejde	ejejf de
ej de
e de
ej dee de	eje
ej f fddZ  ZS )BitNetAttentionr!   	layer_idxc                    s$   t  || t|j|jd| _d S r"   )r$   r%   r   hidden_sizer'   attn_sub_norm)r)   r!   r4   r*   r   r   r%   :   s   zBitNetAttention.__init__past_key_valuepast_key_valuesz4.58)new_nameversionNhidden_statesposition_embeddingsattention_maskcache_positionkwargsreturnc                 K   s.  |j d d }g |d| jR }| ||dd}	| ||dd}
| ||dd}|\}}t|	|
||\}	}
|d urW|||d}||
|| j	|\}
}t
}| jjdkret| jj }|| |	|
||f| jsqdn| j| jd|\}}|jg |dR   }| |}| |}||fS )Nr   r   )sincosr>   eagerg        )dropoutscaling)shapehead_dimq_projview	transposek_projv_projr   updater4   r   r!   _attn_implementationr   trainingattention_dropoutrF   reshape
contiguousr6   o_proj)r)   r;   r<   r=   r8   r>   r?   input_shapehidden_shapequery_states
key_statesvalue_statesrC   rB   cache_kwargsattention_interfaceattn_outputattn_weightsr   r   r   r1   >   s:   



zBitNetAttention.forward)NN)r   r   r   r   intr%   r   torchTensortupler   r   
LongTensorr	   r   r1   r2   r   r   r*   r   r3   9   s(    r3   c                   @   r   )BitNetDecoderLayerNr   r   r   r   r   rc   m   r   rc   c                   @   r   )BitNetModelNr   r   r   r   r   rd   q   r   rd   c                       s0   e Zd ZdgZdZdZdef fddZ  ZS )BitNetForCausalLMzlm_head.weightNr@   c                    s   t  jdi |S )a$  
        labels (`torch.LongTensor` of shape `(batch_size, sequence_length)`, *optional*):
            Labels for computing the masked language modeling loss. Indices should either be in `[0, transformers.,
            config.vocab_size]` or -100 (see `input_ids` docstring). Tokens with indices set to `-100` are ignored
            (masked), the loss is only computed for the tokens with labels in `[0, transformers., config.vocab_size]`.

        Example:

        ```python
        >>> from transformers import AutoTokenizer, BitNetForCausalLM

        >>> model = BitNetForCausalLM.from_pretrained("microsoft/bitnet-b1.58-2B-4T")
        >>> tokenizer = AutoTokenizer.from_pretrained("microsoft/bitnet-b1.58-2B-4T")

        >>> prompt = f'<|begin_of_text|>User: Hey, are you conscious? Can you talk to me?<|eot_id|>Assistant: '
        >>> inputs = tokenizer(prompt, return_tensors="pt")

        >>> # Generate
        >>> generate_ids = model.generate(inputs.input_ids, max_length=100)
        >>> tokenizer.batch_decode(generate_ids, skip_special_tokens=True, clean_up_tokenization_spaces=False)[0]
        "User: Hey, are you conscious? Can you talk to me?Assistant: No, I'm not conscious. I'm an artificial intelligence designed to assist with information and tasks. How can I help you today?"
        ```Nr   )r$   r1   )r)   super_kwargsr*   r   r   r1   z   s   zBitNetForCausalLM.forward)	r   r   r   _tied_weights_keys_tp_plan_pp_planr   r1   r2   r   r   r*   r   re   u   s    re   )re   rd   BitNetPreTrainedModel))__doc__typingr   r   r_   cache_utilsr   modeling_flash_attention_utilsr   modeling_outputsr   modeling_utilsr   processing_utilsr	   utilsr
   utils.deprecationr   gemma.modeling_gemmar   llama.modeling_llamar   r   r   r   r   r   r   configuration_bitnetr   
get_loggerr   loggerr   r    r3   rc   rd   re   __all__r   r   r   r   <module>   s*   $	

4"