o
    ei(                     @   s  d Z ddlmZ ddlZddlmZ ddlmZ ddlm	Z	 ddl
mZ dd	lmZ dd
lmZ ddlmZ ddlmZmZmZmZmZmZmZ ddlmZ eeZG dd deZG dd deZ G dd deZ!G dd deZ"G dd deZ#G dd deZ$g dZ%dS )zPyTorch BitNet model.    )CallableN   )Cache)FlashAttentionKwargs)CausalLMOutputWithPast)ALL_ATTENTION_FUNCTIONS)Unpack)logging   )GemmaMLP)LlamaAttentionLlamaDecoderLayerLlamaForCausalLM
LlamaModelLlamaRMSNormapply_rotary_pos_embeager_attention_forward   )BitNetConfigc                   @      e Zd ZdS )BitNetRMSNormN__name__
__module____qualname__ r   r   g/home/ubuntu/transcripts/venv/lib/python3.10/site-packages/transformers/models/bitnet/modular_bitnet.pyr   )       r   c                       s*   e Zd Zdef fddZdd Z  ZS )	BitNetMLPconfigc                    s"   t  | t|j|jd| _d S N)eps)super__init__r   intermediate_sizerms_norm_epsffn_sub_norm)selfr   	__class__r   r   r#   .   s   zBitNetMLP.__init__c              	   C   s*   |  | | | || | }|S )N)	down_projr&   act_fn	gate_projup_proj)r'   xr*   r   r   r   forward2   s   &zBitNetMLP.forward)r   r   r   r   r#   r/   __classcell__r   r   r(   r   r   -   s    r   c                       s   e Zd Zdedef fddZ		ddejdeejejf dejdB d	e	dB d
ej
dB dee deejejdB f fddZ  ZS )BitNetAttentionr   	layer_idxc                    s$   t  || t|j|jd| _d S r    )r"   r#   r   hidden_sizer%   attn_sub_norm)r'   r   r2   r(   r   r   r#   8   s   zBitNetAttention.__init__Nhidden_statesposition_embeddingsattention_maskpast_key_valuescache_positionkwargsreturnc                 K   s"  |j d d }g |d| jR }| ||dd}	| ||dd}
| ||dd}|\}}t|	|
||\}	}
|d urW|||d}||
|| j	|\}
}t
| jjt}|| |	|
||f| jskdn| j| jd|\}}|jg |dR   }| |}| |}||fS )Nr   r
   )sincosr9   g        )dropoutscaling)shapehead_dimq_projview	transposek_projv_projr   updater2   r   get_interfacer   _attn_implementationr   trainingattention_dropoutr@   reshape
contiguousr4   o_proj)r'   r5   r6   r7   r8   r9   r:   input_shapehidden_shapequery_states
key_statesvalue_statesr>   r=   cache_kwargsattention_interfaceattn_outputattn_weightsr   r   r   r/   <   s:   	


zBitNetAttention.forward)NN)r   r   r   r   intr#   torchTensortupler   
LongTensorr   r   r/   r0   r   r   r(   r   r1   7   s&    	r1   c                   @   r   )BitNetDecoderLayerNr   r   r   r   r   r^   i   r   r^   c                   @   r   )BitNetModelNr   r   r   r   r   r_   m   r   r_   c                       s2   e Zd ZddiZdZdZdef fddZ  ZS )BitNetForCausalLMzlm_head.weightzmodel.embed_tokens.weightNr;   c                    s   t  jdi |S )a$  
        labels (`torch.LongTensor` of shape `(batch_size, sequence_length)`, *optional*):
            Labels for computing the masked language modeling loss. Indices should either be in `[0, transformers.,
            config.vocab_size]` or -100 (see `input_ids` docstring). Tokens with indices set to `-100` are ignored
            (masked), the loss is only computed for the tokens with labels in `[0, transformers., config.vocab_size]`.

        Example:

        ```python
        >>> from transformers import AutoTokenizer, BitNetForCausalLM

        >>> model = BitNetForCausalLM.from_pretrained("microsoft/bitnet-b1.58-2B-4T")
        >>> tokenizer = AutoTokenizer.from_pretrained("microsoft/bitnet-b1.58-2B-4T")

        >>> prompt = f'<|begin_of_text|>User: Hey, are you conscious? Can you talk to me?<|eot_id|>Assistant: '
        >>> inputs = tokenizer(prompt, return_tensors="pt")

        >>> # Generate
        >>> generate_ids = model.generate(inputs.input_ids, max_length=100)
        >>> tokenizer.batch_decode(generate_ids, skip_special_tokens=True, clean_up_tokenization_spaces=False)[0]
        "User: Hey, are you conscious? Can you talk to me?Assistant: No, I'm not conscious. I'm an artificial intelligence designed to assist with information and tasks. How can I help you today?"
        ```Nr   )r"   r/   )r'   super_kwargsr(   r   r   r/   v   s   zBitNetForCausalLM.forward)	r   r   r   _tied_weights_keys_tp_plan_pp_planr   r/   r0   r   r   r(   r   r`   q   s    r`   )r`   r_   BitNetPreTrainedModel)&__doc__collections.abcr   rZ   cache_utilsr   modeling_flash_attention_utilsr   modeling_outputsr   modeling_utilsr   processing_utilsr   utilsr	   gemma.modeling_gemmar   llama.modeling_llamar   r   r   r   r   r   r   configuration_bitnetr   
get_loggerr   loggerr   r   r1   r^   r_   r`   __all__r   r   r   r   <module>   s(   $	

2"