o
    ei7                     @   s  d dl mZ d dlZd dlmZ ddlmZ ddlmZm	Z	 ddl
mZ ddlmZ dd	lmZ dd
lmZ ddlmZ ddlmZ ddlmZmZ ddlmZmZmZmZmZmZm Z m!Z! era	 ddiZ"dZ#e$e%Z&G dd deZ'G dd dej(Z)G dd deZ*G dd de!Z+G dd deZ,G dd de Z-G dd  d eZ.G d!d" d"eZ/G d#d$ d$eZ0G d%d& d&eZ1g d'Z2dS )(    )TYPE_CHECKINGN)nn   )initialization)CacheDynamicCache)PreTrainedConfig)create_causal_mask)BaseModelOutputWithPast)RopeParameters)PreTrainedModel)Unpack)TransformersKwargslogging   )LlamaAttentionLlamaForCausalLMLlamaForSequenceClassificationLlamaForTokenClassificationLlamaMLP
LlamaModelLlamaPreTrainedModelLlamaRotaryEmbedding
vocab_fileztokenizer.modelu   ▁c                *       s<  e Zd ZdZdZdgZddddddddZdgdgfd	d
gd	gfd	gd	gfdZ																				d3dedB dedB dedB d edB d!edB d"edB d#edB d$e	dB d%edB d&e
dB d'edB d(edB d)edB d*edB d+edB d,edB d-eee	ef B dB d.edB d/e
dB d0edB f( fd1d2Z  ZS )4GemmaConfiga  
    This is the configuration class to store the configuration of a [`GemmaModel`]. It is used to instantiate an Gemma
    model according to the specified arguments, defining the model architecture. Instantiating a configuration with the
    defaults will yield a similar configuration to that of the Gemma-7B.
    e.g. [google/gemma-7b](https://huggingface.co/google/gemma-7b)
    Configuration objects inherit from [`PreTrainedConfig`] and can be used to control the model outputs. Read the
    documentation from [`PreTrainedConfig`] for more information.

    Args:
        vocab_size (`int`, *optional*, defaults to 256000):
            Vocabulary size of the Gemma model. Defines the number of different tokens that can be represented by the
            `inputs_ids` passed when calling [`GemmaModel`]
        hidden_size (`int`, *optional*, defaults to 3072):
            Dimension of the hidden representations.
        intermediate_size (`int`, *optional*, defaults to 24576):
            Dimension of the MLP representations.
        num_hidden_layers (`int`, *optional*, defaults to 28):
            Number of hidden layers in the Transformer decoder.
        num_attention_heads (`int`, *optional*, defaults to 16):
            Number of attention heads for each attention layer in the Transformer decoder.
        num_key_value_heads (`int`, *optional*, defaults to 16):
            This is the number of key_value heads that should be used to implement Grouped Query Attention. If
            `num_key_value_heads=num_attention_heads`, the model will use Multi Head Attention (MHA), if
            `num_key_value_heads=1` the model will use Multi Query Attention (MQA) otherwise GQA is used. When
            converting a multi-head checkpoint to a GQA checkpoint, each group key and value head should be constructed
            by meanpooling all the original heads within that group. For more details, check out [this
            paper](https://huggingface.co/papers/2305.13245). If it is not specified, will default to
            `num_attention_heads`.
        head_dim (`int`, *optional*, defaults to 256):
            The attention head dimension.
        hidden_act (`str` or `function`, *optional*, defaults to `"gelu_pytorch_tanh"`):
            The legacy activation function. It is overwritten by the `hidden_activation`.
        max_position_embeddings (`int`, *optional*, defaults to 8192):
            The maximum sequence length that this model might ever be used with.
        initializer_range (`float`, *optional*, defaults to 0.02):
            The standard deviation of the truncated_normal_initializer for initializing all weight matrices.
        rms_norm_eps (`float`, *optional*, defaults to 1e-06):
            The epsilon used by the rms normalization layers.
        use_cache (`bool`, *optional*, defaults to `True`):
            Whether or not the model should return the last key/values attentions (not used by all models). Only
            relevant if `config.is_decoder=True`.
        pad_token_id (`int`, *optional*, defaults to 0):
            Padding token id.
        eos_token_id (`int`, *optional*, defaults to 1):
            End of stream token id.
        bos_token_id (`int`, *optional*, defaults to 2):
            Beginning of stream token id.
        tie_word_embeddings (`bool`, *optional*, defaults to `True`):
            Whether to tie weight embeddings
        rope_parameters (`RopeParameters`, *optional*):
            Dictionary containing the configuration parameters for the RoPE embeddings. The dictionary should contain
            a value for `rope_theta` and optionally parameters used for scaling in case you want to use RoPE
            with longer `max_position_embeddings`.
        attention_bias (`bool`, defaults to `False`, *optional*, defaults to `False`):
            Whether to use a bias in the query, key, value and output projection layers during self-attention.
        attention_dropout (`float`, *optional*, defaults to 0.0):
            The dropout ratio for the attention probabilities.
        use_bidirectional_attention (`bool`, *optional*):
            If True, the model will attend to all text tokens instead of using a causal mask.

    ```python
    >>> from transformers import GemmaModel, GemmaConfig
    >>> # Initializing a Gemma gemma-7b style configuration
    >>> configuration = GemmaConfig()
    >>> # Initializing a model from the gemma-7b style configuration
    >>> model = GemmaModel(configuration)
    >>> # Accessing the model configuration
    >>> configuration = model.config
    ```gemmapast_key_valuescolwiserowwise)zlayers.*.self_attn.q_projzlayers.*.self_attn.k_projzlayers.*.self_attn.v_projzlayers.*.self_attn.o_projzlayers.*.mlp.gate_projzlayers.*.mlp.up_projzlayers.*.mlp.down_proj	input_idsinputs_embedshidden_statesattention_mask)embed_tokenslayersnorm      `           gelu_pytorch_tanh    {Gz?ư>Tr      r   NF        
vocab_sizehidden_sizeintermediate_sizenum_hidden_layersnum_attention_headsnum_key_value_headshead_dim
hidden_actmax_position_embeddingsinitializer_rangerms_norm_eps	use_cachepad_token_ideos_token_idbos_token_idtie_word_embeddingsrope_parametersattention_biasattention_dropoutuse_bidirectional_attentionc                    s   || _ |	| _|| _|| _|| _|| _|| _|| _|| _|
| _	|| _
|| _|| _|| _|| _|| _|| _|| _|| _|| _t jdi | d S )N )r2   r:   r3   r4   r5   r6   r8   r7   r9   r;   r<   r=   rC   rD   rE   r>   r@   r?   rA   rB   super__init__)selfr2   r3   r4   r5   r6   r7   r8   r9   r:   r;   r<   r=   r>   r?   r@   rA   rB   rC   rD   rE   kwargs	__class__rF   e/home/ubuntu/transcripts/venv/lib/python3.10/site-packages/transformers/models/gemma/modular_gemma.pyrH      s*   zGemmaConfig.__init__)r&   r'   r(   r)   r*   r*   r+   r,   r-   r.   r/   Tr   r0   r   TNFr1   N)__name__
__module____qualname____doc__
model_typekeys_to_ignore_at_inferencebase_model_tp_planbase_model_pp_planintstrfloatboolr   dictrH   __classcell__rF   rF   rK   rM   r   4   s    F


	
r   c                       s@   e Zd Zddedef fddZdd Zdd	 Zd
d Z  Z	S )GemmaRMSNormr/   dimepsc                    s&   t    || _tt|| _d S )N)rG   rH   r^   r   	Parametertorchzerosweight)rI   r]   r^   rK   rF   rM   rH      s   
zGemmaRMSNorm.__init__c                 C   s$   |t |djddd| j  S )Nr   T)keepdim)r`   rsqrtpowmeanr^   )rI   xrF   rF   rM   _norm   s   $zGemmaRMSNorm._normc                 C   s*   |  | }|d| j   }||S )Ng      ?)ri   rX   rb   type_as)rI   rh   outputrF   rF   rM   forward   s   
zGemmaRMSNorm.forwardc                 C   s   t | jj d| j S )Nz, eps=)tuplerb   shaper^   )rI   rF   rF   rM   
extra_repr   s   zGemmaRMSNorm.extra_repr)r/   )
rN   rO   rP   rV   rX   rH   ri   rl   ro   r[   rF   rF   rK   rM   r\      s
    r\   c                          e Zd Z fddZ  ZS )GemmaMLPc                    sR   t  | tj| j| jdd| _tj| j| jdd| _tj| j| jdd| _d S )NF)bias)	rG   rH   r   Linearr3   r4   	gate_projup_proj	down_proj)rI   configrK   rF   rM   rH      s   zGemmaMLP.__init__)rN   rO   rP   rH   r[   rF   rF   rK   rM   rq          rq   c                   @      e Zd ZdS )GemmaRotaryEmbeddingNrN   rO   rP   rF   rF   rF   rM   rz          rz   c                       s*   e Zd ZdZdedef fddZ  ZS )GemmaAttentionz=Multi-headed attention from 'Attention Is All You Need' paperrw   	layer_idxc                    s   t    t|dd | _d S )NrE   F)rG   rH   getattr	is_causal)rI   rw   r~   rK   rF   rM   rH      s   
zGemmaAttention.__init__)rN   rO   rP   rQ   r   rV   rH   r[   rF   rF   rK   rM   r}      s    r}   c                   @   s   e Zd Ze dd ZdS )GemmaPreTrainedModelc                 C   s,   t | | d|jjv rt|j d S d S )NRMSNorm)r   _init_weightsrL   rN   initzeros_rb   )rI   modulerF   rF   rM   r      s   z"GemmaPreTrainedModel._init_weightsN)rN   rO   rP   r`   no_gradr   rF   rF   rF   rM   r      s    r   c                   @   st   e Zd Z							ddejdB dejdB dejdB dedB dejdB dedB dejdB d	e	e
 d
efddZdS )
GemmaModelNr   r"   position_idsr   r    r=   cache_positionrJ   returnc              
   K   s,  |d u |d uA rt d|d u r| |}|r!|d u r!t| jd}|d u r=|d ur-| nd}	tj|	|	|jd  |jd}|d u rF|	d}t
| j|||||d}
|}| j||d}tj| jjd |jd	}|| }| jd | jj D ]}||f|
|||||d
|}qs| |}t||r|dS d dS )Nz:You must specify exactly one of input_ids or inputs_embeds)rw   r   r0   )device)rw   r    r"   r   r   r   )r   g      ?)dtype)r"   r   r   r=   r   position_embeddings)last_hidden_stater   )
ValueErrorr#   r   rw   get_seq_lengthr`   arangern   r   	unsqueezer	   
rotary_embtensorr3   r   r$   r5   r%   r
   )rI   r   r"   r   r   r    r=   r   rJ   past_seen_tokenscausal_maskr!   r   
normalizerdecoder_layerrF   rF   rM   rl      sZ   




zGemmaModel.forward)NNNNNNN)rN   rO   rP   r`   
LongTensorTensorr   FloatTensorrY   r   r   r
   rl   rF   rF   rF   rM   r      s6    	
r   c                       rp   )GemmaForCausalLMc                     s   t  jdi | S )a|  
        Example:

        ```python
        >>> from transformers import AutoTokenizer, GemmaForCausalLM

        >>> model = GemmaForCausalLM.from_pretrained("google/gemma-7b")
        >>> tokenizer = AutoTokenizer.from_pretrained("google/gemma-7b")

        >>> prompt = "What is your favorite condiment?"
        >>> inputs = tokenizer(prompt, return_tensors="pt")

        >>> # Generate
        >>> generate_ids = model.generate(inputs.input_ids, max_length=30)
        >>> tokenizer.batch_decode(generate_ids, skip_special_tokens=True, clean_up_tokenization_spaces=False)[0]
        "What is your favorite condiment?"
        ```NrF   )rG   rl   )super_kwargsrK   rF   rM   rl   1  s   zGemmaForCausalLM.forward)rN   rO   rP   rl   r[   rF   rF   rK   rM   r   0  rx   r   c                   @   ry   )GemmaForSequenceClassificationNr{   rF   rF   rF   rM   r   F  r|   r   c                   @   ry   )GemmaForTokenClassificationNr{   rF   rF   rF   rM   r   J  r|   r   )r   r   r   r   r   r   )3typingr   r`   r    r   r   cache_utilsr   r   configuration_utilsr   masking_utilsr	   modeling_outputsr
   modeling_rope_utilsr   modeling_utilsr   processing_utilsr   utilsr   r   llama.modeling_llamar   r   r   r   r   r   r   r   VOCAB_FILES_NAMESSPIECE_UNDERLINE
get_loggerrN   loggerr   Moduler\   rq   rz   r}   r   r   r   r   r   __all__rF   rF   rF   rM   <module>   s<   (
 		C