o
    	۷iX.                     @   s  d dl mZ d dlZd dlmZ ddlmZmZ ddlmZ ddl	m
Z
mZ ddlmZ dd	lmZ dd
lmZmZ ddlmZ ddlmZ ddlmZmZmZmZmZmZmZmZm Z m!Z!m"Z" G dd deeZ#G dd deZ$G dd deZ%G dd de!Z&G dd deZ'G dd de Z(G dd de"Z)G dd deZ*G dd  d eZ+G d!d" d"eZ,G d#d$ d$eZ-G d%d& d&eZ.g d'Z/dS )(    )OptionalN)nn   )CacheDynamicCache)PretrainedConfig)create_causal_mask!create_sliding_window_causal_mask)BaseModelOutputWithPast)Unpack)TransformersKwargsauto_docstring)check_model_inputs   )MistralConfig)Qwen2AttentionQwen2DecoderLayerQwen2ForCausalLMQwen2ForQuestionAnsweringQwen2ForSequenceClassificationQwen2ForTokenClassificationQwen2MLP
Qwen2ModelQwen2PreTrainedModelQwen2RMSNormQwen2RotaryEmbeddingc                   @   sF   e Zd ZdZdZ										
											dddZdS )MinistralConfiga  
    This is the configuration class to store the configuration of a [`MinistralModel`]. It is used to instantiate an
    Ministral model according to the specified arguments, defining the model architecture. Instantiating a configuration
    with the defaults will yield a similar configuration to that of the Ministral-8B-Instruct-2410.

    [mistralai/Ministral-8B-Instruct-2410](https://huggingface.co/mistralai/Ministral-8B-Instruct-2410)
    [mistralai/Ministral-8B-Instruct-2410](https://huggingface.co/mistralai/Ministral-8B-Instruct-2410)

    Configuration objects inherit from [`PretrainedConfig`] and can be used to control the model outputs. Read the
    documentation from [`PretrainedConfig`] for more information.


    Args:
        vocab_size (`int`, *optional*, defaults to 32000):
            Vocabulary size of the Ministral model. Defines the number of different tokens that can be represented by the
            `inputs_ids` passed when calling [`MinistralModel`]
        hidden_size (`int`, *optional*, defaults to 4096):
            Dimension of the hidden representations.
        intermediate_size (`int`, *optional*, defaults to 14336):
            Dimension of the MLP representations.
        num_hidden_layers (`int`, *optional*, defaults to 32):
            Number of hidden layers in the Transformer encoder.
        num_attention_heads (`int`, *optional*, defaults to 32):
            Number of attention heads for each attention layer in the Transformer encoder.
        num_key_value_heads (`int`, *optional*, defaults to 8):
            This is the number of key_value heads that should be used to implement Grouped Query Attention. If
            `num_key_value_heads=num_attention_heads`, the model will use Multi Head Attention (MHA), if
            `num_key_value_heads=1` the model will use Multi Query Attention (MQA) otherwise GQA is used. When
            converting a multi-head checkpoint to a GQA checkpoint, each group key and value head should be constructed
            by meanpooling all the original heads within that group. For more details, check out [this
            paper](https://huggingface.co/papers/2305.13245). If it is not specified, will default to `8`.
        head_dim (`int`, *optional*, defaults to `hidden_size // num_attention_heads`):
            The attention head dimension.
        hidden_act (`str` or `function`, *optional*, defaults to `"silu"`):
            The non-linear activation function (function or string) in the decoder.
        max_position_embeddings (`int`, *optional*, defaults to `4096*32`):
            The maximum sequence length that this model might ever be used with. Ministral's sliding window attention
            allows sequence of up to 4096*32 tokens.
        initializer_range (`float`, *optional*, defaults to 0.02):
            The standard deviation of the truncated_normal_initializer for initializing all weight matrices.
        rms_norm_eps (`float`, *optional*, defaults to 1e-06):
            The epsilon used by the rms normalization layers.
        use_cache (`bool`, *optional*, defaults to `True`):
            Whether or not the model should return the last key/values attentions (not used by all models). Only
            relevant if `config.is_decoder=True`.
        pad_token_id (`int`, *optional*):
            The id of the padding token.
        bos_token_id (`int`, *optional*, defaults to 1):
            The id of the "beginning-of-sequence" token.
        eos_token_id (`int`, *optional*, defaults to 2):
            The id of the "end-of-sequence" token.
        tie_word_embeddings (`bool`, *optional*, defaults to `False`):
            Whether the model's input and output word embeddings should be tied.
        rope_theta (`float`, *optional*, defaults to 10000.0):
            The base period of the RoPE embeddings.
        sliding_window (`int`, *optional*, defaults to 4096):
            Sliding window attention window size. If not specified, will default to `4096`.
        attention_dropout (`float`, *optional*, defaults to 0.0):
            The dropout ratio for the attention probabilities.
        layer_types (`list`, *optional*):
            Attention pattern for each layer.

    ```python
    >>> from transformers import MinistralModel, MinistralConfig

    >>> # Initializing a Ministral 8B style configuration
    >>> configuration = MinistralConfig()

    >>> # Initializing a model from the Ministral 8B style configuration
    >>> model = MinistralModel(configuration)

    >>> # Accessing the model configuration
    >>> configuration = model.config
    ```	ministral }      8         Nsilu   {Gz?ư>T   r   F     @        c                 K   s   t j| f||||d| || _|	| _|| _|| _|| _|| _|| _|| _	|d u r,|}|| _
|| _|
| _|| _|| _|| _|| _|| _| jd u rX| jd urPdndg| | _d S d S )N)pad_token_idbos_token_ideos_token_idtie_word_embeddingssliding_attentionfull_attention)r   __init__
vocab_sizemax_position_embeddingshidden_sizeintermediate_sizenum_hidden_layersnum_attention_headssliding_windowhead_dimnum_key_value_heads
hidden_actinitializer_rangerms_norm_eps	use_cache
rope_thetaattention_dropoutlayer_types)selfr1   r3   r4   r5   r6   r9   r8   r:   r2   r;   r<   r=   r*   r+   r,   r-   r>   r7   r?   r@   kwargs rC   e/home/ubuntu/vllm_env/lib/python3.10/site-packages/transformers/models/ministral/modular_ministral.pyr0   k   sD   

zMinistralConfig.__init__)r   r   r    r!   r!   r"   Nr#   r$   r%   r&   TNr'   r   Fr(   r   r)   N)__name__
__module____qualname____doc__
model_typer0   rC   rC   rC   rD   r      s0    Kr   c                   @      e Zd ZdS )MinistralMLPNrE   rF   rG   rC   rC   rC   rD   rK          rK   c                       s"   e Zd Zdef fddZ  ZS )MinistralAttention	layer_idxc                    sf   t  || tj|j|j| j dd| _tj|j|j| j dd| _	tj|j|j| j dd| _
d S )NF)bias)superr0   r   Linearr3   r6   r8   q_projr9   k_projv_proj)rA   configrO   	__class__rC   rD   r0      s    zMinistralAttention.__init__)rE   rF   rG   intr0   __classcell__rC   rC   rW   rD   rN      s    rN   c                   @   rJ   )MinistralRMSNormNrL   rC   rC   rC   rD   r[      rM   r[   c                   @   rJ   )MinistralDecoderLayerNrL   rC   rC   rC   rD   r\      rM   r\   c                   @   rJ   )MinistralPreTrainedModelNrL   rC   rC   rC   rD   r]      rM   r]   c                   @   rJ   )MinistralRotaryEmbeddingNrL   rC   rC   rC   rD   r^      rM   r^   c                       s   e Zd Zdef fddZe e							ddeej	 deej
 deej	 dee d	eej d
ee deej	 dee defddZ  ZS )MinistralModelrV   c                    s   t  | | `d S )N)rQ   r0   has_sliding_layers)rA   rV   rW   rC   rD   r0      s   zMinistralModel.__init__N	input_idsattention_maskposition_idspast_key_valuesinputs_embedsr=   cache_positionrB   returnc              
   K   s:  |d u |d uA rt d|d u r| |}|r!|d u r!t| jd}|d u r=|d ur-| nd}	tj|	|	|jd  |jd}|d u rF|	d}t
| }
tsf| j|||||d}td
i |td
i |d}
|}| ||}| jd | jj D ]}||f|
|j |||||d|}qw| |}t||r|d	S d d	S )Nz:You must specify exactly one of input_ids or inputs_embeds)rV   r   r'   )device)rV   input_embedsrb   rf   rd   rc   )r/   r.   )rb   rc   rd   r=   rf   position_embeddings)last_hidden_staterd   rC   )
ValueErrorembed_tokensr   rV   get_seq_lengthtorcharangeshaperh   	unsqueeze
isinstancedictr   r	   
rotary_emblayersr5   attention_typenormr
   )rA   ra   rb   rc   rd   re   r=   rf   rB   past_seen_tokenscausal_mask_mappingmask_kwargshidden_statesrj   decoder_layerrC   rC   rD   forward   s\   



zMinistralModel.forward)NNNNNNN)rE   rF   rG   r   r0   r   r   r   ro   
LongTensorTensorr   FloatTensorboolr   r   r
   r~   rZ   rC   rC   rW   rD   r_      s<    	
r_   c                   @   rJ   )MinistralForCausalLMNrL   rC   rC   rC   rD   r     rM   r   c                   @   rJ   )"MinistralForSequenceClassificationNrL   rC   rC   rC   rD   r     rM   r   c                   @   rJ   )MinistralForTokenClassificationNrL   rC   rC   rC   rD   r     rM   r   c                   @   rJ   )MinistralForQuestionAnsweringNrL   rC   rC   rC   rD   r     rM   r   )r   r]   r_   r   r   r   r   )0typingr   ro   r   cache_utilsr   r   configuration_utilsr   masking_utilsr   r	   modeling_outputsr
   processing_utilsr   utilsr   r   utils.genericr   mistral.configuration_mistralr   qwen2.modeling_qwen2r   r   r   r   r   r   r   r   r   r   r   r   rK   rN   r[   r\   r]   r^   r_   r   r   r   r   __all__rC   rC   rC   rD   <module>   s4    4 	M