o
    i                     @   sv  d dl Z d dlmZ d dlmZmZmZ d dlZd dlmZ ddl	m
Z
 ddlmZ ddlmZ dd	lmZ dd
lmZ ddlmZmZ ddlmZmZ ddlmZ ddlmZmZmZmZ ddl m!Z! ddl"m#Z#m$Z$ eeddG dd deZ%eeddG dd deZ&edG dd dej'Z(G dd dej'Z)G dd  d ej'Z*	!dGd"ej'd#ej+d$ej+d%ej+d&eej+ d'e,d(e,fd)d*Z-G d+d, d,ej'Z.G d-d. d.ej'Z/G d/d0 d0ej'Z0G d1d2 d2eZ1G d3d4 d4ej'Z2G d5d6 d6ej'Z3G d7d8 d8ej4Z5G d9d: d:eZ6d;ej+d<e7fd=d>Z8G d?d@ d@e6Z9edAdG dBdC dCe6Z:eG dDdE dEe6eZ;g dFZ<dS )H    N)	dataclass)CallableOptionalUnion)nn   )ACT2FN)Cache)GenerationMixin)use_kernel_forward_from_hub)GradientCheckpointingLayer)BaseModelOutputBaseModelOutputWithPast)ALL_ATTENTION_FUNCTIONSPreTrainedModel)Unpack)ModelOutputTransformersKwargsauto_docstringcan_return_tuple   )	AutoModel   )Ovis2ConfigOvis2VisionConfigzJ
    Base class for Llava outputs, with hidden states and attentions.
    )custom_introc                   @   s$   e Zd ZU dZdZeej ed< dS )Ovis2ModelOutputWithPasta  
    past_key_values (`Cache`, *optional*, returned when `use_cache=True` is passed or when `config.use_cache=True`):
        It is a [`~cache_utils.Cache`] instance. For more details, see our [kv cache guide](https://huggingface.co/docs/transformers/en/kv_cache).

        Contains pre-computed hidden-states (key and values in the self-attention blocks) that can be used (see
        `past_key_values` input) to speed up sequential decoding.
    image_hidden_states (`torch.FloatTensor`, *optional*):
        A `torch.FloatTensor` of size `(batch_size, num_images, sequence_length, hidden_size)`.
        image_hidden_states of the model produced by the vision encoder and after projecting the last hidden state.
    Nimage_hidden_states)	__name__
__module____qualname____doc__r   r   torchFloatTensor__annotations__ r%   r%   e/home/ubuntu/veenaModal/venv/lib/python3.10/site-packages/transformers/models/ovis2/modeling_ovis2.pyr   *   s   
 r   zQ
    Base class for Ovis2 causal language model (or autoregressive) outputs.
    c                   @   s   e Zd ZU dZdZeej ed< dZ	eej ed< dZ
ee ed< dZeeej  ed< dZeeej  ed< dZeej ed< dS )	Ovis2CausalLMOutputWithPastaA  
    loss (`torch.FloatTensor` of shape `(1,)`, *optional*, returned when `labels` is provided):
        Language modeling loss (for next-token prediction).
    logits (`torch.FloatTensor` of shape `(batch_size, sequence_length, config.vocab_size)`):
        Prediction scores of the language modeling head (scores for each vocabulary token before SoftMax).
    past_key_values (`Cache`, *optional*, returned when `use_cache=True` is passed or when `config.use_cache=True`):
        It is a [`~cache_utils.Cache`] instance. For more details, see our [kv cache guide](https://huggingface.co/docs/transformers/en/kv_cache).

        Contains pre-computed hidden-states (key and values in the self-attention blocks) that can be used (see
        `past_key_values` input) to speed up sequential decoding.
    image_hidden_states (`torch.FloatTensor`, *optional*):
        A `torch.FloatTensor` of size (batch_size * num_patches, num_images, sequence_length, hidden_size)`.
        image_hidden_states of the model produced by the vision encoder and after projecting the last hidden state.
    Nlosslogitspast_key_valueshidden_states
attentionsr   )r   r   r    r!   r(   r   r"   r#   r$   r)   r*   r	   r+   tupler,   r   r%   r%   r%   r&   r'   ?   s   
 r'   RMSNormc                       s.   e Zd Zd fdd	Zdd Zdd Z  ZS )	Ovis2RMSNormư>c                    s&   t    tt|| _|| _dS )z;
        Ovis2RMSNorm is equivalent to T5LayerNorm
        N)super__init__r   	Parameterr"   onesweightvariance_epsilon)selfhidden_sizeeps	__class__r%   r&   r2   _   s   

zOvis2RMSNorm.__init__c                 C   sJ   |j }|tj}|djddd}|t|| j  }| j|| S )Nr   Tkeepdim)	dtypetor"   float32powmeanrsqrtr6   r5   )r7   r+   input_dtypevariancer%   r%   r&   forwardg   s
   zOvis2RMSNorm.forwardc                 C   s   t | jj d| j S )Nz, eps=)r-   r5   shaper6   r7   r%   r%   r&   
extra_reprn   s   zOvis2RMSNorm.extra_repr)r0   )r   r   r    r2   rG   rJ   __classcell__r%   r%   r:   r&   r/   ]   s    r/   c                       $   e Zd Z fddZdd Z  ZS )Ovis2VisionMLPc                    x   t    || _|j| _|j| _tj| j| j|jd| _tj| j| j|jd| _	tj| j| j|jd| _
t|j | _d S Nbiasr1   r2   configr8   intermediate_sizer   Linearmlp_bias	gate_projup_proj	down_projr   
hidden_actact_fnr7   rS   r:   r%   r&   r2   s      
zOvis2VisionMLP.__init__c                 C   $   |  | | || | }|S NrY   r[   rW   rX   r7   xrY   r%   r%   r&   rG   }       zOvis2VisionMLP.forwardr   r   r    r2   rG   rK   r%   r%   r:   r&   rM   r       
rM   c                       s8   e Zd Zdef fddZdejdejfddZ  Z	S )Ovis2VisionEmbeddingsrS   c                    s   t    || _|j| _|j| _|j| _tj|j	| j| j| jdd| _
| j| j d | _| j| _t| j| j| _| jdt| jddd t|j|j| _d S )Nvalid)in_channelsout_channelskernel_sizestridepaddingr   position_ids)r   r<   F)
persistent)r1   r2   rS   r8   	embed_dim
image_size
patch_sizer   Conv2dnum_channelspatch_embeddingnum_patchesnum_positions	Embeddingposition_embeddingregister_bufferr"   arangeexpandr/   rms_norm_epsrms_normr\   r:   r%   r&   r2      s"   
zOvis2VisionEmbeddings.__init__pixel_valuesreturnc                 C   sL   | j jj}|  |j|d}|ddd}| |}|| | j }|S )Nr?   r   r   )	rt   r5   r?   r@   flatten	transposer}   rx   rm   )r7   r~   target_dtypepatch_embeds
embeddingsr%   r%   r&   rG      s   

zOvis2VisionEmbeddings.forward)
r   r   r    r   r2   r"   r#   TensorrG   rK   r%   r%   r:   r&   rf      s    rf           modulequerykeyvalueattention_maskscalingdropoutc           
      K   s|   t ||dd| }|d ur|| }tjj|dt jd|j}tjj	||| j
d}t ||}	|	dd }	|	|fS )Nr<   )dimr?   )ptrainingr   r   )r"   matmulr   r   
functionalsoftmaxrA   r@   r?   r   r   
contiguous)
r   r   r   r   r   r   r   kwargsattn_weightsattn_outputr%   r%   r&   eager_attention_forward   s   
r   c                
       R   e Zd ZdZ fddZ	d
dejdeej deejeej f fdd	Z	  Z
S )Ovis2VisionAttention=Multi-headed attention from 'Attention Is All You Need' paperc                       t    || _|j| _|j| _| j| j | _| j| j | jkr-td| j d| j d| jd | _	|j
| _d| _tj| j| j|jd| _tj| j| j|jd| _tj| j| j|jd| _tj| j| j|jd| _d S Nz;embed_dim must be divisible by num_heads (got `embed_dim`: z and `num_heads`: z).g      FrP   r1   r2   rS   r8   ro   num_attention_heads	num_headshead_dim
ValueErrorscaleattention_dropoutr   	is_causalr   rU   qkv_biask_projv_projq_projout_projr\   r:   r%   r&   r2      $   

zOvis2VisionAttention.__init__Nr+   r   r   c              
   K      |j \}}}| |}| |}| |}	|||| j| jdd}|||| j| jdd}|	||| j| jdd}	t}
| j	j
dkrMt| j	j
 }
|
| |||	|| j| j| js\dn| jd\}}|||| }| |}||fS z#Input shape: Batch x Time x Channelr   r   eagerr   )r   r   r   rH   r   r   r   viewr   r   r   r   rS   _attn_implementationr   r   r   r   r   reshaper   r   r7   r+   r   r   
batch_size
seq_lengthro   querieskeysvaluesattention_interfacer   r   r%   r%   r&   rG      .   




zOvis2VisionAttention.forwardr_   r   r   r    r!   r2   r"   r   r   r-   rG   rK   r%   r%   r:   r&   r          r   c                       rL   )Ovis2MLPc                    rN   rO   rR   r\   r:   r%   r&   r2      r]   zOvis2MLP.__init__c                 C   r^   r_   r`   ra   r%   r%   r&   rG     rc   zOvis2MLP.forwardrd   r%   r%   r:   r&   r      re   r   c                
       r   )Ovis2Attentionr   c                    r   r   r   r\   r:   r%   r&   r2   
  r   zOvis2Attention.__init__Nr+   r   r   c              
   K   r   r   r   r   r%   r%   r&   rG     r   zOvis2Attention.forwardr_   r   r%   r%   r:   r&   r     r   r   c                	       sN   e Zd Zdef fddZ	ddejdeej dee	 dejfd	d
Z
  ZS )Ovis2VisionEncoderLayerrS   c                    sB   t    t|| _t|| _t|j|j| _	t|j|j| _
d S r_   )r1   r2   r   	attentionr   ffnr/   r8   r|   	rms_norm1	rms_norm2r\   r:   r%   r&   r2   E  s
   


z Ovis2VisionEncoderLayer.__init__Nr+   r   r   r   c                 K   sL   |  |}| jd||d|\}}|| }| |}| |}|| }|S )N)r+   r   r%   )r   r   r   r   )r7   r+   r   r   norm_hidden_statesr   _
mlp_outputr%   r%   r&   rG   L  s   


zOvis2VisionEncoderLayer.forwardr_   )r   r   r    r   r2   r"   r   r   r   r   rG   rK   r%   r%   r:   r&   r   D  s    
r   c                	       sR   e Zd ZdZdef fddZee	ddee	j
 dee defd	d
Z  ZS )Ovis2VisionEncoderz
    Transformer encoder consisting of `config.num_hidden_layers` self attention layers. Each layer is a
    [`Ovis2VisionEncoderLayer`].

    Args:
        config: Ovis2VisionConfig
    rS   c                    s:   t     | _t fddt jD | _d| _d S )Nc                    s   g | ]}t  qS r%   )r   ).0r   rS   r%   r&   
<listcomp>i  s    z/Ovis2VisionEncoder.__init__.<locals>.<listcomp>F)	r1   r2   rS   r   
ModuleListrangenum_hidden_layerslayersgradient_checkpointingr\   r:   r   r&   r2   f  s   
 
zOvis2VisionEncoder.__init__Nr   r   r   c                 K   s,   |}| j D ]}|||fi |}qt|dS )Nlast_hidden_state)r   r   )r7   inputs_embedsr   r   r+   encoder_layerr%   r%   r&   rG   m  s   

zOvis2VisionEncoder.forwardr_   )r   r   r    r!   r   r2   r   r   r   r"   r   r   r   r   rG   rK   r%   r%   r:   r&   r   ]  s    r   c                       s>   e Zd Zdef fddZe	ddeej fddZ	  Z
S )	Ovis2VisionTransformerrS   c                    s>   t    || _t|| _t|| _t|j|j	| _
d| _d S )NF)r1   r2   rS   rf   r   r   encoderr/   r8   r|   r}   r   r\   r:   r%   r&   r2   }  s   



zOvis2VisionTransformer.__init__Nr   c                 K   s:   |  |}| jd||d|}|j}| |}t|dS )N)r   r   r   r%   )r   r   r   r}   r   )r7   r~   r   r   r+   encoder_outputsr   r%   r%   r&   rG     s   


zOvis2VisionTransformer.forwardr_   )r   r   r    r   r2   r   r   r"   r   rG   rK   r%   r%   r:   r&   r   |  s    r   c                       s*   e Zd Zdejdejf fddZ  ZS )Ovis2VisualEmbeddingTablevisual_tokensr   c                    s8   |j tjtjtjtjtjfv rt |S t	|| j
S r_   )r?   r"   int8int16int32int64longr1   rG   r   r5   )r7   r   r:   r%   r&   rG     s   z!Ovis2VisualEmbeddingTable.forward)r   r   r    r"   r   rG   rK   r%   r%   r:   r&   r     s    "r   c                   @   s@   e Zd ZU eed< dZdZdgZdZdZ	dZ
dZdZdZdZdS )Ovis2PreTrainedModelrS   modelTr   r*   N)r   r   r    r   r$   base_model_prefixsupports_gradient_checkpointing_no_split_modules_skip_keys_device_placement_supports_cache_class_supports_flash_attn_supports_flex_attn_supports_sdpa_can_compile_fullgraph_supports_attention_backendr%   r%   r%   r&   r     s   
 r   r)   r   c                 C   sJ   |  |}|j|ddd }tj| tjd||d}||  | }|S )NTr=   r   )memory_formatg      ?)r   maxr"   
zeros_likelegacy_contiguous_formatscatter_detach)r)   r   y_softindexy_hardretr%   r%   r&   hard_softmax  s
   
r   c                       sL   e Zd ZU eed< def fddZdejdeej	ej	f fddZ
  ZS )Ovis2VisionModelrS   c                    sl   t  | || _t|| _|j| _|j| _tj|j	|j
 |j
 | j| j dd| _t| j| j | _d S NFrP   )r1   r2   rS   r   transformernum_visual_indicator_tokens
vocab_sizer   rU   r8   hidden_stridehead_linear	LayerNorm	head_normr\   r:   r%   r&   r2     s   

zOvis2VisionModel.__init__r~   r   c              	   K   sJ  | j |fi |}|d }| jjdkrl|j\}}}| jj}tt|}	|	|	 |kr.td||	|  | }
tj	
|ddd|
d|
fdd}|	|
7 }	|||	| ||	| ||}|dddddd}||d	|| | }| |}| |}| jjd
krtj	j|d	dd}|S | jjdkrt|d	d}|S | jjdkrtj	j|d	d}|S )Nr   r   z.Token sequence length must be a perfect squareconstantr   r         r<   gumbel_argmaxT)r   hard	st_argmaxr   r   )r   rS   r   rH   intmathsqrtr   r   r   padr   permuter   r   tokenize_functiongumbel_softmaxr   r   )r7   r~   r   outputsr   
num_imagesseq_len
hidden_dimr   sqrt_lpad_sizer)   
prob_tokenr%   r%   r&   rG     s:   

zOvis2VisionModel.forward)r   r   r    r   r$   r2   r"   r#   r-   r   rG   rK   r%   r%   r:   r&   r     s   
 (r   zu
    The Ovis2 model which consists of a vision backbone and a language model, without a language modeling head.
    c                !       s.  e Zd Zi Zdef fddZdd Zdd Zdd	 Zd
d Z	de
jde
jfddZde
jde
jde
jfddZee													d#dee
j dee
j dee
j dee
j dee dee
j dee
j dee dee dee dee dee
j d eee
jf deeef fd!d"Z  ZS )$
Ovis2ModelrS   c                    s^   t  | t|j| _t|j| _t	|jj
|j| _|jj
| _|j
| _
|j| _|   d S r_   )r1   r2   r   vision_configvision_towerr   from_configtext_configlanguage_modelr   r   r8   visual_embeddings_tablevisual_vocab_sizevisual_indicator_token_ids	post_initr\   r:   r%   r&   r2     s   
zOvis2Model.__init__c                 C   
   | j  S r_   )r  get_input_embeddingsrI   r%   r%   r&   r        
zOvis2Model.get_input_embeddingsc                 C      | j | d S r_   )r  set_input_embeddingsr7   r   r%   r%   r&   r#       zOvis2Model.set_input_embeddingsc                 C   s
   || _ d S r_   r  r7   decoderr%   r%   r&   set_decoder  r!  zOvis2Model.set_decoderc                 C      | j S r_   r&  rI   r%   r%   r&   get_decoder
     zOvis2Model.get_decoderr~   r   c           	      C   s   |  |}|j\}}}tj||| j jf|j|jd|jd}tj||gdd}| 	|}tj
| j| j j | jtjd|j}| 	|}||fS )a  
        Obtains image last hidden states from the vision tower and apply multimodal projection.

        Args:
            pixel_values (`torch.FloatTensor]` of shape `(batch_size, channels, height, width)`):
               The tensors corresponding to the input images.
            vision_feature_layer (`Union[int, list[int]]`, *optional*):
                The index of the layer to select the vision feature. If multiple indices are provided,
                the vision feature of the corresponding indices will be concatenated to form the
                vision features.
            vision_feature_select_strategy (`str`, *optional*):
                The feature selection strategy used to select the vision feature from the vision backbone.
                Can be one of `"default"` or `"full"`
        Returns:
            image_features (`torch.Tensor`): Image feature tensor of shape `(num_images, image_length, embed_dim)`).
        F)r?   devicerequires_gradlayoutr   r  r   )r  rH   r"   zerosr   r?   r-  r/  catr  rz   r  r   r@   )	r7   r~   image_featuresr   img_seq_lenr   padding_tensorvisual_indicatorvisual_indicator_featuresr%   r%   r&   get_image_features  s(   


zOvis2Model.get_image_features	input_idsr   r2  c                 C   s   |du r||   tj| jjtj|jdk}|d}n|| jjk}| }|	d
||j}|jd |jd  }||  | krPtd| d| |S )z
        Obtains multimodal placeholder mask from `input_ids` or `inputs_embeds`, and checks that the placeholder token count is
        equal to the length of multimodal features. If the lengths are different, an error is raised.
        Nr?   r-  r<   r   r   z6Image features and image tokens do not match: tokens: z, features )r   r"   tensorrS   image_token_idr   r-  allsum	unsqueeze	expand_asr@   rH   numelr   )r7   r8  r   r2  special_image_maskn_image_tokensn_image_featuresr%   r%   r&   get_placeholder_mask6  s   zOvis2Model.get_placeholder_maskNr   r   rm   r*   labels	use_cacheoutput_attentionsoutput_hidden_statesreturn_dictcache_positionlogits_to_keepc                 K   sZ  |	d ur|	n| j j}	|
d ur|
n| j j}
|d u |d uA r td|d u r*|  |}|d ur| j|d\}}| j|||d}|||}t| j	D ];\}}|d u rg||  t
j|t
j|jdk}|d}n||k|j}| r|| || |j|j||< qI| jd	||||||	|
d||d
|}t|j|j|j|j|d ur|dS d dS )
Nz:You must specify exactly one of input_ids or inputs_embedsr~   )r   r2  r9  r<   T)
r   rm   r*   r   rF  rG  rH  rI  rJ  rK  )r   r*   r+   r,   r   r%   )rS   rG  rH  r   r   r7  rD  masked_scatter	enumerater  r"   r:  r   r-  r<  r@   anyr?  r?   r  r   r   r*   r+   r,   )r7   r8  r~   r   rm   r*   r   rE  rF  rG  rH  rI  rJ  rK  r   r2  r6  rA  ivisual_indicator_idmaskr  r%   r%   r&   rG   N  sf   

zOvis2Model.forwardNNNNNNNNNNNNr   )r   r   r    _checkpoint_conversion_mappingr   r2   r   r#  r)  r+  r"   r#   r7  
LongTensorrD  r   r   r   r   r	   boolr   r  r-   r   rG   rK   r%   r%   r:   r&   r    s~    
)
	

r  c                !       s`  e Zd Zi ZdgZdef fddZdd Zdd Zd	e	j
fd
dZdd Zdd ZdejfddZedd Zedd Zedd Zee													d+deej deej deej deej dee deej d eej d!ee d"ee d#ee d$ee d%eej d&eeejf d	eeef fd'd(Z 						d, fd)d*	Z!  Z"S )-Ovis2ForConditionalGenerationzlm_head.weightrS   c                    s8   t  | t|| _tj|j|jdd| _| 	  d S r   )
r1   r2   r  r   r   rU   r8   r   lm_headr  r\   r:   r%   r&   r2     s   
z&Ovis2ForConditionalGeneration.__init__c                 C   r  r_   )r   r   rI   r%   r%   r&   r     r!  z2Ovis2ForConditionalGeneration.get_input_embeddingsc                 C   r"  r_   )r   r#  r$  r%   r%   r&   r#    r%  z2Ovis2ForConditionalGeneration.set_input_embeddingsr   c                 C   r*  r_   )rX  rI   r%   r%   r&   get_output_embeddings  r,  z3Ovis2ForConditionalGeneration.get_output_embeddingsc                 C   r"  r_   )r   r)  r'  r%   r%   r&   r)    r%  z)Ovis2ForConditionalGeneration.set_decoderc                 C   r  r_   )r   r+  rI   r%   r%   r&   r+    r!  z)Ovis2ForConditionalGeneration.get_decoderr~   c                 C   s   | j j|dS )NrL  )r   r7  )r7   r~   r%   r%   r&   r7    s   z0Ovis2ForConditionalGeneration.get_image_featuresc                 C      | j jS r_   )r   r  rI   r%   r%   r&   r       z,Ovis2ForConditionalGeneration.language_modelc                 C   rZ  r_   )r   r  rI   r%   r%   r&   r    r[  z*Ovis2ForConditionalGeneration.vision_towerc                 C   s   t d)NzNot needed for Ovis2)AttributeErrorrI   r%   r%   r&   multi_modal_projector  r[  z3Ovis2ForConditionalGeneration.multi_modal_projectorNr   r8  r   rm   r*   r   rE  rF  rG  rH  rI  rJ  rK  c                 K   s   |	dur|	n| j j}	|
dur|
n| j j}
| jd||||||||	|
d|d|}|d }t|tr7t| dn|}| |dd|ddf }d}|dur\| jd||| j j	j
d|}t|||j|j|j|jdS )a  
        labels (`torch.LongTensor` of shape `(batch_size, sequence_length)`, *optional*):
            Labels for computing the masked language modeling loss. Indices should either be in `[0, ...,
            config.vocab_size]` or -100 (see `input_ids` docstring). Tokens with indices set to `-100` are ignored
            (masked), the loss is only computed for the tokens with labels in `[0, ..., config.vocab_size]`.

        Example:

        ```python
        >>> from PIL import Image
        >>> import requests
        >>> from transformers import AutoProcessor, Ovis2ForConditionalGeneration

        >>> model = Ovis2ForConditionalGeneration.from_pretrained("thisisiron/Ovis2-2B-hf")
        >>> processor = AutoProcessor.from_pretrained("thisisiron/Ovis2-2B-hf")

        >>> prompt = "<|im_start|>user\n<image>\nDescribe the image.<|im_end|>\n<|im_start|>assistant\n"
        >>> url = "http://images.cocodataset.org/val2014/COCO_val2014_000000537955.jpg"
        >>> image = Image.open(requests.get(url, stream=True).raw)

        >>> inputs = processor(images=image, text=prompt, return_tensors="pt")

        >>> # Generate
        >>> generate_ids = model.generate(**inputs, max_new_tokens=15)
        >>> processor.batch_decode(generate_ids, skip_special_tokens=True)[0]
        "user\n\nDescribe the image.\nassistant\nThe image features a brown dog standing on a wooden floor, looking up with"
        ```NT)r8  r~   r   rm   r*   r   rF  rG  rH  rI  rJ  r   )r)   rE  r   )r(   r)   r*   r+   r,   r   r%   )rS   rG  rH  r   
isinstancer  slicerX  loss_functionr  r   r'   r*   r+   r,   r   )r7   r8  r~   r   rm   r*   r   rE  rF  rG  rH  rI  rJ  rK  r   r  r+   slice_indicesr)   r(   r%   r%   r&   rG     sH   .z%Ovis2ForConditionalGeneration.forwardc           
         s8   t  j|f|||||d|}	|d dkr||	d< |	S )N)r*   r   r   rJ  rK  r   r~   )r1   prepare_inputs_for_generation)
r7   r8  r*   r   r~   r   rJ  rK  r   model_inputsr:   r%   r&   rb    s   
z;Ovis2ForConditionalGeneration.prepare_inputs_for_generationrS  )NNNNNN)#r   r   r    rT  _tied_weights_keysr   r2   r   r#  r   ModulerY  r)  r+  r"   r#   r7  propertyr  r  r]  r   r   r   rU  r   r	   rV  r   r  r-   r'   rG   rb  rK   r%   r%   r:   r&   rW    s    


	

WrW  )r   r  rW  )r   )=r  dataclassesr   typingr   r   r   r"   r   activationsr   cache_utilsr	   
generationr
   integrationsr   modeling_layersr   modeling_outputsr   r   modeling_utilsr   r   processing_utilsr   utilsr   r   r   r   autor   configuration_ovis2r   r   r   r'   re  r/   rM   rf   r   floatr   r   r   r   r   r   r   rw   r   r   r  r   r   r  rW  __all__r%   r%   r%   r&   <module>   s   (
==
4 + 