o
    eiq                     @   s   d dl mZ d dlZddlmZ ddlmZ ddlmZ ddl	m
Z
mZ dd	lmZ d
dlmZ eeZeG dd deZe
G dd deZdgZdS )    )	dataclassN   )Cache)$ImageClassifierOutputWithNoAttention)PreTrainedModel)auto_docstringlogging   )AutoModelForImageTextToText   )ShieldGemma2Configc                   @   s$   e Zd ZU dZdZejdB ed< dS )0ShieldGemma2ImageClassifierOutputWithNoAttentionz^ShieldGemma2 classifies imags as violative or not relative to a specific policy
    Args:
    Nprobabilities)__name__
__module____qualname____doc__r   torchTensor__annotations__ r   r   t/home/ubuntu/transcripts/venv/lib/python3.10/site-packages/transformers/models/shieldgemma2/modeling_shieldgemma2.pyr   !   s   
 r   c                        s  e Zd ZU eed< dZdddddZdef fdd	Zd
d Zdd Z	dd Z
dd Ze														d%dejdB dejdB dejdB dejdB dedB dejdB dejdB dejdB dejdB dedB dedB dedB d edB d!eejB d"efd#d$Z  ZS )&"ShieldGemma2ForImageClassificationconfig)imagetextzmodel.model.language_modelzmodel.model.vision_towerz!model.model.multi_modal_projectorzmodel.lm_head)zmodel.language_model.modelzmodel.vision_towerzmodel.multi_modal_projectorzmodel.language_model.lm_headc                    sD   t  j|d t|dd| _t|dd| _tj|d| _|   d S )N)r   yes_token_indexi *  no_token_indexi  )	super__init__getattrr   r   r
   from_configmodel	post_init)selfr   	__class__r   r   r   5   s
   z+ShieldGemma2ForImageClassification.__init__c                 C      | j   S N)r"   get_decoderget_input_embeddingsr$   r   r   r   r*   <      z7ShieldGemma2ForImageClassification.get_input_embeddingsc                 C      | j  | d S r(   )r"   r)   set_input_embeddings)r$   valuer   r   r   r.   ?      z7ShieldGemma2ForImageClassification.set_input_embeddingsc                 C   r'   r(   )r"   r)   get_output_embeddingsr+   r   r   r   r1   B   r,   z8ShieldGemma2ForImageClassification.get_output_embeddingsc                 C   r-   r(   )r"   r)   set_output_embeddings)r$   new_embeddingsr   r   r   r2   E   r0   z8ShieldGemma2ForImageClassification.set_output_embeddingsNr   	input_idspixel_valuesattention_maskposition_idspast_key_valuestoken_type_idscache_positioninputs_embedslabels	use_cacheoutput_attentionsoutput_hidden_statesreturn_dictlogits_to_keepreturnc                 K   sh   | j d|||||||||	|
||||d|}|j}|ddd| j| jgf }tj|dd}t||dS )aY  
        Returns:
            A `ShieldGemma2ImageClassifierOutputWithNoAttention` instance containing the logits and probabilities
            associated with the model predicting the `Yes` or `No` token as the response to that prompt, captured in the
            following properties.

                *   `logits` (`torch.Tensor` of shape `(batch_size, 2)`):
                    The first position along dim=1 is the logits for the `Yes` token and the second position along dim=1 is
                    the logits for the `No` token.
                *   `probabilities` (`torch.Tensor` of shape `(batch_size, 2)`):
                    The first position along dim=1 is the probability of predicting the `Yes` token and the second position
                    along dim=1 is the probability of predicting the `No` token.

            ShieldGemma prompts are constructed such that predicting the `Yes` token means the content *does violate* the
            policy as described. If you are only interested in the violative condition, use
            `violated = outputs.probabilities[:, 1]` to extract that slice from the output tensors.

            When used with the `ShieldGemma2Processor`, the `batch_size` will be equal to `len(images) * len(policies)`,
            and the order within the batch will be img1_policy1, ... img1_policyN, ... imgM_policyN.
        )r4   r5   r6   r7   r8   r9   r:   r;   r<   r=   r>   r?   r@   rA   N)dim)logitsr   r   )r"   rE   r   r   r   softmaxr   )r$   r4   r5   r6   r7   r8   r9   r:   r;   r<   r=   r>   r?   r@   rA   	lm_kwargsoutputsrE   selected_logitsr   r   r   r   forwardH   s2   'z*ShieldGemma2ForImageClassification.forward)NNNNNNNNNNNNNr   )r   r   r   r   r   input_modalities_checkpoint_conversion_mappingr   r*   r.   r1   r2   r   r   
LongTensorFloatTensorr   r   boolintr   rJ   __classcell__r   r   r%   r   r   *   sv   
 	
r   )dataclassesr   r   cache_utilsr   modeling_outputsr   modeling_utilsr   utilsr   r   autor
   configuration_shieldgemma2r   
get_loggerr   loggerr   r   __all__r   r   r   r   <module>   s   
_