o
    ei                     @   s8  d Z ddlZddlmZ ddlZddlmZ ddlmZ ddlm	Z
 ddlmZ dd	lmZ dd
lmZmZmZ ddlmZ ddlmZ ddlmZmZmZmZ ddlmZ eeZ eeddG dd deZ!G dd dej"Z#G dd dej"Z$G dd dej"Z%G dd dej"Z&G dd dej"Z'G dd dej"Z(G d d! d!ej"Z)G d"d# d#ej"Z*G d$d% d%ej"Z+G d&d' d'eZ,G d(d) d)ej"Z-G d*d+ d+ej"Z.G d,d- d-ej"Z/eG d.d/ d/eZ0eG d0d1 d1e0Z1eG d2d3 d3e0Z2ed4dG d5d6 d6e0Z3ed7dG d8d9 d9e0Z4g d:Z5dS );zPyTorch Bros model.    N)	dataclass)nn)CrossEntropyLoss   )initialization)ACT2FN)GradientCheckpointingLayer)"BaseModelOutputWithCrossAttentions,BaseModelOutputWithPoolingAndCrossAttentionsTokenClassifierOutput)PreTrainedModel)apply_chunking_to_forward)ModelOutputauto_docstringcan_return_tuplelogging   )
BrosConfigz@
    Base class for outputs of token classification models.
    )custom_introc                   @   st   e Zd ZU dZdZejdB ed< dZejdB ed< dZ	ejdB ed< dZ
eej dB ed< dZeej dB ed< dS )BrosSpadeOutputa  
    loss (`torch.FloatTensor` of shape `(1,)`, *optional*, returned when `labels` is provided):
        Classification loss.
    initial_token_logits (`torch.FloatTensor` of shape `(batch_size, sequence_length, config.num_labels)`):
        Classification scores for entity initial tokens (before SoftMax).
    subsequent_token_logits (`torch.FloatTensor` of shape `(batch_size, sequence_length, sequence_length+1)`):
        Classification scores for entity sequence tokens (before SoftMax).
    Nlossinitial_token_logitssubsequent_token_logitshidden_states
attentions)__name__
__module____qualname____doc__r   torchFloatTensor__annotations__r   r   r   tupler    r#   r#   d/home/ubuntu/transcripts/venv/lib/python3.10/site-packages/transformers/models/bros/modeling_bros.pyr   (   s   
 	r   c                       2   e Zd Z fddZdejdejfddZ  ZS )BrosPositionalEmbedding1Dc                    s@   t    |j| _ddtd| jd| j   }| d| d S )Nr   '                 @inv_freq)super__init__dim_bbox_sinusoid_emb_1dr   arangeregister_buffer)selfconfigr*   	__class__r#   r$   r,   B   s   
z"BrosPositionalEmbedding1D.__init__pos_seqreturnc                 C   sX   |  }|\}}}||||d| jddd| jd  }tj| | gdd}|S )Nr      dim)sizeviewr*   r-   r   catsincos)r0   r4   seq_sizeb1b2b3sinusoid_inppos_embr#   r#   r$   forwardL   s
   
(z!BrosPositionalEmbedding1D.forwardr   r   r   r,   r   TensorrE   __classcell__r#   r#   r2   r$   r&   ?   s    
r&   c                       r%   )BrosPositionalEmbedding2Dc                    s*   t    |j| _t|| _t|| _d S N)r+   r,   dim_bboxr&   	x_pos_emb	y_pos_embr0   r1   r2   r#   r$   r,   U   s   

z"BrosPositionalEmbedding2D.__init__bboxr5   c                 C   sd   g }t | jD ]!}|d dkr|| |d|f  q|| |d|f  qtj|dd}|S )Nr6   r   .r7   r8   )rangerK   appendrL   rM   r   r<   )r0   rO   stackibbox_pos_embr#   r#   r$   rE   \   s   z!BrosPositionalEmbedding2D.forwardrF   r#   r#   r2   r$   rI   T   s    rI   c                       s,   e Zd Z fddZdejfddZ  ZS )BrosBboxEmbeddingsc                    s.   t    t|| _tj|j|jdd| _d S )NF)bias)	r+   r,   rI   bbox_sinusoid_embr   Lineardim_bbox_sinusoid_emb_2ddim_bbox_projectionbbox_projectionrN   r2   r#   r$   r,   h   s   

zBrosBboxEmbeddings.__init__rO   c                 C   s\   | dd}|d d d d d d d f |d d d d d d d f  }| |}| |}|S )Nr   r   )	transposerW   r[   )r0   rO   bbox_tbbox_posrT   r#   r#   r$   rE   m   s
   8

zBrosBboxEmbeddings.forwardrF   r#   r#   r2   r$   rU   g   s    rU   c                       sb   e Zd ZdZ fddZ				ddejdB dejdB dejdB dejdB d	ejf
d
dZ  ZS )BrosTextEmbeddingszGConstruct the embeddings from word, position and token_type embeddings.c                    s   t    tj|j|j|jd| _t|j|j| _	t|j
|j| _tj|j|jd| _t|j| _| dt|jd | jdtj| j tj| jjddd d S )	N)padding_idxepsposition_idsr   r7   token_type_idsdtypedeviceF)
persistent)r+   r,   r   	Embedding
vocab_sizehidden_sizepad_token_idword_embeddingsmax_position_embeddingsposition_embeddingstype_vocab_sizetoken_type_embeddings	LayerNormlayer_norm_epsDropouthidden_dropout_probdropoutr/   r   r.   expandzerosrc   r:   longrh   rN   r2   r#   r$   r,   y   s    

zBrosTextEmbeddings.__init__N	input_idsre   rc   inputs_embedsr5   c                 C   s   |d ur	|  }n|  d d }|d }|d u r$| jd d d |f }|d u rNt| drC| jd d d |f }||d |}|}ntj|tj| jjd}|d u rW| 	|}| 
|}	||	 }
| |}|
|7 }
| |
}
| |
}
|
S )Nr7   r   re   r   rf   )r:   rc   hasattrre   rx   r   ry   rz   rh   rn   rr   rp   rs   rw   )r0   r{   re   rc   r|   input_shape
seq_lengthbuffered_token_type_ids buffered_token_type_ids_expandedrr   
embeddingsrp   r#   r#   r$   rE      s*   






zBrosTextEmbeddings.forward)NNNN)	r   r   r   r   r,   r   rG   rE   rH   r#   r#   r2   r$   r_   v   s$    r_   c                       sn   e Zd Z fddZ				ddejdejdejdB dejdB d	ejdB d
ejdB deej fddZ  ZS )BrosSelfAttentionc                    s   t    |j|j dkrt|dstd|j d|j d|j| _t|j|j | _| j| j | _t	
|j| j| _t	
|j| j| _t	
|j| j| _t	|j| _|j| _d S )Nr   embedding_sizezThe hidden size (z6) is not a multiple of the number of attention heads ())r+   r,   rl   num_attention_headsr}   
ValueErrorintattention_head_sizeall_head_sizer   rX   querykeyvalueru   attention_probs_dropout_probrw   
is_decoderrN   r2   r#   r$   r,      s   

zBrosSelfAttention.__init__NFr   rT   attention_maskencoder_hidden_statesencoder_attention_maskoutput_attentionsr5   c                 C   s  |j d d| j| jf}| ||dd}|d u}	|	r8| ||dd}
| ||dd}|}n| ||dd}
| ||dd}t	||
dd}|j \}}}}|||||}|
g d}td||f}|| }|t| j }|d ur|| }tjdd|}| |}t	||}|
dddd	 }| d d | jf }|j| }|r||fn|f}| jr|d
 }|S )Nr   r7   r   r6   )r6   r   r   r   zbnid,bijd->bnijr8   r   rJ   )shaper   r   r   r;   r\   r   r   r   matmulpermuteeinsummathsqrtr   Softmaxrw   
contiguousr:   r   r   )r0   r   rT   r   r   r   r   hidden_shapequery_layeris_cross_attention	key_layervalue_layerattention_scores
batch_sizen_headr   d_headbbox_pos_scoresattention_probscontext_layernew_context_layer_shapeoutputsr#   r#   r$   rE      s8   	

zBrosSelfAttention.forwardNNNF)	r   r   r   r,   r   rG   r"   rE   rH   r#   r#   r2   r$   r      s*    r   c                       8   e Zd Z fddZdejdejdejfddZ  ZS )BrosSelfOutputc                    sB   t    t|j|j| _tj|j|jd| _t|j	| _
d S Nra   )r+   r,   r   rX   rl   densers   rt   ru   rv   rw   rN   r2   r#   r$   r,        
zBrosSelfOutput.__init__r   input_tensorr5   c                 C   &   |  |}| |}| || }|S rJ   r   rw   rs   r0   r   r   r#   r#   r$   rE        

zBrosSelfOutput.forwardrF   r#   r#   r2   r$   r         $r   c                       sl   e Zd Z fddZ				ddejdejdejdB dejdB d	ejdB d
edB deej fddZ  Z	S )BrosAttentionc                    s"   t    t|| _t|| _d S rJ   )r+   r,   r   r0   r   outputrN   r2   r#   r$   r,     s   

zBrosAttention.__init__NFr   rT   r   r   r   r   r5   c           
      C   s<   | j ||||||d}| |d |}|f|dd   }	|	S )Nr   rT   r   r   r   r   r   r   )r0   r   )
r0   r   rT   r   r   r   r   self_outputsattention_outputr   r#   r#   r$   rE     s   	zBrosAttention.forwardr   )
r   r   r   r,   r   rG   boolr"   rE   rH   r#   r#   r2   r$   r     s*    	r   c                       r%   )BrosIntermediatec                    sD   t    t|j|j| _t|jt	rt
|j | _d S |j| _d S rJ   )r+   r,   r   rX   rl   intermediate_sizer   
isinstance
hidden_actstrr   intermediate_act_fnrN   r2   r#   r$   r,   3  s
   
zBrosIntermediate.__init__r   r5   c                 C   s   |  |}| |}|S rJ   )r   r   )r0   r   r#   r#   r$   rE   ;  s   

zBrosIntermediate.forwardrF   r#   r#   r2   r$   r   2  s    r   c                       r   )
BrosOutputc                    sB   t    t|j|j| _tj|j|jd| _t	|j
| _d S r   )r+   r,   r   rX   r   rl   r   rs   rt   ru   rv   rw   rN   r2   r#   r$   r,   B  r   zBrosOutput.__init__r   r   r5   c                 C   r   rJ   r   r   r#   r#   r$   rE   H  r   zBrosOutput.forwardrF   r#   r#   r2   r$   r   A  r   r   c                       st   e Zd Z fddZ				ddejdejdejdB dejdB d	ejdB d
edB deej fddZ	dd Z
  ZS )	BrosLayerc                    sn   t    |j| _d| _t|| _|j| _|j| _| jr+| js&t|  dt|| _	t
|| _t|| _d S )Nr   z> should be used as a decoder model if cross attention is added)r+   r,   chunk_size_feed_forwardseq_len_dimr   	attentionr   add_cross_attention	Exceptioncrossattentionr   intermediater   r   rN   r2   r#   r$   r,   P  s   



zBrosLayer.__init__NFr   rT   r   r   r   r   r5   c                 C   s   | j ||||d}|d }| jr|dd }	n|dd  }	| jrG|d urGt| dr1td|  d| j|||||d}
|
d }|	|
dd  }	t| j| j| j|}|f|	 }	| jr]|	d	 }	|	S )
N)rT   r   r   r   r   r7   r   z'If `encoder_hidden_states` are passed, z` has to be instantiated with cross-attention layers by setting `config.add_cross_attention=True`)r   r   r   r   rJ   )	r   r   r}   r   r   r   feed_forward_chunkr   r   )r0   r   rT   r   r   r   r   self_attention_outputsr   r   cross_attention_outputslayer_outputr#   r#   r$   rE   ^  sD   	


zBrosLayer.forwardc                 C   s   |  |}| ||}|S rJ   )r   r   )r0   r   intermediate_outputr   r#   r#   r$   r     s   
zBrosLayer.feed_forward_chunkr   )r   r   r   r,   r   rG   r    r   r"   rE   r   rH   r#   r#   r2   r$   r   O  s,    
5r   c                       s   e Zd Z fddZe						ddejdejdejdB d	ejdB d
ejdB dedB dedB dedB de	ej e
B fddZ  ZS )BrosEncoderc                    s4   t     | _t fddt jD | _d S )Nc                    s   g | ]}t  qS r#   )r   ).0_r1   r#   r$   
<listcomp>  s    z(BrosEncoder.__init__.<locals>.<listcomp>)r+   r,   r1   r   
ModuleListrP   num_hidden_layerslayerrN   r2   r   r$   r,     s   
$zBrosEncoder.__init__NFTr   rT   r   r   r   r   output_hidden_statesreturn_dictr5   c	              	   C   s   |rdnd }	|r
dnd }
|r| j jrdnd }t| jD ]-\}}|r&|	|f }	|||||||d}|d }|rH|
|d f }
| j jrH||d f }q|rP|	|f }	t||	|
|dS )Nr#   r   r   r   r6   )last_hidden_stater   r   cross_attentions)r1   r   	enumerater   r	   )r0   r   rT   r   r   r   r   r   r   all_hidden_statesall_self_attentionsall_cross_attentionsrS   layer_modulelayer_outputsr#   r#   r$   rE     s8   
	
zBrosEncoder.forward)NNNFFT)r   r   r   r,   r   r   rG   r    r   r"   r	   rE   rH   r#   r#   r2   r$   r     s8    	
r   c                       r%   )
BrosPoolerc                    s*   t    t|j|j| _t | _d S rJ   )r+   r,   r   rX   rl   r   Tanh
activationrN   r2   r#   r$   r,     s   
zBrosPooler.__init__r   r5   c                 C   s(   |d d df }|  |}| |}|S )Nr   )r   r   )r0   r   first_token_tensorpooled_outputr#   r#   r$   rE     s   

zBrosPooler.forwardrF   r#   r#   r2   r$   r     s    r   c                       r%   )BrosRelationExtractorc                    s   t    |j| _|j| _|j| _|j| _t| j| _	t
| j| j| j | _t
| j| j| j | _ttd| j| _d S )Nr   )r+   r,   n_relationsrl   backbone_hidden_sizehead_hidden_sizeclassifier_dropout_probr   ru   droprX   r   r   	Parameterr   ry   
dummy_noderN   r2   r#   r$   r,     s   
zBrosRelationExtractor.__init__r   r   c              	   C   s   |  | |}| jdd|dd}tj||gdd}| | |}|	|d|d| j
| j}|	|d|d| j
| j}t|dddd|dddd}|S )Nr   r   axisr6   r   )r   r   r   	unsqueezerepeatr:   r   r<   r   r;   r   r   r   r   )r0   r   r   	dummy_vecrelation_scorer#   r#   r$   rE     s    zBrosRelationExtractor.forwardrF   r#   r#   r2   r$   r     s    r   c                       s:   e Zd ZU eed< dZe dej	f fddZ
  ZS )BrosPreTrainedModelr1   brosmodulec                    s   t  | | jj}t|trtj|j|d dS t|t	r7t
|jt|jjd d t|j dS t|trTddtd|jd|j   }t
|j| dS dS )	zInitialize the weights)stdr7   rd   r   r'   r(   r)   N)r+   _init_weightsr1   initializer_ranger   r   initnormal_r   r_   copy_rc   r   r.   r   rx   zeros_re   r&   r-   r*   )r0   r   r   r*   r2   r#   r$   r     s   

"
z!BrosPreTrainedModel._init_weights)r   r   r   r   r!   base_model_prefixr   no_gradr   Moduler   rH   r#   r#   r2   r$   r     s
   
 r   c                       s   e Zd Zd fdd	Zdd Zdd Zee											dd	ej	dB d
ej	dB dej	dB dej	dB dej	dB dej	dB dej	dB dej	dB de
dB de
dB de
dB deej	 eB fddZ  ZS )	BrosModelTc                    sN   t  | || _t|| _t|| _t|| _|rt	|nd| _
|   dS )zv
        add_pooling_layer (bool, *optional*, defaults to `True`):
            Whether to add a pooling layer
        N)r+   r,   r1   r_   r   rU   bbox_embeddingsr   encoderr   pooler	post_init)r0   r1   add_pooling_layerr2   r#   r$   r,     s   


zBrosModel.__init__c                 C   s   | j jS rJ   r   rn   )r0   r#   r#   r$   get_input_embeddings'  s   zBrosModel.get_input_embeddingsc                 C   s   || j _d S rJ   r	  )r0   r   r#   r#   r$   set_input_embeddings*  s   zBrosModel.set_input_embeddingsNr{   rO   r   re   rc   r|   r   r   r   r   r   r5   c              
   K   s  |	dur|	n| j j}	|
dur|
n| j j}
|dur|n| j j}|dur*|dur*td|dur3| }n|dur@| dd }ntd|du rLtd|\}}|durW|jn|j}|du retj||d}|du rt	| j
dr| j
jddd|f }|||}|}n	tj|tj|d}| ||}| j jr|dur| \}}}||f}|du rtj||d}| |}nd}| j
||||d	}|jd d
kr|ddddg df }|| j j }| |}| j||||||	|
dd}|d }| jdur| |nd}t|||j|j|jdS )a  
        bbox ('torch.FloatTensor' of shape '(batch_size, num_boxes, 4)'):
            Bounding box coordinates for each token in the input sequence. Each bounding box is a list of four values
            (x1, y1, x2, y2), where (x1, y1) is the top left corner, and (x2, y2) is the bottom right corner of the
            bounding box.

        Examples:

        ```python
        >>> import torch
        >>> from transformers import BrosProcessor, BrosModel

        >>> processor = BrosProcessor.from_pretrained("jinho8345/bros-base-uncased")

        >>> model = BrosModel.from_pretrained("jinho8345/bros-base-uncased")

        >>> encoding = processor("Hello, my dog is cute", add_special_tokens=False, return_tensors="pt")
        >>> bbox = torch.tensor([[[0, 0, 1, 1]]]).repeat(1, encoding["input_ids"].shape[-1], 1)
        >>> encoding["bbox"] = bbox

        >>> outputs = model(**encoding)
        >>> last_hidden_states = outputs.last_hidden_state
        ```NzDYou cannot specify both input_ids and inputs_embeds at the same timer7   z5You have to specify either input_ids or inputs_embedszYou have to specify bbox)rh   re   rf   )r{   rc   re   r|      )r   r   r6   r   r6   r   r   r   T)rT   r   r   r   r   r   r   r   )r   pooler_outputr   r   r   )r1   r   r   use_return_dictr   r:   rh   r   onesr}   r   re   rx   ry   rz   get_extended_attention_maskr   invert_attention_maskr   
bbox_scaler  r  r  r
   r   r   r   )r0   r{   rO   r   re   rc   r|   r   r   r   r   r   kwargsr~   r   r   rh   r   r   extended_attention_maskencoder_batch_sizeencoder_sequence_lengthr   encoder_hidden_shapeencoder_extended_attention_maskembedding_outputscaled_bboxbbox_position_embeddingsencoder_outputssequence_outputr   r#   r#   r$   rE   -  sx   (


zBrosModel.forward)TNNNNNNNNNNN)r   r   r   r,   r
  r  r   r   r   rG   r   r"   r
   rE   rH   r#   r#   r2   r$   r    sT    	
r  c                          e Zd ZdgZ fddZee											ddejdB dejdB dejdB dejdB d	ejdB d
ejdB dejdB dejdB de	dB de	dB de	dB de
ej eB fddZ  ZS )BrosForTokenClassificationr  c                    s^   t  | |j| _t|| _t|dr|jn|j}t	|| _
t|j|j| _|   d S Nclassifier_dropout)r+   r,   
num_labelsr  r   r}   r"  rv   r   ru   rw   rX   rl   
classifierr  r0   r1   r"  r2   r#   r$   r,     s   
z#BrosForTokenClassification.__init__Nr{   rO   r   bbox_first_token_maskre   rc   r|   labelsr   r   r   r5   c                 K   s   |dur|n| j j}| j|||||||	|
dd	}|d }| |}| |}d}|durWt }|durJ|d}||d| j| |d| }n||d| j|d}t|||j	|j
dS )a  
        bbox ('torch.FloatTensor' of shape '(batch_size, num_boxes, 4)'):
            Bounding box coordinates for each token in the input sequence. Each bounding box is a list of four values
            (x1, y1, x2, y2), where (x1, y1) is the top left corner, and (x2, y2) is the bottom right corner of the
            bounding box.
        bbox_first_token_mask (`torch.FloatTensor` of shape `(batch_size, sequence_length)`, *optional*):
            Mask to indicate the first token of each bounding box. Mask values selected in `[0, 1]`:

            - 1 for tokens that are **not masked**,
            - 0 for tokens that are **masked**.

        Examples:

        ```python
        >>> import torch
        >>> from transformers import BrosProcessor, BrosForTokenClassification

        >>> processor = BrosProcessor.from_pretrained("jinho8345/bros-base-uncased")

        >>> model = BrosForTokenClassification.from_pretrained("jinho8345/bros-base-uncased")

        >>> encoding = processor("Hello, my dog is cute", add_special_tokens=False, return_tensors="pt")
        >>> bbox = torch.tensor([[[0, 0, 1, 1]]]).repeat(1, encoding["input_ids"].shape[-1], 1)
        >>> encoding["bbox"] = bbox

        >>> outputs = model(**encoding)
        ```NT)rO   r   re   rc   r|   r   r   r   r   r7   r   logitsr   r   )r1   r  r   rw   r$  r   r;   r#  r   r   r   )r0   r{   rO   r   r&  re   rc   r|   r'  r   r   r   r  r   r  r)  r   loss_fctr#   r#   r$   rE     s<   -


z"BrosForTokenClassification.forwardr  r   r   r   "_keys_to_ignore_on_load_unexpectedr,   r   r   r   rG   r   r"   r   rE   rH   r#   r#   r2   r$   r     sR    	
r   a  
    Bros Model with a token classification head on top (initial_token_layers and subsequent_token_layer on top of the
    hidden-states output) e.g. for Named-Entity-Recognition (NER) tasks. The initial_token_classifier is used to
    predict the first token of each entity, and the subsequent_token_classifier is used to predict the subsequent
    tokens within an entity. Compared to BrosForTokenClassification, this model is more robust to serialization errors
    since it predicts next token from one token.
    c                       s   e Zd ZdgZ fddZee												ddejdB dejdB dejdB dejdB d	ejdB d
ejdB dejdB dejdB dejdB de	dB de	dB de	dB de
ej eB fddZ  ZS )!BrosSpadeEEForTokenClassificationr  c              	      s   t  | || _|j| _|j| _|j| _t|| _t	|dr"|j
n|j}tt|t|j|jt|t|j|j| _t|| _|   d S r!  )r+   r,   r1   r#  r   rl   r   r  r   r}   r"  rv   r   
Sequentialru   rX   initial_token_classifierr   subsequent_token_classifierr  r%  r2   r#   r$   r,     s    

z*BrosSpadeEEForTokenClassification.__init__Nr{   rO   r   r&  re   rc   r|   initial_token_labelssubsequent_token_labelsr   r   r   r5   c                 K   s  |dur|n| j j}| j|||||||
|dd	}|d }|dd }| |dd }| ||d}d| }|j\}}|j	}t
j|t
|dg|gdd }||dddddf t
|jj}t
||d j|t
jd}||dddddf t
|jj}|d }d}|dur|	durt }|d}|dur|d}||d| j| || }n
||d| j|}|	d}	||d|d | |	| }|| }t||||j|jd	S )
a>  
        bbox ('torch.FloatTensor' of shape '(batch_size, num_boxes, 4)'):
            Bounding box coordinates for each token in the input sequence. Each bounding box is a list of four values
            (x1, y1, x2, y2), where (x1, y1) is the top left corner, and (x2, y2) is the bottom right corner of the
            bounding box.
        bbox_first_token_mask (`torch.FloatTensor` of shape `(batch_size, sequence_length)`, *optional*):
            Mask to indicate the first token of each bounding box. Mask values selected in `[0, 1]`:

            - 1 for tokens that are **not masked**,
            - 0 for tokens that are **masked**.
        initial_token_labels (`torch.LongTensor` of shape `(batch_size, sequence_length)`, *optional*):
            Labels for the initial token classification.
        subsequent_token_labels (`torch.LongTensor` of shape `(batch_size, sequence_length)`, *optional*):
            Labels for the subsequent token classification.

        Examples:

        ```python
        >>> import torch
        >>> from transformers import BrosProcessor, BrosSpadeEEForTokenClassification

        >>> processor = BrosProcessor.from_pretrained("jinho8345/bros-base-uncased")

        >>> model = BrosSpadeEEForTokenClassification.from_pretrained("jinho8345/bros-base-uncased")

        >>> encoding = processor("Hello, my dog is cute", add_special_tokens=False, return_tensors="pt")
        >>> bbox = torch.tensor([[[0, 0, 1, 1]]]).repeat(1, encoding["input_ids"].shape[-1], 1)
        >>> encoding["bbox"] = bbox

        >>> outputs = model(**encoding)
        ```NT	r{   rO   r   re   rc   r|   r   r   r   r   r   r   rh   rg   r7   )r   r   r   r   r   )r1   r  r   r\   r   r/  r0  squeezer   rh   r   r<   ry   tor   masked_fillfinforg   mineyer;   r   r#  r   r   r   )r0   r{   rO   r   r&  re   rc   r|   r1  r2  r   r   r   r  r   last_hidden_statesr   r   inv_attention_maskr   max_seq_lengthrh   invalid_token_maskself_token_masksubsequent_token_maskr   r*  initial_token_losssubsequent_token_lossr#   r#   r$   rE   0  sh   2
&  


z)BrosSpadeEEForTokenClassification.forward)NNNNNNNNNNNN)r   r   r   r,  r,   r   r   r   rG   r   r"   r   rE   rH   r#   r#   r2   r$   r-    sX    
	
r-  z
    Bros Model with a token classification head on top (a entity_linker layer on top of the hidden-states output) e.g.
    for Entity-Linking. The entity_linker is used to predict intra-entity links (one entity to another entity).
    c                       r  )!BrosSpadeELForTokenClassificationr  c                    sx   t  | || _|j| _|j| _|j| _t|| _t	|dr"|j
n|j t|| _|   d S  t|| _|   d S r!  )r+   r,   r1   r#  r   rl   r   r  r   r}   r"  rv   r   entity_linkerr  rN   r2   r#   r$   r,     s   


z*BrosSpadeELForTokenClassification.__init__Nr{   rO   r   r&  re   rc   r|   r'  r   r   r   r5   c                 K   sP  |dur|n| j j}| j|||||||	|
dd	}|d }|dd }| ||d}d}|durt }|j\}}|j	}t
||d j|t
jd}|d}t
j| t
j|dgt
j|dgdd	}||dddddf t
|jj}||dddddf t
|jj}||d|d | |d| }t|||j|jd
S )a  
        bbox ('torch.FloatTensor' of shape '(batch_size, num_boxes, 4)'):
            Bounding box coordinates for each token in the input sequence. Each bounding box is a list of four values
            (x1, y1, x2, y2), where (x1, y1) is the top left corner, and (x2, y2) is the bottom right corner of the
            bounding box.
        bbox_first_token_mask (`torch.FloatTensor` of shape `(batch_size, sequence_length)`, *optional*):
            Mask to indicate the first token of each bounding box. Mask values selected in `[0, 1]`:

            - 1 for tokens that are **not masked**,
            - 0 for tokens that are **masked**.

        Examples:

        ```python
        >>> import torch
        >>> from transformers import BrosProcessor, BrosSpadeELForTokenClassification

        >>> processor = BrosProcessor.from_pretrained("jinho8345/bros-base-uncased")

        >>> model = BrosSpadeELForTokenClassification.from_pretrained("jinho8345/bros-base-uncased")

        >>> encoding = processor("Hello, my dog is cute", add_special_tokens=False, return_tensors="pt")
        >>> bbox = torch.tensor([[[0, 0, 1, 1]]]).repeat(1, encoding["input_ids"].shape[-1], 1)
        >>> encoding["bbox"] = bbox

        >>> outputs = model(**encoding)
        ```NTr3  r   r   r4  r7   rf   r   r(  )r1   r  r   r\   r   rD  r5  r   r   rh   r   r:  r6  r   r;   r<   ry   r7  r8  rg   r9  r   r   r   )r0   r{   rO   r   r&  re   rc   r|   r'  r   r   r   r  r   r;  r)  r   r*  r   r=  rh   r?  maskr#   r#   r$   rE     sJ   ,

(($z)BrosSpadeELForTokenClassification.forwardr  r+  r#   r#   r2   r$   rC    sR    	
rC  )r   r  r   r-  rC  )6r   r   dataclassesr   r   r   torch.nnr    r   r   activationsr   modeling_layersr   modeling_outputsr	   r
   r   modeling_utilsr   pytorch_utilsr   utilsr   r   r   r   configuration_brosr   
get_loggerr   loggerr   r  r&   rI   rU   r_   r   r   r   r   r   r   r   r   r   r   r  r   r-  rC  __all__r#   r#   r#   r$   <module>   sd   
>SJ5" c	 n