o
    wi$                     @   s@  d Z ddlZddlZddlmZmZ ddlZddlZddlmZ ddl	m
Z
mZmZ ddlmZ ddlmZ dd	lmZmZmZmZmZmZ dd
lmZmZmZmZ ddlmZmZ ddl m!Z! e"e#Z$G dd dej%Z&d9ddZ'G dd dej%Z(G dd dej%Z)G dd dej%Z*G dd dej%Z+G dd dej%Z,G dd dej%Z-G dd dej%Z.G d d! d!ej%Z/G d"d# d#ej%Z0d$e0iZ1G d%d& d&ej%Z2G d'd( d(eZ3G d)d* d*ej%Z4eG d+d, d,eZ5eG d-d. d.e5Z6eG d/d0 d0e5Z7ed1d2G d3d4 d4e5Z8ed5d2G d6d7 d7e5Z9g d8Z:dS ):zPyTorch MarkupLM model.    N)OptionalUnion)nn)BCEWithLogitsLossCrossEntropyLossMSELoss   )ACT2FN)GradientCheckpointingLayer))BaseModelOutputWithPastAndCrossAttentions,BaseModelOutputWithPoolingAndCrossAttentionsMaskedLMOutputQuestionAnsweringModelOutputSequenceClassifierOutputTokenClassifierOutput)PreTrainedModelapply_chunking_to_forward find_pruneable_heads_and_indicesprune_linear_layer)auto_docstringlogging   )MarkupLMConfigc                       s*   e Zd ZdZ fddZdddZ  ZS )XPathEmbeddingszConstruct the embeddings from xpath tags and subscripts.

    We drop tree-id in this version, as its info can be covered by xpath.
    c                    s   t     j| _t j| j  j| _t j	| _
t | _t j| j d j | _td j  j| _t fddt| jD | _t fddt| jD | _d S )N   c                       g | ]
}t  j jqS  )r   	Embeddingmax_xpath_tag_unit_embeddingsxpath_unit_hidden_size.0_configr   k/home/ubuntu/sommelier/.venv/lib/python3.10/site-packages/transformers/models/markuplm/modeling_markuplm.py
<listcomp>D       z,XPathEmbeddings.__init__.<locals>.<listcomp>c                    r   r   )r   r   max_xpath_subs_unit_embeddingsr   r    r#   r   r%   r&   K   r'   )super__init__	max_depthr   Linearr   hidden_sizexpath_unitseq2_embeddingsDropouthidden_dropout_probdropoutReLU
activationxpath_unitseq2_inner	inner2emb
ModuleListrangexpath_tag_sub_embeddingsxpath_subs_sub_embeddingsselfr$   	__class__r#   r%   r*   7   s"   




zXPathEmbeddings.__init__Nc              	   C   s   g }g }t | jD ](}|| j| |d d d d |f  || j| |d d d d |f  q	tj|dd}tj|dd}|| }| | | 	| 
|}|S )Ndim)r7   r+   appendr8   r9   torchcatr5   r1   r3   r4   )r;   xpath_tags_seqxpath_subs_seqxpath_tags_embeddingsxpath_subs_embeddingsixpath_embeddingsr   r   r%   forwardQ   s   &(zXPathEmbeddings.forward)NN)__name__
__module____qualname____doc__r*   rJ   __classcell__r   r   r<   r%   r   1   s    r   c                 C   s6   |  | }tj|dd|| | }| | S )a  
    Replace non-padding symbols with their position numbers. Position numbers begin at padding_idx+1. Padding symbols
    are ignored. This is modified from fairseq's `utils.make_positions`.

    Args:
        x: torch.Tensor x:

    Returns: torch.Tensor
    r   r?   )neintrB   cumsumtype_aslong)	input_idspadding_idxpast_key_values_lengthmaskincremental_indicesr   r   r%   "create_position_ids_from_input_idsd   s   rZ   c                       s@   e Zd ZdZ fddZdd Z							d
dd	Z  ZS )MarkupLMEmbeddingszGConstruct the embeddings from word, position and token_type embeddings.c                    s   t    || _tj|j|j|jd| _t|j	|j| _
|j| _t|| _t|j|j| _tj|j|jd| _t|j| _| jdt|j	ddd |j| _tj|j	|j| jd| _
d S )N)rV   epsposition_ids)r   r>   F)
persistent)r)   r*   r$   r   r   
vocab_sizer-   pad_token_idword_embeddingsmax_position_embeddingsposition_embeddingsr+   r   rI   type_vocab_sizetoken_type_embeddings	LayerNormlayer_norm_epsr/   r0   r1   register_bufferrB   arangeexpandrV   r:   r<   r   r%   r*   w   s    

zMarkupLMEmbeddings.__init__c                 C   sN   |  dd }|d }tj| jd || j d tj|jd}|d|S )z
        We are provided embeddings directly. We cannot infer which are padded so just generate sequential position ids.

        Args:
            inputs_embeds: torch.Tensor

        Returns: torch.Tensor
        Nr>   r   dtypedevicer   )sizerB   rj   rV   rT   rn   	unsqueezerk   )r;   inputs_embedsinput_shapesequence_lengthr^   r   r   r%   &create_position_ids_from_inputs_embeds   s   	z9MarkupLMEmbeddings.create_position_ids_from_inputs_embedsNr   c                 C   s<  |d ur	|  }n|  d d }|d ur|jn|j}	|d u r0|d ur+t|| j|}n| |}|d u r=tj|tj|	d}|d u rF| |}|d u r_| j	j
tjtt|| jg tj|	d }|d u rx| j	jtjtt|| jg tj|	d }|}
| |}| |}| ||}|
| | | }| |}| |}|S )Nr>   rl   )ro   rn   rZ   rV   rt   rB   zerosrT   rb   r$   
tag_pad_idonestuplelistr+   subs_pad_idrd   rf   rI   rg   r1   )r;   rU   rD   rE   token_type_idsr^   rq   rW   rr   rn   words_embeddingsrd   rf   rI   
embeddingsr   r   r%   rJ      s8   









zMarkupLMEmbeddings.forward)NNNNNNr   )rK   rL   rM   rN   r*   rt   rJ   rO   r   r   r<   r%   r[   t   s    r[   c                       8   e Zd Z fddZdejdejdejfddZ  ZS )MarkupLMSelfOutputc                    sB   t    t|j|j| _tj|j|jd| _t|j	| _
d S Nr\   )r)   r*   r   r,   r-   denserg   rh   r/   r0   r1   r:   r<   r   r%   r*         
zMarkupLMSelfOutput.__init__hidden_statesinput_tensorreturnc                 C   &   |  |}| |}| || }|S Nr   r1   rg   r;   r   r   r   r   r%   rJ         

zMarkupLMSelfOutput.forwardrK   rL   rM   r*   rB   TensorrJ   rO   r   r   r<   r%   r          $r   c                       2   e Zd Z fddZdejdejfddZ  ZS )MarkupLMIntermediatec                    sD   t    t|j|j| _t|jt	rt
|j | _d S |j| _d S r   )r)   r*   r   r,   r-   intermediate_sizer   
isinstance
hidden_actstrr	   intermediate_act_fnr:   r<   r   r%   r*      s
   
zMarkupLMIntermediate.__init__r   r   c                 C      |  |}| |}|S r   )r   r   r;   r   r   r   r%   rJ         

zMarkupLMIntermediate.forwardr   r   r   r<   r%   r      s    r   c                       r~   )MarkupLMOutputc                    sB   t    t|j|j| _tj|j|jd| _t	|j
| _d S r   )r)   r*   r   r,   r   r-   r   rg   rh   r/   r0   r1   r:   r<   r   r%   r*      r   zMarkupLMOutput.__init__r   r   r   c                 C   r   r   r   r   r   r   r%   rJ      r   zMarkupLMOutput.forwardr   r   r   r<   r%   r      r   r   c                       r   )MarkupLMPoolerc                    s*   t    t|j|j| _t | _d S r   )r)   r*   r   r,   r-   r   Tanhr3   r:   r<   r   r%   r*     s   
zMarkupLMPooler.__init__r   r   c                 C   s(   |d d df }|  |}| |}|S )Nr   )r   r3   )r;   r   first_token_tensorpooled_outputr   r   r%   rJ     s   

zMarkupLMPooler.forwardr   r   r   r<   r%   r     s    r   c                       r   )MarkupLMPredictionHeadTransformc                    sV   t    t|j|j| _t|jtrt	|j | _
n|j| _
tj|j|jd| _d S r   )r)   r*   r   r,   r-   r   r   r   r   r	   transform_act_fnrg   rh   r:   r<   r   r%   r*     s   
z(MarkupLMPredictionHeadTransform.__init__r   r   c                 C   s"   |  |}| |}| |}|S r   )r   r   rg   r   r   r   r%   rJ     s   


z'MarkupLMPredictionHeadTransform.forwardr   r   r   r<   r%   r     s    	r   c                       s,   e Zd Z fddZdd Zdd Z  ZS )MarkupLMLMPredictionHeadc                    sL   t    t|| _tj|j|jdd| _t	t
|j| _| j| j_d S )NF)bias)r)   r*   r   	transformr   r,   r-   r`   decoder	ParameterrB   ru   r   r:   r<   r   r%   r*   (  s
   

z!MarkupLMLMPredictionHead.__init__c                 C   s   | j | j_ d S r   )r   r   r;   r   r   r%   _tie_weights5  s   z%MarkupLMLMPredictionHead._tie_weightsc                 C   r   r   )r   r   r   r   r   r%   rJ   8  r   z MarkupLMLMPredictionHead.forward)rK   rL   rM   r*   r   rJ   rO   r   r   r<   r%   r   '  s    r   c                       r   )MarkupLMOnlyMLMHeadc                    s   t    t|| _d S r   )r)   r*   r   predictionsr:   r<   r   r%   r*   @  s   
zMarkupLMOnlyMLMHead.__init__sequence_outputr   c                 C   s   |  |}|S r   )r   )r;   r   prediction_scoresr   r   r%   rJ   D  s   
zMarkupLMOnlyMLMHead.forwardr   r   r   r<   r%   r   ?  s    r   c                       s   e Zd Zd fdd	ZdejdejfddZ						dd	ejd
eej deej deej deej dee	e	ej   dee
 de	ej fddZ  ZS )MarkupLMSelfAttentionNc                    s   t    |j|j dkrt|dstd|j d|j d|j| _t|j|j | _| j| j | _t	
|j| j| _t	
|j| j| _t	
|j| j| _t	|j| _|p\t|dd| _| jdksh| jd	kry|j| _t	d
|j d | j| _|j| _d S )Nr   embedding_sizezThe hidden size (z6) is not a multiple of the number of attention heads ()position_embedding_typeabsoluterelative_keyrelative_key_query   r   )r)   r*   r-   num_attention_headshasattr
ValueErrorrQ   attention_head_sizeall_head_sizer   r,   querykeyvaluer/   attention_probs_dropout_probr1   getattrr   rc   r   distance_embedding
is_decoderr;   r$   r   r<   r   r%   r*   K  s*   

zMarkupLMSelfAttention.__init__xr   c                 C   s6   |  d d | j| jf }||}|ddddS )Nr>   r   r   r   r   )ro   r   r   viewpermute)r;   r   new_x_shaper   r   r%   transpose_for_scorese  s   
z*MarkupLMSelfAttention.transpose_for_scoresFr   attention_mask	head_maskencoder_hidden_statesencoder_attention_maskpast_key_valueoutput_attentionsc                 C   s  |  |}|d u}	|	r|d ur|d }
|d }|}nP|	r/| | |}
| | |}|}n;|d urZ| | |}
| | |}tj|d |
gdd}
tj|d |gdd}n| | |}
| | |}| |}|d u}| jrz|
|f}t||
dd}| j	dks| j	dkr	|j
d |
j
d }}|rtj|d tj|jd	dd}ntj|tj|jd	dd}tj|tj|jd	dd}|| }| || j d }|j|jd
}| j	dkrtd||}|| }n| j	dkr	td||}td|
|}|| | }|t| j }|d ur|| }tjj|dd}| |}|d ur0|| }t||}|dddd }| d d | jf }||}|rX||fn|f}| jrd||f }|S )Nr   r   r   r?   r>   r   r   rl   rm   zbhld,lrd->bhlrzbhrd,lrd->bhlrr   ) r   r   r   r   rB   rC   r   matmul	transposer   shapetensorrT   rn   r   rj   r   rc   torm   einsummathsqrtr   r   
functionalsoftmaxr1   r   
contiguousro   r   )r;   r   r   r   r   r   r   r   mixed_query_layeris_cross_attention	key_layervalue_layerquery_layer	use_cacheattention_scoresquery_length
key_lengthposition_ids_lposition_ids_rdistancepositional_embeddingrelative_position_scoresrelative_position_scores_queryrelative_position_scores_keyattention_probscontext_layernew_context_layer_shapeoutputsr   r   r%   rJ   j  sn   









zMarkupLMSelfAttention.forwardr   NNNNNF)rK   rL   rM   r*   rB   r   r   r   FloatTensorrx   boolrJ   rO   r   r   r<   r%   r   J  s4    	r   eagerc                       s   e Zd Zd fdd	Zdd Z						ddejdeej d	eej d
eej deej dee	e	ej   dee
 de	ej fddZ  ZS )MarkupLMAttentionNc                    s4   t    t|j ||d| _t|| _t | _d S )Nr   )	r)   r*   MARKUPLM_SELF_ATTENTION_CLASSES_attn_implementationr;   r   outputsetpruned_headsr   r<   r   r%   r*     s   

zMarkupLMAttention.__init__c                 C   s   t |dkrd S t|| jj| jj| j\}}t| jj|| j_t| jj|| j_t| jj	|| j_	t| j
j|dd| j
_| jjt | | j_| jj| jj | j_| j|| _d S )Nr   r   r?   )lenr   r;   r   r   r   r   r   r   r   r   r   r   union)r;   headsindexr   r   r%   prune_heads  s   zMarkupLMAttention.prune_headsFr   r   r   r   r   r   r   r   c              	   C   s<   |  |||||||}| |d |}	|	f|dd   }
|
S )Nr   r   )r;   r   )r;   r   r   r   r   r   r   r   self_outputsattention_outputr   r   r   r%   rJ     s   
	zMarkupLMAttention.forwardr   r   )rK   rL   rM   r*   r   rB   r   r   r   rx   r   rJ   rO   r   r   r<   r%   r     s4    	r   c                       s   e Zd Z fddZ						ddejdeej deej deej d	eej d
eeeej   dee	 deej fddZ
dd Z  ZS )MarkupLMLayerc                    sr   t    |j| _d| _t|| _|j| _|j| _| jr-| js&t|  dt|dd| _	t
|| _t|| _d S )Nr   z> should be used as a decoder model if cross attention is addedr   r   )r)   r*   chunk_size_feed_forwardseq_len_dimr   	attentionr   add_cross_attentionr   crossattentionr   intermediater   r   r:   r<   r   r%   r*     s   


zMarkupLMLayer.__init__NFr   r   r   r   r   r   r   r   c              	   C   s  |d ur
|d d nd }| j |||||d}	|	d }
| jr(|	dd }|	d }n|	dd  }d }| jro|d urot| dsDtd|  d|d urN|d	d  nd }| |
||||||}|d }
||dd  }|d }|| }t| j| j| j|
}|f| }| jr||f }|S )
Nr   )r   r   r   r   r>   r  z'If `encoder_hidden_states` are passed, z` has to be instantiated with cross-attention layers by setting `config.add_cross_attention=True`r   )	r   r   r   r   r  r   feed_forward_chunkr   r   )r;   r   r   r   r   r   r   r   self_attn_past_key_valueself_attention_outputsr   r   present_key_valuecross_attn_present_key_valuecross_attn_past_key_valuecross_attention_outputslayer_outputr   r   r%   rJ     sP   


	

zMarkupLMLayer.forwardc                 C   s   |  |}| ||}|S r   )r  r   )r;   r   intermediate_outputr  r   r   r%   r  Z  s   
z MarkupLMLayer.feed_forward_chunkr   )rK   rL   rM   r*   rB   r   r   r   rx   r   rJ   r  rO   r   r   r<   r%   r   
  s4    	
Ar   c                       s   e Zd Z fddZ									ddejdeej deej d	eej d
eej deeeej   dee	 dee	 dee	 dee	 de
eej ef fddZ  ZS )MarkupLMEncoderc                    s:   t     | _t fddt jD | _d| _d S )Nc                    s   g | ]}t  qS r   )r   r    r#   r   r%   r&   e  s    z,MarkupLMEncoder.__init__.<locals>.<listcomp>F)	r)   r*   r$   r   r6   r7   num_hidden_layerslayergradient_checkpointingr:   r<   r#   r%   r*   b  s   
 
zMarkupLMEncoder.__init__NFTr   r   r   r   r   past_key_valuesr   r   output_hidden_statesreturn_dictr   c              
   C   s8  |	rdnd }|r
dnd }|r| j jrdnd }| jr%| jr%|r%td d}|r)dnd }t| jD ]K\}}|	r;||f }|d urC|| nd }|d urM|| nd }||||||||d}|d }|rg||d f7 }|r{||d f }| j jr{||d f }q0|	r||f }|
std	d
 |||||fD S t	|||||dS )Nr   zZ`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`...F)r   r   r   r   r>   r   r   c                 s   s    | ]	}|d ur|V  qd S r   r   )r!   vr   r   r%   	<genexpr>  s    z*MarkupLMEncoder.forward.<locals>.<genexpr>)last_hidden_stater  r   
attentionscross_attentions)
r$   r  r  trainingloggerwarning_once	enumerater  rx   r   )r;   r   r   r   r   r   r  r   r   r  r  all_hidden_statesall_self_attentionsall_cross_attentionsnext_decoder_cacherH   layer_modulelayer_head_maskr   layer_outputsr   r   r%   rJ   h  sd   


zMarkupLMEncoder.forward)	NNNNNNFFT)rK   rL   rM   r*   rB   r   r   r   rx   r   r   r   rJ   rO   r   r   r<   r%   r  a  sD    		
r  c                       sD   e Zd ZeZdZdd Zedee	e
ejf  f fddZ  ZS )MarkupLMPreTrainedModelmarkuplmc                 C   s   t |tjr |jjjd| jjd |jdur|jj	  dS dS t |tj
rC|jjjd| jjd |jdurA|jj|j 	  dS dS t |tjrX|jj	  |jjd dS t |tre|jj	  dS dS )zInitialize the weightsg        )meanstdN      ?)r   r   r,   weightdatanormal_r$   initializer_ranger   zero_r   rV   rg   fill_r   )r;   moduler   r   r%   _init_weights  s    


z%MarkupLMPreTrainedModel._init_weightspretrained_model_name_or_pathc                    s   t  j|g|R i |S r   )r)   from_pretrained)clsr1  
model_argskwargsr<   r   r%   r2    s   z'MarkupLMPreTrainedModel.from_pretrained)rK   rL   rM   r   config_classbase_model_prefixr0  classmethodr   r   r   osPathLiker2  rO   r   r   r<   r%   r$    s    *r$  c                       s   e Zd Zd fdd	Zdd Zdd Zdd	 Ze	
	
	
	
	
	
	
	
	
	
	
ddee	j
 dee	j
 dee	j
 dee	j dee	j
 dee	j
 dee	j dee	j dee dee dee deeef fddZdd Z  ZS )MarkupLMModelTc                    sD   t  | || _t|| _t|| _|rt|nd| _| 	  dS )zv
        add_pooling_layer (bool, *optional*, defaults to `True`):
            Whether to add a pooling layer
        N)
r)   r*   r$   r[   r}   r  encoderr   pooler	post_init)r;   r$   add_pooling_layerr<   r   r%   r*     s   

zMarkupLMModel.__init__c                 C   s   | j jS r   r}   rb   r   r   r   r%   get_input_embeddings  s   z"MarkupLMModel.get_input_embeddingsc                 C   s   || j _d S r   r@  )r;   r   r   r   r%   set_input_embeddings  s   z"MarkupLMModel.set_input_embeddingsc                 C   s*   |  D ]\}}| jj| j| qdS )z
        Prunes heads of the model. heads_to_prune: dict of {layer_num: list of heads to prune in this layer} See base
        class PreTrainedModel
        N)itemsr<  r  r   r   )r;   heads_to_pruner  r   r   r   r%   _prune_heads  s   zMarkupLMModel._prune_headsNrU   rD   rE   r   r{   r^   r   rq   r   r  r  r   c                 C   s  |	dur|	n| j j}	|
dur|
n| j j}
|dur|n| j j}|dur*|dur*td|dur9| || | }n|durF| dd }ntd|durQ|jn|j}|du r_tj	||d}|du rltj
|tj|d}|dd}|j| jd	}d
| d }|dur| dkr|dddd}|| j jdddd}n| dkr|ddd}|jt|  jd	}ndg| j j }| j||||||d}| j||||	|
|d}|d }| jdur| |nd}|s||f|dd  S t|||j|j|jdS )a  
        xpath_tags_seq (`torch.LongTensor` of shape `(batch_size, sequence_length, config.max_depth)`, *optional*):
            Tag IDs for each token in the input sequence, padded up to config.max_depth.
        xpath_subs_seq (`torch.LongTensor` of shape `(batch_size, sequence_length, config.max_depth)`, *optional*):
            Subscript IDs for each token in the input sequence, padded up to config.max_depth.

        Examples:

        ```python
        >>> from transformers import AutoProcessor, MarkupLMModel

        >>> processor = AutoProcessor.from_pretrained("microsoft/markuplm-base")
        >>> model = MarkupLMModel.from_pretrained("microsoft/markuplm-base")

        >>> html_string = "<html> <head> <title>Page Title</title> </head> </html>"

        >>> encoding = processor(html_string, return_tensors="pt")

        >>> outputs = model(**encoding)
        >>> last_hidden_states = outputs.last_hidden_state
        >>> list(last_hidden_states.shape)
        [1, 4, 768]
        ```NzDYou cannot specify both input_ids and inputs_embeds at the same timer>   z5You have to specify either input_ids or inputs_embeds)rn   rl   r   r   r   r(  g     r   )rU   rD   rE   r^   r{   rq   )r   r   r  r  )r  pooler_outputr   r  r  )r$   r   r  use_return_dictr   %warn_if_padding_and_no_attention_maskro   rn   rB   rw   ru   rT   rp   r   rm   r@   rk   r  next
parametersr}   r<  r=  r   r   r  r  )r;   rU   rD   rE   r   r{   r^   r   rq   r   r  r  rr   rn   extended_attention_maskembedding_outputencoder_outputsr   r   r   r   r%   rJ     sn   &
zMarkupLMModel.forwardc                    s.   d}|D ]}|t  fdd|D f7 }q|S )Nr   c                 3   s$    | ]}| d  |jV  qdS )r   N)index_selectr   rn   )r!   
past_statebeam_idxr   r%   r  `  s   " z/MarkupLMModel._reorder_cache.<locals>.<genexpr>)rx   )r;   r  rQ  reordered_past
layer_pastr   rP  r%   _reorder_cache\  s   zMarkupLMModel._reorder_cache)T)NNNNNNNNNNN)rK   rL   rM   r*   rA  rB  rE  r   r   rB   
LongTensorr   r   r   rx   r   rJ   rT  rO   r   r   r<   r%   r;    sV    	

kr;  c                        s   e Zd Z fddZe													ddeej deej deej deej deej d	eej d
eej deej deej deej dee dee dee de	e
ej ef fddZ  ZS )MarkupLMForQuestionAnsweringc                    s@   t  | |j| _t|dd| _t|j|j| _| 	  d S NF)r?  )
r)   r*   
num_labelsr;  r%  r   r,   r-   
qa_outputsr>  r:   r<   r   r%   r*   h  s
   z%MarkupLMForQuestionAnswering.__init__NrU   rD   rE   r   r{   r^   r   rq   start_positionsend_positionsr   r  r  r   c                 C   sL  |dur|n| j j}| j|||||||||||d}|d }| |}|jddd\}}|d }|d }d}|	dur|
durt|	 dkrQ|	d}	t|
 dkr^|
d}
|d}|		d| |
	d| t
|d}|||	}|||
}|| d }|s||f|dd  }|dur|f| S |S t||||j|jd	S )
ae  
        xpath_tags_seq (`torch.LongTensor` of shape `(batch_size, sequence_length, config.max_depth)`, *optional*):
            Tag IDs for each token in the input sequence, padded up to config.max_depth.
        xpath_subs_seq (`torch.LongTensor` of shape `(batch_size, sequence_length, config.max_depth)`, *optional*):
            Subscript IDs for each token in the input sequence, padded up to config.max_depth.

        Examples:

        ```python
        >>> from transformers import AutoProcessor, MarkupLMForQuestionAnswering
        >>> import torch

        >>> processor = AutoProcessor.from_pretrained("microsoft/markuplm-base-finetuned-websrc")
        >>> model = MarkupLMForQuestionAnswering.from_pretrained("microsoft/markuplm-base-finetuned-websrc")

        >>> html_string = "<html> <head> <title>My name is Niels</title> </head> </html>"
        >>> question = "What's his name?"

        >>> encoding = processor(html_string, questions=question, return_tensors="pt")

        >>> with torch.no_grad():
        ...     outputs = model(**encoding)

        >>> answer_start_index = outputs.start_logits.argmax()
        >>> answer_end_index = outputs.end_logits.argmax()

        >>> predict_answer_tokens = encoding.input_ids[0, answer_start_index : answer_end_index + 1]
        >>> processor.decode(predict_answer_tokens).strip()
        'Niels'
        ```N
rD   rE   r   r{   r^   r   rq   r   r  r  r   r   r>   r?   )ignore_indexr   )lossstart_logits
end_logitsr   r  )r$   rG  r%  rY  splitsqueezer   r   ro   clamp_r   r   r   r  )r;   rU   rD   rE   r   r{   r^   r   rq   rZ  r[  r   r  r  r   r   logitsr_  r`  
total_lossignored_indexloss_fct
start_lossend_lossr   r   r   r%   rJ   r  sT   /






z$MarkupLMForQuestionAnswering.forward)NNNNNNNNNNNNN)rK   rL   rM   r*   r   r   rB   r   r   r   rx   r   rJ   rO   r   r   r<   r%   rV  e  sZ    
	
rV  zC
    MarkupLM Model with a `token_classification` head on top.
    )custom_introc                          e Zd Z fddZe												ddeej deej deej deej deej d	eej d
eej deej deej dee dee dee de	e
ej ef fddZ  ZS )MarkupLMForTokenClassificationc                    sb   t  | |j| _t|dd| _|jd ur|jn|j}t|| _	t
|j|j| _|   d S rW  )r)   r*   rX  r;  r%  classifier_dropoutr0   r   r/   r1   r,   r-   
classifierr>  r;   r$   rm  r<   r   r%   r*     s   z'MarkupLMForTokenClassification.__init__NrU   rD   rE   r   r{   r^   r   rq   labelsr   r  r  r   c                 C   s   |dur|n| j j}| j|||||||||
||d}|d }| |}d}|	dur:t }||d| j j|	d}|sP|f|dd  }|durN|f| S |S t|||j|j	dS )a  
        xpath_tags_seq (`torch.LongTensor` of shape `(batch_size, sequence_length, config.max_depth)`, *optional*):
            Tag IDs for each token in the input sequence, padded up to config.max_depth.
        xpath_subs_seq (`torch.LongTensor` of shape `(batch_size, sequence_length, config.max_depth)`, *optional*):
            Subscript IDs for each token in the input sequence, padded up to config.max_depth.
        labels (`torch.LongTensor` of shape `(batch_size, sequence_length)`, *optional*):
            Labels for computing the token classification loss. Indices should be in `[0, ..., config.num_labels - 1]`.

        Examples:

        ```python
        >>> from transformers import AutoProcessor, AutoModelForTokenClassification
        >>> import torch

        >>> processor = AutoProcessor.from_pretrained("microsoft/markuplm-base")
        >>> processor.parse_html = False
        >>> model = AutoModelForTokenClassification.from_pretrained("microsoft/markuplm-base", num_labels=7)

        >>> nodes = ["hello", "world"]
        >>> xpaths = ["/html/body/div/li[1]/div/span", "/html/body/div/li[1]/div/span"]
        >>> node_labels = [1, 2]
        >>> encoding = processor(nodes=nodes, xpaths=xpaths, node_labels=node_labels, return_tensors="pt")

        >>> with torch.no_grad():
        ...     outputs = model(**encoding)

        >>> loss = outputs.loss
        >>> logits = outputs.logits
        ```Nr\  r   r>   r   r^  rd  r   r  )
r$   rG  r%  rn  r   r   rX  r   r   r  )r;   rU   rD   rE   r   r{   r^   r   rq   rp  r   r  r  r   r   r   r^  rg  r   r   r   r%   rJ     s@   -
z&MarkupLMForTokenClassification.forwardNNNNNNNNNNNN)rK   rL   rM   r*   r   r   rB   r   r   r   rx   r   rJ   rO   r   r   r<   r%   rl    sT    	
rl  z
    MarkupLM Model transformer with a sequence classification/regression head on top (a linear layer on top of the
    pooled output) e.g. for GLUE tasks.
    c                       rk  )!MarkupLMForSequenceClassificationc                    sd   t  | |j| _|| _t|| _|jd ur|jn|j}t	|| _
t|j|j| _|   d S r   )r)   r*   rX  r$   r;  r%  rm  r0   r   r/   r1   r,   r-   rn  r>  ro  r<   r   r%   r*   G  s   
z*MarkupLMForSequenceClassification.__init__NrU   rD   rE   r   r{   r^   r   rq   rp  r   r  r  r   c                 C   sv  |dur|n| j j}| j|||||||||
||d}|d }| |}| |}d}|	dur| j jdu rX| jdkr>d| j _n| jdkrT|	jtj	ksO|	jtj
krTd| j _nd| j _| j jdkrvt }| jdkrp|| |	 }n+|||	}n%| j jdkrt }||d| j|	d}n| j jdkrt }|||	}|s|f|dd  }|dur|f| S |S t|||j|jd	S )
a  
        xpath_tags_seq (`torch.LongTensor` of shape `(batch_size, sequence_length, config.max_depth)`, *optional*):
            Tag IDs for each token in the input sequence, padded up to config.max_depth.
        xpath_subs_seq (`torch.LongTensor` of shape `(batch_size, sequence_length, config.max_depth)`, *optional*):
            Subscript IDs for each token in the input sequence, padded up to config.max_depth.
        labels (`torch.LongTensor` of shape `(batch_size,)`, *optional*):
            Labels for computing the sequence classification/regression loss. Indices should be in `[0, ...,
            config.num_labels - 1]`. If `config.num_labels == 1` a regression loss is computed (Mean-Square loss), If
            `config.num_labels > 1` a classification loss is computed (Cross-Entropy).

        Examples:

        ```python
        >>> from transformers import AutoProcessor, AutoModelForSequenceClassification
        >>> import torch

        >>> processor = AutoProcessor.from_pretrained("microsoft/markuplm-base")
        >>> model = AutoModelForSequenceClassification.from_pretrained("microsoft/markuplm-base", num_labels=7)

        >>> html_string = "<html> <head> <title>Page Title</title> </head> </html>"
        >>> encoding = processor(html_string, return_tensors="pt")

        >>> with torch.no_grad():
        ...     outputs = model(**encoding)

        >>> loss = outputs.loss
        >>> logits = outputs.logits
        ```Nr\  r   
regressionsingle_label_classificationmulti_label_classificationr>   r   rq  )r$   rG  r%  r1   rn  problem_typerX  rm   rB   rT   rQ   r   rb  r   r   r   r   r   r  )r;   rU   rD   rE   r   r{   r^   r   rq   rp  r   r  r  r   r   rd  r^  rg  r   r   r   r%   rJ   V  sZ   ,



"


z)MarkupLMForSequenceClassification.forwardrr  )rK   rL   rM   r*   r   r   rB   r   r   r   rx   r   rJ   rO   r   r   r<   r%   rs  ?  sT    	
rs  )rV  rs  rl  r;  r$  )r   );rN   r   r9  typingr   r   rB   torch.utils.checkpointr   torch.nnr   r   r   activationsr	   modeling_layersr
   modeling_outputsr   r   r   r   r   r   modeling_utilsr   r   r   r   utilsr   r   configuration_markuplmr   
get_loggerrK   r  Moduler   rZ   r[   r   r   r   r   r   r   r   r   r   r   r   r  r$  r;  rV  rl  rs  __all__r   r   r   r%   <module>   s`    

3c 4WQ pdt