o
    ߥi|(                     @   s   d dl mZmZmZ d dlZd dlZd dlmZ d dlmZ d dl	m
Z
 d dlmZ d dlmZ d dlmZ d d	lmZ d
dlmZmZ ejejejdG dd deZG dd dejZG dd dejZG dd dejZdS )    )OptionalTupleUnionN)nn)CrossEntropyLoss)ACT2FN)Models)MODELS)AttentionFillMaskModelOutput)Tasks   )DebertaV2ModelDebertaV2PreTrainedModel)module_namec                       s   e Zd ZdZdgZddgZ fddZdd Zd	d
 Z									dde	e
j de	e
j de	e
j de	e
j de	e
j de	e
j de	e de	e de	e deeef fddZ  ZS )DebertaV2ForMaskedLMa  DeBERTa_v2 Model with a `language modeling` head on top.

    The DeBERTa model was proposed in [DeBERTa: Decoding-enhanced BERT with Disentangled
    Attention](https://arxiv.org/abs/2006.03654) by Pengcheng He, Xiaodong Liu, Jianfeng Gao, Weizhu Chen. It's build
    on top of BERT/RoBERTa with two improvements, i.e. disentangled attention and enhanced mask decoder. With those two
    improvements, it out perform BERT/RoBERTa on a majority of tasks with 80GB pretraining data.

    This model is also a PyTorch [torch.nn.Module](https://pytorch.org/docs/stable/nn.html#torch.nn.Module) subclass.
    Use it as a regular PyTorch Module and refer to the PyTorch documentation for all matter related to general usage
    and behavior.

    Preprocessor:
        This is the fill_mask model of Deberta_v2, the preprocessor of this model
        is `modelscope.preprocessors.FillMaskTransformersPreprocessor`.

    Parameters:
        config (`DebertaV2Config`): Model configuration class with all the parameters of the model.
            Initializing with a config file does not load the weights associated with the model, only the
            configuration.
    poolerposition_idszpredictions.decoder.biasc                    s,   t  | t|| _t|| _|   d S N)super__init__r   debertaDebertaV2OnlyMLMHeadcls	post_init)selfconfigkwargs	__class__ ^/home/ubuntu/.local/lib/python3.10/site-packages/modelscope/models/nlp/deberta_v2/fill_mask.pyr   <   s   

zDebertaV2ForMaskedLM.__init__c                 C   s
   | j jjS r   r   predictionsdecoder)r   r   r   r    get_output_embeddingsE   s   
z*DebertaV2ForMaskedLM.get_output_embeddingsc                 C   s   || j j_d S r   r!   )r   new_embeddingsr   r   r    set_output_embeddingsH   s   z*DebertaV2ForMaskedLM.set_output_embeddingsN	input_idsattention_masktoken_type_idsinputs_embedslabelsoutput_attentionsoutput_hidden_statesreturn_dictreturnc
              
   C   s   |	dur|	n| j j}	| j||||||||	d}
|
d }| |}d}|dur7t }||d| j j|d}|	sM|f|
dd  }|durK|f| S |S t||||
j|
j	dS )uD  
        Args:
            input_ids (`torch.LongTensor` of shape `('batch_size, sequence_length')`):
                Indices of input sequence tokens in the vocabulary.

            attention_mask (`torch.FloatTensor` of shape `('batch_size, sequence_length')`, *optional*):
                Mask to avoid performing attention on padding token indices. Mask values selected in `[0, 1]`:

                - 1 for tokens that are **not masked**,
                - 0 for tokens that are **masked**.

            token_type_ids (`torch.LongTensor` of shape `('batch_size, sequence_length')`, *optional*):
                Segment token indices to indicate first and second portions of the inputs. Indices are selected in `[0,
                1]`:

                - 0 corresponds to a *sentence A* token,
                - 1 corresponds to a *sentence B* token.

            position_ids (`torch.LongTensor` of shape `('batch_size, sequence_length')`, *optional*):
                Indices of positions of each input sequence tokens in the position embeddings.
                Selected in the range `[0, config.max_position_embeddings - 1]`.

            inputs_embeds (`torch.FloatTensor` of shape `('batch_size, sequence_length', hidden_size)`, *optional*):
                Optionally, instead of passing `input_ids` you can choose to directly pass an embedded representation.
                This is useful if you want more control over how to convert *input_ids* indices into associated
                vectors than the model's internal embedding lookup matrix.
            output_attentions (`bool`, *optional*):
                Whether or not to return the attentions tensors of all attention layers. See `attentions` under returned
                tensors for more detail.
            output_hidden_states (`bool`, *optional*):
                Whether or not to return the hidden states of all layers. See `hidden_states` under returned tensors for
                more detail.
            return_dict (`bool`, *optional*):
                Whether or not to return a dataclass instead of a plain tuple.
            labels (`torch.LongTensor` of shape `(batch_size, sequence_length)`, *optional*):
                Labels for computing the masked language modeling loss. Indices should be in `[-100, 0, ...,
                config.vocab_size]` (see `input_ids` docstring) Tokens with indices set to `-100` are
                ignored (masked), the loss is only computed for the tokens with labels in `[0, ..., config.vocab_size]`

        Returns:
            Returns `modelscope.outputs.AttentionFillMaskModelOutput`

        Examples:
            >>> from modelscope.models import Model
            >>> from modelscope.preprocessors import Preprocessor
            >>> model = Model.from_pretrained('damo/nlp_debertav2_fill-mask_chinese-lite')
            >>> preprocessor = Preprocessor.from_pretrained('damo/nlp_debertav2_fill-mask_chinese-lite')
            >>> # Call the model, return some tensors
            >>> print(model(**preprocessor('你师父差得动你，你师父可[MASK]不动我。')))
            >>> # Call the pipeline
            >>> from modelscope.pipelines import pipeline
            >>> pipeline_ins = pipeline('fill-mask', model=model, preprocessor=preprocessor)
            >>> print(pipeline_ins('你师父差得动你，你师父可[MASK]不动我。'))
        N)r(   r)   r   r*   r,   r-   r.   r   r   )losslogitsr'   
attentionshidden_states)
r   use_return_dictr   r   r   view
vocab_sizer
   r3   r4   )r   r'   r(   r)   r   r*   r+   r,   r-   r.   outputssequence_outputprediction_scoresmasked_lm_lossloss_fctoutputr   r   r    forwardK   sF   C
zDebertaV2ForMaskedLM.forward)	NNNNNNNNN)__name__
__module____qualname____doc__"_keys_to_ignore_on_load_unexpected_keys_to_ignore_on_load_missingr   r$   r&   r   torchTensorboolr   r   r
   r>   __classcell__r   r   r   r    r       sL    		

r   c                       $   e Zd Z fddZdd Z  ZS ) DebertaV2PredictionHeadTransformc                    sV   t    t|j|j| _t|jtrt	|j | _
n|j| _
tj|j|jd| _d S )N)eps)r   r   r   Linearhidden_sizedense
isinstance
hidden_actstrr   transform_act_fn	LayerNormlayer_norm_epsr   r   r   r   r    r      s   
z)DebertaV2PredictionHeadTransform.__init__c                 C   s"   |  |}| |}| |}|S r   )rN   rR   rS   r   r4   r   r   r    r>      s   


z(DebertaV2PredictionHeadTransform.forwardr?   r@   rA   r   r>   rH   r   r   r   r    rJ      s    
rJ   c                       rI   )DebertaV2LMPredictionHeadc                    sL   t    t|| _tj|j|jdd| _t	t
|j| _| j| j_d S )NF)bias)r   r   rJ   	transformr   rL   rM   r7   r#   	ParameterrE   zerosrY   rU   r   r   r    r      s   


z"DebertaV2LMPredictionHead.__init__c                 C   s   |  |}| |}|S r   )rZ   r#   rV   r   r   r    r>      s   

z!DebertaV2LMPredictionHead.forwardrW   r   r   r   r    rX      s    rX   c                       rI   )r   c                    s   t    t|| _d S r   )r   r   rX   r"   rU   r   r   r    r      s   
zDebertaV2OnlyMLMHead.__init__c                 C   s   |  |}|S r   )r"   )r   r9   r:   r   r   r    r>      s   
zDebertaV2OnlyMLMHead.forwardrW   r   r   r   r    r      s    r   )typingr   r   r   rE   torch.utils.checkpointr   torch.nnr   transformers.activationsr   modelscope.metainfor   modelscope.models.builderr	   modelscope.outputsr
   modelscope.utils.constantr   backboner   r   register_module	fill_mask
deberta_v2r   ModulerJ   rX   r   r   r   r   r    <module>   s"    