o
    ߥi#                     @   s4  d dl mZ d dlZd dlm  mZ d dlmZ d dlmZ d dl	m
Z
mZ d dlmZ d dlmZ d dlmZ d d	lmZmZmZ d d
lmZ ejejejdejejejdG dd deZejejejdG dd deZG dd dejZG dd dejZ G dd dejZ!G dd dejZ"dS )    )DictN)nn)CrossEntropyLoss)ACT2FNgelu)Heads)	TorchHead)HEADS)AttentionFillMaskModelOutputModelOutputBase
OutputKeys)Tasks)module_namec                       sP   e Zd Z				d fdd	Z		ddefd	d
ZdejdejfddZ  Z	S )BertFillMaskHead   r   -q=:w  c                    $   t  j||||d t| j| _d S N)hidden_size
hidden_actlayer_norm_eps
vocab_size)super__init__BertOnlyMLMHeadconfigclsselfr   r   r   r   kwargs	__class__ ^/home/ubuntu/.local/lib/python3.10/site-packages/modelscope/models/nlp/heads/fill_mask_head.pyr   &      zBertFillMaskHead.__init__Ninputsc                 K   8   |  |j}d }|d ur| ||}t|||j|jdS N)losslogitshidden_states
attentions)r   last_hidden_statecompute_lossr
   r+   r,   r   r&   attention_masklabelsr    r*   r)   r#   r#   r$   forward3      zBertFillMaskHead.forwardr*   returnc                 C   &   t  }||d| jj|d}|S Nr   viewr   r   r   r*   r1   loss_fctmasked_lm_lossr#   r#   r$   r.   C   
   zBertFillMaskHead.compute_loss)r   r   r   r   NN)
__name__
__module____qualname__r   r   r2   torchTensorr.   __classcell__r#   r#   r!   r$   r   "   s    
r   c                       s`   e Zd ZddgZ				d fdd	Z				dd
efddZdejdejfddZ	dd Z
  ZS )XlmRobertaMaskHeadzlm_head.decoder.weightzlm_head.decoder.bias   r   h㈵>1 c                    r   r   )r   r   XLMRobertaLMHeadr   lm_headr   r!   r#   r$   r   P   r%   zXlmRobertaMaskHead.__init__Nr&   c                 K   r'   r(   )rJ   r-   r.   r
   r+   r,   r/   r#   r#   r$   r2   ]   r3   zXlmRobertaMaskHead.forwardr*   r4   c                 C   r5   r6   r8   r:   r#   r#   r$   r.   m   r=   zXlmRobertaMaskHead.compute_lossc                 C   s   | j jS N)rJ   decoderr   r#   r#   r$   get_output_embeddingss   s   z(XlmRobertaMaskHead.get_output_embeddings)rF   r   rG   rH   r>   )r?   r@   rA   _keys_to_ignore_on_load_missingr   r   r2   rB   rC   r.   rN   rD   r#   r#   r!   r$   rE   J   s    
rE   c                       $   e Zd Z fddZdd Z  ZS )BertPredictionHeadTransformc                    sV   t    t|j|j| _t|jtrt	|j | _
n|j| _
tj|j|jd| _d S N)eps)r   r   r   Linearr   dense
isinstancer   strr   transform_act_fn	LayerNormr   r   r   r!   r#   r$   r   y   s   
z$BertPredictionHeadTransform.__init__c                 C   s"   |  |}| |}| |}|S rK   )rU   rX   rY   r   r+   r#   r#   r$   r2      s   


z#BertPredictionHeadTransform.forwardr?   r@   rA   r   r2   rD   r#   r#   r!   r$   rQ   w   s    
rQ   c                       rP   )BertLMPredictionHeadc                    sL   t    t|| _tj|j|jdd| _t	t
|j| _| j| j_d S )NF)bias)r   r   rQ   	transformr   rT   r   r   rL   	ParameterrB   zerosr^   rZ   r!   r#   r$   r      s   


zBertLMPredictionHead.__init__c                 C   s   |  |}| |}|S rK   )r_   rL   r[   r#   r#   r$   r2      s   

zBertLMPredictionHead.forwardr\   r#   r#   r!   r$   r]      s    r]   c                       s2   e Zd Z fddZdejdejfddZ  ZS )r   c                    s   t    t|| _d S rK   )r   r   r]   predictionsrZ   r!   r#   r$   r      s   
zBertOnlyMLMHead.__init__sequence_outputr4   c                 C   s   |  |}|S rK   )rb   )r   rc   prediction_scoresr#   r#   r$   r2      s   
zBertOnlyMLMHead.forward)r?   r@   rA   r   rB   rC   r2   rD   r#   r#   r!   r$   r      s    r   c                       s0   e Zd ZdZ fddZdd Zdd Z  ZS )rI   z*Roberta Head for masked language modeling.c                    sd   t    t|j|j| _tj|j|jd| _t|j|j	| _
tt|j	| _| j| j
_d S rR   )r   r   r   rT   r   rU   rY   r   
layer_normr   rL   r`   rB   ra   r^   rZ   r!   r#   r$   r      s   
zXLMRobertaLMHead.__init__c                 K   s*   |  |}t|}| |}| |}|S rK   )rU   r   re   rL   )r   featuresr    xr#   r#   r$   r2      s
   


zXLMRobertaLMHead.forwardc                 C   s,   | j jjjdkr| j| j _d S | j j| _d S )Nmeta)rL   r^   devicetyperM   r#   r#   r$   _tie_weights   s   zXLMRobertaLMHead._tie_weights)r?   r@   rA   __doc__r   r2   rk   rD   r#   r#   r!   r$   rI      s
    

rI   )#typingr   rB   torch.nn.functionalr   
functionalFtorch.nnr   transformers.activationsr   r   modelscope.metainfor   modelscope.models.baser   modelscope.models.builderr	   modelscope.outputsr
   r   r   modelscope.utils.constantr   register_module	fill_maskbert_mlmr   xlm_roberta_mlmrE   ModulerQ   r]   r   rI   r#   r#   r#   r$   <module>   s(   &,