o
    ߥi                     @   sP   d dl Z d dl mZ d dlmZ e ZdddZ		dddZ		dd	d
ZdS )    N)nn)
get_loggerc                 C   s   | j d }tjdd}tj|tjdd| tjdd|ddtj|tjdd|tjdd| dd }|durKt|| t| | }|S t|| }|S )a  
    Calclate two logits' the KL div value symmetrically.
    :param logits1: The first logit.
    :param logits2: The second logit.
    :param attention_mask: An optional attention_mask which is used to mask some element out.
    This is usually useful in token_classification tasks.
    If the shape of logits is [N1, N2, ... Nn, D], the shape of attention_mask should be [N1, N2, ... Nn]
    :return: The mean loss.
    none)	reduction)dimN)shaper   	KLDivLosstorchsum
LogSoftmaxSoftmaxmean)logits1logits2attention_mask
labels_numKLDivloss r   ^/home/ubuntu/.local/lib/python3.10/site-packages/modelscope/models/nlp/structbert/adv_utils.py_symmetric_kl_div   s4   

r   h㈵>c                 K   sj  |dur|nd| }| | j |  d| }|d d|v r&|d d|vr,dn|d }	|d }
|	s8d}
d|v rA|d |di |d|i}|j}t|||
}tj	||d j }|j
dd	td
djdd	dd }tt|}|rtd |S ||d  }|||  }t|| |}t|| |}|di |d|i}|j}t|||
}|| S )a2  
    Calculate the adv loss of the model.
    :param embedding: Original sentense embedding
    :param model: The model, or the forward function(including decoder/classifier),
            accept kwargs as input, output logits
    :param ori_logits: The original logits outputed from the model function
    :param ori_loss: The original loss
    :param adv_grad_factor: This factor will be multipled by the KL loss grad and then the result will be added to
            the original embedding.
            More details please check:https://arxiv.org/abs/1908.04577
            The range of this value always be 1e-3~1e-7
    :param adv_bound: adv_bound is used to cut the top and the bottom bound of the produced embedding.
            If not proveded, 2 * sigma will be used as the adv_bound factor
    :param sigma: The std factor used to produce a 0 mean normal distribution.
            If adv_bound not proveded, 2 * sigma will be used as the adv_bound factor
    :param kwargs: the input param used in model function
    :return: The original loss adds the adv loss
    N   r   	input_idsinputs_embedswith_attention_maskFr   Tinfr   keepdimp   r   z'Nan occurred when calculating adv loss.gư>r   datanewsizenormal_poplogitsr   r
   autogradgradnormfloatmaxanyisnanloggerwarningmin)	embeddingmodel
ori_logitsori_lossadv_grad_factor	adv_boundsigmakwargsembedding_1r   r   outputs	v1_logitsr   emb_grademb_grad_normis_nanembedding_2
adv_logitsadv_lossr   r   r   compute_adv_loss3   sN   




rE   c                 K   sP  |dur|nd| }| | j |  d| }	|d d|v r&|d |di |d|	i}
|
j\}}t||t|| }|d }tj	||	d j }|j
ddtddjd	dd
d }tt|}|rptd |S || }|	||  }t|	| |}t|	| |}|di |d|i}
|
j\}}t||t|| }|| S )a  
    Calculate the adv loss of the model. This function is used in the pair logits scenerio.
    :param embedding: Original sentense embedding
    :param model: The model, or the forward function(including decoder/classifier),
            accept kwargs as input, output logits
    :param start_logits: The original start logits outputed from the model function
    :param end_logits: The original end logits outputed from the model function
    :param ori_loss: The original loss
    :param adv_grad_factor: This factor will be multipled by the KL loss grad and then the result will be added to
            the original embedding.
            More details please check:https://arxiv.org/abs/1908.04577
            The range of this value always be 1e-3~1e-7
    :param adv_bound: adv_bound is used to cut the top and the bottom bound of the produced embedding.
            If not proveded, 2 * sigma will be used as the adv_bound factor
    :param sigma: The std factor used to produce a 0 mean normal distribution.
            If adv_bound not proveded, 2 * sigma will be used as the adv_bound factor
    :param kwargs: the input param used in model function
    :return: The original loss adds the adv loss
    Nr   r   r   r   Tr   r   r!   r"   z,Nan occurred when calculating pair adv loss.r   r#   )r4   r5   start_logits
end_logitsr7   r8   r9   r:   r;   r<   r=   v1_logits_startv1_logits_endr   r?   r@   rA   rB   adv_logits_startadv_logits_endrD   r   r   r   compute_adv_loss_pairo   sX   





rL   )N)Nr   )r
   r   modelscope.utils.loggerr   r1   r   rE   rL   r   r   r   r   <module>   s   
 
B