o
    ߥi_H                  	   @   s  d Z ddlZddlmZ ddlmZ ddlmZmZm	Z	m
Z
mZ ddlZddlZddlZddlmZ ddlmZmZmZmZmZmZ ddlmZ dd	lmZ dd
lmZmZ ddl m!Z! ddl"m#Z# ddl$m%Z% ddl&m'Z' ddl(m)Z) ddl*m+Z+ ddl,m-Z- ddl.m/Z/ ddl0m1Z1 e1 Z2dgZ3dd Z4G dd deZ5G dd deZ6e'j7e/j8e#j9dG dd de%Z:			d*d eeej;  d!e<d"e<d#e<fd$d%Z=		d+d eeej;  d!e<d"e<fd&d'Z>		d+d eeej;  d!e<d"e<fd(d)Z?dS ),zPyTorch UniTE model.    N)	dataclass)ceil)DictListOptionalTupleUnion)version)DropoutLinearModule	ParameterParameterList
Sequential)softmax)pad_sequence)XLMRobertaConfigXLMRobertaModelACT2FN)Models)
TorchModel)MODELS)InputFormat)TranslationEvaluationOutput)compatible_position_ids)Tasks)
get_loggerUniTEForTranslationEvaluationc                 C   st   |j dd}| | d }| | }|jg ddd| }|| | d jg ddd| }| | t|d  S )Ndim)r   T)keepdim   g-q=)	unsqueezesumsizetorchsqrt)tensor
mask_floatbroadcast_masknum_elements_not_maskedtensor_maskedmeanvariance r2   f/home/ubuntu/.local/lib/python3.10/site-packages/modelscope/models/nlp/unite/translation_evaluation.py_layer_norm_all#   s   
r4   c                	       sV   e Zd Z	ddedededdf fddZ	ddeej d	ejdejfd
dZ	  Z
S )LayerwiseAttentionN
num_layers	model_dimdropoutreturnc                    s   t t|   || _|| _|| _ttj|fdd| _	tt
dgdd| _| jrJtt| j	}tt| j	d}| d| | d| d S d S )NT)requires_gradg      ?g@xdropout_maskdropout_fill)superr5   __init__r6   r7   r8   r   r)   zerosscalar_parametersFloatTensorgammalenemptyfill_register_buffer)selfr6   r7   r8   r;   r<   	__class__r2   r3   r>   2   s$   zLayerwiseAttention.__init__tensorsmaskc                 C   s   t jtdd |D dd}| jr(| jr(tt | j | jk| j	| j
dd}nt| j	dd}|dddd}| }|t|| jdd}|d d dd d f }| j| S )Nc                 s   s    | ]	}|j d dV  qdS r   r    N)r&   .0xr2   r2   r3   	<genexpr>M   s    z-LayerwiseAttention.forward.<locals>.<genexpr>r   r    r      )r)   catlisttrainingr8   r   wherer;   uniform_r@   r<   viewfloatr4   r'   rB   )rG   rJ   rK   normed_weightsr,   weighted_sumr2   r2   r3   forwardH   s$   
zLayerwiseAttention.forwardN)__name__
__module____qualname__intrX   r>   r   r)   Tensorr[   __classcell__r2   r2   rH   r3   r5   0   s&    r5   c                       sz   e Zd Zdddgdddfdeded	ee d
edee deddf fddZdede	fddZ
dejdejfddZ  ZS )FeedForwardrQ      i   SigmoidN皙?in_dimout_dimhidden_sizesactivationsfinal_activationr8   r9   c           	         s   t    g }|t||d  || | |t| tdt|D ]}|t||d  ||  || | |t| q'|t|d t| |dur_|| | t	| | _
dS )a  
        Feed Forward Neural Network.

        Args:
            in_dim (:obj:`int`):
                Number of input features.
            out_dim (:obj:`int`, defaults to 1):
                Number of output features. Default is 1 -- a single scalar.
            hidden_sizes (:obj:`List[int]`, defaults to `[3072, 768]`):
                List with hidden layer sizes.
            activations (:obj:`str`, defaults to `Sigmoid`):
                Name of the activation function to be used in the hidden layers.
            final_activation (:obj:`str`, Optional, defaults to `None`):
                Name of the final activation function if any.
            dropout (:obj:`float`, defaults to 0.1):
                Dropout ratio to be used in the hidden layers.
        r   rQ   r   N)r=   r>   appendr   build_activationr
   rangerC   r`   r   ff)	rG   rg   rh   ri   rj   rk   r8   modulesirH   r2   r3   r>   c   s   
zFeedForward.__init__
activationc                 C   s   t | S r\   r   )rG   rr   r2   r2   r3   rm      s   zFeedForward.build_activationin_featuresc                 C   s
   |  |S r\   )ro   )rG   rs   r2   r2   r3   r[      s   
zFeedForward.forward)r]   r^   r_   r`   r   strr   rX   r>   r   rm   r)   ra   r[   rb   r2   r2   rH   r3   rc   a   s.    +rc   )module_namec                )       s   e Zd Zdddddddddd	d
dddddddgdddfdedededededededededededededed ed!ed"ee d#ed$ee d%ef( fd&d'Z			d3d(e
jd)eee  d*ee
j d+efd,d-Zd.ed/e
jd0efd1d2Z  ZS )4r   rf   r   r%   rQ   gelui   g{Gz?i   gh㈵>         Ti rd   tanhNattention_probs_dropout_probbos_token_ideos_token_idpad_token_id
hidden_acthidden_dropout_probhidden_sizeinitializer_rangeintermediate_sizelayer_norm_epsmax_position_embeddingsnum_attention_headsnum_hidden_layerstype_vocab_size	use_cache
vocab_sizemlp_hidden_sizesmlp_actmlp_final_actmlp_dropoutc                    sd  t  jdi | || _|| _|| _|| _|| _|| _|| _|| _	|	| _
|
| _|| _|| _|| _|| _|| _|| _|| _|| _|| _|| _tdi d| jd| jd| jd| jd| jd| jd| jd| j
d	| jd
| jd| jd| jd| jd| j	d| jd| j| _t| jdd| _t| jd | j| jd| _t| jd| j| j| j| jd| _dS )a  The UniTE Model which outputs the scalar to describe the corresponding
            translation quality of hypothesis. The model architecture includes two
            modules: a pre-trained language model (PLM) to derive representations,
            and a multi-layer perceptron (MLP) to give predicted score.

            Args:
                attention_probs_dropout_prob (:obj:`float`, defaults to 0.1):
                    The dropout ratio for attention weights inside PLM.
                bos_token_id (:obj:`int`, defaults to 0):
                    The numeric id representing beginning-of-sentence symbol.
                eos_token_id (:obj:`int`, defaults to 2):
                    The numeric id representing ending-of-sentence symbol.
                pad_token_id (:obj:`int`, defaults to 1):
                    The numeric id representing padding symbol.
                hidden_act (:obj:`str`, defaults to :obj:`"gelu"`):
                    Activation inside PLM.
                hidden_dropout_prob (:obj:`float`, defaults to 0.1):
                    The dropout ratio for activation states inside PLM.
                hidden_size (:obj:`int`, defaults to 1024):
                    The dimensionality of PLM.
                initializer_range (:obj:`float`, defaults to 0.02):
                    The hyper-parameter for initializing PLM.
                intermediate_size (:obj:`int`, defaults to 4096):
                    The dimensionality of PLM inside feed-forward block.
                layer_norm_eps (:obj:`float`, defaults to 1e-5):
                    The value for setting epsilon to avoid zero-division inside
                        layer normalization.
                max_position_embeddings: (:obj:`int`, defaults to 512):
                    The maximum value for identifying the length of input sequence.
                num_attention_heads (:obj:`int`, defaults to 16):
                    The number of attention heads inside multi-head attention layer.
                num_hidden_layers (:obj:`int`, defaults to 24):
                    The number of layers inside PLM.
                type_vocab_size (:obj:`int`, defaults to 1):
                    The number of type embeddings.
                use_cache (:obj:`bool`, defaults to :obj:`True`):
                    Whether to use cached buffer to initialize PLM.
                vocab_size (:obj:`int`, defaults to 250002):
                    The size of vocabulary.
                mlp_hidden_sizes (:obj:`List[int]`, defaults to `[3072, 1024]`):
                    The size of hidden states inside MLP.
                mlp_act (:obj:`str`, defaults to :obj:`"tanh"`):
                    Activation inside MLP.
                mlp_final_act (:obj:`str`, `optional`, defaults to :obj:`None`):
                    Activation at the end of MLP.
                mlp_dropout (:obj:`float`, defaults to 0.1):
                    The dropout ratio for MLP.
            r|   r}   r~   r   r   r   r   r   r   r   r{   r   r   r   r   r   F)add_pooling_layerrQ   )r6   r7   r8   )rg   rh   ri   rj   rk   r8   Nr2   )r=   r>   r{   r|   r}   r~   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   encoder_configr   encoderr5   layerwise_attentionrc   	estimator)rG   r{   r|   r}   r~   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   kwargsrH   r2   r3   r>      s   F	
z&UniTEForTranslationEvaluation.__init__	input_idsinput_formatscorer9   c                 K   s~   | | j }| j||ddd}| |d |}| |jdd}t| 	 |d}	|d ur=|| 
d }
|
|	d< |	S )	NT)r   attention_maskoutput_hidden_statesreturn_dicthidden_statesr   r    )r   r   r%   loss)ner~   longr   r   r   squeezer   cputolistpowr0   )rG   r   r   r   r   r   outputs
mix_statespredoutputr   r2   r2   r3   r[     s$   
z%UniTEForTranslationEvaluation.forwardpathdeviceplm_onlyc                 C   sV   |r| j ||| _ d | j _ntj||d}t|d | | t	d|  d S )N)map_locationzencoder.embeddings.position_idsz%Loading checkpoint parameters from %s)
r   from_pretrainedtopoolerr)   loadr   load_state_dictloggerinfo)rG   r   r   r   
state_dictr2   r2   r3   load_checkpoint0  s   

z-UniTEForTranslationEvaluation.load_checkpoint)NN)r]   r^   r_   rX   r`   rt   boolr   r   r>   r)   ra   r   r   r[   r   r   rb   r2   r2   rH   r3   r      s    	
 

 rw   rQ   r%   all_input_concatmaximum_lengthpad_idxeos_idxc                 C   sF   | dd  D ]
}||d d df< qt | dkrt| ||S t| ||S )NrQ   r      )rC   cut_long_sequences3cut_long_sequences2)r   r   r   r   groupr2   r2   r3   combine_input_sentences=  s
   r   c                    sz  t t|  } t  }| D ]}tfdd|D }tdd |D }t||krtt| t dd t  dd ddD }tt 	 | d	 t
||d	 krgt
|krgtfd
d  D  n| |d    |d < t  fddtdt|D }tdd t||D }t||D ]
\}	}
|
d |	d< q|| q|| qt dd |D }t|dd}|S )Nc                 3        | ]}| | V  qd S r\   masked_selectr   rM   r   r2   r3   rP   P      
z&cut_long_sequences2.<locals>.<genexpr>c                 s       | ]}t |V  qd S r\   rC   rM   r2   r2   r3   rP   R      c                 s       | ]}|d  V  qdS r   Nr2   rM   r2   r2   r3   rP   V  r   c                 S      | d S NrQ   r2   dr2   r2   r3   <lambda>W      z%cut_long_sequences2.<locals>.<lambda>Tkeyreverser%   c                 3        | ]\}}||  fV  qd S r\   r2   rN   kvoffsetr2   r3   rP   ]      rQ   r   c                 3       | ]} | V  qd S r\   r2   rN   r   lengthsr2   r3   rP   b  r   c                 s        | ]\}}|d | V  qd S r\   r2   rN   rO   yr2   r2   r3   rP   c      r   c                 s       | ]
}t j|d dV  qdS rL   r)   rR   rM   r2   r2   r3   rP   k      batch_firstpadding_valuerS   ziptupler'   dict	enumeratesorteditemsr   valuesminrn   rC   rl   r   r   r   r   collected_tuplestensor_tupleall_lenslengths_sorted_idxesnew_lensnew_tensor_tuplerO   r   concat_tensorall_input_concat_paddedr2   r   r   r   r3   r   J  sD   
 r   c                    s  t t|  } t  }| D ]6}tfdd|D }tdd |D }t||kr=tt| t dd t  dd ddD }tt 	 | d	 t
||d	 krit
|kritfd
d  D  nt 	 |kr
 |d   |d  kr| |d    |d    |d  kr |d < ng |d   |d < n\ |d   |d    kr |d  krn n0| |d   d  |d  kr݈  |d <  |d < n% |d    |d <  |d < n|d	   |d <   |d <  |d < t 	 |ksrt  fddtdt D }tdd t||D }t||D ]\}	}
|
d |	d< q+|| q|| qt dd |D }t|dd}|S )Nc                 3   r   r\   r   rM   r   r2   r3   rP   w  r   z&cut_long_sequences3.<locals>.<genexpr>c                 s   r   r\   r   rM   r2   r2   r3   rP   y  r   c                 s   r   r   r2   rM   r2   r2   r3   rP   }  r   c                 S   r   r   r2   r   r2   r2   r3   r   ~  r   z%cut_long_sequences3.<locals>.<lambda>Tr   r   c                 3   r   r\   r2   r   r   r2   r3   rP     r   r   rQ   r%   c                 3   r   r\   r2   r   r   r2   r3   rP     r   c                 s   r   r\   r2   r   r2   r2   r3   rP     r   r   c                 s   r   rL   r   rM   r2   r2   r3   rP     r   r   r   r   r2   r   r3   r   q  s   





 r   )rw   rQ   r%   )rw   rQ   )@__doc__warningsdataclassesr   mathr   typingr   r   r   r   r   numpynpr)   torch.utils.checkpoint	packagingr	   torch.nnr
   r   r   r   r   r   torch.nn.functionalr   torch.nn.utils.rnnr   transformersr   r   transformers.activationsr   modelscope.metainfor   modelscope.models.baser   modelscope.models.builderr   )modelscope.models.nlp.unite.configurationr   modelscope.outputs.nlp_outputsr   -modelscope.utils.compatible_with_transformersr   modelscope.utils.constantr   modelscope.utils.loggerr   r   __all__r4   r5   rc   register_moduletranslation_evaluationuniter   ra   r`   r   r   r   r2   r2   r2   r3   <module>   sl    14 )

(