o
    ߥi                     @   s@  d dl Z d dlZd dlZd dlZd dlZd dlmZ d dlmZm	Z	m
Z
mZmZ d dlZd dlZd dlZd dlm  mZ d dlmZmZ d dlmZ d dlmZmZmZmZmZmZ d dlm Z  d dl!m"Z" d d	l#m$Z$ d d
l%m&Z& d dl'm(Z( d dl)m*Z* d dl+m,Z,m-Z- d dl.m/Z0 d dl1m2Z2 ddl3m4Z4 ddl5m6Z6m7Z7 dZ8dZ9G dd dej:Z;G dd dej:Z<G dd dej:Z=G dd dej:Z>G dd dZ?G dd  d ej:Z@G d!d" d"ej:ZAG d#d$ d$e(e"ZBG d%d& d&eBZCG d'd( d(ej:ZDG d)d* d*ej:ZEG d+d, d,eFZGe*jHe2jIe$jJd-G d.d/ d/eBZKdS )0    N)	dataclass)AnyDictListOptionalUnion)Tensornn)xavier_uniform_)
BertConfig	BertModelBertTokenizerRobertaConfigRobertaModelRobertaTokenizer)ACT2FN)PreTrainedModel)Models)Model)
TorchModel)MODELS)TextGenerationModelOutputTokenGeneratorOutput)logger)Tasks   )
PalmConfig)compute_bleu_rouge	normalizezconfig.jsonzpytorch_model.binc                       s:   e Zd ZdZ		d
 fdd	Z					ddd	Z  ZS )MultiHeadedAttentiona  
    Multi-Head Attention module from
    "Attention is All You Need"
    :cite:`DBLP:journals/corr/VaswaniSPUJGKP17`.

    Similar to standard `dot` attention but uses
    multiple attention distributions simulataneously
    to select relevant items.

    .. mermaid::

       graph BT
          A[key]
          B[value]
          C[query]
          O[output]
          subgraph Attn
            D[Attn 1]
            E[Attn 2]
            F[Attn N]
          end
          A --> D
          C --> D
          A --> E
          C --> E
          A --> F
          C --> F
          D --> O
          E --> O
          F --> O
          B --> O

    Also includes several additional tricks.

    Args:
       head_count (int): number of parallel heads
       model_dim (int): the dimension of keys/values/queries,
           must be divisible by head_count
       dropout (float): dropout parameter
    皙?Tc                    s   || dksJ || | _ || _t   || _t||| j  | _t||| j  | _t||| j  | _	tj
dd| _t|| _|| _| jrRt||| _d S d S )Nr   dim)dim_per_head	model_dimsuper__init__
head_countr	   Linearlinear_keyslinear_valueslinear_querySoftmaxsoftmaxDropoutdropoutuse_final_linearfinal_linear)selfr(   r%   r0   r1   	__class__ a/home/ubuntu/.local/lib/python3.10/site-packages/modelscope/models/nlp/palm_v2/text_generation.pyr'   \   s$   

zMultiHeadedAttention.__init__NFc	                    s  | d | j| j fdd}	 fdd}
|dur|dkrp| || || |}}}|	|}|	|}|j}|d durStj|d 	||fd	d
}|d durgtj|d 	||fd	d
}||d< ||d< nL|dkr| |}|d du r| || |}}|	|}|	|}n	|d |d }}||d< ||d< n| |}| |}| |}|	|}|	|}|	|}|t
 }t||d	d}|dur|d|}||d}| |}|dur|dddf | }|t|d	d	d  }t|ddddf |dgd}| |}| jr6|
t||}| |}|r4||fS |S t||}|rC||fS |S )a  
        Compute the context vector and the attention vectors.

        Args:
           key (`FloatTensor`): set of `key_len`
                key vectors `[batch, key_len, dim]`
           value (`FloatTensor`): set of `key_len`
                value vectors `[batch, key_len, dim]`
           query (`FloatTensor`): set of `query_len`
                 query vectors  `[batch, query_len, dim]`
           mask: binary mask indicating which keys have
                 non-zero attention `[batch, query_len, key_len]`
        Returns:
           (`FloatTensor`, `FloatTensor`) :

           * output context vectors `[batch, query_len, dim]`
           * one of the attention vectors `[batch, query_len, key_len]`
        r   c                    s   |   dddS )z  projection r!   r      )view	transposex
batch_sizer$   r(   r6   r7   shape   s   z+MultiHeadedAttention.forward.<locals>.shapec                    s   |  dd  d S )z  compute context r   r8   r!   )r:   
contiguousr9   r;   r=   r6   r7   unshape   s   z-MultiHeadedAttention.forward.<locals>.unshapeNr3   	self_keysr8   r"   self_valuescontextmemory_keysmemory_values   r   g Ngmr!   g&.>)sizer$   r(   r,   r*   r+   devicetorchcattomathsqrtmatmulr:   	unsqueeze	expand_asmasked_fillr.   sumr0   r1   r2   )r3   keyvaluequerymasklayer_cachetypepredefined_graph_1return_attnr?   rA   rI   scoresattnattn_masked	drop_attnrD   outputr6   r=   r7   forwards   s   









&

zMultiHeadedAttention.forward)r    T)NNNNF__name__
__module____qualname____doc__r'   ra   __classcell__r6   r6   r4   r7   r   2   s    ,r   c                       *   e Zd ZdZd fdd	Zdd Z  ZS )PositionwiseFeedForwarda*   A two-layer Feed-Forward-Network with residual layer norm.

    Args:
        d_model (int): the size of input for the first-layer of the FFN.
        d_ff (int): the hidden layer size of the second-layer
            of the FNN.
        dropout (float): dropout probability in :math:`[0, 1)`.
    r    c                    s\   t    tj|dd| _t||| _td | _t	|| _
t||| _t	|| _d S )Nư>epsgelu_new)r&   r'   r	   	LayerNorm
layer_normr)   w_1r   actvr/   	dropout_1w_2	dropout_2)r3   d_modeld_ffr0   r4   r6   r7   r'      s   

z PositionwiseFeedForward.__init__c              	   C   s4   |  | | | |}| | |}|| S N)rr   rq   rp   ro   rt   rs   )r3   r<   interr`   r6   r6   r7   ra      s   zPositionwiseFeedForward.forward)r    rb   r6   r6   r4   r7   ri      s    		ri   c                       s<   e Zd ZdZdZ fddZ			d
ddZdd	 Z  ZS )TransformerDecoderLayera  
    Args:
      d_model (int): the dimension of keys/values/queries in
                       MultiHeadedAttention, also the input size of
                       the first-layer of the PositionwiseFeedForward.
      heads (int): the number of heads for MultiHeadedAttention.
      d_ff (int): the second-layer of the PositionwiseFeedForward.
      dropout (float): dropout probability(0-1.0).
      self_attn_type (string): type of self-attention scaled-dot, average
      c                    s   t    t|||d| _t|||d| _t|||| _tj|dd| _	tj|dd| _
t|| _| | j}| d| d S )N)r0   rj   rk   rW   )r&   r'   r   	self_attncontext_attnri   feed_forwardr	   rn   layer_norm_1layer_norm_2r/   drop_get_attn_subsequent_maskMAX_SIZEregister_buffer)r3   ru   headsrv   r0   rW   r4   r6   r7   r'     s   
z TransformerDecoderLayer.__init__Nc              
   C   s   t |t j| jddd|dd|df t j d}| |}	|	}
|dur8t j||	fdd}
d}| j|
|
|	||dd}| 	|| }| 
|}| j|||||ddd	\}}| | 	|| }|||
fS )
a#  
        Args:
            inputs (`FloatTensor`): `[batch_size x 1 x model_dim]`
            memory_bank (`FloatTensor`): `[batch_size x src_len x model_dim]`
            src_pad_mask (`LongTensor`): `[batch_size x 1 x src_len]`
            tgt_pad_mask (`LongTensor`): `[batch_size x 1 x 1]`

        Returns:
            (`FloatTensor`, `FloatTensor`, `FloatTensor`):

            * output `[batch_size x 1 x model_dim]`
            * attn `[batch_size x 1 x src_len]`
            * all_input `[batch_size x current_step x model_dim]`

        Nr   r   r"   r3   )rW   rX   rY   rD   T)rW   rX   rY   r[   )rJ   gtrY   uint8rW   rH   r~   rK   r{   r   r   r|   r}   )r3   inputsmemory_banksrc_pad_masktgt_pad_maskprevious_inputrX   stepdec_mask
input_norm	all_inputrV   
query_normmidr]   r`   r6   r6   r7   ra     sD   
(



zTransformerDecoderLayer.forwardc                 C   s2   d||f}t jt |ddd}t|}|S )z
        Get an attention mask to avoid using the subsequent info.

        Args:
            size: int

        Returns:
            (`LongTensor`):

            * subsequent_mask `[1 x size x size]`
        r   )kr   )nptriuonesastyperJ   
from_numpy)r3   rH   
attn_shapesubsequent_maskr6   r6   r7   r   U  s   

z1TransformerDecoderLayer._get_attn_subsequent_mask)NNN)	rc   rd   re   rf   r   r'   ra   r   rg   r6   r6   r4   r7   ry      s    

8ry   c                       s0   e Zd Zd	 fdd	Zd
ddZdd Z  ZS )PositionalEncodingrz   c                    s   t    t||}td|d}ttjd|dtjdt	d|   }t
| | |d d dd df< t| | |d d dd df< |d}| d| t|| _|| _d S )Nr   r   r8   )dtypeg     @pe)r&   r'   rJ   zerosarangerP   expfloatrM   logsincosr   r	   r/   r0   r#   )r3   r0   r#   max_lenr   positiondiv_termr4   r6   r7   r'   i  s   
$$

zPositionalEncoding.__init__Nc                 C   sl   |t | j }|r|| jd d |f d d d d d f  }n|| jd d d |df  }| |}|S Nr   )rM   rN   r#   r   rH   r0   )r3   embr   r6   r6   r7   ra   v  s   * 
zPositionalEncoding.forwardc                 C   s   | j d d d |df S r   )r   rH   )r3   r   r6   r6   r7   get_emb  s   zPositionalEncoding.get_emb)rz   rw   )rc   rd   re   r'   ra   r   rg   r6   r6   r4   r7   r   g  s    

r   c                   @   s8   e Zd ZddedefddZdd Zdd	 Zd
d ZdS )TransformerDecoderStater!   srccache_num_layersc                 C   s2   || _ d | _d | _d | _|dkr| | d S d S Nr!   )r   r   previous_layer_inputscache_init_cache)r3   r   r   r6   r6   r7   r'     s   z TransformerDecoderState.__init__c                 C   s   || _ || _d | _d S rw   )r   r   r   )r3   	new_inputr   r6   r6   r7   update_state  s   
z$TransformerDecoderState.update_statec                 C   sB   i | _ t|D ]}d d d}d |d< d |d< || j d|< qd S )N)rE   rF   rB   rC   layer_{})r   rangeformat)r3   
num_layersnumrX   r6   r6   r7   r     s   
z#TransformerDecoderState._init_cachec                    s:   d fdd	 | j d| _ | jd ur | j d S d S )Nr   c                    s@   |   D ]\}}|d urt|tr | q||| |< qd S rw   )items
isinstancedict)struct	batch_dimr   v_recursive_mapfnr6   r7   r     s   

z<TransformerDecoderState.map_batch_fn.<locals>._recursive_mapr   )r   r   )r3   r   r6   r   r7   map_batch_fn  s
   
z$TransformerDecoderState.map_batch_fnN)r!   )	rc   rd   re   r   intr'   r   r   r   r6   r6   r6   r7   r     s
    r   c                       sH   e Zd ZdZdZ fddZ		ddededed	ed
ef
ddZ	  Z
S )TransformerDecodera  
    The Transformer decoder from "Attention is All You Need".


    .. mermaid::

       graph BT
          A[input]
          B[multi-head self-attn]
          BB[multi-head src-attn]
          C[feed forward]
          O[output]
          A --> B
          B --> BB
          BB --> C
          C --> O


    Args:
       num_layers (int): number of encoder layers.
       d_model (int): size of the model
       heads (int): number of heads
       d_ff (int): size of the inner FF layer
       dropout (float): dropout parameters
       embeddings (:obj:`onmt.modules.Embeddings`):
          embeddings to use, should have positional encodings
       attn_type (str): if using a seperate copy attention
    transformerc                    sd   t    || _|| _t| jj| _t fddt	|D | _
tjdd| _d | _d S )Nc                    s   g | ]	}t  qS r6   )ry   .0_rv   ru   r0   r   r6   r7   
<listcomp>  s    z/TransformerDecoder.__init__.<locals>.<listcomp>rj   rk   )r&   r'   r   
embeddingsr   embedding_dimpos_embr	   
ModuleListr   transformer_layersrn   ro   state)r3   r   ru   r   rv   r0   r   r4   r   r7   r'     s   


zTransformerDecoder.__init__Nr   tgtr   r   memory_masksc                 C   s  |j }|}| \}}	| \}
}| |}| dksJ | ||}|}| jj}|j|d	|
||}|d urI|d}	|	|||	}n|j|d	|||	}|j
d u r^g }g }t| jD ]@}d }|j
d u rx|jd urx|j| }| j| ||||||j
d ur|j
d| nd |d\}}}|j
d u r|| || qe|j
d u rt|}| |}|j
d u r||| |||fS )NrG   r   r!   r   )r   rX   r   )r   rH   r   r#   r   padding_idxdataeqrP   expandr   r   r   r   r   r   r   appendrJ   stackro   r   )r3   r   r   r   r   r   	src_words	tgt_words	src_batchsrc_len	tgt_batchtgt_lenr   r`   src_memory_bankr   r   r   saved_inputsattnsiprev_layer_inputr]   r   r6   r6   r7   ra     sZ   















zTransformerDecoder.forward)NN)rc   rd   re   rf   decoder_typer'   r   r   r   ra   rg   r6   r6   r4   r7   r     s"    r   c                       s$   e Zd Z fddZdd Z  ZS )PalmPointerGeneratorc                    s(   t    t||| _td| _d S r   )r&   r'   r	   r)   dense
LogSoftmaxgen_func)r3   hidden_size
vocab_sizer4   r6   r7   r'     s   
zPalmPointerGenerator.__init__c                 C   s   |  |}| |}|S rw   )r   r   )r3   r<   r6   r6   r7   ra     s   

zPalmPointerGenerator.forward)rc   rd   re   r'   ra   rg   r6   r6   r4   r7   r     s    r   c                       sT   e Zd ZdZeZdZ fddZede	e
eejf  fddZedd	 Z  ZS )
PalmPreTrainedModelz
    An abstract class to handle weights initialization and a simple interface for downloading and loading pretrained
    models.
    palmc                    s*   t  j|jfi | t t| | d S rw   )r&   r'   name_or_pathr   )r3   configkwargsr4   r6   r7   r'   -  s   zPalmPreTrainedModel.__init__pretrained_model_name_or_pathc                 K   sv   t j|t}t j|rt|nt }t j||j|_t j|t}t j|r0t	
|nd }| ||fi |S rw   )ospathjoinCONFIG_NAMEisfiler   from_json_fileencoder_pthWEIGHTS_NAMErJ   load)clsr   r   config_filer   checkpoint_file
checkpointr6   r6   r7   _from_pretrained1  s$   z$PalmPreTrainedModel._from_pretrainedc                 K   s(   | d}| jdd|i|}||_|S )ah  Instantiate the model.

        Args:
            kwargs: Input args.
                    model_dir: The model dir used to load the checkpoint and the label information.
                    num_labels: An optional arg to tell the model how many classes to initialize.
                                    Method will call utils.parse_label_mapping if num_labels not supplied.
                                    If num_labels is not found, the model will use the default setting (2 classes).

        Returns:
            The loaded model, which is initialized by transformers.PreTrainedModel.from_pretrained
        	model_dirr   Nr6   )popr   r   )r   r   r   modelr6   r6   r7   _instantiateA  s   
z PalmPreTrainedModel._instantiate)rc   rd   re   rf   r   config_classbase_model_prefixr'   classmethodr   r   strr   PathLiker   r  rg   r6   r6   r4   r7   r   $  s    r   c                       s8   e Zd Zd	 fdd	ZedefddZdd Z  ZS )
AbsSummarizerNc                    s  t  j|fi | || _|jdks|jdkr!tt|j| _n|jdkr/t	t
|j| _|jdkrnt|j| jjjj}| jjjjj|jjd d< | jjjjjd d d d f |jd d|jjdd < || jjj_| jjj| _tj| j| jjj|jdkrdndd}|jrt| jjjjj|_t|j|j|j|j|j|d	| _ t!|j| j| _"| j jj| j"j#_|d ur| $|}| j%|d
d d S | j & D ];}t'|tj(tjfr|jjj)ddd nt'|tj*r|j+j,  |jj-d t'|tj(r|j+d ur|j+j,  q| j". D ]}|/ dkrt0| q|j,  q|j1rT|jdkr:tj| j| jjjdd}ntj| j| jjjdd}t| jjjj|_|| j _| j jj| j"j#_d S )Nbertzh_bertrobertai   r!   r   r   )r   )r   rv   r0   r   F)strict        g{Gz?)meanstd      ?)2r&   r'   r   encoderr   r   from_pretrainedr   r	  r   r   max_posr	   	Embeddingr  r   r   position_embeddingsweightr   repeatr   	share_embcopydeepcopyword_embeddingsr   
dec_layersdec_hidden_size	dec_headsdec_ff_sizedec_dropoutdecoderr   	generatorr   _unwrap_checkpointload_state_dictmodulesr   r)   normal_rn   biaszero_fill_
parametersr#   r
   use_bert_emb)r3   r   r   r   my_pos_embeddingstgt_embeddingsmodulepr4   r6   r7   r'   Y  s   






zAbsSummarizer.__init__r   c                    sB   d}|D ]
  | v r|   } q|D ]  fdd|   D } q| S )N)r  r   c                    s4   i | ]\}}|  r|t d  d n||qS )r   N)
startswithlen)r   r   r   namer6   r7   
<dictcomp>  s    (z4AbsSummarizer._unwrap_checkpoint.<locals>.<dictcomp>)r   )r   
wrap_namesr6   r2  r7   r#    s   
z AbsSummarizer._unwrap_checkpointc           	      C   sN   | j ||dd\}}t|}| ||d d d df |\}}}||d |fS )NFreturn_dictr!   )r	  r   r!  )	r3   r   r   mask_srctop_vecr   r   decoder_outputsr   r6   r6   r7   ra     s   $zAbsSummarizer.forwardrw   )	rc   rd   re   r'   staticmethodr   r#  ra   rg   r6   r6   r4   r7   r  W  s
    H
r  c                       rh   )LabelSmoothingLossz
    With label smoothing,
    KL-divergence between q_{smoothed ground truth prob.}(w)
    and p_{prob. computed by model}(w) is minimized.
    c                    st   d|  k rdksJ  J || _ tt|   ||d  }t|f|}d|| j < | d|d d| | _d S )Nr  r  r8   r   one_hot)	r   r&   r<  r'   rJ   fullr   rP   
confidence)r3   label_smoothingtgt_vocab_sizeignore_indexsmoothing_valuer>  r4   r6   r7   r'     s   
zLabelSmoothingLoss.__init__c                 C   sR   | j |dd}|d|d| j ||| jkdd tj	||ddS )zf
        output (FloatTensor): batch_size x n_classes
        target (LongTensor): batch_size
        r   r   rS   )	reduction)
r>  r  rH   scatter_rP   r@  masked_fill_r   Fkl_div)r3   r`   target
model_probr6   r6   r7   ra     s   zLabelSmoothingLoss.forward)r=  rb   r6   r6   r4   r7   r<    s    r<  c                       s:   e Zd ZdZd fdd	Zdd Zdd Zd	d
 Z  ZS )NMTLossComputez(
    Standard NMT Loss Computation.
    r  c                    sN   t    || _|d | _|dkrt||| jd| _d S tj| jdd| _d S )NPADr   )rC  rS   )rC  rE  )r&   r'   r"  r   r<  	criterionr	   NLLLoss)r3   r"  symbolsr   rA  r4   r6   r7   r'     s   

zNMTLossCompute.__init__c                 C   s   | d|dS )Nr!   r8   r9   rH   )r3   _vr6   r6   r7   _bottle  s   zNMTLossCompute._bottlec                 C   s   | d||dS )Nr!   r   rQ  )r3   rR  r>   r6   r6   r7   	_unbottle  s   zNMTLossCompute._unbottlec           	      C   sd   |d d dd f }| | j }| |}| |}| d}| ||}|t	| |S )Nr   r!   )
ner   rS   rS  r"  r@   r9   rN  divr   )	r3   r   r`   rJ  normalizationbottled_outputr\   gtruthlossr6   r6   r7   ra     s   

zNMTLossCompute.forward)r  )	rc   rd   re   rf   r'   rS  rT  ra   rg   r6   r6   r4   r7   rL    s    rL  c                       s   e Zd ZdZeG dd dZd!def fddZdd	 Zd
d Z	d"ddde
fddZd#ddZdded dfddZd$ddZdejdejdeeejf fdd Z  ZS )%
Translatorz9
    Uses a model to translate a batch of sentences.
    c                   @   sh   e Zd ZU eed< ejed< ejed< ejed< dZed ed< dZ	eee
  ed< dZee
 ed< dS )	zTranslator.Batchr>   r   r   r8  Nquery_idsrc_strtgt_str)rc   rd   re   r   __annotations__rJ   r   r\  r   r]  r  r^  r6   r6   r6   r7   Batch  s   
 


r`  cnndatasetc                    sv   t    t | _|j| _|| j_|j| _	| j	j
| _
|j| _|j| _| jd | _| jd | _| jj| _| jj| _d S )NBOSEOS)r&   r'   logging
get_loggerr   r   argsrb  r   r  r"  	tokenizervocabrP  start_token	end_tokenalpha	beam_size)r3   r  rb  r4   r6   r7   r'     s   



zTranslator.__init__c                    st  |d }t |d t |d ksJ |j}|d |d |j|j|jf\}}}}}|j}		 g }
t|D ]} jjdkre jj	dksG jj	dkrS fdd	|| D }n jj	d
krd fdd	|| D }nz jj	d
kr j
dd	 || d D dddd}n\ jj	dkr j
dd	 || d D }d|dd}n= jj	dkr jjdkr fdd	|| D }dd	 |D }n jj	dkr߈ j
dd	 || d D }d|dd}|| } jj	d
kr j
dd	 || D }d|| }n fdd	|| D d d }d|} jjdkr'|||| |	| || f}n||||	| || f}|
| q2|
S )Nbatch
gold_scorepredictionsr\   qg_ranking_testr	  r
  c                    s0   g | ]}d   jdd |D ddqS ) c                 S      g | ]}t |qS r6   r   r   nr6   r6   r7   r   &      4Translator.from_batch.<locals>.<listcomp>.<listcomp> ## )r   ri  convert_ids_to_tokensreplacer   eachr3   r6   r7   r   #  s    z)Translator.from_batch.<locals>.<listcomp>r  c                    s2   g | ]} j d d |D ddddqS )c                 S   rs  r6   rt  ru  r6   r6   r7   r   +  rw  rx  <s>rz  </s>)ri  decoder|  r}  r  r6   r7   r   *  s    c                 S   rs  r6   rt  ru  r6   r6   r7   r   1  s    r   r  rz  r  c                 S   rs  r6   rt  ru  r6   r6   r7   r   7  rw  rr  ry  
paraphrasec                    s"   g | ]} j d d |D qS )c                 S   rs  r6   rt  ru  r6   r6   r7   r   ;  rw  rx  )ri  r{  r   predr  r6   r7   r   :  s    c                 S   s   g | ]}d  |dd qS )rz  ry  )r   r|  r  r6   r6   r7   r   >  s    c                 S   rs  r6   rt  ru  r6   r6   r7   r   C  rw  ##c                 S   rs  r6   rt  r   tr6   r6   r7   r   H  rw  c                    s   g | ]
} j jt| qS r6   )ri  ids_to_tokensr   r  r  r6   r7   r   K  s    i  faq)r1  r>   r^  r   r]  r\  r   rg  rb  r  ri  r  r|  r{  r   r   )r3   translation_batchrn  r>   preds
pred_scorer^  r   r]  r\  translationsb
pred_sents	gold_sentraw_srctranslationr6   r  r7   
from_batch  s   






zTranslator.from_batchc           (      C   s  | j jd|  }| j jd|  }t|dd| _t|dd| _t|d dd| _| j jdkrE| j jdkrEd	g d	d
 }| j
| | j jd|  }t|dd| _g g }}d}	i i }
}t|D ]\}}| jd|d  dt|  | |}| |}|D ]S}|\}}}}}|dddd }| j jdkrd	dd |D }nD|dddddddddddddddddd }|dddd ddd!dd"dd#d }|dd  dddddddddddd d!dd"dd#d }| j jrNd}|d D ] }|d  |  }t| t| d$ krJ|} n|}q-| j jd%ksc| j jd&ksc| j jd'kr|d d}|d ur||gd(}||gd(}||g|d    d)}n|	|gd(}|	|gd(}|	|g|d    d)}t|| j | j
d
 t|| j | j
d
 t|| j | j
d
 | j
| d
  n| j jd*kr| j
|d
  | j
|d
  | j
| d
  n| j jd+kr1|d u r	t|	}|t|g |t|g | j
|d
  | j
d	|d |gd
  n| j jdkro|d u rAt|	}| j jdkrK|g}t|d g|
|< t|g||< | j
d	t||||d gd
  ng| j jd,kr|d    d-k rq| j
d	t|||gd
  | j
d	t|||gd
  | j
d	t||||t|d    gd
  n| j jdkr| j
t|d | d
  |	d7 }	q| j  | j  | j  qe| jd.|	  | j  | j  | j  |d/kr| j jd%ks| j jd&ks| j jd'kr-t !d0||f }| j| d S | j jd*kre| jd1 dd2l"m#} d3d t|dd4D } d5d t|dd4D }!| j$| |!d6d7}"t%|" d S | j jd+kss| j jdkrd8d9 }#|#||\}}d:d; t|D }
d<d; t|D }$t&|
|$}%t%|% d S | j jd+ks| j jdkr|#||\}}t'||}&dd2l"m#} | }'|'j$||d6d7}"t%d=(|&|" d S d S d S )>Nz.%d.goldz.%d.candidatewzutf-8z.sampler  r  	)r\  source_querytarget_querypredict_query
z.%d.raw_srcr   zdata: r   z / <pad>rz  r  rq  c                 S   st   g | ]6}| d d dd dd dd dd dd dd dd	 d
d dd dd dd qS )	[unused0]rz  [PAD]	[unused1] +rr  [SEP]	[unused2]<mask><q>r  r  r  <unk>)r|  stripr}  r6   r6   r7   r   u  s2    




z(Translator.translate.<locals>.<listcomp>r  r  r  r  rr  r  r  [CLS]z[UNK]r  r  r  r  r  
   marcosquad
qg_ranking)r\  answers)r\  r  r\   ra  dureaderr  g      zcnt: %sr!   z./run.sh %s %szCalculating Rouge)Rougec                 S      g | ]}|  qS r6   r  r   liner6   r6   r7   r         )encodingc                 S   r  r6   r  r  r6   r6   r7   r     r  T)avgc                 S   sF   dd | D } dd |D }d| v r|  d}d| |< d| v s| |fS )Nc                 S   s   g | ]
}|  d dqS ).rz  )r  r|  r  r6   r6   r7   r     s    zBTranslator.translate.<locals>.postprocess_text.<locals>.<listcomp>c                 S   r  r6   r  )r   labelr6   r6   r7   r     rw  rz  u   。)index)r  labelsidxr6   r6   r7   postprocess_text   s   
z.Translator.translate.<locals>.postprocess_textc                 S      i | ]	\}}t ||qS r6   r  r   r   tmpr6   r6   r7   r4  
      z(Translator.translate.<locals>.<dictcomp>c                 S   r  r6   r  r  r6   r6   r7   r4    r  z'Dev eval result: Bleu-4={}, {}))rg  result_pathcodecsopengold_out_filecan_out_filepred_json_score_out_filerb  r  r   writesrc_out_file	enumerater   infor1  translate_batchr  r|  r  recall_evalsplitcpunumpytolistjsondumpr  extendr   flushclose
subprocess	getoutputrouger  
get_scoresprintr   cal_bleur   )(r3   	data_iterr   	gold_pathcan_pathoutraw_src_pathpred_resultsgold_resultscnt	pred_dictref_dictr   rn  
batch_datar  transr  goldr   r\  r  pred_strgold_str	_pred_strsentcan_pred_str	pred_json	gold_jsonpred_json_scorecnn_resultsr  
candidates
referencesrouge_scorer  	gold_dict
bleu_rouge
bleu_scorer  r6   r6   r7   	translateY  s  

 









*










*





#zTranslator.translateFrn  fastc                 C   s@   | j   t  | |W  d   S 1 sw   Y  dS )aq  
        Translate a batch of sentences.

        Mostly a wrapper around :obj:`Beam`.

        Args:
           batch (:obj:`Batch`): a batch from a dataset object
           data (:obj:`Dataset`): the dataset object
           fast (bool): enables fast beam search (may not support all features)

        Todo:
           Shouldn't need the original dataset.
        N)r  evalrJ   no_grad_fast_translate_batch)r3   rn  r  r6   r6   r7   r    s   

$zTranslator.translate_batchr   c                 C   s   t tt| }|dkr"|| |d |d< ||< || }t | }|d  |9  < |d}||ddd|ddd j| }|dkrW|| }|S )Nr   r!   r   )	listr   r1  rH   permuter@   r9   r:   r  )r3   r<   countr#   permout_sizern  r6   r6   r7   _tile,  s"   

zTranslator._tiler  r  Infr   c                 C   s   |dkrt t|||d}|t||d d k }|||< |dk rgtj|dd\}}tjtj|dddd}	|	|k}
|dkrHd|
d	d |f< |
d	d df 	 |
d	dd f< d|
d
< |

d||
}|||< |S )Nr   r!   ).r!   Nr  T)
descendingr"   r   .).r   )minmaxrH   rJ   topksortcumsumrH  r.   clonescatter)r3   logitstop_ktop_pfilter_valuemin_tokens_to_keepindices_to_removesorted_logitssorted_indicescumulative_probssorted_indices_to_remover6   r6   r7   _top_k_top_p_filtering>  s2   

z!Translator._top_k_top_p_filteringc           /         s<  j j}j j}j |j}|j}|j}jj||dd\}}t	|jj
j}	|j}
|	 fdd j| dd}tj|tj|
d}tjd|   tj|
d}tj|  d	gjtj|
d}tjd
gtdg d	   |
d|}dd t|D }i }dd t|D |d< dd t|D |d< dg| |d< ||d< t|D ]u}|d d df d	d}|dd	}jj
|	|||d\}}}	j|dd	d}|d}||k rd|d d jf< d|d	  d j }j j r@j j!}|| }j"|j j#j j$d	d}tj%t&j'|ddd	d}t&j(|d	d}||d)d	7 }|| }t*|d|}|d }|d }n||d)d	7 }|| }|+d | }|j, dd\}}j j-r|d	}|dkrt|dD ]e}d}dd || D j j.dkrj/01 2 nfddD d 34d!d"2 t5dkrqufd#dtd	t5d	 D } t6| d }!|!| d d v rd$}|rd%||< qu|| }|| }"|7|}|"|d |"d )d	 }#|#dt8|9d|dd	gd}|:j}$|d	 |kr|$;j |$d d df :d	}%|$< r|d |d}&t|$dD ]}|| }'|%| rR|$| ;j |$| = d}(|(D ]})||' >|||)f |&||)d	d f f q]|%| rt?||' d&d d$d'}*j j@d(ksj j@d)krj j s|*d   D ]}+|+\},}-|d |' >|, |d |' >|- qq?|*d \},}-|d |' >|, |d |' >|- q?|%:d= d}.t5|.dkr |S |9d|.}|#9d|.}#|9d|.}|&9d|.d|d}|#d|9d}|	fd*d q|S )+NFr6  c                    s   j |  |dS )Nr"   )r  r   r#   )rm  r3   r6   r7   <lambda>s      z2Translator._fast_translate_batch.<locals>.<lambda>r   r"   )r   rI   )r   r   rI   r   r  z-inf)rI   c                 S      g | ]}g qS r6   r6   r   r6   r6   r7   r     r  z4Translator._fast_translate_batch.<locals>.<listcomp>c                 S   r  r6   r6   r   r6   r6   r7   r     r  rp  c                 S   r  r6   r6   r   r6   r6   r7   r     r  r\   ro  rn  r!   )r   g@xg      @g      @)r  r  r  )num_samplesrG   c                 S   rs  r6   rt  r   r  r6   r6   r7   r     rw  r  c                    s   g | ]} j j| qS r6   )ri  r  r  r  r6   r7   r     s    rr  ry  rz  c                    s*   g | ]} |d    |  |d   fqS )r   r6   )r   r   )wordsr6   r7   r     s    "TgPKc                 S   s   | d S )Nr   r6   r;   r6   r6   r7   r    s    )rT   reverserq  r  c                    s   |  | S rw   )index_selectr  )select_indicesr6   r7   r    s    )Arg  
max_length
min_lengthrm  r>   r   r8  r  r	  r   r!  r   rI   r   r  rJ   r   longr?  rj  tensorr   r  r   r9   r:   r"  ra   squeezerH   rk  rl  sample_topktemperaturer  r  r  multinomialrH  r.   log_softmaxrP   gatherreshaper  block_trigramr  ri  r  r  r  r   r|  r1  tuplefmodrK   r  r   r)  anynonzeror   sortedrb  )/r3   rn  r   r!  r>   r   r8  src_featuresr   r   rI   batch_offsetbeam_offset	alive_seqtopk_log_probs
hypothesesresultsr   decoder_inputdec_outr   	log_probsr   length_penaltyr&  _scorestopk_idstopk_scorescurr_scorescur_lenr   failtrigramstrigramtopk_beam_indexbatch_indexis_finishedend_conditionrp  r  finished_hypjbest_hypr~  scorer  non_finishedr6   )rm  r  r3   r  r7   r  a  sV  

















z Translator._fast_translate_batch	input_idsattention_maskreturnc                 K   s4   | j | d |d |d}| |}|d }d|iS )Nr   )r>   r   r   r8  rp  )r`  rH   r  )r3   rM  rN  r   rn  r  r  r6   r6   r7   __call__!  s   

zTranslator.__call__)ra  )Fr   )rn  r`  )rc   rd   re   rf   r   r`  r  r'   r  r  boolr  r  r   r  r  rJ   r   r   rP  rg   r6   r6   r4   r7   r[    s(    	J B


# Ar[  )module_namec                       s@   e Zd Zd
 fdd	Zdd Zdeeef defdd	Z	  Z
S )PalmForTextGenerationNc                    s   t  j|fi | || _|jdkr&tj|jdd}|j|j|j	|j
d}n%|jdks0|jdkrKtj|jdd}|jd |jd	 |jd
 |jd d}|| _|| _t||| _t| jj|| jj|j| _t| | _d S )Nr  F)do_lower_case)rc  rd  rM  EOQr	  r
  Tr  r  r  r  )r&   r'   r   r  r   r  r   cls_token_idsep_token_idpad_token_idunk_token_idr   ri  rh  rP  r  r   rL  r"  r   rA  rZ  r[  )r3   r   r   r   rh  rP  r4   r6   r7   r'   1  s8   

zPalmForTextGeneration.__init__c                 C   s0   | j |||d}| ||d }t||d dS )N)r   r   r8  r   )rZ  r  )r   rZ  r   )r3   rM  rN  r  r`   rZ  r6   r6   r7   ra   N  s   zPalmForTextGeneration.forwardinputrO  c                 K   sN   |  D ]\}}t| jj|| q| jdi |}|d }tdd |D dS )Nrp  c                 S   s   g | ]}|d  qS r   r6   r  r6   r6   r7   r   \  rw  z2PalmForTextGeneration.generate.<locals>.<listcomp>)	sequencesr6   )r   setattrr"  rg  r   )r3   rZ  r   r   r   outputsr  r6   r6   r7   generateV  s
   zPalmForTextGeneration.generaterw   )rc   rd   re   r'   ra   r   r  r   r   r^  rg   r6   r6   r4   r7   rS  .  s    rS  )Lr  r  rM   r   r  dataclassesr   typingr   r   r   r   r   r  r  r   rJ   torch.nn.functionalr	   
functionalrH  r   torch.nn.initr
   transformersr   r   r   r   r   r   transformers.activationsr   transformers.modeling_utilsr   modelscope.metainfor   modelscope.modelsr   modelscope.models.baser   modelscope.models.builderr   modelscope.outputsr   r   modelscope.utilsr   re  modelscope.utils.constantr   configurationr   dureader_evalr   r   r   r   Moduler   ri   ry   r   r   r   r   r   r  r<  rL  objectr[  register_moduletext_generationr   rS  r6   r6   r6   r7   <module>   s\     6g&m3\!    @