o
    ߥiUS                     @   sv   d dl mZ d dlZd dlm  mZ dddZG dd deZ	dde
d	 d
fddZG dd deZdddZdS )    )print_functionNc                 C   s   d }t | |||||d}|S )N)global_scorerlogger)TextGenerator)args	tokenizersymbolsmodelr   scorer
translator r   a/home/ubuntu/.local/lib/python3.10/site-packages/modelscope/models/multi_modal/mplug/predictor.pybuild_predictor   s
   r   c                   @   s\   e Zd ZdZ					dddZdd Zdd
dZ			dddZ				dddZdd Z	dS )r   a{  
    Uses a model to translate a batch of sentences.


    Args:
       model (:obj:`onmt.modules.NMTModel`):
          NMT model to use for translation
       fields (dict of Fields): data fields
       beam_size (int): size of beam to use
       n_best (int): number of translations produced
       max_length (int): maximum length output to produce
       global_scores (:obj:`GlobalScorer`):
         object to rescore final translations
       copy_attn (bool): use copy attention during translation
       cuda (bool): use cuda
       beam_trace (bool): trace beam search for debugging
       logger(logging.Logger): logger.
    N c                 C   s   d| _ || _tj dk| _|| _|| _|| _|| _d| _	d| _
|| _|j| _|j| _|j| _|| _| jdk| _d | _| jrHg g g g d| _d S d S )Ng333333?r   e   f   r   )predicted_idsbeam_parent_idsscores	log_probs)alphar   torchcudadevice_countr   r	   vocabr   start_token	end_tokenr   	beam_size
min_length
max_length	dump_beam
beam_trace
beam_accum)selfr   r	   r   r   r   r   r    r   r   r   __init__3   s.   zTextGenerator.__init__c                    sf   g }|D ]}t |}|| |d  jkr|d d } nq fdd|D } j|d}|S )Nc                    s   g | ]}|t  jk r|qS r   )lenr   ).0tr#   r   r   
<listcomp>c   s    z6TextGenerator._build_target_tokens.<locals>.<listcomp> )intappendr   r   	DecodeIdssplit)r#   predtokenstokr   r)   r   _build_target_tokens[   s   
z"TextGenerator._build_target_tokensF   c                 C   s`   |r| j || j| j||dS t  | j || j| j||dW  d   S 1 s)w   Y  dS )aq  
        Translate a batch of sentences.

        Mostly a wrapper around :obj:`Beam`.

        Args:
           batch (:obj:`Batch`): a batch from a dataset object
           data (:obj:`Dataset`): the dataset object
           fast (bool): enables fast beam search (may not support all features)

        Todo:
           Shouldn't need the original dataset.
        r   	do_sampleout_sizeN)_fast_translate_batchr   r   r   no_gradr#   encoder_inputsr6   r7   r   r   r   translate_batchg   s"   
$zTextGenerator.translate_batchc                 C   s   | j || j| j||dS )Nr5   )r8   r   r   r:   r   r   r   translate_batch_scst   s   z"TextGenerator.translate_batch_scstr   c           .      C   s8  | j rJ |r
d}n| j}t|dkr|\}}}	nt|dkr%|\}}d }	|j}
|d}t||dd}t||dd}tj|tj|
d}tjd|| |tj|
d}|	d ur]t|	|dd}ntj	|| dg| j
tj|
d}tjdgtd	g|d   |
d
|}dd t|D }i }dd t|D |d< dd t|D |d< dg| |d< g |d< t|D ]}| j|||ddd}|jd d dd d f }|d}ttj|d|dd}||k rd|d d | jf< | j}|rd}n
d|d  d | }|rA|| jj }t|| jj| jjdd}tjtj|dddd}tj|dd}||dd7 }t|d|}|d|}|d|}n#||dd7 }|| }| d|| }|j!|dd\}}|| }tj"||dd}|#|}||d |d d }|d}t$|%d||ddgd}|&| j} |d |kr| 'd | d d df &d}!| ( r`|d||d}"t| dD ]f}#||# }$|!|# r| |# 'd | |# ) d}%|%D ]}&||$ *||#|&f |"|#|&dd f f q|!|# r,t+||$ dd  dd!}'|'d | D ]}(|(\})}*|d |$ *|) |d |$ *|* qq|!&d) d}+t|+dkrA n1|%d|+}|%d|+}|%d|+}|"%d|+d|d}|d}|%d|}|%d|}qg },g }-|d D ]}(|-*|(d |  qz|d D ]}(|,*|(d |  q|,|-fS )"Nr4         r   dim)dtypedevice)steprB   rC   g        z-inf)rC   c                 S      g | ]}g qS r   r   r'   _r   r   r   r*          z7TextGenerator._fast_translate_batch.<locals>.<listcomp>c                 S   rE   r   r   rF   r   r   r   r*      rH   predictionsc                 S   rE   r   r   rF   r   r   r   r*      rH   r   
gold_scorebatchTnone)encoder_hidden_statesencoder_attention_maskreturn_dict	reductionr%   g@x      ?g      @g      @)top_ktop_pmin_tokens_to_keepnum_samplesfloor)rounding_modec                 S   s   | d S )Nr   r   )xr   r   r   <lambda>$  s    z5TextGenerator._fast_translate_batch.<locals>.<lambda>)keyreverse),r    r   r&   rC   sizetiler   arangelongfullr   tensorfloatrepeatranger	   logitslogsoftmaxviewr   r   r   temperaturetop_k_top_p_filteringrR   rS   multinomialFlog_softmax	unsqueezegatherreshapetopkdivfmodcatindex_selecteqfill_anynonzeror-   sorted).r#   r;   r   r   r6   r7   r   src_featurespadding_mask	input_idsrC   
batch_sizeattention_maskbatch_offsetbeam_offset	alive_seqtopk_log_probs
hypothesesresultsrD   dec_feat_seq
vocab_sizer   r   length_penalty_scorestopk_idstopk_scorescurr_scorestopk_beam_indexbatch_indexselect_indicesis_finishedend_conditionrI   ibfinished_hypjbest_hypeachscorer0   non_finishedpred_idsr   r   r   r   r8      s*  












z#TextGenerator._fast_translate_batchc                 C   s  | j dks	J dg }||d}g }d}||k r| j||d}| di |}|dkr6d| j }d}n|dks<J | |sK|d | j }|}nd}d}|d jd |ksZJ |d dd|ddf }| |rp|d }|dkrt|D ]*}t|| 	 D ]}|||f dk r|||f  |9  < q|||f  |  < qqx|r|dkr|| }t
|||d}tjtj|d	d
ddd}ntj|d	d
}tj|d	d
}t|d	|d	}|| || || |	d|   }tj||d	gd	d
}|
D ]}||| }q|d }| dkrn||k s||kr3|ddd	f |jtjd|
d  tj|dd
}tj|dd
 }|| jdd
}||jdd
 }||jd  }|dkrq||||	}tj||gdd
}|d|dfS )z Generate sequences for each example without beam search (num_beams == 1).
            All returned sequence are generated independantly.
        r4   z-cannot generate >1 sentences in greedy searchN)pastr?   r   rQ   )rR   rS   r%   r@   rU   )rB   r   ) num_keep_bestnewrx   prepare_inputs_for_generationod_labels_len_do_output_pastshapere   settolistrk   r   rl   rm   rh   squeezeargmaxrn   rp   ro   r-   ru   mulner`   maxmasked_fill_toboolstackrc   sum)r#   r~   cur_lenr   r6   rj   rR   rS   repetition_penaltypad_token_ideos_token_idsr   unfinished_sentscur_unfinishedlogprobsr   model_inputsoutputs	token_lennext_token_idxnext_token_logitsr   previous_token
next_tokenr   tokens_to_addeos_token_idsum_logprobspad_lenpadding_idsr   r   r   _generate_no_beam_searchA  s   







K
z&TextGenerator._generate_no_beam_search)NNNNr   )Fr4   )r   Fr4   )
__name__
__module____qualname____doc__r$   r3   r<   r=   r8   r   r   r   r   r   r      s&    
(
 

 2r   
   rQ   Infr4   c           
      C   s   |dkrt t||| d}| t| |d d k }|| |< |dk rgtj| dd\}}tjtj|dddd}||k}	|dkrHd|	d	d |f< |	d	d df 	 |	d	dd f< d|	d
< |	
d||	}|| |< | S )Nr   r%   ).r%   NrQ   T)
descendingr@   r4   .).r   )minr   r]   r   rr   sortcumsumrm   rh   clonescatter)
rf   rR   rS   filter_valuerT   indices_to_removesorted_logitssorted_indicescumulative_probssorted_indices_to_remover   r   r   rk     s2   

rk   c                   @   s    e Zd ZdZdd Zdd ZdS )Translationa  
    Container for a translated sentence.

    Attributes:
        src (`LongTensor`): src word ids
        src_raw ([str]): raw src words

        pred_sents ([[str]]): words from the n-best translations
        pred_scores ([[float]]): log-probs of n-best translations
        attns ([`FloatTensor`]) : attention dist for each translation
        gold_sent ([str]): words from gold translation
        gold_score ([float]): log-prob of gold translation

    c	           	      C   s4   || _ || _|| _|| _|| _|| _|| _|| _d S N)fnamesrcsrc_raw
pred_sentsattnspred_scores	gold_sentrJ   )	r#   r   r   r   r   attnr   tgt_sentrJ   r   r   r   r$     s   
zTranslation.__init__c           	      C   s   d || j}| jd }| jd }d|}|d ||7 }|d |7 }| jdur@d| j}|d ||7 }|d | j7 }t| jd	kr_|d
7 }t| j| jD ]\}}|d ||7 }qR|S )z"
        Log translation.
        z
SENT {}: {}
r   r+   zPRED {}: {}
zPRED SCORE: {:.4f}
NzGOLD {}: {}
zGOLD SCORE: {:.4f}
r4   z
BEST HYP:
z[{:.4f}] {}
)	formatr   r   r   joinr   rJ   r&   zip)	r#   sent_numberoutput	best_pred
best_score	pred_sentr   r   sentr   r   r   rg     s   



zTranslation.logN)r   r   r   r   r$   rg   r   r   r   r   r     s    r   c                 C   s   t tt|  }|dkr"|| |d |d< ||< | | } t |  }|d  |9  < | d}| |ddd|ddd j| } |dkrW| | } | S )z/
    Tiles x on dimension dim count times.
    r   r%   r4   )	listre   r&   r]   permute
contiguousri   	transposerd   )rY   countrA   permr7   rK   r   r   r   r^     s"   

r^   r   )r   )
__future__r   r   torch.nn.functionalnn
functionalrm   r   objectr   rc   rk   r   r^   r   r   r   r   <module>   s   
   !
$4