o
    ॵi                     @   s$  d dl Z d dlmZ d dlmZmZ d dlZd dlZd dl	Z
d dlmZmZmZmZmZ d dlmZ d dlmZ d dlmZ d dlmZ d d	lmZ d d
lmZ d dlmZ d dlm Z m!Z! d dl"m#Z#m$Z$ d dl%m&Z& e
j'dkrye
j(j)Z
e
*  e& Z+dgZ,ej-e$j.ej/dG dd deZ0dS )    N)unescape)AnyDict)MosesDetokenizerMosesDetruecaserMosesPunctNormalizerMosesTokenizerMosesTruecaser)SentencePieceProcessor)beam_search_ops)	Pipelines)Model)
OutputKeys)Pipeline)	PIPELINES)ConfigConfigFields)	ModelFileTasks)
get_loggerz2.0AutomaticPostEditingPipeline)module_namec                       s   e Zd Zdef fddZdedeeef fddZdeeef deeef fdd	Zd
eeef deeef fddZ	  Z
S )r   modelc                    s  t  jd*d|i| |}ttj|tj| _	tj|| j	t
j d }tdd tt|dddD | _tdd tt|dddD | _| j	t
j d	d
| _| j	t
ji dd}|rcdn| j	t
ji dd| _| jd
kr}t| jd | _t  tjdd}d|j_tj|d| _tjj| jtj jj!j"g| t# }|$d| _%|$d| _&|$d| _'|$d| _(|$d}|$d}	tj)t*dd |tj+d}
t*dd |	}|
|d| _,t- }t. }| j/||g tjj| jtj jj!j"g| | j	t
j d | _0| j	t
j d | _1| j	t
j d d!t2| j0d"t2| j1d"tj|| j	t
j d# }t3|d$tj|| j	t
j d% }t4 | fd&d| _5fd'd| _6t7 t8| j1d"  fd(d| _9d)S )+zwBuild an automatic post editing pipeline with a model dir.

        @param model: Model path for saved pb file
        r   vocabc                 S   s   g | ]
\}}|  |fqS  strip.0iwr   r   l/home/ubuntu/.local/lib/python3.10/site-packages/modelscope/pipelines/nlp/automatic_post_editing_pipeline.py
<listcomp>0       z9AutomaticPostEditingPipeline.__init__.<locals>.<listcomp>rutf8)encodingc                 S   s   g | ]
\}}||  fqS r   r   r   r   r   r!   r"   2   r#   unk_id	strip_unkT 	unk_tokenz<unk>   )allow_soft_placement)configzPlaceholder:0zPlaceholder_1:0zPlaceholder_2:0zPlaceholder_3:0zenc2enc/decoder/transpose:0zenc2enc/decoder/Minimum:0c                 S      | d S Nr   r   xr   r   r!   <lambda>P       z7AutomaticPostEditingPipeline.__init__.<locals>.<lambda>)dtypec                 S   r/   r0   r   r1   r   r   r!   r3   Q   r4   )
output_idsoutput_lenssrc_langtgt_langtokenize_escapeF)lang	truecaser)	load_fromsentencepiecec              
      s&   d  jj| ddddS N T)
return_strescaperA   joinencode_as_piecestruecasetokenizer1   )spsrc_tokenizer
tok_escaper<   r   r!   r3   i       c              
      s&   d j j| ddddS r?   rD   r1   )mt_tokenizerrI   rK   r<   r   r!   r3   o   rL   c                    s<     j| dddddd dd  dd S )Nu    ▁z@@r@   r*   r,   TrC   )
detokenize
detruecasereplacer   splitr1   )detokenizerdetruecaserr   r!   r3   y   s    
Nr   ):super__init__r   	from_fileospathrE   r   CONFIGURATIONcfgr   preprocessordict	enumerateopenr   vocab_reversegetr'   postprocessorr+   lentfreset_default_graphConfigProtogpu_optionsallow_growthSession_sessionsaved_modelloaderloadpythontag_constantsSERVINGget_default_graphget_tensor_by_nameinput_src_id_placeholderinput_src_len_placeholderinput_mt_id_placeholderinput_mt_len_placeholdercastmap_fnint64outputglobal_variables_initializerlocal_variables_initializerrun	_src_lang	_tgt_langr   r	   r
   src_preprocessmt_preprocessr   r   postprocess_fun)selfr   kwargs
export_dirjoint_vocab_filer)   	tf_configdefault_graphoutput_id_beamoutput_len_beam	output_id
output_leninit
local_inittruecase_modelsp_model	__class__)rR   rS   rM   rI   rJ   rK   r<   r!   rU   %   s   





z%AutomaticPostEditingPipeline.__init__inputreturnc                    s   | dd\}} | |}}t fdd|   D g}t fdd|   D g}dd |D }dd |D }	 j| j| j| j	|	i}
|
S )Nr,   c                       g | ]
} j | jqS r   r   r`   r'   r   r    r   r   r!   r"      r#   z;AutomaticPostEditingPipeline.preprocess.<locals>.<listcomp>c                    r   r   r   r   r   r   r!   r"      r#   c                 S      g | ]}t |qS r   rb   r   r2   r   r   r!   r"          c                 S   r   r   r   r   r   r   r!   r"      r   )
rQ   r   r   nparrayr   rr   rt   rs   ru   )r   r   srcmtsrc_spmt_spinput_src_idsinput_mt_idsinput_src_lensinput_mt_lens	feed_dictr   r   r!   
preprocess   s    z'AutomaticPostEditingPipeline.preprocessc                 C   sD   | j   | j j| j|d}|W  d    S 1 sw   Y  d S )N)r   )ri   
as_defaultr|   ry   )r   r   sess_outputsr   r   r!   forward   s   $z$AutomaticPostEditingPipeline.forwardinputsc                    sZ   |d d |d d }}|d |d  }d  fdd|D } |}tj|i}|S )Nr6   r   r7   r,   r@   c                    r   r   )r_   r`   r+   )r   widr   r   r!   r"      s    z<AutomaticPostEditingPipeline.postprocess.<locals>.<listcomp>)rE   r   r   TRANSLATION)r   r   r6   r   output_tokenspost_editing_outputresultr   r   r!   postprocess   s   

z(AutomaticPostEditingPipeline.postprocess)__name__
__module____qualname__strrU   r   r   r   r   r   __classcell__r   r   r   r!   r   !   s
    Z"*)1rW   htmlr   typingr   r   jiebanumpyr   
tensorflowrc   
sacremosesr   r   r   r   r	   r>   r
   %tensorflow.contrib.seq2seq.python.opsr   modelscope.metainfor   modelscope.models.baser   modelscope.outputsr   modelscope.pipelines.baser   modelscope.pipelines.builderr   modelscope.utils.configr   r   modelscope.utils.constantr   r   modelscope.utils.loggerr   __version__compatv1disable_eager_executionlogger__all__register_moduletranslationautomatic_post_editingr   r   r   r   r!   <module>   s4   
