o
    ॵi                     @   s  d dl mZ d dlmZmZ d dlZd dlZd dl	Z
d dlmZmZmZ d dlmZ d dlmZ d dlmZ d dlmZ d dlmZ d d	lmZ d d
lmZ d dlmZmZ d dl m!Z! e
j"dkrie
j#j$Z
e
%  e! Z&dgZ'ej(ej)ej*dG dd deZ+dS )    N)AnyDict)MosesDetokenizerMosesPunctNormalizerMosesTokenizer)	apply_bpe)	Pipelines)Model)
OutputKeys)Pipeline)	PIPELINES)Config)	ModelFileTasks)
get_loggerz2.0TranslationPipeline)module_namec                       s   e Zd Zdef fddZdedeeef fddZdeeef deeef fdd	Z	d
eeef deeef fddZ
  ZS )r   modelc                    s8  t  jdd|i| t| jtsJ dtj | jj}t	  t
t
|tjd| _tt
|tj| _t
|| jd d d | _tdd tt| jd	d
D | _t
|| jd d d | _tdd tt| jd	d
D | _tjdd}d|j_tj|d| _tjtjddgdd| _i | _ | jd d | _!| jd d | _"t
|| jd d d | _#| j!dkrt$| _%nt&| j!d| _'t(| j!d| _%t)| j"d| _*t+,t| j#d	d
| _-| | j}| j .| | j/ #}t01d| j  tj23t4 | _5| j56|| j W d   dS 1 sw   Y  dS )zBuild a translation pipeline with a model dir or a model id in the model hub.

        Args:
            model: A Model instance.
        r   z,please check whether model config exists in zckpt-0dataset	src_vocabfilec                 S   s   g | ]
\}}|  |fqS  strip.0iwr   r   a/home/ubuntu/.local/lib/python3.10/site-packages/modelscope/pipelines/nlp/translation_pipeline.py
<listcomp>6       z0TranslationPipeline.__init__.<locals>.<listcomp>zutf-8)encoding	trg_vocabc                 S   s   g | ]
\}}||  fqS r   r   r   r   r   r   r   :   r    T)allow_soft_placement)configN
input_wids)dtypeshapenamepreprocessorsrc_langtgt_langsrc_bpezh)langzloading model from r   )7super__init__
isinstancer   r	   r   CONFIGURATION	model_dirtfreset_default_graphospjoinTF_CHECKPOINT_FOLDER
model_pathr   	from_filecfg_src_vocab_pathdict	enumerateopen
_src_vocab_trg_vocab_path_trg_rvocabConfigProtogpu_optionsallow_growthSession_sessionplaceholderint64r%   output	_src_lang	_tgt_lang_src_bpe_pathjieba_tokr   _punct_normalizerr   r   _detokr   BPE_bpeupdate
as_defaultloggerinfotrainSaverglobal_variablesmodel_loaderrestore)selfr   kwargs	tf_configrJ   sess	__class__r   r   r0   "   s^   



$zTranslationPipeline.__init__inputreturnc                    s   | d}jdkrfdd|D }dd |D }n+fdd|D }djdv r0jd	ks:jd	kr<jdv r<d
fdd|D }fdd|D }tdd |D  t fdd|D }d|i}|S )N<SENT_SPLIT>r-   c                       g | ]} j |qS r   )rO   cutr   itemr]   r   r   r   b       z2TranslationPipeline.preprocess.<locals>.<listcomp>c                 S   s   g | ]	}d  t|qS ) )r7   listrh   r   r   r   r   c   s    c                    rf   r   )rP   	normalizerh   rj   r   r   r   e   rk   T)esfrenFc                    s   g | ]}j j|d  dqS )T)
return_straggressive_dash_splits)rO   tokenizerh   )rs   r]   r   r   r   j   s    c                    s    g | ]} j |  qS r   )rS   process_liner   splitrh   rj   r   r   r   r   s    c                 S   s   g | ]}t |qS r   lenrh   r   r   r   r   u   s    c                    s0   g | ]}fd d|D dg t |   qS )c                    s2   g | ]}| j v r j | n jd  d d qS )r   src_vocab_size   )r@   r;   )r   r   rj   r   r   r   v   s
    z=TranslationPipeline.preprocess.<locals>.<listcomp>.<listcomp>r   rw   rh   )
MAX_LENGTHr]   r   r   r   v   s    
	input_ids)rv   rK   rL   maxnparray)r]   rc   	input_tok	input_bper|   resultr   )r{   rs   r]   r   
preprocess^   s*   


zTranslationPipeline.preprocessc                 C   sR   | j   | j|d i}| j j| j|d}|W  d    S 1 s"w   Y  d S )Nr|   )	feed_dict)rG   rU   r%   runrJ   )r]   rc   r   sess_outputsr   r   r   forward}   s
   $zTranslationPipeline.forwardinputsc                    s   |d j \}}}g }t|D ]9}|d | }t|d dg }|d |d }d fdd|D dddd}	| j|		  qd	|}t
j|i}
|
S )
Noutput_seqsr   rl   c                    s$   g | ]}| j v r j | nd qS )z<unk>)rB   )r   widrj   r   r   r      s    z3TranslationPipeline.postprocess.<locals>.<listcomp>z@@  z@@re   )r'   rangerm   indexr7   replaceappendrQ   
detokenizerv   r
   TRANSLATION)r]   r   xyztranslation_outr   r   widstranslationr   r   rj   r   postprocess   s   

zTranslationPipeline.postprocess)__name__
__module____qualname__r	   r0   strr   r   r   r   r   __classcell__r   r   ra   r   r      s
    <"*),os.pathpathr6   typingr   r   rN   numpyr~   
tensorflowr4   
sacremosesr   r   r   subword_nmtr   modelscope.metainfor   modelscope.models.baser	   modelscope.outputsr
   modelscope.pipelines.baser   modelscope.pipelines.builderr   modelscope.utils.configr   modelscope.utils.constantr   r   modelscope.utils.loggerr   __version__compatv1disable_eager_executionrV   __all__register_moduler   csanmt_translationr   r   r   r   r   <module>   s0   
