o
    ॵi'                     @   s   d dl Z d dlmZmZmZmZ d dlZd dlmZ d dl	m
Z
 d dlmZ d dlmZmZ d dlmZ d dlmZ d d	lmZ d d
lmZ d dlmZmZ dZdZdZdZdZdgZ ej!ej"ej"dG dd deZ#dS )    N)AnyDictOptionalUnion)	Pipelines)Model)T5Chat)
OutputKeysTokenGeneratorOutput)Pipeline)	PIPELINES)Preprocessor)"remove_space_between_chinese_chars)	ModelFileTasksu~   假设我和你正在进行对话，请你给我得体、准确、友好的回复。以下是我们的对话内容。{context}u   假设我和你正在进行对话，请你给我得体、准确、友好的回复。以下是我们的对话内容。{context}#以下是在此之前我们的对话内容，可作为回复时的参考。{history}u   假设我和你正在进行对话，请你给我得体、准确、友好的回复。以下是我们的对话内容。{context}#以下是和对话相关的知识，请你参考该知识进行回复。{knowledge}u  假设我和你正在进行对话，请你给我得体、准确、友好的回复。以下是我们的对话内容。{context}#假设以下是你对我所了解的信息，请你参考该信息并避免你的回复和该信息矛盾，信息如下：{user_profile}u   假设我和你正在进行对话，请你给我得体、准确、友好的回复。以下是我们的对话内容。{context}#假设以下是你的人物设定，请你参考该信息并避免你的回复和该信息矛盾，信息如下：{bot_profile}FidDialoguePipeline)module_namec                	       s   e Zd Z				ddeeef dee dedef fdd	Zd
d Z	de
eef fddZde
eef de
eef fddZdd Zdd Zdede
eef fddZ  ZS )r   NgpuTmodelpreprocessorconfig_filedevicec           	         s   t  jd
|||||d| t| jt| _|du r&tj| jjfi || _	| jsFg }t
ddD ]}|d| d q0| j	jjd|i t| jtsTJ dtj | j| j| _| j  d	| _dS )u  Use `model` and `preprocessor` to create a fid-dialogue pipeline for prediction.

        Args:
            model (str or Model): Supply either a local model dir which supported the text generation task,
            or a model id from the model hub, or a torch model instance.
            preprocessor (Preprocessor): An optional preprocessor instance, please make sure the preprocessor fits for
            the model if supplied.
            kwargs (dict, `optional`):
                Extra kwargs passed into the preprocessor's constructor.
            Examples:
                >>> from modelscope.pipelines import pipeline
                >>> from modelscope.utils.constant import Tasks
                >>> pipeline_ins = pipeline(Tasks.fid_dialogue, model='damo/plug-dialogue', model_revision='v1.0.1')
                >>> input = {
                >>>    "history": "你好[SEP]你好，我是小达，很高兴认识你！[SEP]李白是谁",
                >>>    "bot_profile": "我是小达;我是女生;我是单身;我今年21岁;我生日是2001年11月11日",
                >>>    "knowledge": "唐代诗人李白（701年—762年12月）,字太白,号青莲居士,又号“谪仙人”[SEP]李白（公元701年—公元762年），字太白",
                >>>    "user_profile": "你是小明"
                >>> }
                >>> result = pipeline_ins(input)
                >>> print(result)
        )r   r   r   r   auto_collateN   d   z[unused]additional_special_tokensz,please check whether model config exists in [SEP] )super__init__
isinstancer   r   is_t5r   from_pretrained	model_dirpreprocessor_tokenizerrangeappendnlp_tokenizer	tokenizeradd_special_tokensr   r   CONFIGURATIONtor   evalSEP)	selfr   r   r   r   r   kwargsunused_listi	__class__r   b/home/ubuntu/.local/lib/python3.10/site-packages/modelscope/pipelines/nlp/fid_dialogue_pipeline.pyr    #   s:   



zFidDialoguePipeline.__init__c                 K   s.   | di }| di }| di }|||fS )Npreprocess_paramsforward_paramspostprocess_params)get)r/   pipeline_parametersr6   r7   r8   r   r   r5   _sanitize_parameters[   s   
z(FidDialoguePipeline._sanitize_parametersinputsc                 K   s@   t   | jj|fi |W  d    S 1 sw   Y  d S )N)torchno_gradr   generate)r/   r<   r7   r   r   r5   forwarda   s   
$zFidDialoguePipeline.forwardreturnc                 K   s4  d}d}d|v r| d}d|v r| d}d|v r|d nd}t|dkr*tdd	|v r2|d	 nd}d
|v r<|d
 nd}d|v rF|d nd}|| j}|| d  }	| |	}	|d |  }| |}|| j}g }
|rt|dkr|
tj	|	|d |rt|dkr|D ]}|
t
j	|	|d q|rt|dkr|
tj	|	|d |rt|dkr|
tj	|	|d |
s|
tj	|	d tt|
D ]*}| jr|
| dddddd|
|< q|
| dddddd|
|< q| jd|
idd|ddd d| j}d|tj| ji}|S )Ni,     max_encoder_lengthcontext_turnhistory r   zhistory is necessary!	knowledgeuser_profilebot_profile)contextrE   )rJ   rG   )rJ   rH   )rJ   rI   )rJ   
u   ▁<extra_id_22>	u   ▁<extra_id_33>  u   ▁<extra_id_23>
[unused22]
[unused33]
[unused23]src_txtTpt)padding
truncation
max_lengthreturn_tensors	input_ids)poplen	Exceptionsplitr.   process_contextprocess_historyr'   history_templateformatknowledge_templateuser_profile_templatebot_profile_templatecontext_templater&   r"   replacer%   	unsqueezer,   r   r=   int64)r/   r<   r6   rC   rD   rE   rG   rH   rI   rJ   model_inputknowr2   rW   
input_dictr   r   r5   
preprocesse   s   




zFidDialoguePipeline.preprocessc                 C   s|   d}t t|d ddD ],}t|| dkr&|| d dvr&|| d ||< |d ||  ||< |dkr6dnd}qd	|S )
N   我r   r   Y   !"#$%&'()*+,-./:;<=>?@[\]^_`{|}~、。，？！；：“”（）【】《》〈〉……   。   ：   你rF   r&   rY   join)r/   context_listsubjectr2   r   r   r5   r\         
z#FidDialoguePipeline.process_contextc                 C   s|   d}t t|d ddD ],}t|| dkr&|| d dvr&|| d ||< |d ||  ||< |dkr6dnd}qd	|S )
Nrp   r   rl   r   rm   rn   ro   rk   rF   rq   )r/   history_listrt   r2   r   r   r5   r]      ru   z#FidDialoguePipeline.process_historyc                 K   sx   |j    }| jj|d | jd}ddddddddddd
}| D ]
\}}|||}q%| js7t	|}t
j|iS )Nr   )skip_special_tokensrK   rL   rM   rF   )
z<extra_id_22>z<extra_id_33>z<extra_id_23>rN   rO   rP   r   z[CLS]z[PAD]z[UNK])	sequencesdetachcputolistr%   decoder"   itemsrd   r   r	   TEXT)r/   r<   r8   
hypothesesresponsetoken_mappingstr   r   r5   postprocess   s(   

zFidDialoguePipeline.postprocess)NNr   T)__name__
__module____qualname__r   r   strr   r   r    r;   r   r   r@   rj   r\   r]   r
   r   __classcell__r   r   r3   r5   r      s0    
8

F


)$retypingr   r   r   r   r=   modelscope.metainfor   modelscope.models.baser   ,modelscope.models.nlp.fid_T5.text_generationr   modelscope.outputsr	   r
   modelscope.pipelines.baser   modelscope.pipelines.builderr   modelscope.preprocessorsr   modelscope.utils.chinese_utilsr   modelscope.utils.constantr   r   rc   r^   r`   ra   rb   __all__register_modulefid_dialoguer   r   r   r   r5   <module>   s,   