o
    ॵij                     @   sZ  d dl Z d dlmZmZmZmZmZ d dlZd dlm	Z	 d dl
mZ d dlmZ d dlmZ d dlmZmZmZ d dlmZmZ d d	lmZ d d
lmZ d dlmZ d dlmZmZ d dl m!Z!m"Z" d dl#m$Z$ d dl%m&Z& d dl'm(Z( e$ Z)g dZ*ej+ej,ej,dG dd dee&Z-ej+ej.ej/dej+ej.ej0dej+ej.ej1dej+ej.ej.dG dd de-Z2ej+ej3ddG dd deZ4ej+ej3ddG dd deZ5ej+ej3ddG dd  d eZ6ej+ej,d!dG d"d# d#eZ7ej+ej,d$dG d%d& d&eZ8ej+ej,ej9dG d'd( d(e-Z:ej+ej3ej;dG d)d* d*eZ<dS )+    N)AnyDictListOptionalUnion)GenerationConfig)snapshot_download)	Pipelines)Model)ModelOutputBase
OutputKeysTokenGeneratorOutput)PipelineTensor)	PIPELINES)Preprocessor)"remove_space_between_chinese_chars)	ModelFileTasks)Configread_config)
get_logger)PipelineStreamingOutputMixin)is_on_same_device)TextGenerationPipelineTextGenerationT5PipelineChatGLM6bTextGenerationPipeline!ChatGLM6bV2TextGenerationPipelineQWenChatPipelineQWenTextGenerationPipelineSeqGPTPipelineLlama2TaskPipeline)module_namec                	       s   e Zd Z					ddeeef dee dedef fd	d
Zdd Z	dee
eef ef de
eef fddZdefddZdefddZdefddZdee
eef ef de
eef fddZ  ZS )r   NgpuTsentencemodelpreprocessorconfig_filedevicec           	         s   t  jd||||||dd|di d| t| jts'J dtj |du r9tj	| jj
fd|i|| _| j  |dd| _| jdu r\t| jd	r\t| jj
}|d| _| jdu rdd
| _d| _dS )uB  Use `model` and `preprocessor` to create a generation pipeline for prediction.

        Args:
            model (str or Model): Supply either a local model dir which supported the text generation task,
            or a model id from the model hub, or a torch model instance.
            preprocessor (Preprocessor): An optional preprocessor instance, please make sure the preprocessor fits for
            the model if supplied.
            kwargs (dict, `optional`):
                Extra kwargs passed into the preprocessor's constructor.

        Examples:
            >>> from modelscope.pipelines import pipeline
            >>> pipeline_ins = pipeline(task='text-generation',
            >>>    model='damo/nlp_palm2.0_text-generation_chinese-base')
            >>> sentence1 = '本文总结了十个可穿戴产品的设计原则，而这些原则，同样也是笔者认为是这个行业最吸引人的地方：'
            >>>     '1.为人们解决重复性问题；2.从人开始，而不是从机器开始；3.要引起注意，但不要刻意；4.提升用户能力，而不是取代'
            >>> print(pipeline_ins(sentence1))
            >>> # Or use the dict input:
            >>> print(pipeline_ins({'sentence': sentence1}))

            To view other examples plese check tests/pipelines/test_text_generation.py.
        compileFcompile_options)r%   r&   r'   r(   auto_collater)   r*   z,please check whether model config exists in Nfirst_sequencepostprocessor	model_dirdecode )super__init__pop
isinstancer%   r
   r   CONFIGURATIONr   from_pretrainedr.   r&   evalr-   hasattrr   get
has_logged)	selfr%   r&   r'   r(   r+   r,   kwargscfg	__class__r0   e/home/ubuntu/.local/lib/python3.10/site-packages/modelscope/pipelines/nlp/text_generation_pipeline.pyr2   '   s<   






zTextGenerationPipeline.__init__c                 K   
   i |i fS Nr0   r;   pipeline_parametersr0   r0   r@   _sanitize_parameters^      
z+TextGenerationPipeline._sanitize_parametersinputsreturnc                 K   s   t  G z| jj|fi |W W  d    S  tyJ } z'| js.td| d d| _| jjdi ||W  Y d }~W  d    S d }~ww 1 sNw   Y  d S )Nz.When inputs are passed directly, the error is z,, which can be ignored if it runs correctly.Tr0   )torchno_gradr%   generateAttributeErrorr:   loggerwarning)r;   rG   forward_paramser0   r0   r@   forwarda   s"   
 zTextGenerationPipeline.forwardc                 C   s   | j j| ddS )NTskip_special_tokensr&   r/   tolistr;   rG   r0   r0   r@   r/   o   s   zTextGenerationPipeline.decodec                 C   s   | j | S rB   rT   rV   r0   r0   r@   sentence_pieces   s   z%TextGenerationPipeline.sentence_piecec                 C   s,   | j | }|ddddddS )Nz<q>z. z<mask>z</s> )r&   r/   rU   replace)r;   rG   decodedr0   r0   r@   robertav   s   zTextGenerationPipeline.robertac                 K   sX   t |ttfr|d }t |tst|jdkr|d }t| | j|}t|}t	j
|iS )zprocess the prediction results

        Args:
            inputs (Dict[str, Any]): _description_

        Returns:
            Dict[str, str]: the prediction results
        	sequences   r   )r4   dictr   listlenshapegetattrr-   r   r   TEXT)r;   rG   postprocess_paramsrZ   textr0   r0   r@   postprocess{   s   
z"TextGenerationPipeline.postprocess)NNr#   Tr$   )__name__
__module____qualname__r   r
   strr   r   r2   rE   r   r   r   rQ   r/   rW   r[   r   rf   __classcell__r0   r0   r>   r@   r   #   s6    
7


r   c                       s|   e Zd Z		ddeeef dee f fddZdd Z	de
eef f fd	d
Zde
eef de
eef fddZ  ZS )r   Nr%   r&   c                    sh   t  j||fi | || _| t| jdd d| _| t| jdd d| _| t| jdd d| _d S )Nr.   task_specific_params
min_length
max_length)	r1   r2   sub_task_parse_specific_model_paramsrb   r%   rl   rm   rn   )r;   r%   r&   ro   r<   r>   r0   r@   r2      s   
z!TextGenerationT5Pipeline.__init__c                 C   sL   |d u rd S t |}|d| }|d u r$t tj|d}||}|S )Nzmodel.zconfig.json)r   safe_getospathjoin)r;   r.   keyr=   paramsr0   r0   r@   rp      s   
z5TextGenerationT5Pipeline._parse_specific_model_paramsrH   c                    s   t |tstdt| | jd ur9| jp| jjj}|| jv r9| jj	| j|  d| j| v r9| j| j
| }t j|fi |S )NzNot supported input type: prefix)r4   rj   
ValueErrortyperl   ro   r%   pipelineconfigupdaterw   r1   
preprocess)r;   rG   preprocess_paramsro   r>   r0   r@   r}      s   


z#TextGenerationT5Pipeline.preprocessrG   c                 K   s~   | d| j}| d| j}|d ur||d< |d ur||d< t  | jjdi ||W  d    S 1 s8w   Y  d S )Nrm   rn   r0   )r9   rm   rn   rI   rJ   r%   rK   )r;   rG   rO   rm   rn   r0   r0   r@   rQ      s   
$z TextGenerationT5Pipeline.forward)NN)rg   rh   ri   r   r
   rj   r   r   r2   rp   r   r   r}   rQ   rk   r0   r0   r>   r@   r      s    

r   zchatglm6b-text-generation)	group_keyr"   c                       ~   e Zd Z		ddeeef f fddZdd Zdeee	f fd	d
Z
dedeee	f fddZdeee	f fddZ  ZS )r   NFr%   c                    s   ddl m} t|tr'tj|st|n|}||	 }t
j r'| }|d ur0||}|r6| }|| _| j  t jdd|i| d S )Nr   )ChatGLMForConditionalGenerationr%   r0   )-modelscope.models.nlp.chatglm.text_generationr   r4   rj   rr   rs   existsr   r6   halfrI   cudais_availablequantizebfloat16r%   r7   r1   r2   )r;   r%   quantization_bituse_bf16r<   r   r.   r>   r0   r@   r2      s,   




z(ChatGLM6bTextGenerationPipeline.__init__c                 K   rA   rB   r0   rC   r0   r0   r@   rE      rF   z4ChatGLM6bTextGenerationPipeline._sanitize_parametersrH   c                 K      |S rB   r0   r;   rG   r~   r0   r0   r@   r}         z*ChatGLM6bTextGenerationPipeline.preprocessrG   c                 K   s   | | | j|S rB   )r|   r%   chatr;   rG   rO   r0   r0   r@   rQ      s   
z'ChatGLM6bTextGenerationPipeline.forwardc                 K   r   rB   r0   r;   inputr<   r0   r0   r@   rf      r   z+ChatGLM6bTextGenerationPipeline.postprocessNFrg   rh   ri   r   r
   rj   r2   rE   r   r   r}   rQ   rf   rk   r0   r0   r>   r@   r      s    
r   zchatglm2_6b-text-generationc                       r   )r   NFr%   c                    s$  ddl m} |dd}t|trR|dd }tj|s#t||dn|}d }	|	ds1|	dr5ddi}	|d	|	}
d }|rBt
j}|d
|}tj|d|
|d}n|	ds\|	drdt|rd|  |rj|  |d urs||}|| _| j  |j| jjdd| _t jdd|i| d S )Nr   AutoTokenizerr(   r#   revision)r   r   rX   
device_maptorch_dtypeT)trust_remote_coder   r   r   r%   r0   )
modelscoper   r9   r4   rj   rr   rs   r   r   
startswithrI   r   r
   r6   r   r   r   r%   r7   r.   	tokenizerr1   r2   )r;   r%   r   r   r<   r   r(   r   r.   default_device_mapr   default_torch_dtyper   r>   r0   r@   r2      sN   



z*ChatGLM6bV2TextGenerationPipeline.__init__c                 K   rA   rB   r0   rC   r0   r0   r@   rE   %  rF   z6ChatGLM6bV2TextGenerationPipeline._sanitize_parametersrH   c                 K   r   rB   r0   r   r0   r0   r@   r}   (  r   z,ChatGLM6bV2TextGenerationPipeline.preprocessrG   c                 K   s   | | | j|| jS rB   )r|   r%   r   r   r   r0   r0   r@   rQ   ,  s   
z)ChatGLM6bV2TextGenerationPipeline.forwardc                 K   r   rB   r0   r   r0   r0   r@   rf   1  r   z-ChatGLM6bV2TextGenerationPipeline.postprocessr   r   r0   r0   r>   r@   r      s    
)r   z	qwen-chatc                       s   e Zd Zdeeef f fddZdd Zdeee	f fddZ
d	eeef deee	f fd
dZdeee	f fddZ  ZS )r   r%   c                    s  ddl m}m} |dtj}|dd}|dd}|dd	}|rDttj d d
 d  d tj	 }	 fddt
|	D  nd  |dksO|tjkrRd}
nd}
t|trx|j||dd| _|j|||d|
d | _tj|dd| j_t jdd| ji| d| _d S )Nr   AutoModelForCausalLMr   r   r   autouse_max_memoryFmodel_revisionzv.1.0.5   @   GBc                       i | ]}| qS r0   r0   .0i
max_memoryr0   r@   
<dictcomp>B      z-QWenChatPipeline.__init__.<locals>.<dictcomp>bf16Tr   r   )r   r   r   fp16r   r%   r0   )r   r   r   r9   rI   r   intr   mem_get_infodevice_countranger4   rj   r6   r   r7   r%   r   generation_configr1   r2   _model_preparer;   r%   r<   r   r   r   r   r   r   n_gpusr   r>   r   r@   r2   8  s>    



zQWenChatPipeline.__init__c                 K   rA   rB   r0   rC   r0   r0   r@   rE   Z  rF   z%QWenChatPipeline._sanitize_parametersrH   c                 K   r   rB   r0   r   r0   r0   r@   r}   ]  r   zQWenChatPipeline.preprocessrG   c                 K   st   t |tr|dd }|dd }n|}|dd }|dd}|dd}| j| j||||}|d |d d	S )
Nre   historysystemzYou are a helpful assistant.append_historyTr   r]   )responser   )r4   r   r9   r%   r   r   )r;   rG   rO   re   r   r   r   resr0   r0   r@   rQ   a  s   
zQWenChatPipeline.forwardc                 K   r   rB   r0   r   r0   r0   r@   rf   p  r   zQWenChatPipeline.postprocessr   r0   r0   r>   r@   r   5  s    "

r   zqwen-text-generationc                       x   e Zd Zdeeef f fddZdd Zdeee	f fddZ
d	edeee	f fd
dZdeee	f fddZ  ZS )r   r%   c                    s$  ddl m}m} |dtj}|dd}|dd}|dd	}|rDttj d d
 d  d tj	 }	 fddt
|	D  nd  |dksO|tjkrRd}
nd}
t|trw|j|||d|
d | _|j||dd| _t|| j_n
|| _|dd | _t jdd| ji| d| _d S )Nr   r   r   r   r   r   Fr   zv.1.0.4r   r   r   c                    r   r0   r0   r   r   r0   r@   r     r   z7QWenTextGenerationPipeline.__init__.<locals>.<dictcomp>r   T)r   r   r   r   r   r   r%   r0   )r   r   r   r9   rI   r   r   r   r   r   r   r4   rj   r6   r7   r%   r   r   r   r1   r2   r   r   r>   r   r@   r2   x  sB    



z#QWenTextGenerationPipeline.__init__c                 K   rA   rB   r0   rC   r0   r0   r@   rE     rF   z/QWenTextGenerationPipeline._sanitize_parametersrH   c                 K   r   rB   r0   r   r0   r0   r@   r}     r   z%QWenTextGenerationPipeline.preprocessrG   c                 K   s@   | j |ddd}tj| j j| jjdi | d ddiS )Npt)return_tensorszcuda:0r   TrR   r0   )r   tor   rc   r/   r%   rK   cpur   r0   r0   r@   rQ     s   z"QWenTextGenerationPipeline.forwardc                 K   r   rB   r0   r   r0   r0   r@   rf     r   z&QWenTextGenerationPipeline.postprocessr   r0   r0   r>   r@   r   t  s    %
r   seqgptc                       r   )r    r%   c                    sn   ddl m} t|trtj|st|n|}t	|}|| _
| j
  |	|| _t jdd|i| d S )Nr   r   r%   r0   )modelscope.utils.hf_utilr   r4   rj   rr   rs   r   r   r
   r6   r%   r7   r   r1   r2   )r;   r%   r<   r   r.   r>   r0   r@   r2     s   



zSeqGPTPipeline.__init__c                 K   rA   rB   r0   rC   r0   r0   r@   rE     rF   z#SeqGPTPipeline._sanitize_parametersrH   c                 K   r   rB   r0   r   r0   r0   r@   r}     r   zSeqGPTPipeline.preprocesspromptc                 K   sv   | j ||dd ddddd}|j| jj}| jj|ddd	d
}| j j|dd}|d }|t|d  }t	j
|iS )N	gen_tokenrX   r   Ti   )r   padding
truncationrn      F   )	num_beams	do_samplemax_new_tokensrR   r   )r   r9   	input_idsr   r%   r(   rK   batch_decoder`   r   rc   )r;   r   rO   r   outputsdecoded_sentencesdecoded_sentencer0   r0   r@   rQ     s"   
zSeqGPTPipeline.forwardc                 K   r   rB   r0   r   r0   r0   r@   rf     r   zSeqGPTPipeline.postprocessr   r0   r0   r>   r@   r      s    r    c                       s   e Zd Z				d$deeef dededef fdd	Zd
eee	f fddZ
dd Z								d%dededededededededed
eee	f fd d!Zd
eee	f fd"d#Z  ZS )&r!   Nr#   Tr%   r&   r'   r(   c                    sH   t j|dtjd| _ddlm} ||| _t j	dd| ji| dS )u  Use `model` and `preprocessor` to create a generation pipeline for prediction.

        Args:
            model (str or Model): Supply either a local model dir which supported the text generation task,
            or a model id from the model hub, or a torch model instance.
            preprocessor (Preprocessor): An optional preprocessor instance, please make sure the preprocessor fits for
            the model if supplied.
            kwargs (dict, `optional`):
                Extra kwargs passed into the preprocessor's constructor.
        Examples:
            >>> from modelscope.utils.constant import Tasks
            >>> import torch
            >>> from modelscope.pipelines import pipeline
            >>> from modelscope import snapshot_download, Model
            >>> model_dir = snapshot_download("modelscope/Llama-2-13b-chat-ms",
            >>>     ignore_file_pattern = [r'\w+\.safetensors'])
            >>> pipe = pipeline(task=Tasks.text_generation, model=model_dir, device_map='auto',
            >>>     torch_dtype=torch.float16)
            >>> inputs="咖啡的作用是什么？"
            >>> result = pipe(inputs,max_length=200, do_sample=True, top_p=0.85,
            >>>     temperature=1.0, repetition_penalty=1., eos_token_id=2, bos_token_id=1, pad_token_id=0)
            >>> print(result['text'])

            To view other examples plese check tests/pipelines/test_llama2_text_generation_pipeline.py.
        r   r   r   r   Llama2Tokenizerr%   Nr0   )
r
   r6   rI   float16r%   modelscope.models.nlp.llama2r   r   r1   r2   )r;   r%   r&   r'   r(   r+   r<   r   r>   r0   r@   r2     s    zLlama2TaskPipeline.__init__rH   c                 K   r   rB   r0   r   r0   r0   r@   r}     r   zLlama2TaskPipeline.preprocessc                 K   rA   rB   r0   rC   r0   r0   r@   rE     rF   z'Llama2TaskPipeline._sanitize_parameters   F?333333?      ?r   r]   r   rG   rn   r   top_ptemperaturerepetition_penaltyeos_token_idbos_token_idpad_token_idc
                 K   sd   i }| j |ddd}| jj|jdf||||||||	d|
}| j j|dddd }||d	< |S )
NFr   )add_special_tokensr   r   )rn   r   r   r   r   r   r   r   T)rS   clean_up_tokenization_spacesr   re   )r   r%   rK   r   r   r   )r;   rG   rn   r   r   r   r   r   r   r   rO   outputgenerate_idsoutr0   r0   r@   rQ     s6   

zLlama2TaskPipeline.forwardc                 K   r   rB   r0   r   r0   r0   r@   rf   3  r   zLlama2TaskPipeline.postprocessNNr#   T)r   Fr   r   r   r   r]   r   )rg   rh   ri   r   r
   rj   r   r2   r   r   r}   rE   r   boolfloatrQ   rf   rk   r0   r0   r>   r@   r!     s\    
&	


!r!   c                       s   e Zd ZdZ				d)deeef dededed	ed
e	ee
f f fddZd
e	ee
f fddZdd Zdddddddddg f
dedededededed ed!ed"ed#ed$ed
e	ee
f fd%d&Zd
e	ee
f fd'd(Z  ZS )*Llama2chatTaskPipelinea  Use `model` and `preprocessor` to create a generation pipeline for prediction.

        Args:
            model (str or Model): Supply either a local model dir which supported the text generation task,
            or a model id from the model hub, or a torch model instance.
            preprocessor (Preprocessor): An optional preprocessor instance, please make sure the preprocessor fits for
            the model if supplied.
            kwargs (dict, `optional`):
                Extra kwargs passed into the preprocessor's constructor.
        Examples:
            >>> from modelscope.utils.constant import Tasks
            >>> import torch
            >>> from modelscope.pipelines import pipeline
            >>> from modelscope import Model
            >>> pipe = pipeline(task=Tasks.chat, model="modelscope/Llama-2-7b-chat-ms", device_map='auto',
            >>> torch_dtype=torch.float16, ignore_file_pattern = [r'.+\.bin$'], model_revision='v1.0.5')
            >>> inputs = 'Where is the capital of Zhejiang?'
            >>> result = pipe(inputs,max_length=512, do_sample=False, top_p=0.9,
            >>> temperature=0.6, repetition_penalty=1., eos_token_id=2, bos_token_id=1, pad_token_id=0)
            >>> print(result['response'])
            >>> inputs = 'What are the interesting places there?'
            >>> result = pipe(inputs,max_length=512, do_sample=False, top_p=0.9,
            >>> temperature=0.6, repetition_penalty=1., eos_token_id=2, bos_token_id=1,
            >>> pad_token_id=0, history=result['history'])
            >>> print(result['response'])
            >>> inputs = 'What are the company there?'
            >>> history_demo = [('Where is the capital of Zhejiang?',
            >>> 'Thank you for asking! The capital of Zhejiang Province is Hangzhou.')]
            >>> result = pipe(inputs,max_length=512, do_sample=False, top_p=0.9,
            >>> temperature=0.6, repetition_penalty=1., eos_token_id=2, bos_token_id=1,
            >>> pad_token_id=0, history=history_demo)
            >>> print(result['response'])

            To view other examples plese check tests/pipelines/test_llama2_text_generation_pipeline.py.
        Nr#   Tr%   r&   r'   r(   r+   rH   c           
         s^   | dd }| dd }tj|||d| _ddlm}	 |	|| _t jdd| ji| d S )Nr   r   r   r   r   r%   r0   )	r9   r
   r6   r%   r   r   r   r1   r2   )
r;   r%   r&   r'   r(   r+   r<   r   r   r   r>   r0   r@   r2   ^  s   zLlama2chatTaskPipeline.__init__c                 K   r   rB   r0   r   r0   r0   r@   r}   m  r   z!Llama2chatTaskPipeline.preprocessc                 K   rA   rB   r0   rC   r0   r0   r@   rE   p  rF   z+Llama2chatTaskPipeline._sanitize_parametersr   Fr   r   r   r   r]   r   zyou are a helpful assistant!rG   rn   r   r   r   r   r   r   r   r   r   c                 K   sp   |}||d< ||d< ||d< ||d< ||d< ||d< ||d< ||d< |	|d	< |
|d
< ||d< | j || j}|S )Nre   rn   r   r   r   r   r   r   r   r   r   )r%   r   r   )r;   rG   rn   r   r   r   r   r   r   r   r   r   rO   inputs_dictr   r0   r0   r@   rQ   s  s   zLlama2chatTaskPipeline.forwardc                 K   r   rB   r0   r   r0   r0   r@   rf     r   z"Llama2chatTaskPipeline.postprocessr   )rg   rh   ri   __doc__r   r
   rj   r   r   r   r   r2   r}   rE   r   r   r   rQ   rf   rk   r0   r0   r>   r@   r   7  sr    &

	


r   )=rr   typingr   r   r   r   r   rI   transformersr   r   r   modelscope.metainfor	   modelscope.models.baser
   modelscope.outputsr   r   r   modelscope.pipelines.baser   r   modelscope.pipelines.builderr   modelscope.preprocessorsr   modelscope.utils.chinese_utilsr   modelscope.utils.constantr   r   modelscope.utils.hubr   r   modelscope.utils.loggerr   !modelscope.utils.streaming_outputr   modelscope.utils.torch_utilsr   rM   __all__register_moduletext_generationr   text2text_generationtranslation_en_to_detranslation_en_to_rotranslation_en_to_frr   r   r   r   r   r   r    llama2_text_generation_pipeliner!   $llama2_text_generation_chat_pipeliner   r0   r0   r0   r@   <module>   s~   j6);><-S