o
    ॵiWu                     @   sd  d dl Z d dlmZ d dlmZ d dlmZmZmZm	Z	m
Z
mZmZmZ d dlZd dlZd dlZd dlmZmZ d dlmZmZmZmZ d dlmZ d dlmZ d d	lmZm Z  d d
l!m"Z" d dl#m$Z$ d dl%m&Z& d dl'm(Z(m)Z) d dl*m+Z+ d dl,m-Z-m.Z.m/Z/m0Z0 d dl1m2Z2m3Z3 d dl4m5Z5 d dl6m7Z7 d dl8m9Z9m:Z:m;Z; e5 Z<i a=G dd dZ>e&j?e0j@dde&j?e0jAddG dd dee9ZBe>Cddd ZDe>Ede>Eddd ZFe>Cd e>Cd!d"d# ZGe>Cd$e>Cd%d&d' ZHe>Cd(d)d* ZIe>Cd+d,d- ZJe>Cdd.d/ ZKe>Cd0d1d2 ZLe>Mde e>Md e  e>Md!e  dS )3    N)contextmanager)Lock)AnyCallableDict	GeneratorIteratorListTupleUnion)PreTrainedModelPreTrainedTokenizer)AutoModelForCausalLMAutoTokenizerPipelinesnapshot_download)model_file_download)Model)ChatGLM2TokenizerLlama2Tokenizer)
OutputKeys)Input)	PIPELINES)is_modelis_official_hub_path)Config)
FrameworksInvoke	ModelFileTasks)create_devicedevice_placement)
get_logger)ModelTypeHelper)PipelineStreamingOutputMixinStreamingOutputMixinadd_stream_generatec                   @   s   e Zd Zdg diZeddedefddZeddedefd	d
ZeddefddZ	eddefddZ
eddefddZededefddZededefddZdS )LLMAdapterRegistryqwenNNNr   N
model_typevalue_indexc                 C   s\   |st jsJ |d u rt j}|| jvrg d| j|< | j| | d u s%J || j| |< |S )Nr)   )r#   current_model_typellm_format_map)clsr*   r+   member r0   Y/home/ubuntu/.local/lib/python3.10/site-packages/modelscope/pipelines/nlp/llm_pipeline.py_add_to_map'   s   
zLLMAdapterRegistry._add_to_mapc                    s*   |d ur  |S  fdd}|S )Nc                    s     | S Nr2   )r/   r.   r*   r+   r0   r1   	_register7   s   z.LLMAdapterRegistry._wrapper.<locals>._registerr4   )r.   r*   r+   r/   r6   r0   r5   r1   _wrapper2   s   zLLMAdapterRegistry._wrapperc                 C      |  |d|S )Nr   r7   r.   r*   functionr0   r0   r1   register_format_messages<      z+LLMAdapterRegistry.register_format_messagesc                 C   r8   )N   r9   r:   r0   r0   r1   register_format_output@   r=   z)LLMAdapterRegistry.register_format_outputc                 C   r8   )N   r9   )r.   r*   tokenizer_classr0   r0   r1   register_tokenizerD   r=   z%LLMAdapterRegistry.register_tokenizer
model_namereturnc                 C   s
   || j v S r3   r-   r.   rC   r0   r0   r1   containsH      
zLLMAdapterRegistry.containsc                 C   s
   | j | S r3   rE   rF   r0   r0   r1   getL   rH   zLLMAdapterRegistry.get)r   N)NN)__name__
__module____qualname__r-   classmethodstrintr2   r7   r<   r?   rB   boolrG   rI   r0   r0   r0   r1   r'   #   s     
	r'   llm)module_namec                       s<  e Zd Zdd Zdeeef defddZd;dd	Z		
	
	
	
d<dee
ef de
dedef fddZd=ddZedeeef fddZdeeef fddZdeeee f defddZdeeef deeef deeef defddZdeeef fd d!Zd"d# Zd$d% Zd>d&d'Zed(eeeeeef  f dedeeejf fd)d*Zed+efd,d-Zed.eeeeeef  f de e!eef  fd/d0Z"ed1ee# d2ed3eeeeeef  f dedee# f
d4d5Z$eded3eeeeeef  f fd6d7Z%ed1ee# d8ee#ee# f dee# fd9d:Z&  Z'S )?LLMPipelinec                 C   s  ddl m} t|trtd|  | |rS| jd ur'td| j d | j	
d}|d us5J d| j	
dd	}tj||tj| j| jd
d}|j||d}|S t|trt|rtd| d | jrtj|rp|nt|}z| || j}td| j d |W S  ty } ztd| d| j d|  d | _W Y d }~nd }~ww t|rtj|tj| j| j| jdS tj|r|nt|}tj|| jd
d}||_|S |S )Nr   )Swiftzinitiate model from z0Cannot using swift with llm_framework, ignoring .zadapter_cfg.model_id_or_pathzECannot get adapter_cfg.model_id_or_path from configuration.json file.zadapter_cfg.model_revisionmasterT)
invoked_by
device_maptorch_dtypetrust_remote_code)model_idzinitiate model from location zinitiate model with z Cannot using llm_framework with z, ignoring llm_framework=z : )rW   rX   rY   ignore_file_pattern)rX   rZ   )swiftrT   
isinstancerN   loggerinfo_is_swift_modelllm_frameworkwarningcfgsafe_getr   from_pretrainedr   PIPELINErX   rY   r   ospathexistsr   _wrap_infer_framework	Exceptionr   r\   r   	model_dir)selfmodelrT   
base_modelrevisionswift_modelrm   er0   r0   r1   initiate_single_modelU   s   


z!LLMPipeline.initiate_single_modelro   rD   c                 C   sn   t |tsdS tj|rtj|tj}nzt|tj}W n
 t	y(   Y dS w t
|| _| jddkS )NFzadapter_cfg.tuner_backendr]   )r^   rN   rh   ri   rj   joinr   CONFIGURATIONr   rl   r   	from_filerd   re   )rn   ro   cfg_filer0   r0   r1   ra      s   
zLLMPipeline._is_swift_modelvllmc                 C   s   ddl m} |||S )Nr   )InferFramework)$modelscope.pipelines.accelerate.baserz   rf   )rn   rm   	frameworkrz   r0   r0   r1   rk      s   z!LLMPipeline._wrap_infer_frameworkNformat_messagesformat_output	tokenizerrb   c           	         sn  | dd | _|| _| jsd|d  v rd| _| dd | _| dd | _|dkr8| |d |dd	 d S | | t	 j
|i | W d    n1 sQw   Y  t| jtrbt| j| _d }t|tr~t|svJ d
| dt|\}}}|d u rtj| jjdd}t|rt|\}}}|d ur|| _|d ur|| _|d u r| || _d S || _d S )NrX   r(   ro   cudarY   r\   r]   devicegpuzCan not find function for `z`!-)split)poprX   rb   lowerrY   r\   _init_swiftrI   _temp_configuration_filesuper__init__r^   ro   r   r&   rN   r'   rG   r#   rm   r}   r~   _get_tokenizerr   )	rn   r}   r~   r   rb   argskwargsrA   r*   	__class__r0   r1   r      sH   



zLLMPipeline.__init__c           
         s,  ddl m} ddlm}m} tsdd | D adttt	tttf  f dt
dtttjf f fd	d
}dttt	tttf  f dtttf fdd |tv sUJ d|t| d}||jd\}}	t|_j|	_|	_|	j_|_d_tj_|_t|_d_t _d_d_d S )Nr   )prepare_model_template)MODEL_MAPPINGInferArgumentsc                 S   s   i | ]	\}}|d  |qS )model_id_or_pathr0   .0kvr0   r0   r1   
<dictcomp>   s    z+LLMPipeline._init_swift.<locals>.<dictcomp>messagesr   rD   c                    s   j  | \}}|dd  d|v r&t|d d  }||d< |jd }d|v r9|d d  }||d< |jd }t|d  |d< d|v rQt|d d  |d< |S )Nlabels	input_idsr>   inputs_embedsattention_masktoken_type_ids)templateencoder   torchtensorshapeones)r   r   r   inputs_r   	token_lenr   get_examplern   r0   r1   r}      s$   

z0LLMPipeline._init_swift.<locals>.format_messagesc                 S   s   | d } t | dksJ dd }| d d dkr$| d d }| dd  } t | d dks0J d	d
d | D }|d }tt|d d d |dd d }t|||dS )Nr   r   zmessages cannot be empty!rolesystemcontentr>   r@   zUnsupported messages format!c                 S   s   g | ]}|d  qS )r   r0   )r   messager0   r0   r1   
<listcomp>   s    z@LLMPipeline._init_swift.<locals>.get_example.<locals>.<listcomp>)r   prompthistory)lenlistzipdict)r   r   contentsr   r   r0   r0   r1   r      s   "z,LLMPipeline._init_swift.<locals>.get_examplez/Swift framework does not support current model!)r*   )rX   FT)	swift.llmr   swift.llm.utilsr   r   SWIFT_MODEL_ID_MAPPINGitemsr   rN   r	   r   r   TensorrX   r&   ro   r   r   r}   has_multiple_modelsr   r|   device_namer    r   _model_preparer   _model_prepare_lock_auto_collate_compile)
rn   r[   r   r   r   r   r}   r   ro   r   r0   r   r1   r      sD   





zLLMPipeline._init_swiftr   c                 c   s    |  |d  |d< }t|tr|n|j}tj|tj}tj	|r)d V  d S t
|d}tddd| W d    n1 sBw   Y  d V  t| d S )Nro   wpytorchchat)r|   task)rt   r^   rN   rm   rh   ri   ru   r   rv   rj   openjsondumpremove)rn   r   ro   rm   configuration_pathfr0   r0   r1   r     s   
z$LLMPipeline._temp_configuration_filec           
      O   s*  | di }| di }| di }t|tod|v  |d< |d< | j|fi |}| jdv r_t| jdrA| jjdi ||}n4t| jdr[t| jjdr[| jjjdi ||}ntd	t	|d
 
  g}| j|fi |d }| jd u r| d t|d
 d d  }| j|fi |}	|	S )Npreprocess_paramsforward_paramspostprocess_paramsr   is_messages)Nr]   generatero   z"model does not support `generate`!r   r   r0   )rI   r^   r   
preprocessrb   hasattrro   r   
ValueErrorr   flattennumpytolistr   postprocess)
rn   r   r   r   r   r   r   tokensoutputsresponser0   r0   r1   _process_single  s(   

 zLLMPipeline._process_singler   c           
         s   t jts
J djsjrjd rjs  jdi |\ }}t |to/d|v   d< |d< t |t	rX fdd|D }g }|D ]}|
||| qI|S | }	|	||}|S )Nz,pipeline.model must be StreamingOutputMixin!r   r   r   c                    s   g | ]} | qS r0   )_preprocess_with_check)r   ir   rn   r0   r1   r   E  s    
z/LLMPipeline.stream_generate.<locals>.<listcomp>r0   )r^   ro   r%   r   modelsr   prepare_model_sanitize_parametersr   r   append_stream_singler   )
rn   r   r   r   r   r   model_input_listoutputelemodel_inputr0   r   r1   stream_generate7  s<   
zLLMPipeline.stream_generater   r   r   c              	   c   s    t | j| jh | jtjkr7t  | jr| |}| jj	di ||}W d    n1 s1w   Y  n| jj	di ||}|D ]#}|
 d t|d d d  }| j|fi |}| | |V  qDW d    d S 1 ssw   Y  d S )Nr   r   r0   )r!   r|   r   r   r   no_gradr   _collate_fnro   r   r   r   r   _check_output)rn   r   r   r   streamoutr0   r0   r1   r   U  s.   


 
"zLLMPipeline._stream_singlec                    s   | d}|r| j|| jfi |}n| j|fddi|}| d|d< t| jdr0| jj n t| jdrCt| jjdrC| jjj nt| jdrLd	 ntd
 fdd| D S )Nr   return_tensorsptr   r   r   ro   rb   cpuz'model does not have `device` attribute!c                    s*   i | ]\}}|t |r| n|qS r0   )r   	is_tensortor   r   r0   r1   r   |  s    z*LLMPipeline.preprocess.<locals>.<dictcomp>)r   r}   r   r   ro   r   r   r   )rn   r   r   r   r   r0   r   r1   r   j  s"   


zLLMPipeline.preprocessc                 K   s   | d}t|ts-tjtjf}t||r t|jdkr |d }| j	j
|fddi|}n|}|r<| j|fi |}|S tj|i}|S )Nr   r>   r   skip_special_tokensT)r   r^   rN   r   r   npndarrayr   r   r   decoder~   r   TEXT)rn   r   r   r   
shape_typer   r0   r0   r1   r     s$   


zLLMPipeline.postprocessc                 K   s
   i |i fS )a  
        this method should sanitize the keyword args to preprocessor params,
        forward params and postprocess params on '__call__' or '_process_single' method
        considered to be a normal classmethod with default implementation / output

        Default Returns:
            Dict[str, str]:  preprocess_params = {}
            Dict[str, str]:  forward_params = {}
            Dict[str, str]:  postprocess_params = pipeline_parameters
        r0   )rn   generate_parameterr0   r0   r1   r     s   
z LLMPipeline._sanitize_parametersc                 C   s6   t | jtr
| j}n| jj}|d u rt}|j|ddS )NT)rZ   )r^   ro   rN   rm   r   rf   )rn   rA   rm   r0   r0   r1   r     s   zLLMPipeline._get_tokenizerr   c                 K   s>   g }t | D ]\}}t ||||}qdtj|gtjdiS )Nr   )dtype)rS   _message_iter_concat_with_special_tokensr   r   int64)r   r   r   r   r   r   r0   r0   r1   r}     s   zLLMPipeline.format_messagesr   c                 K   s   |   } dd| di}|S )Nr   	assistantr   r   )strip)r   r   r   r0   r0   r1   r~     s   zLLMPipeline.format_outputdatac                 c   s&    | d D ]}|d |d fV  qd S )Nr   r   r   r0   )r  pairr0   r0   r1   r     s   zLLMPipeline._message_iteridsr   r   c              	   C   sF   |j }|j}|d}|| }t||}t| ||||||S )N
)im_start_id	im_end_idr   r  rS   _encode_concat)r  r   r   r   im_startim_endnl_tokenr0   r0   r1   r     s   
z'LLMPipeline._concat_with_special_tokensc                 C   s   t |tr| | S g }|D ]K}| \\}}|dkrSt| dd}| j}|d }|d }	tt|dd}
t	|
|ks@J d|	g|t	|
  }t
|||
||}q|| | q|S )	Nimageimg_token_span   r>   r@   zutf-8)encodingzImage url is too long.)r^   rN   r   rstripr   getattrimg_start_idr   bytesr   rS   r	  extend)r   r   encodedr  modalvaluer  r  
img_end_id
img_pad_idlist_int_urlpad_idsr0   r0   r1   r    s2   
zLLMPipeline._encoder   c                 G   s.   |D ]}t |tr| | q| | q| S r3   )r^   r   r  r   )r  r   itemr0   r0   r1   r	    s
   
zLLMPipeline._concat)ry   )NNNN)rD   Nr3   )(rJ   rK   rL   rt   r   rN   r   rP   ra   rk   r   r   r   r   r   r   r   r   r   r	   r   r   r   r   r   r   r   staticmethodr   r   r}   r~   r   r
   r   rO   r   r  r	  __classcell__r0   r0   r   r1   rS   Q   s    =


+?










4rS   chatglm2c                 K   s&   dd }|| fi |}||dddS )Nc              	   [   s   d}| d } | d d dksJ dt dt| d dD ]}|d	|d d | | d
 | |d  d
 7 }q|dt| d d | d d
 7 }|S )N r   r   r   userz&chatglm2 does not have system messagesr>   r@   u    [Round {}]

问：{}

答：{}

r   u   [Round {}]

问：{}

答：r   )ranger   format)r   r   r   r   r0   r0   r1   build_chatglm2_prompt  s"   "z7chatglm2_format_messages.<locals>.build_chatglm2_promptFr   return_token_type_idsr   r0   )r   r   r   r%  r   r0   r0   r1   chatglm2_format_messages  s   r(  chatglmc                 K   s*   |   } | dd} d| d}d|i}|S )Nu   [[训练时间]]u   2023年r   r   r   )r  replace)r   r   r   r   r0   r0   r1   chatglm2_format_output  s   
r+  llamallama2c                 K   s6   ddl m} dd }|| |fi |\}}|d|iS )Nr   BatchEncodingc                 [   s  | dd}d}| d } | d d dkrd|dg|  } | d d	 }d
| d}||ddj}| d d	 }|  d}	||	ddj}
|jd |
jd  }||kr[td| d| d}g }tt| d ddD ]?}| | d	 | |d  d	 }}|  d|  d}||ddj}||jd  |kr n|| }|g| }||jd 7 }qi|||	g}|g| |
g }d|tj	|ddfS )N
max_length   zyou are a helpful assistant!r   r   r   r   r   r   z<s>[INST] <<SYS>>
z
<</SYS>>

r   r   r   z [/INST]zprepend prompt length z is bigger than max_length r!  r@   r>   z	 [/INST] z </s><s>[INST] )dim)
rI   r   r  r   RuntimeErrorr#  r   ru   r   cat)r   r   r   r0  default_system_messager   system_prompt
system_idstexttext_prompttext_idsprompt_lengthhistory_prompthistory_ids_listr   r"  r   round_prompt	round_idsprompt_listprompt_ids_listr0   r0   r1   build_llama2_prompt  sL   

z3llama2_format_messages.<locals>.build_llama2_promptr   )transformersr/  )r   r   r   r/  rD  r   r   r0   r0   r1   llama2_format_messages  s   ,rF  baichuan	baichuan2c                 K   sB  ddl m} ddd}| d } d}d}|d	d pd
}d}|| }	|| dd\}
}||
}|	t| }g }|d d d D ]?}g }|D ]}|d dkrQ|| n|| |||d  qCt|dksqt|t| |kr||| }t||k r|q= || }| d d dkr|| ||	 d  }t|g}|d|iS )Nr   r.  r"  c                 S   s   dg }}g }t | D ]+\}}|d dkr"|dksJ d|d }q|d |kr1|r1|| g }|| q|r>|| ||fS )Nr!  r   r   r   'first message should be system message.r   )	enumerater   )r   
split_roler   roundsroundr   r   r0   r0   r1   _parse_messagesK  s   


z1baichuan_format_messages.<locals>._parse_messagesr         max_new_tokensr1  i   )rK  r   r   r   r   r   )r"  )	rE  r/  rI   r   r   r   r  r   
LongTensor)r   r   r   r/  rN  assistant_token_iduser_token_idrQ  model_max_lengthmax_input_tokensr   rL  system_tokensmax_history_tokenshistory_tokensrM  round_tokensr   input_tokensr0   r0   r1   baichuan_format_messagesF  sD   



r\  wizardlmc                 K   s(   dd }|| |fi |}||dddS )Nc           
      [   s   d}	 | d } | d d dkrd|dg|  } | d d }|g}t | dd  D ]*\}}|d d	kr?|d }|d
|  q(|d dkrR|d }|d| d q(d|}	|	S )NzGA chat between a curious user and an artificial intelligence assistant.r   r   r   r   r   r   r>   r"  zUSER: r   zASSISTANT: z</s> )rJ  r   ru   )
r   r   r   r7  r8  rB  r   r   user_promptpromptsr0   r0   r1   build_wizardlm_prompt  s*   
z7wizardlm_format_messages.<locals>.build_wizardlm_promptFr   r&  r0   )r   r   r   ra  r`  r0   r0   r1   wizardlm_format_messages}  s   rb  
wizardcodec           	      K   s   | d } t | dksJ dd\}}t| D ](\}}|d dkr,|dks(J d|d	 }|d d
kr>|dks:J d|d	 }q|d | d }||ddddd}|S )Nr   r@   z&wizard code only support two messages.)r!  r!  r   r   r   rI  r   r"  r>   z&second message should be user message.z

### Instruction:
z

### Response:Fr   )r'  paddingadd_special_tokensr   )r   rJ  )	r   r   r   r   r"  r   r   r   r   r0   r0   r1   wizardcode_format_messages  s(   rf  c                 K   sT   | d } | d d | d d }}|j ||d}|j|d|dg}||d< |S )Nr   r   r   )r   z<|user|>z<|observation|>eos_token_id)build_chat_inputrg  get_command)r   r   r   queryr   r   rg  r0   r0   r1   chatglm3_format_messages  s   rk  qwen2c                 K   s&   | d } |j | ddd}||gddS )Nr   FT)tokenizeadd_generation_promptr   r2  )apply_chat_template)r   r   r   r:  r0   r0   r1   qwen2_format_messages  s
   rp  )Nrh   
contextlibr   	threadingr   typingr   r   r   r   r   r	   r
   r   r   r   r   r   rE  r   r   
modelscoper   r   r   r   modelscope.hub.file_downloadr   modelscope.models.baser   modelscope.models.nlpr   r   modelscope.outputsr   modelscope.pipelines.baser   modelscope.pipelines.builderr   modelscope.pipelines.utilr   r   modelscope.utils.configr   modelscope.utils.constantr   r   r   r   modelscope.utils.devicer    r!   modelscope.utils.loggerr"   "modelscope.utils.model_type_helperr#   !modelscope.utils.streaming_outputr$   r%   r&   r_   r   r'   register_moduler   text_generationrS   r<   r(  r?   r+  rF  r\  rb  rf  rk  rp  rB   r0   r0   r0   r1   <module>   sj   (.    

35



