o
    ߥi4                  	   @   s  d dl Z d dlZd dlZd dlZd dlZd dlZd dlZd dlmZ d dl	m
Z
mZmZmZ d dlZd dlmZ d dlmZmZ d dlmZmZ d dlmZ d dlmZ d d	lmZ d d
lmZ d dlm Z  d dl!m"Z"m#Z# d dl$m%Z% ddl&m'Z'm(Z( ddl)m)Z)m*Z* e+d e% Z,dedfddZ-dd Z.dd Z/	d#ddZ0de1deee1 ee1 eee1  f fddZ2ej3e#j4ej5d G d!d" d"eZ6dS )$    N)partial)AnyDictListTuple)mpu)"get_masks_and_position_ids_defaultupdate_mems)generate_continually
timed_name)Models)
TorchModel)MODELS)
OutputKeys)Config)	ModelFileTasks)
get_logger   )BaseStrategyBeamSearchStrategy)
initializeinitialize_model_and_tokenizer   i c              	   K   sx  t |jdks	J |j\}}	||\}}
}|dd|	f }|
jtjkr,|
t|  }
|	d }|du r6dn|jd }d}||jd d k r6||| d}|durf||jd || |jd |jd nd}| |dd|df |d||d f |
d||d d|d f fd|i|^}}d	d
 |D }t	|||d}||	d kr|t
||d f }n|dddf }|d7 }|}|||d}|||d}||jd |||jd |jd }||||\}}t |jdkr(|dkr(|jd }|d||d|| d}|
jdd }|
d||dddj|| g|R  }
|jr-n	||jd d k sG|||S )aj  
        seq: [2, 3, 5, ..., -1(to be generated), -1, ...]
        mems: [num_layers, batch_size, len_mems(index), mem_hidden_size]
            cache, should be first mems.shape[1] parts of context_tokens.
            mems are the first-level citizens here, but we don't assume what is memorized.
            input mems are used when multi-phase generation.
       .Nr   r   memsc                 S   s   g | ]}|d  qS )mem_kv ).0or   r   b/home/ubuntu/.local/lib/python3.10/site-packages/modelscope/models/nlp/glm_130b/text_generation.py
<listcomp>P   s    z*batch_filling_sequence.<locals>.<listcomp>)max_memory_length   )lenshapedtypetorchbooltype_asnext
parametersreshaper	   arangeforward	unsqueezeexpandis_donefinalize)modelseqscontext_lengthsstrategyr$   get_masks_and_position_idsr   kw_args
batch_sizecontext_lengthattention_maskposition_idstokenscounterindex	num_beamslogitsoutput_per_layersr   attention_mask_shaper   r   r"   batch_filling_sequence"   s   




(rG   c                 C   s8   | j dtddd | j dtddd | j dd	d
d d S )Nz--sampling-strategyr   zType of sampling strategy.)typedefaulthelpz--min-gen-lengthr   z.The minimum length each blank should generate.z--print-all-beams
store_truez3Print all output generated by beam search strategy.)actionrJ   )add_argumentstrint)parserr   r   r"   add_generation_specific_argsl   s"   
rQ   c                 C   s.   z| j ddd W dS  ty   Y dS w )Nzutf-8)encodingasciiFT)encodedecodeUnicodeDecodeError)sr   r   r"   	isEnglish}   s   rX   Fc                 C   s   | j d }tjjj| d|fddd}tjd|j d |j d f|jd}|  d|dd |d f< |d |dk 	 }tj
|j d tj|jd	}|sS|||d d < |d}|||fS )
Nr   r   constantr   )modevaluedevice.g      ?)r)   r]   )r(   r*   nn
functionalpadonesr]   tril_
unsqueeze_r+   r0   longr2   )seqmask_positionmax_gen_lengthgmaskr=   r@   r>   r?   r   r   r"   r:      s$   



r:   raw_textreturnc                 C   s  d}d|v r	d}nd|v rd}d|vod|v}d}t ||}t ||}	g }
tt|	D ]}|	| }|| }|
|| |
|	| q/|
||d  d|vrf|
|	|g7 }
|d| 7 }|
dss|
|	dg }
t d	krtd
| t|
| jkrtdt|}|
g}| jdkr| jnd}d	g| dd t|D dd t|D dd t|D f\}}}}	 |d	 }
|	|}||
vrn|
|}g }tjj|
|	dg g| jd}t||tjj|jd g| jd|tt|| j|jd  |dd\}}t|tj r|! }|d	 }|| tt|D ]}t|| tj r2|| ! n|| }z|d}W n tyK   t|}Y nw ||d  |j"v rZ|d8 }||	d}|#||| | }|#||d | }||  ||rdnd |rdnd | |rdnd |rdnd 7  < || | || |d  ||< |d | ||d |  ||d |  ||< q!qt$|D ]-\}}|d |	dkr|d d }||  |#||| d  7  < |#|||< q|||fS )Nz[gMASK]z[MASK]z[sMASK]z\[[sg]?MASK\]r   zMASK] eosr   z
Input: {}
ztext too long.r   r   c                 S      g | ]}d qS  r   r    _r   r   r"   r#          zfill_blanks.<locals>.<listcomp>c                 S   rm   rn   r   rp   r   r   r"   r#      rr   c                 S   s   g | ]}g qS r   r   rp   r   r   r"   r#      rr   Tsopr\   )rf   rg   rh   )r9   r:   ro   z[4mz[0;32m[4mz[0mz[0m[0m)%resplitcompilefindallranger'   extendtokenizeappendget_commandendswithr   get_model_parallel_rankloggerinfoformatmax_sequence_length
ValueErrorrX   sampling_strategyrC   rB   r*   cuda
LongTensorr]   rG   r(   r   r:   out_seq_length
isinstanceTensortolist
end_tokens
detokenize	enumerate)argsri   r6   	tokenizerr9   generation_mask	use_gmaskmask_pattern	text_listpattern_listre   ipatternsub_text
is_englishoutput_list
num_outputlast_posanswersanswers_with_styleblanks
mask_tokenrf   	input_seqoutputrq   
unfinishedbogprefixblankr   r   r"   fill_blanks   s   






8"
r   )module_namec                       sD   e Zd Zdef fddZdd Zdedeeef fdd	Z  ZS )
GLM130bForTextGeneration	model_dirc              	      s^  t  j|g|R i | t|d tj | _ttd}t	
dtjd |_| jjj|_| jjj|_| jjj|_| jjj|_| jjj|_| jjj|_| jjj|_| jjj|_| jjj|_||_td t|\| _| _| jd| jdg}|jdkrtd|j|j|j|d| _n|jd	krt d|j|jd
||j|jd| _nt!d|j || _"d S )N/)extra_args_providerr   zLoading model and tokenizer ...eoprl   r   )r<   temperaturetop_ktop_pr   r   T)length_penaltyconsider_endr   no_repeat_ngram_sizemin_gen_lengthzunknown strategy )#super__init__r   	from_filer   CONFIGURATIONcfgr   rQ   randomrandintsysmaxsizeseedr6   r   r   r   rC   r   r   r   r   r   loadr   r   r   r   r|   r   r9   r   r   r   )selfr   r   kwargsr   	__class__r   r"   r     sN   









z!GLM130bForTextGeneration.__init__c                 C   sL   t | j|| j| j| j\}}}t dkr tdt	|d   t	|d S )Nr   zOutput:)
r   r   r6   r   r9   r   r~   r   r   rN   )r   ri   r   r   r   r   r   r"   funcA  s   
zGLM130bForTextGeneration.funcinputrj   c              
   C   s   d\}}t j dkr#|}|stjdiS |dkrd}t j||g n||g}t j| |\}}|r5d S zt }| |}t j dkrRt	d
t |  W n ttfyn } ztjt|iW  Y d }~S d }~ww t	d tj|iS )N)ro   Fr   zQuery should not be empty!stopTz
Taken time {:.2f}
zGeneration finished.)r*   distributedget_rankr   TEXTbroadcast_object_listtimer   r   r   r   r   FileNotFoundErrorrN   )r   r   ri   is_stopr   
start_timereser   r   r"   r1   J  s6   



z GLM130bForTextGeneration.forward)	__name__
__module____qualname__rN   r   r   r   r1   __classcell__r   r   r   r"   r     s    3"	r   )F)7copyosr   rt   statr   r   	functoolsr   typingr   r   r   r   r*   SwissArmyTransformerr   7SwissArmyTransformer.generation.autoregressive_samplingr   r	   %SwissArmyTransformer.generation.utilsr
   r   modelscope.metainfor   modelscope.models.baser   modelscope.models.builderr   modelscope.outputsr   modelscope.utils.configr   modelscope.utils.constantr   r   modelscope.utils.loggerr   
generationr   r   r   r   set_num_threadsr   rG   rQ   rX   r:   rN   r   register_moduletext_generationglm130br   r   r   r   r"   <module>   sJ   

J

m