o
    ॵi7                     @   sN  d dl Z d dlZd dlZd dlmZ d dlmZ d dlmZ d dlm	Z	m
Z
mZmZmZmZmZ d dlZd dlZd dlmZ d dlmZ d dlmZ d d	lmZ d d
lmZ d dlmZ d dlmZ d dlm Z  d dl!m"Z" d dl#m$Z$m%Z% d dl&m'Z'm(Z( ee)e*eddf Z+e ,e-Z.dej/d< dgZ0e"j1e(j2ej2dG dd de Z3dS )    N)deepcopy)ceil)time)AnyDict	GeneratorListMappingOptionalUnion)softmax)autocast)tqdm)	Pipelines)Model)	MsDataset)
OutputKeys)Pipeline)	PIPELINES)PreprocessorSiameseUiePreprocessor)	ModelFileTaskszImage.Imageznumpy.ndarraytrueTOKENIZERS_PARALLELISMSiameseUiePipeline)module_namec                	       s   e Zd Z				ddeeef dee dedef fdd	Zd
e	ee
f de	ee
f fddZdeeee f dee	ee
f ef fddZdd Zdd Zdd Zdd Zdd Zdd Z  ZS )r   NcpuTmodelpreprocessorconfig_filedevicec                    s   t  j||||||dd|di d t| jts#J dtj | jdu r4t	j
| jjfi || _| j  d| _d| _d	| _d
| _d| _dS )uu  Use `model` and `preprocessor` to create a generation pipeline for prediction.

        Args:
            model (str or Model): Supply either a local model dir which supported the text generation task,
            or a model id from the model hub, or a torch model instance.
            preprocessor (Preprocessor): An optional preprocessor instance, please make sure the preprocessor fits for
            the model if supplied.
            kwargs (dict, `optional`):
                Extra kwargs passed into the preprocessor's constructor.

        Examples:
            >>> from modelscope.pipelines import pipeline
            >>> pipeline_ins = pipeline(Tasks.siamese_uie,
            >>>    model='damo/nlp_structbert_siamese-uie_chinese-base')
            >>> sentence = '1944年毕业于北大的名古屋铁道会长谷口清太郎等人在日本积极筹资，共筹款2.7亿日元，参加捐款的日本企业有69家。'
            >>> print(pipeline_ins(sentence, schema={'人物': None, '地理位置': None, '组织机构': None}))

            To view other examples plese check tests/pipelines/test_siamese_uie.py.
        compileFcompile_options)r   r   r    r!   auto_collater"   r#   z,please check whether model config exists in Ni`  i        g      ?)super__init__pop
isinstancer   r   r   CONFIGURATIONr   r   from_pretrained	model_direval	slide_lenmax_lenhint_max_leninference_batch_size	threshold)selfr   r   r    r!   r$   kwargs	__class__ a/home/ubuntu/.local/lib/python3.10/site-packages/modelscope/pipelines/nlp/siamese_uie_pipeline.pyr(   '   s0   

	



zSiameseUiePipeline.__init__inputsreturnc                 C   s   d S )Nr8   )r4   r:   r8   r8   r9   postprocessW   s   zSiameseUiePipeline.postprocessinputc                 O   s   d|v r| d}|r|dkrtd| jr| js|   |}| d}t|tkr/t|}| dd}| 	|gd }g }	g }
| 
|||
||	| d|	iS )	uU  
        Args:
            input(str): sentence to extract
            schema: (dict or str) schema of uie task
        Default Returns:
            List[List]:  predicted info list i.e.
            [[{'type': '人物', 'span': '谷口清太郎', 'offset': [18, 23]}],
            [{'type': '地理位置', 'span': '日本', 'offset': [26, 28]}],
            [{'type': '地理位置', 'span': '日本', 'offset': [48, 50]}],
            [{'type': '组织机构', 'span': '北大', 'offset': [8, 10]}],
            [{'type': '组织机构', 'span': '名古屋铁道', 'offset': [11, 16]}]]
        
batch_size   z,This pipeline do not support batch inferenceschemaoutput_all_prefixFr   output)r)   	Exceptionr   _model_prepareprepare_modeltypestrjsonloadsr   forward)r4   r=   argsr5   r>   textr@   rA   tokenized_textpred_info_listprefix_infor8   r8   r9   __call__Z   s&   


zSiameseUiePipeline.__call__c                 C   s(   |d  |g| j t|d   7  < |S )N)r0   len)r4   	input_idspad_token_idr8   r8   r9   _pad   s   $zSiameseUiePipeline._padc                    s   j |dd jd}g }t jkr!tt j  j d nd} fddt|D } fddt|D }|dkrK |d} |d}tj	|tj
 jd}tj	|tj
 jd}|d j d }	t||	}
t||	}g }t 1 t  t|
|D ]\}} j||}|| qW d    n1 sw   Y  W d    n1 sw   Y  tj|dd	}tj|dd	}tt|D ]0}|| }|| }t|D ]!}| j }|d
 | ||||| |j|| |jd}|| qq|S )NT)padding
truncation
max_lengthr?   c                    ,   g | ]}j | j | j  j  qS r8   )idsr/   r0   .0jr4   rM   r8   r9   
<listcomp>   s    z6SiameseUiePipeline.tokenize_sample.<locals>.<listcomp>c                    rY   r8   )attention_maskr/   r0   r[   r^   r8   r9   r_      s    r   dtyper!   )dimz--)idhintrL   shiftsequence_outputhint_token_idsattention_maskscross_attention_masks)r   r1   rR   r0   r   r/   rangerU   torchtensorlongr!   sizer2   tensor_splitno_gradr   zipr   get_plm_sequence_outputappendcatrZ   r`   )r4   rL   rM   hintstokenized_hintstokenized_data	split_num	token_idsri   	batch_numall_token_idsall_attention_masksall_sequence_outputrg   ire   tokenized_hintr]   aitemr8   r^   r9   tokenize_sample   s   






z"SiameseUiePipeline.tokenize_samplec           
      C   s   |  |||}tdd |D }tdd |D }tjdd |D tj| jd}tjdd |D tj| jd}|d| j d }	t||	}t||	}t||	}t||	}|||||ffS )	Nc                 S      g | ]}|d  qS )rg   r8   r\   r   r8   r8   r9   r_          zISiameseUiePipeline.get_tokenized_data_and_data_loader.<locals>.<listcomp>c                 S   r   )ri   r8   r   r8   r8   r9   r_      r   c                 S   r   )rh   r8   r   r8   r8   r9   r_      r   ra   c                 S   r   )rj   r8   r   r8   r8   r9   r_      r   r   r?   )	r   rl   stackrm   rn   r!   ro   r2   rp   )
r4   rL   rM   rv   rx   rg   ri   rh   rj   r{   r8   r8   r9   "get_tokenized_data_and_data_loader   s6   z5SiameseUiePipeline.get_tokenized_data_and_data_loaderc                    s   g } fddt t D }|D ]1}t |t|D ]'}|| jkrB|| d }	|| d }
|	|
g||	|
 d}||  nqqt|dd d}|S )	Nc                    s   g | ]} | j kr|qS r8   )r3   r[   
head_probsr4   r8   r9   r_      s    z3SiameseUiePipeline.get_entities.<locals>.<listcomp>r   r?   )offsetspanc                 S   s   t | d S )Nr   )tuple)xr8   r8   r9   <lambda>   s    z1SiameseUiePipeline.get_entities.<locals>.<lambda>)key)rk   rR   r3   rt   sorted)r4   rL   offsetsr   
tail_probssample_entitiespotential_headsphpt	char_head	char_tailer8   r   r9   get_entities   s(   


	zSiameseUiePipeline.get_entitiesc           $   	   C   s  g }|D ]#}d}|D ]}||d  d|d  d7 }q
|| d7 }| | q| |||\}	}
g }d }g }g }g }t ; t ( t|
 D ]}| jj| \}}| | }}||7 }||7 }qHW d    n1 snw   Y  W d    n1 s}w   Y  |	 ddi | d  | d  t|	||D ]\}}}|d }|	dd	||d
}|d ur3||kr3t
|j}dg| }dg| }|D ]Y}|d }|d }|d }t
|}t|D ]B}|| |k r|||  dkr|| n|||  ||  d ||| < |||  dkr|| n|||  ||  d ||| < qq|j}| ||||} | |  g }| | |}qg }!t||D ]#\}} | D ]}"t|}#||"d |"d d}|# | |! |# qHqB|!S )N rF   z: r   z, rd   WhatADifferentUUiDrf   r   )rf   headtailrQ   r   r      r   )rF   r   r   )rt   r   rl   rq   r   rr   r   fast_inferencetolistgetrR   r   rk   r   r   )$r4   rL   rM   rO   schema_typesrv   stre   r   all_valid_tokenized_dataall_tensor_dataprobs	last_uuidall_pred_entitiesall_head_probsall_tail_probs
batch_databatch_head_probsbatch_tail_probstokenized_sampler   r   uuidprob
len_tokensprob_tmprf   r   r   len_subr]   r   pred_entitiesnext_prefix_infosr   pir8   r8   r9   get_prefix_infos   s   

	














z#SiameseUiePipeline.get_prefix_infosc           	   	   C   sd   |  ||||}|D ]%}||d d  }|d u r|| q
|r%|| | |||||| q
d S )NrQ   rF   )r   rt   rJ   )	r4   rL   rM   rO   curr_schema_dictrN   rA   r   next_schema_dictr8   r8   r9   rJ   /  s   

zSiameseUiePipeline.forward)NNr   T)__name__
__module____qualname__r   r   rG   r
   r   r(   r   r   r<   Inputr   r   rP   rU   r   r   r   r   rJ   __classcell__r8   r8   r6   r9   r   #   s0    
"0
%7I)4loggingospathlibcopyr   mathr   r   typingr   r   r   r   r	   r
   r   rH   rl   scipy.specialr   torch.cuda.ampr   r   modelscope.metainfor   modelscope.modelsr   modelscope.msdatasetsr   modelscope.outputsr   modelscope.pipelines.baser   modelscope.pipelines.builderr   modelscope.preprocessorsr   r   modelscope.utils.constantr   r   rG   r   r   	getLoggerr   loggerenviron__all__register_modulesiamese_uier   r8   r8   r8   r9   <module>   s8   $

