o
    ॵis                     @   s   d dl Z d dlmZmZmZmZ d dlZd dlZd dl	m
Z
 d dlmZ d dlmZ d dlmZ d dlmZmZ d dlmZ d d	lmZ d d
lmZ d dlmZ e ZdgZejejejdG dd deZ dS )    N)AnyDictListUnion)Dataset)	Pipelines)Model)
OutputKeys)PipelineTensor)	PIPELINES),DocumentSegmentationTransformersPreprocessor)Tasks)
get_loggerExtractiveSummarizationPipeline)module_namec                	       s   e Zd Z				ddeeef dededef fdd	Zd
eee ef de	ee
f fddZd
eee ef de	ee
f fddZde	eef de	eef fddZdeee ef fddZdd Z  ZS )r   NgpuTmodelpreprocessorconfig_filedevicec                    sv   t  jd|||||d| |dd  |dd  | jj| _| jj| _|d u r9t| j| jjjfi || _	d S d S )N)r   r   r   r   auto_collatecompilecompile_options )
super__init__popr   	model_dir	model_cfgr   configmax_position_embeddingsr   )selfr   r   r   r   r   kwargs	__class__r   n/home/ubuntu/.local/lib/python3.10/site-packages/modelscope/pipelines/nlp/extractive_summarization_pipeline.pyr      s(   	

z(ExtractiveSummarizationPipeline.__init__	documentsreturnc                 C   s   |  |}| |}|S )N)predictpostprocess)r"   r'   outputr   r   r&   __call__:   s   

z(ExtractiveSummarizationPipeline.__call__c                    s    |}t|} | j}t| jj }t| jj }|d}|d}| jj}	t	
  dd | D }
 jjdi |
j}W d    n1 sTw   Y  tj|dd}t|t|ksuJ d|t|t| fdd	t||D } fd
d	t||D }g }t|D ]}|g g g d qt||||	D ][\}}}}t|t|k r|d |d t|t|ksJ dt|t|t|t|ksJ dt|t||| d | || d | || d | q|S )Nlabels	sentencesc                 S   s   i | ]
\}}|t |qS r   )torchtensor).0keyvalr   r   r&   
<dictcomp>P   s    
z;ExtractiveSummarizationPipeline.predict.<locals>.<dictcomp>   )axisz(sample {}  infer_sample {} prediction {}c                    (   g | ]\}} fd dt ||D qS )c                    s$   g | ]\}}|d kr j j| qS ir   
label_listr1   plr"   r   r&   
<listcomp>]       
FExtractiveSummarizationPipeline.predict.<locals>.<listcomp>.<listcomp>zipr1   
predictionlabelr>   r   r&   r?   \       
z;ExtractiveSummarizationPipeline.predict.<locals>.<listcomp>c                    r7   )c                    s$   g | ]\}}|d kr j j| qS r8   r9   r;   r>   r   r&   r?   d   r@   rA   rB   rD   r>   r   r&   r?   c   rG   )r.   r-   predictionsOz{} {}rH   r   )cut_documentsr   	from_dictr   r   lencontext_column_namer   example_id_column_namer/   no_graditemsr   forwardlogitsnpargmaxformatrC   rangeappendextend)r"   r'   pred_samplespredict_examplespredict_datasetnum_examplesnum_samplesr-   r.   example_idsinputrR   rH   true_predictionstrue_labelsoutirE   sentence_listrF   
example_idr   r>   r&   r)   ?   sj   










z'ExtractiveSummarizationPipeline.predictinputsc                 C   s   g }t |}t|D ]+}g }t|| d || d D ]\}}| }|dkr,|| q|d| q
|dkrAtj|d iS tj|iS )zprocess the prediction results

        Args:
            inputs (Dict[str, Any]): _description_

        Returns:
            Dict[str, str]: the prediction results
        r.   rH   B-EOP
   r   )rL   rV   rC   striprW   joinr	   TEXT)r"   rf   result
list_countnumressr<   r   r   r&   r*   ~   s   	


z+ExtractiveSummarizationPipeline.postprocessparac           
      C   s   t |tr	|g}n|}g }g }g }d}|D ]&}| |}dgt|d  dg }	|| ||	 || |d7 }q|||dS )Nr   rI   ri   rg   )re   r.   r-   )
isinstancestrcut_sentencerL   rW   )
r"   rr   document_listr.   r-   re   iddocumentsentencerF   r   r   r&   rJ      s$   





z-ExtractiveSummarizationPipeline.cut_documentsc                 C   sT   t dd|}t dd|}t dd|}t dd|}| }dd |dD S )	Nu   ([。！.!？\?])([^”’])z\1\n\2u   (\.{6})([^”’])u   (\…{2})([^”’])u*   ([。！？\?][”’])([^，。！？\?])c                 S   s   g | ]}|r|qS r   r   )r1   _r   r   r&   r?      s    z@ExtractiveSummarizationPipeline.cut_sentence.<locals>.<listcomp>rh   )resubrstripsplit)r"   rr   r   r   r&   ru      s   z,ExtractiveSummarizationPipeline.cut_sentence)NNr   T)__name__
__module____qualname__r   r   rt   r   r   r   r   r   r,   r)   r   r*   rJ   ru   __classcell__r   r   r$   r&   r      s&    
&&"?)!r{   typingr   r   r   r   numpyrS   r/   datasetsr   modelscope.metainfor   modelscope.modelsr   modelscope.outputsr	   modelscope.pipelines.baser
   r   modelscope.pipelines.builderr   modelscope.preprocessorsr   modelscope.utils.constantr   modelscope.utils.loggerr   logger__all__register_moduleextractive_summarizationr   r   r   r   r&   <module>   s(   