o
    ॵi                     @   s  d dl mZmZmZmZ d dlZd dlmZ d dlm	Z	 d dl
mZ d dlmZ d dlmZ d dlmZ d d	lmZmZmZ d d
lmZmZ d dlmZmZ ddgZejejejdG dd deZejejej dG dd deZ!ejejej"dG dd de!Z#dS )    )AnyDictOptionalUnionN)	Pipelines)Model)
OutputKeys)Pipeline)	PIPELINES)TokenClassificationPipeline)Preprocessor+TokenClassificationTransformersPreprocessor WordSegmentationPreprocessorThai)	ModelFileTasks)torch_nested_detachtorch_nested_numpifyWordSegmentationPipelineWordSegmentationThaiPipeline)module_namec                   @   s6   e Zd ZdZ	ddeeef deeef fddZdS )	r   u  Use `model` and `preprocessor` to create a nlp word segment pipeline for prediction.

    NOTE: The preprocessor will first split the sentence into single characters,
    then feed them into the tokenizer with the parameter is_split_into_words=True.

    Examples:
        >>> from modelscope.pipelines import pipeline
        >>> pipeline_ins = pipeline(task='word-segmentation',
        >>>    model='damo/nlp_structbert_word-segmentation_chinese-base')
        >>> sentence1 = '今天天气不错，适合出去游玩'
        >>> print(pipeline_ins(sentence1))

    To view other examples plese check tests/pipelines/test_word_segmentation.py.
    Tinputsreturnc                 K   sN   | j |fi |}|r dd |D }dd |D }tj|i}|S tj|i}|S )at  Process the prediction results

        Args:
            inputs (Dict[str, Any]): should be tensors from model
            output_final_sentence (bool): Output the cut sentence splitted by blanks or not.
                If False, the pipeline will output the original token-label information.

        Returns:
            Dict[str, Any]: The prediction results.
        c                 S   s    g | ]}|d    r|d  qS span)strip).0chunk r   g/home/ubuntu/.local/lib/python3.10/site-packages/modelscope/pipelines/nlp/word_segmentation_pipeline.py
<listcomp>;   s
    z8WordSegmentationPipeline.postprocess.<locals>.<listcomp>c                 S   s   g | ]}|qS r   r   )r   r   r   r   r   r   >   s    _chunk_processr   OUTPUT)selfr   output_final_sentencepostprocess_paramschunksspans
seg_resultoutputsr   r   r   postprocess)   s   

z$WordSegmentationPipeline.postprocessNT)__name__
__module____qualname____doc__r   strr   r*   r   r   r   r   r      s    

c                   @   s2   e Zd Z	ddeeef deeef fddZdS )$MultilingualWordSegmentationPipelineTr   r   c                 K   s*   | j |fi |}dd |D }tj|iS )Nc                 S   s   g | ]}|d  qS r   r   r   entityr   r   r   r   Q   s    zDMultilingualWordSegmentationPipeline.postprocess.<locals>.<listcomp>r    )r#   r   r$   r%   r&   word_segmentsr   r   r   r*   L   s   
z0MultilingualWordSegmentationPipeline.postprocessNr+   )r,   r-   r.   r   r0   r   r*   r   r   r   r   r1   G   s    

r1   c                	       sh   e Zd Z					ddeeef dee dedef fd	d
Zde	ee
f de	eef fddZ  ZS )r   NgpuT   modelpreprocessorconfig_filedevicec                    s\   t  j|||||d t| jtsJ dtj |d u r,t| jjfd|i|| _	d S d S )N)r7   r8   r9   r:   auto_collatez,please check whether model config exists in sequence_length)
super__init__
isinstancer7   r   r   CONFIGURATIONr   	model_dirr8   )r#   r7   r8   r9   r:   r;   r<   kwargs	__class__r   r   r>   Y   s&   
z%WordSegmentationThaiPipeline.__init__r   r   c                 K   s*   | j |fi |}dd |D }tj|iS )Nc                 S   s   g | ]
}|d   ddqS )r     )replacer2   r   r   r   r   t   s    z<WordSegmentationThaiPipeline.postprocess.<locals>.<listcomp>r    )r#   r   r%   r&   r4   r   r   r   r*   q   s   
z(WordSegmentationThaiPipeline.postprocess)NNr5   Tr6   )r,   r-   r.   r   r   r0   r   r   r>   r   r   r*   __classcell__r   r   rC   r   r   U   s$    

)$typingr   r   r   r   torchmodelscope.metainfor   modelscope.modelsr   modelscope.outputsr   modelscope.pipelines.baser	   modelscope.pipelines.builderr
   modelscope.pipelines.nlpr   modelscope.preprocessorsr   r   r   modelscope.utils.constantr   r   modelscope.utils.tensor_utilsr   r   __all__register_moduleword_segmentationr   multilingual_word_segmentationr1   word_segmentation_thair   r   r   r   r   <module>   s2   .