o
    ॵi&                  	   @   s  d dl Z d dlmZmZmZmZmZmZ d dlZ	d dl
Z
d dlmZ d dlmZ d dlmZ d dlmZmZ d dlmZ d dlmZ d d	lmZmZ d d
lmZmZ dgZejej ej dejej ej!dejej ej"dejej ej#dejej!ej!dG dd deZ$dS )    N)AnyDictListOptionalTupleUnion)	Pipelines)Model)
OutputKeys)InputPipeline)	PIPELINES)Preprocessor)	ModelFileTasks)torch_nested_detachtorch_nested_numpifyTokenClassificationPipeline)module_namec                	       s@  e Zd Z					d!deeef dee dedef fd	d
Zde	ee
f de	ee
f fddZde	ee
f de	ee
f fddZde	ee
f defddZdede	ee
f f fddZdee dedee	ee
f  f fddZdee defddZdee	ee
f  de	eeeef f dee	ee
f  fdd Z  ZS )"r   NgpuT   modelpreprocessorconfig_filedevicec                    s   t  j||||||dd|di d t| jts#J dtj |du r5tj	| jj
fd|i|| _| j  || _t| jdsEJ | jj| _dS )	a  use `model` and `preprocessor` to create a token classification pipeline for prediction

        Args:
            model (str or Model): A model instance or a model local dir or a model id in the model hub.
            preprocessor (Preprocessor): a preprocessor instance, must not be None.
            kwargs (dict, `optional`):
                Extra kwargs passed into the preprocessor's constructor.
        compileFcompile_options)r   r   r   r   auto_collater   r   z,please check whether model config exists in Nsequence_lengthid2label)super__init__pop
isinstancer   r	   r   CONFIGURATIONr   from_pretrained	model_dirr   evalr   hasattrr   )selfr   r   r   r   r   r   kwargs	__class__ j/home/ubuntu/.local/lib/python3.10/site-packages/modelscope/pipelines/nlp/token_classification_pipeline.pyr!   "   s0   

	

z$TokenClassificationPipeline.__init__inputsreturnc                 K   sZ   | tj}t  i | jdi ||tj|iW  d    S 1 s&w   Y  d S )Nr-   )r"   r
   TEXTtorchno_gradr   )r)   r/   forward_paramstextr-   r-   r.   forwardI   s   
$z#TokenClassificationPipeline.forwardc                 K   s   | j |fi |}tj|iS )zProcess the prediction results

        Args:
            inputs (Dict[str, Any]): should be tensors from model

        Returns:
            Dict[str, Any]: the prediction results
        )_chunk_processr
   OUTPUT)r)   r/   postprocess_paramschunksr-   r-   r.   postprocessQ   s   

z'TokenClassificationPipeline.postprocessc                    s  |d }t j|vr!|t j }t|jdkr|d }tj|dd}n|t j }t|jdkr1|d }|d }t|jdkr@|d }|d}|d	url|d	 
  }t|dd|}t|jdkrg|d }||}tt|}tt|} fd
d|D }	|dd}
|
rt j|v r|t j }t|jdkr|d }tt|d}nd}
g }i }tt|	|D ]\}\}}|d dv r|r||d |d  |d< || |dd	 |d |d d}|
r|| ||  |d< |d dv r|s|dd	 |d |d d}|
r|| ||  |d< |d dv r8|s8|dd	 |d |d d}|
r8|| ||  |d< |d dv rH|rH|d |d< |d dv re|re||d |d  |d< || i }q|rz||d |d  |d< || |S )zprocess the prediction results and output as chunks

        Args:
            inputs (Dict[str, Any]): should be tensors from model

        Returns:
            List: The output chunks
        r5      r   )dim   offset_mapping
label_maskNc                    s   g | ]} j | qS r-   )r   ).0xr)   r-   r.   
<listcomp>   s    z>TokenClassificationPipeline._chunk_process.<locals>.<listcomp>return_probTFBSstartendspan   )typerH   rI   probIEIESES)r
   PREDICTIONSLOGITSlenshaper2   argmaxgetsumlongcpuitemnarrowmasked_selectr   r   r"   softmax	enumeratezipappend)r)   r/   r9   r5   logitspredictionsr@   rA   masked_lengthslabelsrF   probsr:   chunkilabeloffsetsr-   rD   r.   r7   ^   s   













z*TokenClassificationPipeline._chunk_processinputc           	         s~   | dd}|dkrt j|g|R i |S | |g|\}}g }|D ]}|t j|g|R i | q$| ||d S )Nsplit_max_lengthr   )r"   r    _process_single_auto_splitra   
_auto_join)	r)   rk   argsr*   rl   split_textsindex_mappingoutputsr5   r+   r-   r.   rm      s   "z+TokenClassificationPipeline._process_single
batch_sizec           	         sl   | dd}|dkrt j|g|R d|i|S | ||\}}t j|g|R d|i|}| ||S )Nrl   r   rt   )r"   r    _process_batchrn   ro   )	r)   rk   rt   rp   r*   rl   rq   rr   rs   r+   r-   r.   ru      s4   z*TokenClassificationPipeline._process_batchinput_textsrl   c                 C   s   g }i }d}t |D ]C\}}t||k r$|| |df||< |d7 }q
tt|| }t|D ]}	|	| }
|||
|
|   ||
f||< |d7 }q1q
||fS )Nr   rK   )r_   rT   ra   mathceilrange)r)   rv   rl   rq   rr   new_idxraw_idxr5   n_splitrh   offsetr-   r-   r.   rn      s    


z'TokenClassificationPipeline._auto_splitrs   rr   c           	      C   s   g }t |D ]8\}}|| \}}|t|kr|| q|tj D ]}|d  |7  < |d  |7  < || tj | q!q|S )NrH   rI   )r_   rT   ra   r
   r8   )	r)   rs   rr   joined_outputsidxoutputr{   r}   rg   r-   r-   r.   ro      s   z&TokenClassificationPipeline._auto_join)NNr   Tr   )__name__
__module____qualname__r   r	   strr   r   r!   r   r   r6   r;   r   r7   r   rm   intru   rn   r   ro   __classcell__r-   r-   r+   r.   r      sH    
'




e)%rw   typingr   r   r   r   r   r   numpynpr2   modelscope.metainfor   modelscope.modelsr	   modelscope.outputsr
   modelscope.pipelines.baser   r   modelscope.pipelines.builderr   modelscope.preprocessorsr   modelscope.utils.constantr   r   modelscope.utils.tensor_utilsr   r   __all__register_moduletoken_classificationpart_of_speechword_segmentationnamed_entity_recognitionr   r-   r-   r-   r.   <module>   s:    