o
    5ti-                     @   s   d dl mZmZmZmZmZmZmZmZ d dl	m
Z
mZmZ d dlmZ ddlmZ ddlmZmZ ddlmZmZmZ dd	lmZ erPd d
lmZmZmZmZ dZG dd deZ dS )    )TYPE_CHECKINGAnyCallableDictListOptionalTupleUnion)
ClassLabelDatasetSequence)Literal   )EvaluationModule)add_end_docstringsadd_start_docstrings   )"EVALUATOR_COMPUTE_RETURN_DOCSTRING EVALUTOR_COMPUTE_START_DOCSTRING	Evaluator)DatasetColumn)PipelinePreTrainedModelPreTrainedTokenizerTFPreTrainedModela  
    The dataset input and label columns are expected to be formatted as a list of words and a list of labels respectively, following [conll2003 dataset](https://huggingface.co/datasets/conll2003). Datasets whose inputs are single strings, and labels are a list of offset are not supported.

    Examples:
    ```python
    >>> from evaluate import evaluator
    >>> from datasets import load_dataset
    >>> task_evaluator = evaluator("token-classification")
    >>> data = load_dataset("conll2003", split="validation[:2]")
    >>> results = task_evaluator.compute(
    >>>     model_or_pipeline="elastic/distilbert-base-uncased-finetuned-conll03-english",
    >>>     data=data,
    >>>     metric="seqeval",
    >>> )
    ```

    <Tip>

    For example, the following dataset format is accepted by the evaluator:

    ```python
    dataset = Dataset.from_dict(
        mapping={
            "tokens": [["New", "York", "is", "a", "city", "and", "Felix", "a", "person", "."]],
            "ner_tags": [[1, 2, 0, 0, 0, 0, 3, 0, 0, 0]],
        },
        features=Features({
            "tokens": Sequence(feature=Value(dtype="string")),
            "ner_tags": Sequence(feature=ClassLabel(names=["O", "B-LOC", "I-LOC", "B-PER", "I-PER"])),
            }),
    )
    ```

    </Tip>

    <Tip warning={true}>

    For example, the following dataset format is **not** accepted by the evaluator:

    ```python
    dataset = Dataset.from_dict(
        mapping={
            "tokens": [["New York is a city and Felix a person."]],
            "starts": [[0, 23]],
            "ends": [[7, 27]],
            "ner_tags": [["LOC", "PER"]],
        },
        features=Features({
            "tokens": Value(dtype="string"),
            "starts": Sequence(feature=Value(dtype="int32")),
            "ends": Sequence(feature=Value(dtype="int32")),
            "ner_tags": Sequence(feature=Value(dtype="string")),
        }),
    )
    ```

    </Tip>
c                $       s  e Zd ZdZdg iZd/ fdd	Zdeee  deee  d	efd
dZ	dee d	efddZ
deeef deded	ef fddZ			d0deededdf ded ded def fddZeeeee												 	!	"d1deededdf deeef d#ee d$ed%eeef deeed&f  d'ed( d)ed*edee d+ee deded	ee d,eeeef ef fd-d.Z  ZS )2TokenClassificationEvaluatora(  
    Token classification evaluator.

    This token classification evaluator can currently be loaded from [`evaluator`] using the default task name
    `token-classification`.

    Methods in this class assume a data format compatible with the [`~transformers.TokenClassificationPipeline`].
    ignore_labelstoken-classificationNc                    s   t  j||d d S )N)default_metric_name)super__init__)selftaskr   	__class__ [/home/ubuntu/.local/lib/python3.10/site-packages/evaluate/evaluator/token_classification.pyr    f   s   z%TokenClassificationEvaluator.__init__predictionswordsjoin_byc                 C   s   g }t |D ]U\}}g }| || |}d}	|D ]=}
||	 d |
d k r2|	d7 }	||	 d |
d k s$||	 d |
d krB|d q||	 d |
d krU|||	 d  q|| qd|iS )aW  
        Transform the pipeline predictions into a list of predicted labels of the same length as the true labels.

        Args:
            predictions (`List[List[Dict]]`):
                List of pipeline predictions, where each token has been labeled.
            words (`List[List[str]]`):
                Original input data to the pipeline, used to build predicted labels of the same length.
            join_by (`str`):
                String to use to join two words. In English, it will typically be " ".

        Returns:
            `dict`: a dictionary holding the predictions
        r   startr   Oentityr'   )	enumeratewords_to_offsetsappend)r!   r'   r(   r)   predsi
predictionpred_processedwords_offsetstoken_indexword_offsetr%   r%   r&   predictions_processori   s    z2TokenClassificationEvaluator.predictions_processorc                 C   sD   g }d}|D ]}|t | d }|||f |t | d }q|S )a  
        Convert a list of words to a list of offsets, where word are joined by `join_by`.

        Args:
            words (`List[str]`):
                List of words to get offsets from.
            join_by (`str`):
                String to insert between words.

        Returns:
            `List[Tuple[int, int]]`: List of the characters (start index, end index) for each of the words.
        r   r   )lenr/   )r!   r(   r)   offsetsr*   wordendr%   r%   r&   r.      s   z-TokenClassificationEvaluator.words_to_offsetsdatainput_columnlabel_columnc           
         s   t  || t|j trt|j| tstdt|j| jt}|rC|j| jj}dd t	|D   fdd|| D }n|j| jj
drQtd|| }d|i}|fd	d
}t|}	||	fS )NzYTokenClassificationEvaluator expects the input and label columns to be provided as lists.c                 S   s   i | ]\}}||qS r%   r%   ).0r1   labelr%   r%   r&   
<dictcomp>   s    z=TokenClassificationEvaluator.prepare_data.<locals>.<dictcomp>c                    s   g | ]} fd d|D qS )c                    s   g | ]} | qS r%   r%   )r?   label_idid_to_labelr%   r&   
<listcomp>   s    zHTokenClassificationEvaluator.prepare_data.<locals>.<listcomp>.<listcomp>r%   )r?   	label_idsrC   r%   r&   rE      s    z=TokenClassificationEvaluator.prepare_data.<locals>.<listcomp>intz[References provided as integers, but the reference column is not a Sequence of ClassLabels.
referencesc                    s     |   iS )N)join)x)r=   r)   r%   r&   <lambda>   s    z;TokenClassificationEvaluator.prepare_data.<locals>.<lambda>)r   prepare_data
isinstancefeaturesr   
ValueErrorfeaturer
   namesr-   dtype
startswithNotImplementedErrormapr   )
r!   r<   r=   r>   r)   labels_are_int
label_listrH   metric_inputspipeline_inputsr#   )rD   r=   r)   r&   rL      s*   

z)TokenClassificationEvaluator.prepare_datamodel_or_pipeliner   r   r   	tokenizer)PreTrainedTokenizerBaseFeatureExtractionMixinfeature_extractordevicec                    sF   t  ||||}|dgfi | j}|d d d d u r!td|S )Nz2003 New York Gregoryr   r*   zTokenClassificationEvaluator supports only pipelines giving 'start' index as a pipeline output (got None). Transformers pipelines with a slow tokenizer will raise this error.)r   prepare_pipelinePIPELINE_KWARGSrO   )r!   rZ   r[   r^   r_   pipedummy_outputr#   r%   r&   r`      s   z-TokenClassificationEvaluator.prepare_pipelinesimpleffffff?'  tokensner_tags subsetsplitmetricr   strategy)rd   	bootstrapconfidence_leveln_resamplesrandom_statereturnc                 C   s   i }|  |
| | j|||d}| j||||d\}}| j|||
d}| |}| ||\}}| ||| |}|| | j|||||	|d}|| || |S )ac  
        input_column (`str`, defaults to `"tokens"`):
            The name of the column containing the tokens feature in the dataset specified by `data`.
        label_column (`str`, defaults to `"label"`):
            The name of the column containing the labels in the dataset specified by `data`.
        join_by (`str`, *optional*, defaults to `" "`):
            This evaluator supports dataset whose input column is a list of words. This parameter specifies how to join
            words to generate a string input. This is especially useful for languages that do not separate words by a space.
        )r<   rj   rk   )r<   r=   r>   r)   )rZ   r[   r_   )rl   rX   rm   ro   rp   rq   )	"check_for_mismatch_in_device_setup	load_datarL   r`   prepare_metriccall_pipeliner7   updatecompute_metric)r!   rZ   r<   rj   rk   rl   r[   rm   ro   rp   r_   rq   r=   r>   r)   resultrX   pipe_inputsrb   r'   perf_resultsmetric_resultsr%   r%   r&   compute   s,   



	
z$TokenClassificationEvaluator.compute)r   N)NNN)NNNNNNrd   re   rf   NNrg   rh   ri   )__name__
__module____qualname____doc__ra   r    r   r   strr7   r.   r	   r   rL   r   rG   r`   r   r   r   r   TASK_DOCUMENTATIONr   r   r   floatr   r   r}   __classcell__r%   r%   r#   r&   r   Z   s    	&'&#

	
r   N)!typingr   r   r   r   r   r   r   r	   datasetsr
   r   r   typing_extensionsr   moduler   utils.file_utilsr   r   baser   r   r   utilsr   transformersr   r   r   r   r   r   r%   r%   r%   r&   <module>   s   (<