o
    5ti                     @   s   d dl mZ d dlmZmZmZmZmZmZm	Z	 d dl
mZ d dlmZ ddlmZ ddlmZmZ dd	lmZmZmZ erLd d
lmZmZmZmZmZ dZG dd deZdS )    )Number)TYPE_CHECKINGAnyCallableDictOptionalTupleUnion)Dataset)Literal   )EvaluationModule)add_end_docstringsadd_start_docstrings   )"EVALUATOR_COMPUTE_RETURN_DOCSTRING EVALUTOR_COMPUTE_START_DOCSTRING	Evaluator)FeatureExtractionMixinPipelinePreTrainedModelPreTrainedTokenizerTFPreTrainedModela  
    Examples:

    <Tip>

    Remember that, in order to process audio files, you need ffmpeg installed (https://ffmpeg.org/download.html)

    </Tip>

    ```python
    >>> from evaluate import evaluator
    >>> from datasets import load_dataset

    >>> task_evaluator = evaluator("audio-classification")
    >>> data = load_dataset("superb", 'ks', split="test[:40]")
    >>> results = task_evaluator.compute(
    >>>     model_or_pipeline=""superb/wav2vec2-base-superb-ks"",
    >>>     data=data,
    >>>     label_column="label",
    >>>     input_column="file",
    >>>     metric="accuracy",
    >>>     label_mapping={0: "yes", 1: "no", 2: "up", 3: "down"}
    >>> )
    ```

    <Tip>

    The evaluator supports raw audio data as well, in the form of a numpy array. However, be aware that calling
    the audio column automatically decodes and resamples the audio files, which can be slow for large datasets.

    </Tip>

    ```python
    >>> from evaluate import evaluator
    >>> from datasets import load_dataset

    >>> task_evaluator = evaluator("audio-classification")
    >>> data = load_dataset("superb", 'ks', split="test[:40]")
    >>> data = data.map(lambda example: {"audio": example["audio"]["array"]})
    >>> results = task_evaluator.compute(
    >>>     model_or_pipeline=""superb/wav2vec2-base-superb-ks"",
    >>>     data=data,
    >>>     label_column="label",
    >>>     input_column="audio",
    >>>     metric="accuracy",
    >>>     label_mapping={0: "yes", 1: "no", 2: "up", 3: "down"}
    >>> )
    ```
c                &       s  e Zd ZdZi Zd% fdd	Zdd Zeee	e
e											
					d&deededdf deeef dee dee deeef deeedf  deeedf  ded dedededee ded ed!eeeef  d"eeeef ef f  fd#d$Z  ZS )'AudioClassificationEvaluatora%  
    Audio classification evaluator.
    This audio classification evaluator can currently be loaded from [`evaluator`] using the default task name
    `audio-classification`.
    Methods in this class assume a data format compatible with the [`transformers.AudioClassificationPipeline`].
    audio-classificationNc                    s   t  j||d d S )N)default_metric_name)super__init__)selftaskr   	__class__ [/home/ubuntu/.local/lib/python3.10/site-packages/evaluate/evaluator/audio_classification.pyr   [   s   z%AudioClassificationEvaluator.__init__c                    s(   dd |D } fdd|D }d|iS )Nc                 S   s    g | ]}t |d d dd qS )c                 S   s   | d S )Nscorer"   )xr"   r"   r#   <lambda>_   s    zOAudioClassificationEvaluator.predictions_processor.<locals>.<listcomp>.<lambda>)keylabel)max.0predr"   r"   r#   
<listcomp>_        zFAudioClassificationEvaluator.predictions_processor.<locals>.<listcomp>c                    s    g | ]} d ur | n|qS )Nr"   r*   label_mappingr"   r#   r-   `   r.   predictionsr"   )r   r1   r0   
pred_labelr"   r/   r#   predictions_processor^   s   z2AudioClassificationEvaluator.predictions_processorsimpleffffff?'  filer(   model_or_pipeliner   r   r   datasubsetsplitmetric	tokenizerr   feature_extractorr   strategy)r4   	bootstrapconfidence_leveln_resamplesdevicerandom_stateinput_columnlabel_columnr0   returnc                    s.   t  j|||||||||	|
|||||d}|S )ad  
        input_column (`str`, defaults to `"file"`):
            The name of the column containing either the audio files or a raw waveform, represented as a numpy array, in the dataset specified by `data`.
        label_column (`str`, defaults to `"label"`):
            The name of the column containing the labels in the dataset specified by `data`.
        label_mapping (`Dict[str, Number]`, *optional*, defaults to `None`):
            We want to map class labels defined by the model in the pipeline to values consistent with those
            defined in the `label_column` of the `data` dataset.
        )r8   r9   r:   r;   r<   r=   r>   r?   rA   rB   rC   rD   rE   rF   r0   )r   compute)r   r8   r9   r:   r;   r<   r=   r>   r?   rA   rB   rC   rD   rE   rF   r0   resultr    r"   r#   rH   d   s$   !z$AudioClassificationEvaluator.compute)r   N)NNNNNNNr4   r5   r6   NNr7   r(   N)__name__
__module____qualname____doc__PIPELINE_KWARGSr   r3   r   r   r   r   TASK_DOCUMENTATIONr	   strr   r
   r   r   r   floatintr   r   r   r   rH   __classcell__r"   r"   r    r#   r   Q   sr    

	
r   N)numbersr   typingr   r   r   r   r   r   r	   datasetsr
   typing_extensionsr   moduler   utils.file_utilsr   r   baser   r   r   transformersr   r   r   r   r   rO   r   r"   r"   r"   r#   <module>   s   $3