o
    5ti(                     @   s   d dl mZmZmZmZmZmZmZ d dlm	Z	 d dl
mZ ddlmZ ddlmZmZ ddlmZmZmZ erDd d	lmZmZmZmZ d
ZG dd deZdS )    )TYPE_CHECKINGAnyCallableDictOptionalTupleUnion)Dataset)Literal   )EvaluationModule)add_end_docstringsadd_start_docstrings   )"EVALUATOR_COMPUTE_RETURN_DOCSTRING EVALUTOR_COMPUTE_START_DOCSTRING	Evaluator)PipelinePreTrainedModelPreTrainedTokenizerTFPreTrainedModela  
    Examples:
    ```python
    >>> from evaluate import evaluator
    >>> from datasets import load_dataset
    >>> task_evaluator = evaluator("automatic-speech-recognition")
    >>> data = load_dataset("mozilla-foundation/common_voice_11_0", "en", split="validation[:40]")
    >>> results = task_evaluator.compute(
    >>>     model_or_pipeline="https://huggingface.co/openai/whisper-tiny.en",
    >>>     data=data,
    >>>     input_column="path",
    >>>     label_column="sentence",
    >>>     metric="wer",
    >>> )
    ```
c                $       s   e Zd ZdZddiZd% fdd	Zdd	 Zeee	e
e							
							d&deededdf deeef dee dee deeef deeedf  ded dedededee ded ed!ed"eeeef ef f fd#d$Z  ZS )'#AutomaticSpeechRecognitionEvaluatora7  
    Automatic speech recognition evaluator.
    This automatic speech recognition evaluator can currently be loaded from [`evaluator`] using the default task name
    `automatic-speech-recognition`.
    Methods in this class assume a data format compatible with the [`AutomaticSpeechRecognitionPipeline`].
    
truncationTautomatic-speech-recognitionNc                    s   t  j||d d S )N)default_metric_name)super__init__)selftaskr   	__class__ c/home/ubuntu/.local/lib/python3.10/site-packages/evaluate/evaluator/automatic_speech_recognition.pyr   9   s   z,AutomaticSpeechRecognitionEvaluator.__init__c                 C   s   ddd |D iS )Npredictionsc                 S   s   g | ]}|d  qS )textr!   ).0predr!   r!   r"   
<listcomp>=   s    zMAutomaticSpeechRecognitionEvaluator.predictions_processor.<locals>.<listcomp>r!   )r   r#   label_mappingr!   r!   r"   predictions_processor<   s   z9AutomaticSpeechRecognitionEvaluator.predictions_processorsimpleffffff?'  pathsentencemodel_or_pipeliner   r   r   datasubsetsplitmetric	tokenizerr   strategy)r*   	bootstrapconfidence_leveln_resamplesdevicerandom_stateinput_columnlabel_columngeneration_kwargsreturnc                    s>   |dur
| j | t j|||||||||	|
|||d}|S )a  
        input_column (`str`, defaults to `"path"`):
            the name of the column containing the input audio path in the dataset specified by `data`.
        label_column (`str`, defaults to `"sentence"`):
            the name of the column containing the labels in the dataset specified by `data`.
        generation_kwargs (`Dict`, *optional*, defaults to `None`):
            The generation kwargs are passed to the pipeline and set the text generation strategy.
        N)r/   r0   r1   r2   r3   r4   r5   r7   r8   r9   r:   r;   r<   )PIPELINE_KWARGSupdater   compute)r   r/   r0   r1   r2   r3   r4   r5   r7   r8   r9   r:   r;   r<   r=   resultr   r!   r"   rA   ?   s$   z+AutomaticSpeechRecognitionEvaluator.compute)r   N)NNNNNNr*   r+   r,   NNr-   r.   N)__name__
__module____qualname____doc__r?   r   r)   r   r   r   r   TASK_DOCUMENTATIONr   strr   r	   r   r   r
   floatintdictr   r   r   rA   __classcell__r!   r!   r   r"   r   /   sl    

	
r   N)typingr   r   r   r   r   r   r   datasetsr	   typing_extensionsr
   moduler   utils.file_utilsr   r   baser   r   r   transformersr   r   r   r   rG   r   r!   r!   r!   r"   <module>   s   $