o
    %ݫi5                     @   sP   d Z ddlmZmZmZ ddlZddlmZ G dd deZG dd deZ	dS )	zSProvides a metrics class for the SemDist metric.

Authors
* Sylvain de Langen 2024
    )CallableListLiteralN)MetricStatsc                   @   s^   e Zd ZdZ		ddeee gejf de	de
fddZd	d
 Zdd ZdddZdd ZdS )BaseSemDistStatsuv  
    Base class to implement the SemDist metric, for the variants that estimate a
    single cosine similarity per pair of target and predicted texts.
    The SemDist metrics are described by the paper
    `Evaluating User Perception of Speech Recognition System Quality with Semantic Distance Metric <https://arxiv.org/abs/2110.05376>`_.

    Arguments
    ---------
    embed_function : Callable[[List[str]], torch.Tensor]
        Given a list of sentences, return their summarized embedding using the
        method of your choice (e.g. mean pooling)
    scale : float, optional
        The `α` scale applied to the cosine similarity result for clarity. The
        default is `1000`, in order to match the authors' recommendation.
    batch_size : int, optional
        How many pairs of utterances should be considered at once. Higher is
        faster but may result in OOM.
         @@@   embed_functionscale
batch_sizec                 C   s   |    || _|| _|| _d S N)clearr	   r
   r   )selfr	   r
   r    r   M/home/ubuntu/.local/lib/python3.10/site-packages/speechbrain/utils/semdist.py__init__"   s   
zBaseSemDistStats.__init__c                 C   s"   g | _ g | _g | _g | _i | _dS )zClears the collected metricsN)idspredictionstargetsscoressummary)r   r   r   r   r   -   s
   
zBaseSemDistStats.clearc                 C   s(   | j | | j| | j| dS )aN  
        Appends inputs, predictions and targets to internal
        lists

        Arguments
        ---------
        ids: list
            the string IDs for the samples
        predict: list
            the model's predictions in tokenizable format
        target: list
            the ground truths in tokenizable format
        N)r   extendr   r   )r   r   predicttargetr   r   r   append5   s   zBaseSemDistStats.appendNc                 C   sH   t   |   W d   n1 sw   Y  |dur!| j| S | jS )a  Summarize the SemDist metric scores. Performs the actual embedding
        function call and SemDist calculation.

        Full set of fields:
        - `semdist`: The average SemDist over all utterances, multiplied by
          the scale optionally specified at initialization.

        Additionally, a `scores` list is populated by this function for each
        pair of sentences. Each entry of that list is a dict, with the fields:
        - `key`: the ID of the utterance.
        - `semdist`: The SemDist of the utterance, multiplied by the scale.

        Arguments
        ---------
        field : str, optional
            The field to return, if you are only interested in one of them.
            If specified, a single `float` is returned, otherwise, a dict is.

        Returns
        -------
        dict from str to float, if `field is None`
            A dictionary of the fields documented above.
        float, if `field is not None`
            The single field selected by `field`.
        N)torchno_grad_update_summaryr   )r   fieldr   r   r   	summarizeG   s   


zBaseSemDistStats.summarizec                 C   s   d}t dt| j| jD ][}| j||| j  }| j||| j  }| j||| j  }| | }| | }tj	j
j||dd}d| | j }	t|D ]\}
}| j||	|
  d qO||	 7 }q|t| j  }|| jd< dS )	zPerforms the actual inference and SemDist estimation, updating the
        `summary` field. Automatically called by `summarize`.g        r   dimg      ?)keysemdistr$   N)rangelenr   r   r   r   r	   cpur   nn
functionalcosine_similarityr
   	enumerater   r   itemsumr   )r   semdist_sum	chunk_idxr   ref_texthyp_textref_embhyp_emb
similaritychunk_semdistiutt_idr$   r   r   r   r   j   s$   z BaseSemDistStats._update_summary)r   r   r   )__name__
__module____qualname____doc__r   r   strr   Tensorfloatintr   r   r   r   r   r   r   r   r   r      s    

#r   c                       sF   e Zd ZdZ	dded f fddZdee dej	fd	d
Z
  ZS )SemDistStatsa  Computes the SemDist metric with a provided HuggingFace Transformers text
    encoder.

    Arguments
    ---------
    lm : speechbrain.lobes.models.huggingface_transformers.TextEncoder
        HF Transformers tokenizer and text encoder wrapper to use as a LM.
    method : "meanpool" or "cls"
        - `"meanpool"` (default): Computes the mean of all contextualized
          embeddings, excluding padding tokens.
        - `"cls"`: Exclusively uses the first contextualized embedding, which
          with BERT-like tokenizers is the `[CLS]` token, which is typically
          intended to capture classification information.
    *args
        Extra positional arguments passed to the base constructor.
    **kwargs
        Extra keyword arguments passed to the base constructor.meanpoolmethod)rA   clsc                    s(   t  j|d| ji| || _|| _d S )Nr	   )superr   _embedlmrB   )r   rF   rB   argskwargs	__class__r   r   r      s   
zSemDistStats.__init__	sentencesreturnc                 C   s   dd |D }| j |dd\}}|d  }| jdkr7| |d }tj|dd}tj|d	d|d S | jd
krI|dddddf  S td| j d)ao  Computes the LM embedding of a batch of independent sentences,
        according to the pooling method chosen at initialization.

        Arguments
        ---------
        sentences : list of str
            List of unprocessed sentences to tokenize and encode.

        Returns
        -------
        torch.Tensor
            Embedding of the LM encoder.
        c                 S   s   g | ]}d  |qS ) )join).0sentr   r   r   
<listcomp>   s    z'SemDistStats._embed.<locals>.<listcomp>T)return_tokensattention_maskrA   r    r!   rC   Nr   zSpecified SemDist method z is invalid)rF   r'   rB   	unsqueezer   r-   
ValueError)r   rK   tokenshiddenmaskmasked_hiddennonmasked_countsr   r   r   rE      s    

zSemDistStats._embed)rA   )r8   r9   r:   r;   r   r   r   r<   r   r=   rE   __classcell__r   r   rI   r   r@      s     r@   )
r;   typingr   r   r   r   speechbrain.utils.metric_statsr   r   r@   r   r   r   r   <module>   s    z