o
    yiU                     @   sx  U d dl Z d dlmZ d dlmZmZmZmZmZm	Z	m
Z
mZ d dlZd dlmZmZ d dlmZ d dlmZ ddgiZd	d
ddddddddddZeeeeef f ed< dZdLddZdede	e fddZdedededeeef fd d!Z	"dMd#e	e d$e	e d%edeee	e	e  f fd&d'Zd(e	e	e  d#e	e d$e	e de	e fd)d*Zd+e	e	e  d$e	e de	e fd,d-Z			dNd.ed/ee d0eegef d1eege	e f de	e f
d2d3Z d4e	e d5e	e d6edeeef fd7d8Z!d4e	e d5e	e deeef fd9d:Z"d4e	e	e  d5e	e	e  deeef fd;d<Z#			dNd=e	e d5e	e	e  d>eeeef  d?ed/ee d0eegef d1eege	e f deeeef eeeef  f fd@dAZ$dBeeee f deeef fdCdDZ%	E	"			FdOd=eee	e f d5eee	e e	e	e  f d?ed dGed0eegef d1eege	e f dHeee
edIf f deeef fdJdKZ&dS )P    N)Counter)AnyCallableDictListOptionalSequenceTupleUnion)Tensortensor)Literal)_NLTK_AVAILABLE)rouge_score_rouge_score_updatenltk                        	   LLsum)rouge1rouge2rouge3rouge4rouge5rouge6rouge7rouge8rouge9rougeL	rougeLsumALLOWED_ROUGE_KEYS)avgbestreturnc                  C   s^   ddl } z	| jd W dS  ty.   z| jdddddd W Y dS  ty-   tdw w )	zxCheck whether `nltk` `punkt` is downloaded.

    If not, try to download if a machine is connected to the internet.
    r   Nztokenizers/punkt.zippunktTF)quietforcehalt_on_errorraise_on_errorzz`nltk` resource `punkt` is not available on a disk and cannot be downloaded as a machine is not connected to the internet.)r   datafindLookupErrordownload
ValueErrorOSError)r    r7   V/home/ubuntu/.local/lib/python3.10/site-packages/torchmetrics/functional/text/rouge.py _ensure_nltk_punkt_is_downloaded*   s   r9   xc                 C   s2   t stdddl}t  tdd|  || S )zdThe sentence is split to get rougeLsum scores matching published rougeL scores for BART and PEGASUS.zQROUGE-Lsum calculation requires that `nltk` is installed. Use `pip install nltk`.r   Nz<n> )r   ModuleNotFoundErrorr   r9   resubsent_tokenize)r:   r   r7   r7   r8   _split_sentence=   s   
r@   hits_or_lcspred_len
target_lenc                 C   sp   | | }| | }||  krdkr!n nt tdtdtddS d| | ||  }t t|t|t|dS )ak  This computes precision, recall and F1 score based on hits/lcs, and the length of lists of tokenizer
    predicted and target sentences.

    Args:
        hits_or_lcs: A number of matches or a length of the longest common subsequence.
        pred_len: A length of a tokenized predicted sentence.
        target_len: A length of a tokenized target sentence.
            	precisionrecallfmeasurer   )dictr   )rA   rB   rC   rF   rG   rH   r7   r7   r8   _compute_metricsI   s   	rJ   Fpred_tokenstarget_tokensreturn_full_tablec                    s    fddt t|d D }t dt|d D ]@}t dt d D ]4}||d   |d  krB||d  |d  d || |< q#t||d  | || |d  || |< q#q|r]|S |d d S )zCommon DP algorithm to compute the length of the longest common subsequence.

    Args:
        pred_tokens: A tokenized predicted sentence.
        target_tokens: A tokenized target sentence.
    c                    s   g | ]}d gt  d  qS )r   r   )len).0_rK   r7   r8   
<listcomp>d   s    z_lcs.<locals>.<listcomp>r   )rangerN   max)rK   rL   rM   lcsijr7   rQ   r8   _lcs[   s   	",rY   	lcs_tablec                 C   s   t |}t |}g }|dkrP|dkrP||d  ||d  kr/|d|d  |d8 }|d8 }n| | |d  | |d  | krD|d8 }n|d8 }|dkrP|dks|S )zBacktrack LCS table.

    Args:
        lcs_table: A table containing information for the calculation of the longest common subsequence.
        pred_tokens: A tokenized predicted sentence.
        target_tokens: A tokenized target sentence.
    r   r   )rN   insert)rZ   rK   rL   rW   rX   backtracked_lcsr7   r7   r8   _backtracked_lcsp   s   

 
	r]   pred_tokens_listc                    sn   dt t dt t dt t fdd dt t t  dt t fdd} fd	d
| D }fdd
||D }|S )zFind union LCS between a target sentence and iterable of predicted tokens.

    Args:
        pred_tokens_list: A tokenized predicted sentence split by '
'.
        target_tokens: A tokenized single part of target sentence split by '
'.

    Return:
    rK   rL   r+   c                 S   s   t | |dd}t|| |}|S )zSReturns one of the longest of longest common subsequence via backtracked lcs table.T)rM   )rY   r]   )rK   rL   rZ   backtracked_lcs_tabler7   r7   r8   lcs_ind   s   z_union_lcs.<locals>.lcs_ind
lcs_tablesc                 S   s   t tt j|  S )z#Find union LCS given a list of LCS.)sortedlistsetunion)ra   r7   r7   r8   
find_union   s   z_union_lcs.<locals>.find_unionc                    s   g | ]} |qS r7   r7   )rO   rK   r`   rL   r7   r8   rR      s    z_union_lcs.<locals>.<listcomp>c                    s   g | ]} | qS r7   r7   rO   rW   )rL   r7   r8   rR          )r   strint)r^   rL   rf   ra   	union_lcsr7   rg   r8   
_union_lcs   s
   "
rm   textstemmer
normalizer	tokenizerc                    sf   t |r|| ntdd|  } t |r|| ntd| } r* fdd|D }dd |D }|S )a  Rouge score should be calculated only over lowercased words and digits. Optionally, Porter stemmer can be
    used to strip word suffixes to improve matching. The text normalization follows the implemantion from `Rouge
    score_Text Normalizition`_

    Args:
        text: An input sentence.
        stemmer: Porter stemmer instance to strip word suffixes to improve matching.
        normalizer: A user's own normalizer function.
            If this is ``None``, replacing any non-alpha-numeric characters with spaces is default.
            This function must take a ``str`` and return a ``str``.
        tokenizer:
            A user's own tokenizer function. If this is ``None``, splitting by spaces is default
            This function must take a ``str`` and return ``Sequence[str]``
    z
[^a-z0-9]+ z\s+c                    s&   g | ]}t |d kr |n|qS )r   )rN   stemrO   r:   ro   r7   r8   rR         & z0_normalize_and_tokenize_text.<locals>.<listcomp>c                 S   s&   g | ]}t |trt|d kr|qS )r   )
isinstancerj   rN   rt   r7   r7   r8   rR      rv   )callabler=   r>   lowersplit)rn   ro   rp   rq   tokensr7   ru   r8   _normalize_and_tokenize_text   s   "r|   predtargetn_gramc                    s   dt t dtdtfdd}|| |||| t  t }}d||fv r8ttdtdtddS t fd	d
t D }t	|t
|dt
|dS )zThis computes precision, recall and F1 score for the Rouge-N metric.

    Args:
        pred: A predicted sentence.
        target: A target sentence.
        n_gram: N-gram overlap.
    r{   nr+   c                    sD   t  } fddtt  d D D ]
}||  d7  < q|S )Nc                 3   s$    | ]}t ||   V  qd S N)tuplerh   r   r{   r7   r8   	<genexpr>   s   " z9_rouge_n_score.<locals>._create_ngrams.<locals>.<genexpr>r   )r   rT   rN   )r{   r   ngramsngramr7   r   r8   _create_ngrams   s   (z&_rouge_n_score.<locals>._create_ngramsr   rD   rE   c                 3   s"    | ]}t  | | V  qd S r   )min)rO   wpred_ngramstarget_ngramsr7   r8   r      s     z!_rouge_n_score.<locals>.<genexpr>r   )r   rj   rk   r   sumvaluesrI   r   rd   rJ   rU   )r}   r~   r   r   rB   rC   hitsr7   r   r8   _rouge_n_score   s   	r   c                 C   sN   t | t |}}d||fv rttdtdtddS t| |}t|||S )zThis computes precision, recall and F1 score for the Rouge-L metric.

    Args:
        pred: A predicted sentence.
        target: A target sentence.
    r   rD   rE   )rN   rI   r   rY   rJ   )r}   r~   rB   rC   rV   r7   r7   r8   _rouge_l_score   s
   
r   c                 C   s   t tt| }t tt|}d||fv r!ttdtdtddS dttt  dtfdd}|| }||}d}|D ],}t| |}	|	D ]"}
||
 dkre||
 dkre|d7 }||
  d8  < ||
  d8  < qCq:t	|||S )	a:  This computes precision, recall and F1 score for the Rouge-LSum metric. More information can be found in Section
    3.2 of the referenced paper [1]. This implementation follow the official implementation from:
    https://github.com/google-research/google-research/blob/master/rouge/rouge_scorer.py

    Args:
        pred: An iterable of predicted sentence split by '
'.
        target: An iterable target sentence split by '
'.

    References
        [1] ROUGE: A Package for Automatic Evaluation of Summaries by Chin-Yew Lin. https://aclanthology.org/W04-1013/
    r   rD   rE   	sentencesr+   c                 S   s   t  }| D ]}|| q|S r   )r   update)r   r   sentencer7   r7   r8   _get_token_counts  s   z,_rouge_lsum_score.<locals>._get_token_countsr   )
r   maprN   rI   r   r   rj   r   rm   rJ   )r}   r~   rB   rC   r   pred_tokens_counttarget_tokens_countr   tgtrV   tokenr7   r7   r8   _rouge_lsum_score   s$   
r   predsrouge_keys_values
accumulatec                    s  dd |D }t | |D ]\}}	dd |D }
dd |D }g }t|}d|v r8fddt|D }|	D ]P}t|}d|v rTfddt|D }|D ],}t|trdt|||}n|d	krnt||}n	|dkrwt||}||
|< || | qV||
	  q:|d
kr|d t
fdd|D }tt
| }|D ]}|| || |  qq|dkrdd |D }| D ].\}}i  |D ]}| D ]\}}| vrg  |<  | | qqч fdd D ||< q|D ]}|| ||  qq|S )a
  Update the rouge score with the current set of predicted and target sentences.

    Args:
        preds: An iterable of predicted sentences.
        target: An iterable of iterable of target sentences.
        rouge_keys_values: List of N-grams/'L'/'Lsum' arguments.
        accumulate: Useful incase of multi-reference rouge score.
            ``avg`` takes the avg of all references with respect to predictions
            ``best`` takes the best fmeasure score obtained between prediction and multiple corresponding references.
            Allowed values are ``avg`` and ``best``.
        stemmer: Porter stemmer instance to strip word suffixes to improve matching.
        normalizer:
            A user's own normalizer function.
            If this is ``None``, replacing any non-alpha-numeric characters with spaces is default.
            This function must take a `str` and return a `str`.
        tokenizer:
            A user's own tokenizer function. If this is ``None``, spliting by spaces is default
            This function must take a `str` and return `Sequence[str]`

    Example:
        >>> preds = "My name is John".split()
        >>> target = "Is your name John".split()
        >>> from pprint import pprint
        >>> score = _rouge_score_update(preds, target, rouge_keys_values=[1, 2, 3, 'L'], accumulate='best')
        >>> pprint(score)
        {1: [{'fmeasure': tensor(0.), 'precision': tensor(0.), 'recall': tensor(0.)},
             {'fmeasure': tensor(0.), 'precision': tensor(0.), 'recall': tensor(0.)},
             {'fmeasure': tensor(0.), 'precision': tensor(0.), 'recall': tensor(0.)},
             {'fmeasure': tensor(0.), 'precision': tensor(0.), 'recall': tensor(0.)}],
         2: [{'fmeasure': tensor(0.), 'precision': tensor(0.), 'recall': tensor(0.)},
             {'fmeasure': tensor(0.), 'precision': tensor(0.), 'recall': tensor(0.)},
             {'fmeasure': tensor(0.), 'precision': tensor(0.), 'recall': tensor(0.)},
             {'fmeasure': tensor(0.), 'precision': tensor(0.), 'recall': tensor(0.)}],
         3: [{'fmeasure': tensor(0.), 'precision': tensor(0.), 'recall': tensor(0.)},
             {'fmeasure': tensor(0.), 'precision': tensor(0.), 'recall': tensor(0.)},
             {'fmeasure': tensor(0.), 'precision': tensor(0.), 'recall': tensor(0.)},
             {'fmeasure': tensor(0.), 'precision': tensor(0.), 'recall': tensor(0.)}],
         'L': [{'fmeasure': tensor(0.), 'precision': tensor(0.), 'recall': tensor(0.)},
               {'fmeasure': tensor(0.), 'precision': tensor(0.), 'recall': tensor(0.)},
               {'fmeasure': tensor(0.), 'precision': tensor(0.), 'recall': tensor(0.)},
               {'fmeasure': tensor(0.), 'precision': tensor(0.), 'recall': tensor(0.)}]}
    c                 S      i | ]}|g qS r7   r7   rO   	rouge_keyr7   r7   r8   
<dictcomp>J      z'_rouge_score_update.<locals>.<dictcomp>c                 S      i | ]}|i qS r7   r7   r   r7   r7   r8   r   M  r   c                 S   r   r7   r7   r   r7   r7   r8   r   N  r   r   c                       g | ]	}t | qS r7   r|   )rO   pred_sentencerp   ro   rq   r7   r8   rR   R      z'_rouge_score_update.<locals>.<listcomp>c                    r   r7   r   )rO   tgt_sentencer   r7   r8   rR   [  r   r   r*   r   c                    s   g | ]}|  d  qS )rH   r7   )rO   v)key_currr7   r8   rR   m  s    r)   c                 S   r   r7   r7   r   r7   r7   r8   r   t  s    c                    s    i | ]}|t  |  qS r7   )torchr   mean)rO   _type)_dict_metric_score_batchr7   r8   r     s    )zipr|   r@   rw   rk   r   r   r   appendcopyr   r   argmaxitemitems)r   r~   r   r   ro   rp   rq   resultspred_raw
target_rawresult_inner
result_avglist_resultsr}   	pred_lsumtarget_raw_innerr   target_lsumr   scoreall_fmeasurehighest_idxnew_result_avgmetricsmetricr   valuer7   )r   r   rp   ro   rq   r8   r     sh   3



r   sentence_resultsc                 C   s8   i }| i kr|S |   D ]\}}t| ||< q|S )zCompute the combined ROUGE metric for all the input set of predicted and target sentences.

    Args:
        sentence_results: Rouge-N/Rouge-L/Rouge-LSum metrics calculated for single sentence.
    )r   r   r   r   )r   r   r   scoresr7   r7   r8   _rouge_score_compute  s   r   r*   r   r   r&   r'   use_stemmer
rouge_keys.c              	   C   sj  |rt stdddl}|r|jj nd}t|ts|f}|D ]}	|	t	 vr6t
d|	 dtt	  q dd |D }
t|tr[tdd	 |D r[t| trT|gnd
d |D }t| trc| g} t|trl|gg}t| ||
||||d}i }|
D ]}dD ]}g |d| d| < qq{| D ]\}}|D ]}| D ]\}}|d| d|  | qqqt|S )aw	  Calculate `Calculate Rouge Score`_ , used for automatic summarization.

    Args:
        preds: An iterable of predicted sentences or a single predicted sentence.
        target:
            An iterable of iterables of target sentences or an iterable of target sentences or a single target sentence.
        accumulate:
            Useful incase of multi-reference rouge score.

            - ``avg`` takes the avg of all references with respect to predictions
            - ``best`` takes the best fmeasure score obtained between prediction and multiple corresponding references.

        use_stemmer: Use Porter stemmer to strip word suffixes to improve matching.
        normalizer: A user's own normalizer function.
            If this is ``None``, replacing any non-alpha-numeric characters with spaces is default.
            This function must take a ``str`` and return a ``str``.
        tokenizer: A user's own tokenizer function. If this is ``None``, spliting by spaces is default
            This function must take a ``str`` and return ``Sequence[str]``
        rouge_keys: A list of rouge types to calculate.
            Keys that are allowed are ``rougeL``, ``rougeLsum``, and ``rouge1`` through ``rouge9``.

    Return:
        Python dictionary of rouge scores for each input rouge key.

    Example:
        >>> from torchmetrics.functional.text.rouge import rouge_score
        >>> preds = "My name is John"
        >>> target = "Is your name John"
        >>> from pprint import pprint
        >>> pprint(rouge_score(preds, target))
        {'rouge1_fmeasure': tensor(0.7500),
         'rouge1_precision': tensor(0.7500),
         'rouge1_recall': tensor(0.7500),
         'rouge2_fmeasure': tensor(0.),
         'rouge2_precision': tensor(0.),
         'rouge2_recall': tensor(0.),
         'rougeL_fmeasure': tensor(0.5000),
         'rougeL_precision': tensor(0.5000),
         'rougeL_recall': tensor(0.5000),
         'rougeLsum_fmeasure': tensor(0.5000),
         'rougeLsum_precision': tensor(0.5000),
         'rougeLsum_recall': tensor(0.5000)}


    Raises:
        ModuleNotFoundError:
            If the python package ``nltk`` is not installed.
        ValueError:
            If any of the ``rouge_keys`` does not belong to the allowed set of keys.

    References:
        [1] ROUGE: A Package for Automatic Evaluation of Summaries by Chin-Yew Lin. https://aclanthology.org/W04-1013/
    zBStemmer requires that `nltk` is installed. Use `pip install nltk`.r   NzGot unknown rouge key z. Expected to be one of c                 S   s   g | ]}t | qS r7   )r(   )rO   keyr7   r7   r8   rR     ri   zrouge_score.<locals>.<listcomp>c                 s   s    | ]}t |tV  qd S r   )rw   rj   rO   r   r7   r7   r8   r     s    zrouge_score.<locals>.<genexpr>c                 S   s   g | ]}|gqS r7   r7   r   r7   r7   r8   rR     r   )ro   rp   rq   r   )rH   rF   rG   rougerP   )r   r<   r   rs   porterPorterStemmerrw   r   r(   keysr5   rc   allrj   r   r   r   r   )r   r~   r   r   rp   rq   r   r   ro   r   r   r   outputr   tpr   r   r   r7   r7   r8   r     sN   ?



r   )r+   N)F)NNN)r*   FNNr   )'r=   collectionsr   typingr   r   r   r   r   r   r	   r
   r   r   r   typing_extensionsr   torchmetrics.utilities.importsr   __doctest_requires__r(   rj   rk   __annotations__ALLOWED_ACCUMULATE_VALUESr9   r@   rJ   boolrY   r]   rm   r|   r   r   r   r   r   r   r7   r7   r7   r8   <module>   s   
(

"


&
*%&.-

&r
