o
    5t¾i#	  ã                
   @   sž   d Z ddlmZ ddlmZmZ dedededeeef fdd	„Zd
ee dedefdd„Z	ddedede
defdd„Z	ddedede
dee fdd„ZdS )zCVarious utility functions for word and character n-gram extraction.é    )ÚCounter)ÚListÚTupleÚlineÚ	min_orderÚ	max_orderÚreturnc              	   C   sf   g }|   ¡ }t||d ƒD ]}tdt|ƒ| d ƒD ]}| t|||| … ƒ¡ qqt|ƒt|ƒfS )a  Extracts all ngrams (min_order <= n <= max_order) from a sentence.

    :param line: A string sentence.
    :param min_order: Minimum n-gram order.
    :param max_order: Maximum n-gram order.
    :return: a Counter object with n-grams counts and the sequence length.
    é   r   )ÚsplitÚrangeÚlenÚappendÚtupler   )r   r   r   ÚngramsÚtokensÚnÚi© r   úM/home/ubuntu/.local/lib/python3.10/site-packages/sacrebleu/metrics/helpers.pyÚextract_all_word_ngrams   s   	ÿr   r   r   c                    s(   t ‡ ‡fdd„ttˆƒˆ  d ƒD ƒƒS )z¸Extracts n-grams with order `n` from a list of tokens.

    :param tokens: A list of tokens.
    :param n: The order of n-grams.
    :return: a Counter object with n-grams counts.
    c                    s"   g | ]}d   ˆ||ˆ  … ¡‘qS )ú )Újoin©Ú.0r   ©r   r   r   r   Ú
<listcomp>!   s   " z'extract_word_ngrams.<locals>.<listcomp>r	   )r   r   r   )r   r   r   r   r   Úextract_word_ngrams   s   (r   FÚinclude_whitespacec                    s:   |s	d  ˆ  ¡ ¡‰ t‡ ‡fdd„ttˆ ƒˆ d ƒD ƒƒS )a&  Yields counts of character n-grams from a sentence.

    :param line: A segment containing a sequence of words.
    :param n: The order of the n-grams.
    :param include_whitespace: If given, will not strip whitespaces from the line.
    :return: a dictionary containing ngrams and counts
    Ú c                    ó   g | ]
}ˆ ||ˆ … ‘qS r   r   r   ©r   r   r   r   r   /   ó    z'extract_char_ngrams.<locals>.<listcomp>r	   )r   r
   r   r   r   )r   r   r   r   r    r   Úextract_char_ngrams$   s   (r"   c                    s`   g }|sd  ˆ  ¡ ¡‰ td|d ƒD ]‰t‡ ‡fdd„ttˆ ƒˆ d ƒD ƒƒ}| |¡ q|S )aH  Extracts all character n-grams at once for convenience.

    :param line: A segment containing a sequence of words.
    :param max_order: The maximum order of the n-grams.
    :param include_whitespace: If given, will not strip whitespaces from the line.
    :return: a list of Counter objects containing ngrams and counts.
    r   r	   c                    r   r   r   r   r    r   r   r   B   r!   z+extract_all_char_ngrams.<locals>.<listcomp>)r   r
   r   r   r   r   )r   r   r   Úcountersr   r   r    r   Úextract_all_char_ngrams2   s   
(r$   N)F)Ú__doc__Úcollectionsr   Útypingr   r   ÚstrÚintr   r   Úboolr"   r$   r   r   r   r   Ú<module>   s     "
ÿÿÿÿÿ