o
    5ti?                     @  s  d dl mZ d dlZd dlZd dlZd dlZd dlZd dlmZ d dl	m
Z
mZmZmZ d dlmZ d dlmZmZ eeZedZe
rgd dlmZmZmZmZ d dlZd d	lmZ d d
lm Z  d dl!m"Z" G dd deddZ#dtduddZ$G dd dZ%G dd dZ&dd Z'				dvdwd%d&Z(G d'd( d(Z)	dxdyd.d/Z*dzd5d6Z+d{d9d:Z,d|d@dAZ-	Bd}d~dFdGZ.				H		I	IdddTdUZ/	Vddd\d]Z0	I	V		HdddbdcZ1ddhdiZ2dxddmdnZ3dxddrdsZ4dS )    )annotationsNwraps)TYPE_CHECKINGAnyLiteralTypeVar)	TypedDict)
maybe_warnwarning_onceT)CallableIterableIteratorSequence)Image)PreTrainedTokenizerBase)PretrainedConfigc                   @  s6   e Zd ZU ded< ded< ded< ded< d	ed
< dS )	GenKwargsbool	do_samplefloattemperatureintmax_gen_toks	list[str]untilr   __extra_items__N)__name__
__module____qualname____annotations__ r"   r"   H/home/ubuntu/.local/lib/python3.10/site-packages/lm_eval/models/utils.pyr   !   s   
 r   F)totalnr   c                 c  sX    g }t | D ]\}}|| t||r||| n|kr"|V  g }q|r*|V  dS dS )a  
    Divides an iterable into chunks of specified size or based on a given function.
    Useful for batching

    Parameters:
    - iter: The input iterable to be divided into chunks.
    - n: An integer representing the size of each chunk. Default is 0.
    - fn: A function that takes the current index and the iterable as arguments and returns the size of the chunk. Default is None.

    Returns:
    An iterator that yields chunks of the input iterable.

    Example usage:
    ```
    data = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10]
    for chunk in chunks(data, 3):
        print(chunk)
    ```
    Output:
    ```
    [1, 2, 3]
    [4, 5, 6]
    [7, 8, 9]
    [10]
    ```
    N)	enumerateappendlen)iterr%   fnarrixr"   r"   r#   chunks*   s   

r.   c                   @  s*   e Zd ZdddZdddZdd	d
ZdS )MultiChoicereturnNonec                 C  s
   || _ d S Nchoices)selfr4   r"   r"   r#   __init__Q   s   
zMultiChoice.__init__r   c                 C  s`   | dD ](}tt| j|dkr-td | jD ]
}td|  qtd| dqdS )N,r   zAvailable tasks to choose:z  - 'z' is not in task listT)splitr(   fnmatchfilterr4   eval_loggerinfo
ValueError)r5   valuesvaluechoicer"   r"   r#   __contains__U   s   

zMultiChoice.__contains__r   c                 c  s    | j E d H  d S r2   r3   r5   r"   r"   r#   __iter__^   s   zMultiChoice.__iter__Nr0   r1   )r0   r   )r0   r   )r   r   r    r6   rB   rD   r"   r"   r"   r#   r/   P   s    

	r/   c                   @  s*   e Zd ZdZdddZdd Zdd	 Zd
S )Grouperz
    takes an array `arr` and function `fn` and returns a dictionary
    with keys fn(ob) for each ob in `arr` and with values `self.arr[key]` a list of all
    objects in `arr` satisfying `key == fn(ob)`.
    r0   r1   c                   s@   t || _tt|}dd }|| fdd}|| _d | _d S )Nc                 S  s*   t t}| D ]}||| | q|S r2   )collectionsdefaultdictlistr'   )r+   r*   resobr"   r"   r#   group_return_dictn   s   
z+Grouper.__init__.<locals>.group_return_dictc                       | d S N   r"   r-   r*   r"   r#   <lambda>u       z"Grouper.__init__.<locals>.<lambda>)r(   sizerI   r&   r+   _grouped)r5   r+   r*   rL   r"   rQ   r#   r6   i   s   

zGrouper.__init__c                 C  sB   | j r| j S i }| j D ]}dd | j| D ||< q|| _ |S )Nc                 S     g | ]}|d  qS rO   r"   ).0yr"   r"   r#   
<listcomp>       z'Grouper.get_grouped.<locals>.<listcomp>)rU   r+   keys)r5   groupedkeyr"   r"   r#   get_grouped{   s   zGrouper.get_groupedc                 C  s   d g| j  }dg| j  }| | j ksJ | D ]}t| j| || ddD ]\\}}}|||< d||< q)qt|s?J |S )NFTstrict)rT   r\   r+   zipall)r5   grouped_dictrJ   covr^   ind_vr"   r"   r#   get_original   s   $
zGrouper.get_originalNrE   )r   r   r    __doc__r6   r_   ri   r"   r"   r"   r#   rF   b   s
    
rF   c                 C  s&   dd t jt jdd | D  D S )an  
    Undoes https://more-itertools.readthedocs.io/en/stable/api.html#more_itertools.distribute .

    Re-interleaves results that have been split using more_itertools.distribute:
        >>> group_1, group_2 = distribute(2, [1, 2, 3, 4, 5, 6])
        >>> list(group_1)
        [1, 3, 5]
        >>> list(group_2)
        [2, 4, 6]
        >>> undistribute([group_1, group_2])
        [1, 2, 3, 4, 5, 6]

    Handles non-uniform component lengths:

        >>> children = distribute(3, [1, 2, 3, 4, 5, 6, 7])
        >>> [list(c) for c in children]
        [[1, 4, 7], [2, 5], [3, 6]]
        >>> undistribute(children)
        [1, 2, 3, 4, 5, 6, 7]

    Also handles when some iterables are empty:

        >>> children = distribute(5, [1, 2, 3])
        >>> [list(c) for c in children]
        [[1], [2], [3], [], []]
        >>> undistribute(children)
        [1, 2, 3]

    c                 S  s   g | ]}|d ur|qS r2   r"   rX   r-   r"   r"   r#   rZ      s
    z undistribute.<locals>.<listcomp>c                 S  s   g | ]}t |qS r"   )rI   rk   r"   r"   r#   rZ      r[   )	itertoolschainfrom_iterablezip_longest)iterabler"   r"   r#   undistribute   s
   rq         @      ?on_exceptionslist[type[Exception]]max_retries
int | Nonebackoff_timer   backoff_multiplieron_exception_callback(Callable[[Exception, float], Any] | Nonec                   s   d fdd}|S )a  Retry on an LLM Provider's rate limit error with exponential backoff
    For example, to use for OpenAI, do the following:
    ```
    from openai import RateLimitError

    # Recommend specifying max_retries to avoid infinite loops!
    @retry_on_specific_exceptions([RateLimitError], max_retries=3)
    def completion(...):
        # Wrap OpenAI completion function here
        ...
    ```
    funcr   c                   s"   t   fdd}|S )Nc               
     s   }d}d u s|k rIz| i |W S  t y> } zd ur'|| t| | 9 }|d7 }W Y d }~nd }~ww d u s|k sd S d S )Nr   rO   )tupletimesleep)argskwargs
sleep_timeattempte)ry   rx   r|   rv   rz   rt   r"   r#   wrapper   s   

z@retry_on_specific_exceptions.<locals>.decorator.<locals>.wrapperr   )r|   r   ry   rx   rv   rz   rt   )r|   r#   	decorator   s   z/retry_on_specific_exceptions.<locals>.decoratorN)r|   r   r"   )rt   rv   rx   ry   rz   r   r"   r   r#   retry_on_specific_exceptions   s   r   c                   @  s   e Zd ZdZdd dd dfd<ddZd=ddZd=ddZ	d>d?ddZd@d%d&ZdAd)d*Z	dBd-d.Z
d/d0 Ze	1dCdDd7d8Ze	dEdFd:d;ZdS )GCollatora  
    A class for reordering and batching elements of an array.

    This class allows for sorting an array based on a provided sorting function, grouping elements based on a grouping function, and generating batches from the sorted and grouped data.

    Objects of this class have the group_by attribute which determines the method for grouping
    the data while batching it. Three options include "gen_kwargs", "contexts", or None:
        If group_by == "gen_kwargs" then requests will be grouped by gen_kwargs
        If group_by == "contexts" then requests will be grouped by context + cont[:-1]
        If None then requests will just be reordered by length descending.
    c                 C  s   | S r2   r"   rP   r"   r"   r#   rR      s    zCollator.<lambda>c                 C  s   | d S rN   r"   rP   r"   r"   r#   rR      s    Nr+   Sequence[T]sort_fnCallable[[T], Any]group_fngroup_by(Literal['gen_kwargs', 'contexts'] | Noner0   r1   c                   sp   || _ fdd| _ fdd| _g | _t|| _tt|| _| j dkr+| 	  d S | j dkr6| 
  d S d S )Nc                   rM   rN   r"   rP   )r   r"   r#   rR     rS   z#Collator.__init__.<locals>.<lambda>c                   rM   rN   r"   rP   )r   r"   r#   rR     rS   contexts
gen_kwargs)	_group_by_sort_fn	_group_fn_reorder_indicesr(   _sizer}   r&   _arr_with_indices_group_by_context_group_by_index)r5   r+   r   r   r   r"   )r   r   r#   r6      s   


zCollator.__init__c                 C     | j | j| jdd| _dS )z4Group the elements of a list based on their indices.r   r*   r   Ngroupr   r   rC   r"   r"   r#   r        
zCollator._group_by_indexc                 C  r   )z(Group the array with indices by context.r   r   Nr   rC   r"   r"   r#   r     r   zCollator._group_by_contextrO   r%   r   batch_fn(Callable[[int, Iterable[T]], int] | NoneIterator[T]c                 c  s    | j dkr$| j D ]\}}| |}| j|||d}|E dH  qdS | j dkrE| dd | j D }| j|||d}|E dH  dS | | j}| j|||d}|E dH  dS )a  
        Generates and yields batches from the reordered array. The method of grouping and batching
        depends on the parameter `group_by`.
        If `group_by` is set to "gen_kwargs", it will batch the
        re-ordered values with same gen_kwargs for each batch.
        If `group_by` is "contexts", it caches the requests by context before batching.
        If `group_by` is neither "gen_kwargs" nor "contexts", it yields the reordered array

        Parameters:
        - n (int): The size of each batch. Defaults to 1.
        - batch_fn ([Callable[[int, Iterable], int]] | None): A function to determine the size of
          each batch. Defaults to None.

        Returns:
        Iterator: An iterator over batches of reordered elements grouped as per the `group_by`
                  attribute.

        Yields:
        List of batched elements according to the `group_by` attribute.
        r   )r%   r*   Nr   c                 S  s   g | ]
}t |d d dqS )c                 S  s   t | d d S )NrO   )r(   rP   r"   r"   r#   rR   >  s    z1Collator.get_batched.<locals>.<listcomp>.<lambda>r^   )max)rX   r@   r"   r"   r#   rZ   =  s    z(Collator.get_batched.<locals>.<listcomp>)r   r   items_reorder
get_chunksr?   )r5   r%   r   rg   r?   batchr"   r"   r#   get_batched  s*   


zCollator.get_batchedreq_strtuple[str, str]cxt_toks	list[int]	cont_tokslogitstorch.Tensor9Iterator[tuple[tuple[str, str], list[int], torch.Tensor]]c           	      c  s    | j dkr[| jt||dd  }t| }dkr/| jdd |D  |||fV  dS ||dd|}t	dd |D d	d
i\}}}| j| t	|||d
dE dH  dS |||fV  dS )a=  
        Retrieves cached single-token continuations and their associated arguments, updating indices as necessary.

        The behavior of this function varies depending on how the `group_by` attribute is set:

        - When `group_by` is "contexts":
            The function identifies single-token continuations by checking for keys that equate to
            [context+continuation][-1] and logs the indices for re-ordering.
            In this mode, this function can work in two scenarios:

            1. Cache Hit - Single Match:
                If a single matching context-continuation pair is found in the cache,
                the function yields the original arguments.

            2. Cache Hit - Multiple Matches:
                If multiple matching context-continuation pairs are found in the cache,
                the function expands the logits batch dimension to match the number of cache hits.
                It updates the original requests and continuation tokens.

        - When `group_by` is not set to "contexts":
            This method yields the original arguments, logits and continuation tokens,
            without checking for one-token continuations.

        Parameters:
        - req_str (tuple[str, str]): Original strings used for CachingLM.
        - cxt_toks (list[int]): Full context tokens used for lookup.
        - cont_toks (list[int]): Continuation tokens for which logits were generated.
        - logits (torch.Tensor [1, seq_length, vocab_size]): Logits generated by the model given context and continuation keys.

        Yields:
        - Iterator:
            - req_str (tuple[str, str]): strings used for CachingLM.
            - cont_toks (list[int]) : continuation tokens.
            - logits (torch.Tensor [1, seq_length, vocab_size]): The original logits (repeated cache hit times)
        r   Nr   rO   c                 s  s    | ]}|d  V  qdS r   Nr"   rk   r"   r"   r#   	<genexpr>x  s    z%Collator.get_cache.<locals>.<genexpr>c                 S  s*   g | ]}|d  |d d  |d d fqS )r   rO   r   r"   rk   r"   r"   r#   rZ     s   * z&Collator.get_cache.<locals>.<listcomp>ra   Tr`   )
r   r   popr}   r(   r   extendexpandchunkrb   )	r5   r   r   r   r   	cache_hit
cache_sizemultilogitsindicesr"   r"   r#   	get_cacheI  s    
*zCollator.get_cache"list | tuple[tuple[int, Any], ...]r   c                 c  sH    t || jd}| jdkr| jdd |D  dd |D E dH  dS )z
        Reorders the elements in the array based on the sorting function.

        Parameters:
        - arr (list | tuple[tuple[int, Any], ...]]): The array or iterable to be reordered.

        Yields:
            Iterator
        r   r   c                 S  rV   )r   r"   rk   r"   r"   r#   rZ     r[   z%Collator._reorder.<locals>.<listcomp>c                 S  rV   rW   r"   rk   r"   r"   r#   rZ     r[   N)sortedr   r   r   r   )r5   r+   r"   r"   r#   r     s
   

zCollator._reordernewarrrI   c                 C  sR   dg| j  }dg| j  }t| j|ddD ]\}}|||< d||< qt|s'J |S )z
        Restores the original order of elements from the reordered list.

        Parameters:
        - newarr (list): The reordered array.

        Returns:
        list: The array with elements restored to their original order.
        NFTr`   )r   rb   r   rc   )r5   r   rJ   re   rf   rh   r"   r"   r#   ri     s   

zCollator.get_originalc                 C  s   | j S r2   )r   rC   r"   r"   r#   __len__  s   zCollator.__len__r   Iterable[T]r*   !Callable[[T], Sequence[T] | dict]!Literal['gen_kwargs', 'contexts']dictc              
   C  s   t t}| D ]@}|dkr|t|| | qztdd t|| D }|| | W q ttfyG   |t|| | Y qw |S )aq  
        Groups elements of an iterable based on a provided function.


        The `group_by` parameter determines the method of grouping.
        If `group_by` is "contexts", the elements are grouped by [context + cont][:-1].
        If `group_by` is "gen_kwargs", the elements are grouped based on the gen_kwargs dict.

        Parameters:
        - arr (Iterable): The iterable to be grouped.
        - fn (Callable): The function to determine the grouping.
        - values (bool): If True, returns the values of the group. Defaults to False.

        Returns:
        Iterator: An iterable of grouped elements.
        r   c                 s  s2    | ]\}}|t |tjjrt|n|fV  qd S r2   )
isinstancerG   abcr   r}   )rX   r^   r@   r"   r"   r#   r     s    

z!Collator.group.<locals>.<genexpr>)	rG   rH   rI   r}   r'   r   r   	TypeErrorAttributeError)r+   r*   r   rJ   rK   hashable_dictr"   r"   r#   r     s   
	zCollator.groupr   c                 c  s`    g }t | } t| D ]\}}|| t||r||| n|kr&|V  g }q|r.|V  dS dS )a  
        Divides an iterable into chunks of specified size or based on a given function.
        Useful for batching

        Parameters:
        - iter: The input iterable to be divided into chunks.
        - n: An integer representing the size of each chunk. Default is 0.
        - fn: A function that takes the current index and the iterable as arguments and returns the size of the chunk. Default is None.

        Returns:
        An iterator that yields chunks of the input iterable.

        Example usage:
        ```
        data = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10]
        for chunk in chunks(data, 3):
            print(chunk)
        ```
        Output:
        ```
        [1, 2, 3]
        [4, 5, 6]
        [7, 8, 9]
        [10]
        ```
        N)r}   r&   r'   r(   )_iterr%   r*   r+   r,   r-   r"   r"   r#   r     s   

zCollator.get_chunks)
r+   r   r   r   r   r   r   r   r0   r1   rE   )rO   N)r%   r   r   r   r0   r   )
r   r   r   r   r   r   r   r   r0   r   )r+   r   r0   r   )r   rI   r0   rI   )r   )r+   r   r*   r   r   r   r0   r   r   )r%   r   r*   r   r0   r   )r   r   r    rj   r6   r   r   r   r   r   ri   r   staticmethodr   r   r"   r"   r"   r#   r      s(    


/
=
*r   	tokenizerr   model_configPretrainedConfig | Noner0   c                 C  s   | j r	 | S | jr| j| _| S | jr| j| _| S |r't|dddkr'd| _ | S | jjdks3| jjdkr<| jdks:J | S | 	dd	i | S )
a  
    This function checks if the (Hugging Face) tokenizer has a padding token and sets it if not present.
    Some tokenizers require special handling.

    Args:
        tokenizer: The tokenizer for which the padding token is to be handled.
        model_config: The configuration of the model. Default is None.

    Returns:
        The tokenizer after the padding token has been handled.

    Raises:
        AssertionError: If the tokenizer is of type RWKVWorldTokenizer or Rwkv5Tokenizer and the padding token id is not 0.
    
model_typeNqwenz<|endoftext|>RWKVWorldTokenizerRwkv5Tokenizerr   	pad_tokenz<|pad|>)
r   	unk_tokenunk_token_idpad_token_id	eos_tokeneos_token_idgetattr	__class__r   add_special_tokens)r   r   r"   r"   r#   configure_pad_token  s$   r   stringstrdefault_placeholderimage_token
max_imagesc                 C  st   d}g }|  |}|dd D ]}|| ||k r$|| |d7 }q||kr-|| q||d  d|S )a  
    A utility function used for local multimodal models. It locates all `placeholder` string
    occurrences in the given input `string_` and replaces the first `max_count` instances with
    `replacement`, and all subsequent occurrences with the empty string.

    This is used to replace <image> placeholder tags by model-specific image tokens like <|image_pad|>
    and to allow for only the first `max_count` images to be passed to a model if desired.

    :param string: The original string containing placeholders.
    :param default_placeholder: The placeholder text to be replaced.
    :param image_token: The token to replace the placeholder with.
    :param max_images: The maximum number of replacements to make.
    :return: The string with placeholders replaced.
    r   Nr   rO    )r9   r'   join)r   r   r   r   countresultpartspartr"   r"   r#   replace_placeholders0  s   





r   images
list[list]c                 C  s   dd | D S )aX  
    Takes in a list of lists of images, and returns a single list of all images in order.
    Used for some multimodal models like Llava-1.5 which expects this flattened-list format for its image processor.

    :param images: A list of lists of PIL images.
    :return: a list of PIL images, via concatenating all the sub-lists in order.
    c                 S  s   g | ]	}|D ]}|qqS r"   r"   )rX   
image_listimager"   r"   r#   rZ   Z  s    z&flatten_image_list.<locals>.<listcomp>r"   )r   r"   r"   r#   flatten_image_listR  s   r   r   str | list[str] | Noneeos
str | Noner   c                 C  sV   t | tr	| g} n| du rg } nt | tstd|  |dur)|| vr)| | | S )zZEnsures that the `until` parameter is a list of stop sequences and includes the EOS token.NzAExpected `kwargs['until']` to be of type Union[str,list] but got )r   r   rI   r>   r'   )r   r   r"   r"   r#   handle_stop_sequences]  s   


r      r   r   default_max_gen_toksc           
      C  sZ  ddl }|| }|dg }t|ts|g}|dd|dd|dd|ddd}d	d
 | D }t|dkrFtt	d| d t
tt| |}|d}t|dd}	| du ro |	dkrjdnd|d< n/ du r |	r|	dkrtt	d|d|	d d|d< ndu r|	dkrtt	d|d|	d ||d< ||d< tdi |S )au  Normalize generation kwargs for consistent handling across model backends.

    Model implementations may have different expectations for generation parameters.

    Args:
        gen_kwargs: Raw generation kwargs from the request. Expected keys include:
            - do_sample: Whether to use sampling (vs greedy decoding) - Required
            - until (str | list[str]): Stop sequence(s) for generation.
            - max_gen_toks | max_new_tokens | max_tokens | max_completion_tokens: Maximum tokens to generate
            - temperature: Sampling temperature
            - Other backend-specific kwargs
        default_max_gen_toks: Default max_gen_toks if not specified in gen_kwargs.

    Returns:
        A normalized dict containing:
        - do_sample (bool): Whether to use sampling (bool)
        - until: list[str]: List of stop sequences.
        - max_gen_toks (int): Maximum tokens to generate (int)
        - temperature (float): Sampling temperature (float). Note: will always be set to 0.0 if do_sample=False or do_sample is not specified.
        - All other kwargs passed through unchanged

    Notes:
        - Accepts `max_gen_toks` and other aliases. Priority:
          max_gen_toks > max_new_tokens > max_tokens > max_completion_tokens.
          Output always uses `max_gen_toks`.
        - When `do_sample=False`, temperature is set to 0.0 for greedy decoding.
        - When temperature is 0.0 and `do_sample` is not specified, `do_sample` is set
          to False.
        - Model backends may further modify the returned dict as needed (e.g., vLLM
          removes `do_sample` since it uses temperature directly).
    r   Nr   r   max_new_tokens
max_tokensmax_completion_tokens)r   r   r   r   c                 S  s   i | ]\}}|d ur||qS r2   r"   )rX   krh   r"   r"   r#   
<dictcomp>  s    z(normalize_gen_kwargs.<locals>.<dictcomp>rO   z"Multiple max token args provided: z_. Using first by priority (max_gen_toks > max_new_tokens > max_tokens > max_completion_tokens).r   r   g        TFz
do_sample=z` but temperature=zb; setting `temperature` to 0.0 for greedy decoding. For non-greedy decoding, set `do_sample=True`.z0. For non-greedy sampling, set temperature > 0.0r"   )copydeepcopygetr   rI   r   r   r(   r   r<   r   nextr)   r?   r   r   )
r   r   r   r   r   max_token_aliasesprovidedr   r   r   r"   r"   r#   normalize_gen_kwargsm  sT   $








r   TrO   r   Image.Imagewidthheightmax_dimensionkeep_aspect_ratior   resample_filter	min_width
min_heightc                 C  sP  | j \}}	|du r|du r|du r| S |}
|	}|durF|durF||kr)|	|kr)| S |rAt|| ||	 }t|| }
t|	| }nU|}
|}nP|dur[||krP| S |}
t|	| |
 }n;|durp|	|kre| S |}t||	 | }
n&|durt|	||kr}| S ||	kr|}
t|	| |
 }n
|}t||	 | }
t||
}
t||}| |
|f|S )aY  
    Resizes a PIL Image object with flexible options.

    Args:
        image: The PIL Image object to resize.
        width: Target width in pixels.
        height: Target height in pixels.
        max_dimension: Maximum size for the longer dimension of the image.
        keep_aspect_ratio: If True (default) and both width and height are provided,
                          the image is resized to fit within these dimensions while
                          maintaining its aspect ratio. If False, the image is stretched
                          to the exact width and height.
        resample_filter: The resampling filter to use for resizing.
                        Defaults to Image.BICUBIC.
        min_width: Minimum width for the resized image. Defaults to 1.
        min_height: Minimum height for the resized image. Defaults to 1.

    Returns:
        The resized PIL Image object. If no resize parameters are provided
        or if the image already meets the criteria, the original image is returned.

    Order of precedence for resizing:
    1. If width AND height are provided:
       - If keep_aspect_ratio is True: Fits image within bounds, preserving aspect ratio.
       - If keep_aspect_ratio is False: Resizes to exact dimensions (may distort).
    2. Else if only width is provided: Calculates height proportionally.
    3. Else if only height is provided: Calculates width proportionally.
    4. Else if max_dimension is provided: Resizes the longest side to max_dimension
       and scales the other side proportionally.
    5. If none of the above are provided, returns the original image.
    N)rT   minr   r   resize)r   r  r  r  r  r  r  r  original_widthoriginal_height	new_width
new_heightratior"   r"   r#   resize_image  sF   
)

r  lefttokensr   
max_lengthside"Literal['left', 'middle', 'right']c                 C  sp   | dkr | | d S  dkr | d| S dkr0|d }|| }| d| | | d  S t d|d)zVTruncate a token list to max_length using the given strategy (left, right, or middle).r  Nrightmiddle   zUnknown truncation side=z.. Must be one of 'left', 'middle', or 'right'.)r>   )r  r  r  left_lengthright_lengthr"   r"   r#   truncate_tokens1  s   r  r   max_model_lenmin_gen_tokstuple[list[int], int]c              	   C  s  t | }|| |kr| |fS d| d| d||  d| d	}|s7t| d|d| t| || |d|fS ||  }	|krQt| d	|d
|	 d| | |	fS ||  }
dkrgtd| d| d| dt| d|d
|
 d|d| t| |
|d|fS )a  
    Truncates input tokens and/or reduces max_gen_toks to fit within max_model_len.

    Strategy:
        1. No truncation needed: If len(tokens) + max_gen_toks <= max_model_len, return as-is.
        2. If shrink_gen_toks=False: Truncate context to fit max_model_len - max_gen_toks.
        3. If shrink_gen_toks=True:
                a. First try reducing max_gen_toks (down to min_gen_toks) to fit the context.
                b. If context still doesn't fit, truncate context to reserve space for min_gen_toks.

    Args:
        tokens (list[int]): The input context tokens to potentially truncate.
        max_gen_toks (int): The maximum number of tokens to generate.
        max_model_len (int): The model's maximum context window size (prompt + generation).
        min_gen_toks (int): Lower bound for generation tokens. Defaults to 1.
        side (str): "left" | "right" | "middle". Defaults to "left".
        shrink_gen_toks (bool): Whether to adjust the generation tokens count
            to fit within the maximum length. Defaults to False.
        verbose (bool): Whether to log warnings when truncation or adjustments occur.

    Returns:
        tuple[list[int], int]: A tuple containing:
            - list[int]: The (possibly truncated) context tokens.
            - int: The adjusted maximum generation token count.

    Raises:
        ValueError: when max_model_len <= min_gen_toks.
    zContext length (z) + max_gen_toks (z) = z exceeds model's max length ()z. Truncating context from side=.)r  z. Reducing max_gen_toks=z to z$ to fit within model context window.r   zModel context window (z+) is too small to fit initial context len (z) + minimum generation len (z  tokens to reserve min_gen_toks=z for generation.)r(   r
   r  r>   )r  r   r  r  r  shrink_gen_toksverbosectx_lenwarningnew_maxmax_ctx_lenr"   r"   r#   maybe_truncateD  s<   %"
r&  
generationstoplist[str] | str | Nonethink_end_tokenc                 C  sV   |rt |tr
|gn|}|D ]}t|dkr| |d } q|r)| |d  } | S )aR  
    Post-processes the generated text by stripping stop sequences and optional thinking markers.

    Args:
        generation (str): The generated text to be processed.
        stop (list[str] | None): Stop sequence(s) to remove. Text is truncated
            at the first occurrence of any stop sequence.
        think_end_token (str | None): Token marking end of thinking section. If provided,
            returns only the text after this token (discarding thinking content).

    Returns:
        str: The processed generation - text before stop sequences and after thinking sections.
    r   r   )r   r   r(   r9   lstrip)r'  r(  r*  termr"   r"   r#   postprocess_generated_text  s   r-  sequencebos_strstr | Iterable[str] | Nonec                   s6   |d u rdS t |tr |S t fdd|D S )NFc                 3  s    | ]}  |V  qd S r2   )
startswithrk   r.  r"   r#   r     s    z!has_bos_prefix.<locals>.<genexpr>)r   r   r1  any)r.  r/  r"   r2  r#   has_bos_prefix  s
   

r4  r   bool | Noneadd_bosc                 C  s$   | d urd| iS |d urd|iS i S )Nr   r"   )r   r6  r"   r"   r#   _add_special_kwargs  s
   r7  r   )r%   r   )Nrr   rs   N)
rt   ru   rv   rw   rx   r   ry   r   rz   r{   r2   )r   r   r   r   r0   r   )r   r   r   r   r   r   r   r   )r   r   )r   r   r   r   r0   r   )r   )r   r   r   r   r0   r   )NNNTNrO   rO   )r   r   r  rw   r  rw   r  rw   r  r   r  rw   r  r   r  r   r0   r   )r  )r  r   r  r   r  r  r0   r   )rO   r  FT)r  r   r   r   r  r   r  r   r  r  r0   r  )r'  r   r(  r)  r*  r   r0   r   )r.  r   r/  r0  r0   r   )r   r5  r6  r5  )5
__future__r   rG   r:   rl   loggingr~   	functoolsr   typingr   r   r   r   typing_extensionsr	   lm_eval.utilsr
   r   	getLoggerr   r<   r   collections.abcr   r   r   r   torchPILr   transformersr    transformers.configuration_utilsr   r   r.   r/   rF   rq   r   r   r   r   r   r   r   r  r  r&  r-  r4  r7  r"   r"   r"   r#   <module>   sp    
	&:*(  
-
"
bg
J	