o
    	TÃi£  ã                   @   s2   d dl Z deeeeef   dee fdd„ZdS )é    NÚcompletionsÚreturnc                    s2   d‰ dd„ | D ƒ}‡ fdd„|D ƒ}dd„ |D ƒS )a¢  
    Reward function that checks if the reasoning process is enclosed within `"<think>"` and `"</think>"` tags. The
    function returns a reward of 1.0 if the format is correct, otherwise 0.0.

    Args:
        completions (`list[list[dict[str, str]]]`):
            List of completions to be evaluated. Each completion must be a list of one message, i.e. a dictionary
            containing the key `"content"` with the value being the text of the completion.
        **kwargs:
            Additional keyword arguments. This function does not use them, but they are required in the function
            signature to ensure compatibility with trainers like [`GRPOTrainer`].

    Returns:
        `list[float]`:
            A list of rewards, where each reward is 1.0 if the completion matches the expected format, otherwise 0.0.

    Example:
    ```python
    >>> from trl.rewards import think_format_reward

    >>> completions = [
    ...     [{"content": "<think>\nThis is my reasoning.\n</think>\nThis is my answer."}],
    ...     [{"content": "<think>\nThis is my reasoning.\nThis is my answer."}],
    ... ]
    >>> think_format_reward(completions)
    [1.0, 0.0]
    ```
    z%^<think>(?!.*<think>)(.*?)</think>.*$c                 S   s   g | ]}|d  d ‘qS )r   Úcontent© )Ú.0Ú
completionr   r   úN/home/ubuntu/.local/lib/python3.10/site-packages/trl/rewards/format_rewards.pyÚ
<listcomp>0   ó    z'think_format_reward.<locals>.<listcomp>c                    s"   g | ]}t  ˆ |t jt jB ¡‘qS r   )ÚreÚmatchÚDOTALLÚ	MULTILINE)r   r   ©Úpatternr   r   r	   1   s   " c                 S   s   g | ]}|rd nd‘qS )g      ð?g        r   )r   r   r   r   r   r	   2   r
   r   )r   ÚkwargsÚcompletion_contentsÚmatchesr   r   r   Úthink_format_reward   s   r   )r   ÚlistÚdictÚstrÚfloatr   r   r   r   r   Ú<module>   s   *