o
    ̳i                     @   s   d dl mZ d dlZd dlm  mZ d dlmZ d dl	m
Z
 	 ddejdejdedeejejf fd	d
Z	ddejdejdedejfddZe
dfdejdejdededejf
ddZdejdedejfddZdS )    )TupleN)rlhf)CROSS_ENTROPY_IGNORE_IDX	sequencesstop_tokens
fill_valuereturnc                 C   s@   t | |}t j|dd}|dk|dk| @ B }|| |< || fS )aq  
    Truncates sequence(s) after the first stop token and pads with ``fill_value``.

    Args:
        sequences (torch.Tensor): tensor of shape [batch_size, sequence_length] or [sequence_length].
        stop_tokens (torch.Tensor): tensor containing stop tokens.
        fill_value (int): value to pad the sequence with after the first stop token, usually ``pad_id``.

    Returns:
        Tuple[torch.Tensor, torch.Tensor]: A tuple of two tensors with the same shape as ``sequences``:
            - padding_mask (torch.Tensor): a bool tensor where True indicates the token has been truncated.
            - sequences (torch.Tensor) a tensor of truncated and padded sequences.

    Example:
        >>> stop_token_ids = torch.tensor([2, 869])
        >>> fill_value = 0
        >>> sequences = torch.tensor(
        >>>     [
        >>>         [869, 30, 869],
        >>>         [2, 30, 869],
        >>>         [869, 30, 2],
        >>>         [50, 30, 869],
        >>>         [13, 30, 2],
        >>>         [13, 30, 5],
        >>>         [13, 2, 20],
        >>>         [13, 2, 2],
        >>>         [2, 2, 2],
        >>>     ]
        >>> )
        >>> eos_mask, truncated_sequences = rlhf.truncate_sequence_at_first_stop_token(
        >>>     sequences, stop_token_ids, fill_value
        >>> )
        >>> eos_mask
        >>> torch.tensor([
        >>>         [False, True, True],
        >>>         [False, True, True],
        >>>         [False, True, True],
        >>>         [False, False, False],
        >>>         [False, False, False],
        >>>         [False, False, False],
        >>>         [False, False, True],
        >>>         [False, False, True],
        >>>         [False, True, True],
        >>>     ]
        >>> )
        >>> truncated_sequences
        >>> torch.tensor([
        >>>         [869, 0, 0],
        >>>         [2, 0, 0],
        >>>         [869, 0, 0],
        >>>         [50, 30, 869],
        >>>         [13, 30, 2],
        >>>         [13, 30, 5],
        >>>         [13, 2, 0],
        >>>         [13, 2, 0],
        >>>         [2, 0, 0],
        >>>     ]
        >>> )
       dim)torchisincumsum)r   r   r   eos_maskseq_lenspadding_mask r   V/home/ubuntu/.local/lib/python3.10/site-packages/torchtune/rlhf/sequence_processing.py%truncate_sequence_at_first_stop_token   s
   >r         ?logitstemperaturec                 C   s(   t tj| | ddd|ddS )a  
    Converts logits corresponding to a generated sequence to logprobs over the generated tokens.

    Args:
        logits (torch.Tensor): The logits tensor of shape [b, response_length, vocab_size].
        sequences (torch.Tensor): The corresponding tokens of shape [b, response_length].
        temperature (float): The temperature to scale the logits. Default 1.0
    Returns:
        torch.Tensor: The log probabilities corresponding to each token in ``sequences``. Shape [b, response_length].
    r
      )r   gatherFlog_softmax	unsqueezesqueeze)r   r   r   r   r   r   logits_to_logprobsT   s   r   Flabelslabel_pad_token_idreturn_average_logprobsc                 C   s   | j dd |j krtd|ddddf  }| ddddddf } ||k}d|||k< t| |dd}|rBtj||ddS || dS )	a(  
    Calculate log probabilities based on provided logits and labels.

    Args:
        logits (torch.FloatTensor): direct logits output of the model of shape (b, s, v)
        labels (torch.LongTensor): ground-truth labels to compute log probs with, shape (b, s).
            Label tokens with a value of label_pad_token_id are ignored.
        label_pad_token_id (int): token id to ignore in labels.
        return_average_logprobs (bool): If True, return the average log probs across the sequence. Default
            is False. See https://github.com/eric-mitchell/direct-preference-optimization/blob/f8b8c0f49dc92a430bae41585f9d467d3618fe2f/trainers.py#L96 # noqa

    Returns:
        Calculated log probs of shape (b, )

    Raises:
        ValueError: If logits and labels have different shapes.
    Nr   zKLogits (batch and sequence length dim) and labels must have the same shape.r	   r   r   )r   r
   )shape
ValueErrorcloner   r   masked_meansum)r   r    r!   r"   	loss_maskper_token_log_probsr   r   r   get_batch_log_probsh   s   r*   query_response_logitscontext_lengthc                 C   s   | dd|d df S )a  
    Truncates logits generated over a sequence for estimating logprobs over the tokens in the sequence.
    This assumes the sequence is of the (query, response) format with length (context_length + response_length)
    Args:
        query_response_logits (torch.Tensor): The logits tensor of shape [b, context_length + response_length, vocab_size].
        context_length (int): The length of the context.

    Returns:
        torch.Tensor: The truncated logits for the response with shape [b, response_length, vocab_size].Nr	   r   r   )r+   r,   r   r   r   truncate_sequence_for_logprobs   s   r-   )r   )r   )typingr   r   torch.nn.functionalnn
functionalr   	torchtuner   torchtune.datar   Tensorintr   floatr   FloatTensor
LongTensorboolr*   r-   r   r   r   r   <module>   sZ   
F

+