o
    ̳i                     @   s"   d dl Z de jde jfddZdS )    Nmaskreturnc                 C   s6   |  j ddjddjtjd}|d| jd d S )az  
    Returns the sequence lengths (0-indexed) for each batch element, excluding masked tokens.

    Args:
        mask (torch.Tensor): Boolean mask with shape [b x s], where True indicates a value to be masked out
            This is usually a mask for padding tokens, where True indicates a padding token.

    Returns:
        Tensor: Sequence indices logits with shape [b]

    Shape notation:
        - b = batch size
        - s = sequence length

    Example:
        >>> input_ids = torch.tensor([
        ...        [2, 4, 0, 0],
        ...        [2, 4, 6, 0],
        ...        [2, 4, 6, 9]
        ...    ])
        >>> mask = input_ids == 0
        >>> mask
        tensor([[False, False,  True,  True],
                [False, False, False,  True],
                [False, False, False, False]])
        >>> get_unmasked_sequence_lengths(mask)
        tensor([1, 2, 3])

    )dim)dtyper      )cumsumargmaxtotorchlongclipshape)r   sequence_lengths r   N/home/ubuntu/.local/lib/python3.10/site-packages/torchtune/training/pooling.pyget_unmasked_sequence_lengths	   s    r   )r   Tensorr   r   r   r   r   <module>   s   