o
    }oi                     @   sF   d dl Z d dlm  mZ da	dde jde jfddZdd	d
ZdS )    Nlogitstargetsc                 C   s   t j|  ||ddS )a  
    Computes the cross-entropy loss between logits and targets.

    Args:
        logits (torch.Tensor): Model predictions of shape (sequence_length, num_classes).
        targets (torch.Tensor): Ground-truth labels of shape (sequence_length,).
        ignore_index (int, optional): Target value that is ignored when computing the loss.
            Defaults to -100.

    Returns:
        torch.Tensor: The sum of cross-entropy losses over the sequence.
    sum)ignore_index	reduction)Fcross_entropyfloat)r   r   r    r   R/home/ubuntu/.local/lib/python3.10/site-packages/nemo/automodel/loss/chunked_ce.pycompute_cross_entropy   s   r       Tc                 C   s   |j | j kr|| j }|dur<t   |j |j kr!||j }||ddk| ~W d   n1 s7w   Y  tdu rGtjtdda| j	d }|| d | }d}t
| j|dd|j|ddD ]\}	}
|t|	|
|7 }qe||k  }|| S )	a  
    Computes cross-entropy loss in chunks to handle long sequences more efficiently.

    Args:
        logits (torch.Tensor): Model output logits of shape (sequence_length, num_classes).
        targets (torch.Tensor): Ground-truth labels of shape (sequence_length,).
        mask (torch.Tensor, optional): Boolean mask indicating valid positions (1) and
            positions to ignore (0). Defaults to None.
        chunk_len (int, optional): The size of each chunk. The sequence will be split
            along the first dimension in chunks of this length. Defaults to 32.
        compile (bool, optional): If True, uses the compiled compute_cross_entropy function.
            Defaults to True.
        ignore_index (int, optional): Target value that is ignored when computing the loss.
            Defaults to -100.

    Returns:
        torch.Tensor: The average cross-entropy loss across the valid tokens in the sequence.
    Nr   T)dynamic   g        )dim)devicetotorchno_gradmasked_fill_view_compiled_compute_cross_entropycompiler   shapezipchunkr   detach)r   r   mask	chunk_lenr   r   seq_len
num_chunkslosslogits_chunktargets_chunk
num_tokensr   r   r   chunked_cross_entropy(   s$   

&r'   )r   )Nr   Tr   )	r   torch.nn.functionalnn
functionalr   r   Tensorr   r'   r   r   r   r   <module>   s   
