o
    }oi                     @   s   d dl Z d dlmZ d dlZd dlmZ dee dedefddZd	ee d
ededefddZdej	dededefddZ
ddej	dedefddZdeej	 deej	 dee fddZd ddZdS )!    N)List)PackedSeqParamsnumberscapacityreturnc                 C   s    t  | |}|dkrdS |d S )zVFinds the index of largest number that fits into the knapsack with the given capacity.r      )bisect)r   r   index r   S/home/ubuntu/.local/lib/python3.10/site-packages/nemo/collections/vlm/data/utils.pysearch_for_fit   s   r   
item_sizessamplesmax_capacityc           
      C   s   t | t |ksJ dg }t | dkr|S ttt| |dd d \}}t|}t|}|d |kr@td|d  d| d	|rng }|}	 t||}|dkrQn||| 8 }|| ||}	||	 qG|| |sB|S )
zGreedy algorithm with binary search for the knapsack problem.

    Pack as many samples as possible given a maximum capacity and capacities of individual samples.
    Used if sequence packing is enabled.
    z5sample lengths and samples must have the same length.r   c                 S   s   | d S )Nr   r   )xr   r   r   <lambda>0   s    z!greedy_knapsack.<locals>.<lambda>)keyr   zknapsack: A sample is larger z than the max_sequence_length .)lenzipsortedlist
ValueErrorr   popappend)
r   r   r   	knapsackssorted_item_sizessorted_samplescurrent_knapsackremaining_capacityidxsampler   r   r   greedy_knapsack"   s4   




r#   instance_tokensnum_image_embeddings_per_tilemedia_token_indexc                 C   s*   t | |k }t| |d |  }|S )an  
    Predict the effective sequence length, accounting for media embeddings.

    Args:
        instance_tokens (torch.Tensor): Token tensor for a single instance.
        num_image_embeddings_per_tile (int): Number of image embeddings per tile.
        media_token_index (int): Token ID representing media.

    Returns:
        int: Effective sequence length.
    r   )torchsumitemr   )r$   r%   r&   
num_imagesseqlenr   r   r   predict_seq_lenN   s   r,   @   pad_to_multiple_ofc                 C   s    t | }|| d | | }|S )zGet seqlen with paddingr   )r   )r$   r.   r+   seqlen_paddedr   r   r   predict_seq_len_with_padding_   s   r0   tokenslabelsseqlensc              	   C   s@  g }g }g }g }dg}dg}t | ||D ]9\}	}
}||	 ||
 |tjt|	tj|	jd t|	}|| ||d |  ||d |  qtj|ddd}tj|ddd}tj|ddd}tj	|tj
|jd}d||dk < t|}t|}t||||tt|tt|dd}|||||fS )aM  
    Convert tokens, labels, and associated inputs into a packed version.

    Args:
        tokens (list[torch.Tensor]): List of token tensors for each instance.
        labels (list[torch.Tensor]): List of label tensors for each instance.
        seqlens (list[int]): List of sequence lengths for each instance before padding.
    r   )dtypedevicer   )dimg        thd)cu_seqlens_qcu_seqlens_kvcu_seqlens_q_paddedcu_seqlens_kv_paddedmax_seqlen_qmax_seqlen_kv
qkv_format)r   r   r'   aranger   intr5   cat	unsqueeze	ones_likefloat	IntTensorr   max)r1   r2   r3   packed_tokenspacked_labelspacked_position_idsseqlens_padded
cu_seqlenscu_seqlens_paddedr$   instance_labelsr+   r/   packed_loss_maskpacked_seq_paramsr   r   r   convert_to_packedf   s>   







rP   Fc                 C   sp   t | }t |}t||| d D ]$}| |||  |k}t|s-|r5t|dd  r5||| f  S qdS )Nr   )r   r   )r   ranger'   all)templatepatternsearch_start_indexallow_first_token_mismatchtemplate_lenpattern_lenimatchr   r   r   _find_pattern_indices   s    r[   )r-   )r   F)r	   typingr   r'   megatron.core.packed_seq_paramsr   r@   r   r#   Tensorr,   r0   rP   r[   r   r   r   r   <module>   s    	,
4