o
    }oi(                     @   s   d dl Z d dlmZ d dlZd dlm  mZ d dlm	Z	 dee
 de
de
fddZd	ee
 d
ede
defddZdejde
de
de
fddZddejde
de
fddZ	ddeej deej de
de
de
de
fddZdS )    N)List)PackedSeqParamsnumberscapacityreturnc                 C   s    t  | |}|dkrdS |d S )zVFinds the index of largest number that fits into the knapsack with the given capacity.r      )bisect)r   r   index r   c/home/ubuntu/.local/lib/python3.10/site-packages/nemo/collections/vlm/neva/data/sequence_packing.pysearch_for_fit   s   r   
item_sizessamplesmax_capacityc           
      C   s   t | t |ksJ dg }t | dkr|S ttt| |dd d \}}t|}t|}|d |kr@td|d  d| d	|rng }|}	 t||}|dkrQn||| 8 }|| ||}	||	 qG|| |sB|S )
zGreedy algorithm with binary search for the knapsack problem.

    Pack as many samples as possible given a maximum capacity and capacities of individual samples.
    Used if sequence packing is enabled.
    z5sample lengths and samples must have the same length.r   c                 S   s   | d S )Nr   r   )xr   r   r   <lambda>1   s    z!greedy_knapsack.<locals>.<lambda>)keyr   zknapsack: A sample is larger z than the max_sequence_length .)lenzipsortedlist
ValueErrorr   popappend)
r   r   r   	knapsackssorted_item_sizessorted_samplescurrent_knapsackremaining_capacityidxsampler   r   r   greedy_knapsack#   s4   




r#   instance_tokensnum_image_embeddings_per_tilemedia_token_indexc                 C   s*   t | |k }t| |d |  }|S )an  
    Predict the effective sequence length, accounting for media embeddings.

    Args:
        instance_tokens (torch.Tensor): Token tensor for a single instance.
        num_image_embeddings_per_tile (int): Number of image embeddings per tile.
        media_token_index (int): Token ID representing media.

    Returns:
        int: Effective sequence length.
    r   )torchsumitemr   )r$   r%   r&   
num_imagesseqlenr   r   r   predict_seq_lenO   s   r,   @   pad_to_multiple_ofc                 C   s    t | }|| d | | }|S )zGet seqlen with paddingr   )r   )r$   r.   r+   seqlen_paddedr   r   r   predict_seq_len_with_padding`   s   r0   tokenslabelsignore_indexc              	   C   s  g }g }g }g }	dg}
dg}t | |D ]`\}}t|||}|| d | | }|| }|dkrCt|d|fdd}t|d|fd|}|| || |tjt|tj|j	d |	| |
|
d |  ||d |  qtj
|ddd}tj
|ddd}tj
|ddd}tj|tj|j	d}d||dk < t|
}
t|}t|
|
||tt|	tt|	dd	}|||||fS )
aN  
    Convert tokens, labels, and associated inputs into a packed version with padded sequence parameters.

    Args:
        tokens (list[torch.Tensor]): List of token tensors for each instance.
        labels (list[torch.Tensor]): List of label tensors for each instance.
        num_image_embeddings_per_tile (int): Number of image embeddings per tile.
        media_token_index (int): Token ID representing media.
        ignore_index (int): Value to use for padding labels.
        pad_to_multiple_of (int): Sequence length will be padded to a multiple of this value. Default is 8.
    r   r   constant)dtypedevicer   )dimg        thd)cu_seqlens_qcu_seqlens_kvcu_seqlens_q_paddedcu_seqlens_kv_paddedmax_seqlen_qmax_seqlen_kv
qkv_format)r   r,   Fpadr   r'   aranger   intr6   cat	unsqueeze	ones_likefloat	IntTensorr   max)r1   r2   r%   r&   r3   r.   packed_tokenspacked_labelspacked_position_idsseqlens_padded
cu_seqlenscu_seqlens_paddedr$   instance_labelsr+   r/   pad_lenpacked_loss_maskpacked_seq_paramsr   r   r   convert_to_packedg   sH   







rT   )r-   )r	   typingr   r'   torch.nn.functionalnn
functionalr@   megatron.core.packed_seq_paramsr   rC   r   r#   Tensorr,   r0   rT   r   r   r   r   <module>   s.   	,