o
    }oi                     @   sl   d dl mZmZmZ d dlZd dlmZ d dlmZ d dl	m
Z
 d dlmZ d dlmZ G dd	 d	eZdS )
    )AnyDictListN)tensor_parallel)AbstractModelInferenceWrapper)InferenceWrapperConfig)InferenceParams)default_collatec                
       s   e Zd ZdZdef fddZ	ddejdee	 fdd	Z
d
e	eef dedede	eef fddZd
e	eef dejfddZ  ZS )LlavaInferenceWrappera3  Constructor for the model inference wrapper

    The wrapper prepares the model for inference, provides the required input
    data, and runs the forward pass

    Args:
        model (NevaModel): The Neva model
        inference_wrapper_config (InferenceWrapperConfig): the config of inference wrapper
    inference_wrapper_configc                    s   t  || | jjj| _d S N)super__init__modelmodule_img_seq_len)selfr   r   	__class__ j/home/ubuntu/.local/lib/python3.10/site-packages/nemo/collections/vlm/inference/llava_inference_wrapper.pyr   &   s   zLlavaInferenceWrapper.__init__Nprompts_tokens
image_dictc                 C   s|   t |d jdd}||dddd}|d}|d}tj|tj|jdd	|}t
||| j | _|||d	S )
Npixel_valuesT)non_blockingr      iP     )dtypedevice	input_idsposition_idsimages)r	   cudareshapesizetorcharangelongr   	unsqueeze	expand_asr   r   inference_params)r   r   r   media
batch_size
seq_lengthr!   r   r   r   prep_inference_input*   s   

z*LlavaInferenceWrapper.prep_inference_inputinference_inputcontext_start_positioncontext_end_positionreturnc                 C   sT   |d d d ||f }|d d d ||f }|dk| j d  | _|||d dS )Nr    r!   r   r   r"   r   )r   img_token_offset)r   r0   r1   r2   
tokens2usepositions2user   r   r   get_batch_for_context_windowC   s   z2LlavaInferenceWrapper.get_batch_for_context_windowc                 C   sF   | j dd| jd|}t|}| j j|d d| j 7  _|S )a  Utility to carry out simple forward pass for TP or no model parallel models

        Runs a very simple forward pass for model. Used  in the case of models without
        any parallelism or only tensor parallelism.

        Args:
            inference_input (List): A list containg the inputs for the neva
                model [input ids, position ids, media]

        Returns:
            torch.Tensor: The output logits of shape [batch_size, seq_len, padded_vocab_size]
        N)attention_maskr+   r    r   r   )r   r+   r   (gather_from_tensor_model_parallel_regionsequence_len_offsetr%   r4   )r   r0   logitsr   r   r   &forward_pass_without_pipeline_parallelT   s   
 z<LlavaInferenceWrapper.forward_pass_without_pipeline_parallelr   )__name__
__module____qualname____doc__r   r   r&   Tensorr   r   r/   strr   intr7   r<   __classcell__r   r   r   r   r
      s&    




$r
   )typingr   r   r   r&   megatron.corer   Qmegatron.core.inference.model_inference_wrappers.abstract_model_inference_wrapperr   Imegatron.core.inference.model_inference_wrappers.inference_wrapper_configr   megatron.core.inference_paramsr   torch.utils.datar	   r
   r   r   r   r   <module>   s   