o
    }oi^                     @   s   d dl mZmZmZ d dlZd dlm  mZ d dl	m
Z
 d dlmZ d dlmZ d dlmZ d dlmZ d dlmZ G d	d
 d
eZdS )    )AnyDictListN)tensor_parallel)AbstractModelInferenceWrapper)InferenceWrapperConfig)InferenceParams)default_collate)create_vision_mask_tensorc                
       s   e Zd ZdZdef fddZ	ddejdee	 fdd	Z
d
e	eef dedede	eef fddZd
e	eef dejfddZ  ZS )MllamaInferenceWrappera"  Constructor for the model inference wrapper

    The wrapper prepares the model for inference, provides the required input
    data, and runs the forward pass

    Args:
        model (MllamaModel): The Mllama model
        args (Namespace): The command line arguments that were passed
    inference_wrapper_configc                    s   t  || d S N)super__init__)selfmodelr   	__class__ k/home/ubuntu/.local/lib/python3.10/site-packages/nemo/collections/vlm/inference/mllama_inference_wrapper.pyr   )   s   zMllamaInferenceWrapper.__init__Nprompts_tokens
image_dictc           
      C   s8  t dd |D }|D ]J}||d jd  }t|d ddddddddd|f
dd|d< t|d dt |d dfdd|d< tt|d dt |d dfdd|d< qt|}|d}|d}tj|tj	|j
d	d|}	t||| _d | j_d | j_d | j_||	|d jd
d|d |d jd
ddS )Nc                 s   s    | ]
}|d  j d V  qdS )pixel_valuesr   N)shape).0instancer   r   r   	<genexpr>2   s    z>MllamaInferenceWrapper.prep_inference_input.<locals>.<genexpr>r   r   constantaspect_ratio_ids   	num_tiles)dtypedeviceT)non_blockingr   position_idsr   r    r   )maxr   Fpadtorchtensorr	   sizearangelongr"   	unsqueeze	expand_asr   inference_paramsxattn_cachescross_attention_masksfull_text_row_masked_out_maskcuda)
r   r   r   max_num_concurrent_mediar   pad_num_imagesbatch
batch_size
seq_lengthr%   r   r   r   prep_inference_input,   s:     


z+MllamaInferenceWrapper.prep_inference_inputinference_inputcontext_start_positioncontext_end_positionreturnc                 C   sL   |d d d ||f }|d d d ||f }|||d |d |d dS )Nr   r%   r   r    r   r$   r   )r   r;   r<   r=   
tokens2usepositions2user   r   r   get_batch_for_context_windowV   s   z3MllamaInferenceWrapper.get_batch_for_context_windowc                 C   s   |d }t |d dg|d }| j|d ||d |d ||d | jj| jj| jj| jd
}t|}| j j	|d	7  _	|S )
a  Utility to carry out simple forward pass for TP or no model parallel models

        Runs a very simple forward pass for model. Used  in the case of models without
        any parallelism or only tensor parallelism.

        Args:
            inference_input (List): A list containg the inputs for the vlm
                model [tokens, position ids]

        Returns:
            torch.Tensor: The output logits of shape [batch_size, seq_len, padded_vocab_size]
        r   r   i  r   r    r   r%   )
batch_imagesbatch_masks
num_chunksr   tokensr%   r1   r2   r3   r0   r   )
r
   r+   r   r0   r1   r2   r3   r   (gather_from_tensor_model_parallel_regionsequence_len_offset)r   r;   r?   rC   logitsr   r   r   &forward_pass_without_pipeline_parallelh   s"   
z=MllamaInferenceWrapper.forward_pass_without_pipeline_parallelr   )__name__
__module____qualname____doc__r   r   r)   Tensorr   r   r:   strr   intrA   rI   __classcell__r   r   r   r   r      s&    

*


$r   )typingr   r   r   r)   torch.nn.functionalnn
functionalr'   megatron.corer   Qmegatron.core.inference.model_inference_wrappers.abstract_model_inference_wrapperr   Imegatron.core.inference.model_inference_wrappers.inference_wrapper_configr   megatron.core.inference_paramsr   torch.utils.datar	   'nemo.collections.vlm.mllama.model.utilsr
   r   r   r   r   r   <module>   s   