o
    wi                     @   sd  d dl mZmZmZ d dlmZ d dlZd dlZd dl	m
Z
 d dlmZ d dlmZ d dlmZ d dlmZ d dlmZ d dlmZ d	d
lmZ d	dlmZ d	dlmZ d	dlmZ de dej!dej"fddZ#ej$dfdej%de&fddZ'dej$dfde deej! dej%de&fddZ(			d$dedee  deeeee f  de&dee& d ee
 d!e)fd"d#Z*dS )%    )ListOptionalUnionN)CommonInferenceParams)AbstractModelInferenceWrapper)InferenceWrapperConfig)Image)AutoProcessor)vlm   )LlavaInferenceWrapper)MllamaInferenceWrapper)	VLMEngine)VLMTextGenerationControllerpathtrainermodelc                 C   s.   ddl m} |||  | }|| |}|S )z)Setup trainer and restore model from pathr   )#set_modelopt_spec_if_exists_in_ckpt)nemo.collections.vlm.modeloptr   	to_fabric
load_model)r   r   r   r   fabric r   `/home/ubuntu/sommelier/.venv/lib/python3.10/site-packages/nemo/collections/vlm/inference/base.py _setup_trainer_and_restore_model%   s
   
r   i  params_dtype&inference_batch_times_seqlen_thresholdc           	      C   s   | j }| jj }||}|  t|tjrt}|j	j
}nt|tjr,t}|jj
}ntd| ||t||||jd}|S )z&Set up inference wrapper for the modelzUnknown model config: )hidden_sizer   r   padded_vocab_size)configmodulecudatoeval
isinstancer
   MLlamaModelConfigr   language_model_configr   LlavaConfigr   language_transformer_config
ValueErrorr   
vocab_size)	r   	tokenizerr   r   r   mcore_modelwrapper_clsr   inference_wrapped_modelr   r   r   setup_inference_wrapper2   s*   



r/   c           
      C   sN   d}t |}|j}t }tj||d}t| ||d t||||}	|	|fS )zSet up model and tokenizerz(meta-llama/Llama-3.2-11B-Vision-Instruct)r+   )r   r   r   )r	   from_pretrainedr+   r
   MLlamaConfig11BInstructMLlamaModelr   r/   )
r   r   r   r   model_id	processorr+   r   r   r.   r   r   r   setup_model_and_tokenizerU   s   
r5      wrapped_modelpromptsimagesmax_batch_sizerandom_seedinference_paramsreturnc                 C   s>   t | ||d}t|||d}	|ptdd}
|	j|||
d}|S )ae  
    Generates text using a NeMo VLM model.
    Args:
        wrapped_model (AbstractModelInferenceWrapper): The model inference wrapper.
        tokenizer: tokenizer for the input text,
        image_processor: image processor for the input image,
        prompts (list[str]): The list of prompts to generate text for.
        images (list): The list of images to generate text for.
        max_batch_size (int, optional): The maximum batch size. Defaults to 4.
        random_seed (Optional[int], optional): The random seed. Defaults to None.
        inference_params (Optional["CommonInferenceParams"], optional): The inference parameters defined in
            Mcore's CommonInferenceParams. Defaults to None.

    Returns:
        list[Union["InferenceRequest", str]]: A list of generated text,
            either as a string or as an InferenceRequest object.
    )r.   r+   image_processor)text_generation_controllerr:   r;   2   )num_tokens_to_generate)r8   r9   common_inference_params)r   r   r   generate)r7   r+   r>   r8   r9   r:   r;   r<   r?   mcore_enginerB   resultsr   r   r   rC   k   s   rC   )r6   NN)+typingr   r   r   lightning.pytorchpytorchpltorchtorch.distributed/megatron.core.inference.common_inference_paramsr   Qmegatron.core.inference.model_inference_wrappers.abstract_model_inference_wrapperr   Imegatron.core.inference.model_inference_wrappers.inference_wrapper_configr   	PIL.Imager   transformersr	   nemo.lightning	lightningnlnemo.collectionsr
   llava_inference_wrapperr   mllama_inference_wrapperr   
vlm_enginer   vlm_inference_controllerr   strTrainerLightningModuler   bfloat16dtypeintr/   r5   dictrC   r   r   r   r   <module>   sj   
%
	