o
    }oi!                     @   sN   d dl mZ d dlZd dlmZ d dlmZ G dd dZG dd deZdS )	    )OrderedDictN)InferenceRequest)SimpleTextGenerationControllerc                   @   s$   e Zd Zdd Zdd Zdd ZdS )TokenizerWrapperc                 C   s   |j | _d | _|| _d S N)eos_token_ideod
vocab_size
_tokenizer)self	tokenizer r   k/home/ubuntu/.local/lib/python3.10/site-packages/nemo/collections/vlm/inference/vlm_inference_controller.py__init__   s   
zTokenizerWrapper.__init__c                 C      | j j|ddS )NF)skip_special_tokens)r
   decode)r   tokensr   r   r   
detokenize       zTokenizerWrapper.detokenizec                 C   r   )NF)add_special_tokens)r
   encode)r   promptr   r   r   tokenize$   r   zTokenizerWrapper.tokenizeN)__name__
__module____qualname__r   r   r   r   r   r   r   r      s    r   c                       sF   e Zd Z fddZdefddZdejdeee	f fdd	Z
  ZS )
VLMTextGenerationControllerc                    s   t  |t| || _d S r   )superr   r   image_processor)r   inference_wrapped_modelr   r   	__class__r   r   r   +   s   
z$VLMTextGenerationController.__init__r   c              	   C   s   | j |}|d u r,ttddd| jjd | jjd tjdgtjddgd}||fS | jj	|d	d
}dd |
 D }||fS )N         heightwidthr   )dtypepixel_valuesaspect_ratio_ids	num_tilespt)return_tensorsc                 S   s"   i | ]\}}|d v r||d qS )r)   r   r   ).0kvr   r   r   
<dictcomp><   s    z?VLMTextGenerationController.tokenize_prompt.<locals>.<dictcomp>)r   r   dicttorchzerosr   sizetensorlong
preprocessitems)r   r   imager   
image_dictr   r   r   tokenize_prompt/   s   z+VLMTextGenerationController.tokenize_promptprompts_tokensactive_requestsc                 C   s&   t tdd | }| jj||dS )a-  Preparing input data for inference, using respective wrapper's prep_inference_input method

        Args:
            prompts_tokens (torch.Tensor): A tensor of shape [batch_size, max_sequence_length]
            active_requests (OrderedDict[int, InferenceRequest]): The input active requests
        c                 S   s   | j S r   )encoder_prompt)requestr   r   r   <lambda>H   s    zBVLMTextGenerationController.prep_inference_input.<locals>.<lambda>)r>   r<   )listmapvaluesr    prep_inference_input)r   r>   r?   imagesr   r   r   rF   A   s
   z0VLMTextGenerationController.prep_inference_input)r   r   r   r   strr=   r4   Tensorr   r   rF   __classcell__r   r   r!   r   r   )   s    $r   )	typingr   r4   )megatron.core.inference.inference_requestr   Umegatron.core.inference.text_generation_controllers.simple_text_generation_controllerr   r   r   r   r   r   r   <module>   s   