o
    ̳iw                      @   s   d dl Z d dlZd dlZd dlmZmZmZ d dlZd dlm	Z	m
Z
 d dlmZmZmZ d dlmZmZmZ d dlmZ d dlmZ G dd	 d	eZG d
d dZejde	ddfddZedkriee  dS dS )    N)AnyDictList)
DictConfig	OmegaConf)configtrainingutils)
load_imageMessage$padded_collate_tiled_images_and_mask)sample)	Transformc                   @   s.   e Zd ZdZdeeef dee fddZ	dS )SingleTurnYAMLToMessagesa  
    Converts a single turn conversation in YAML format to a list of messages.

    Expects the YAML to look like:
        system: You are a helpful AI assistant.
        user: What is the capital of France?

    or if it includes an image:
        system: You are a helpful AI assistant.
        user:
            image: url or path_to_image
            text: Describe the image in detail.
    promptreturnc                 C   s   g }|  D ]K\}}|d u rqt|trd|dg}n-d| v r6|d }t|}d|dd|d dg}nd| v s@J dd|d dg}|t||d q|tddd |S )Ntext)typecontentimagez4Multiple entries per role expect at least a text key)roler   	assistant )items
isinstancestrkeysr
   appendr   )selfr   messagesr   r   new_content	image_locr    r"   K/home/ubuntu/.local/lib/python3.10/site-packages/recipes/dev/generate_v2.py__call__%   s&   
z!SingleTurnYAMLToMessages.__call__N)
__name__
__module____qualname____doc__r   r   r   r   r   r$   r"   r"   r"   r#   r      s    "r   c                   @   s`   e Zd ZdZdeddfddZdeddfddZd	ed
eddfddZ	e
 defddZdS )InferenceRecipeaE  
    Recipe for generating tokens from a dense Transformer-based LLM.
    This works for text-only generation and image-text generation.

    This *does not* currently support the following features:
        - torch.compile
        - quantization through torchao
        - multi-GPU generation
        - batch generation
    cfgr   Nc                 C   sN   t j|jd| _tj|j| jd| _t |j	| _
tj|j|dd d d S )Ndevice)dtyper,   cudnn_deterministic_mode)seed
debug_mode)r	   
get_devicer,   _devicer   	get_dtyper-   _dtype
get_logger	log_level_loggerset_seedr/   get)r   r*   r"   r"   r#   __init__M   s   
zInferenceRecipe.__init__c              	   C   s   t |j}| }t| j! | j t |j}W d   n1 s%w   Y  W d   n1 s4w   Y  |	|tj
  || _| jd| j d t |j| _t | _dS )zSetup the model and transforms.Nz%Model was initialized with precision .)r   instantiatecheckpointerload_checkpointr   set_default_dtyper4   r2   modelload_state_dict	MODEL_KEYr7   info	tokenizermodel_transformr   to_messages)r   r*   _checkpointer
_ckpt_dictr@   r"   r"   r#   setupU   s    zInferenceRecipe.setup
total_timetokens_per_secondc                 C   s   t dd t| j | j D }| jd|dd|dd | jd|| d dd	 | jj	d
krLt
 }| jd| d dd dS dS )zLogs the following metrics: total time for inference, tokens/sec,
        bandwidth achieved, and max memory allocated.

        Feel free to modify this function to log additional metrics.
        c                 S   s   g | ]
}|  |jj qS r"   )numelr-   itemsize).0pr"   r"   r#   
<listcomp>m   s    z/InferenceRecipe.log_metrics.<locals>.<listcomp>zTime for inference: z.02fz sec total, z tokens/seczBandwidth achieved: i   @z GiB/scpuzMax memory allocated: z GiBN)sum	itertoolschainr@   
parametersbuffersr7   rC   r2   r   r	   get_torch_device_namespacemax_memory_allocated)r   rJ   rK   
model_sizetorch_devicer"   r"   r#   log_metricsf   s"   zInferenceRecipe.log_metricsc                 C   s  |  t|j}tdd |D }| jd|idd}t|d }||j }| j | j	j
d| j|r6| jjnd|d	 W d   n1 sEw   Y  ttj||ftj| jd
}t|}i }	|rt|gdd| jjd}	|	d ddd|f |	d< |	d| j}
ntj|d | jdd}
|dd|f |	d< |dd|f |	d< t|	| j g }t }| j	|
fi |	dddf }t||j|jd}||   |r|	d |	d ddddf |	d< t!|jD ]D}|d|f |	d< |d|dddf |	d< |  | jj"v r n$| j	|fi |	dddf }t||j|jd}||   |d7 }qt | }| j#|}| j$%d| d t|| }| j&||d dS )z9The main entry point for generating tokens from a prompt.c                 S   s   g | ]}|j qS r"   )contains_media)rN   mr"   r"   r#   rP      s    z,InferenceRecipe.generate.<locals>.<listcomp>r   T)	inferencetokens   N)
batch_sizer-   encoder_max_seq_lendecoder_max_seq_len)sizer-   r,   left)pad_directionpad_max_imagespad_max_tilesencoder_maskr+   r   mask	input_pos)temperaturetop_kencoder_inputz


)rJ   rK   )'rF   r   to_containerr   anyrE   lenmax_new_tokensr2   r@   setup_cachesr4   image_seq_lentorchtrilonesboolaranger   max_num_tilespoptotensor	unsqueezer	   batch_to_devicetimeperf_counterr   rm   rn   r   itemrangestop_tokensdecoder7   rC   r[   )r   r*   r   is_multimodal_inputmodel_inputsseq_lentotal_response_lengthcausal_maskrk   batchr   generated_tokenst0logitstokenitdecodedrK   r"   r"   r#   generate~   sz   




zInferenceRecipe.generate)r%   r&   r'   r(   r   r:   rI   intfloatr[   rw   inference_moder   r"   r"   r"   r#   r)   A   s    r)   r*   r   c                 C   s4   t jd| d t| d}|j| d |j| d d S )Nr)   )recipe_namer*   )r*   )r   
log_configr)   rI   r   )r*   reciper"   r"   r#   main   s   
r   __main__)rS   sysr   typingr   r   r   rw   	omegaconfr   r   	torchtuner   r   r	   torchtune.datar
   r   r   torchtune.generationr   torchtune.modules.transformsr   r   r)   parser   r%   exitr"   r"   r"   r#   <module>   s$   + 