o
    ίi                     @   s   d Z ddlZddlmZ ddlmZmZmZmZ eG dd dZ	e	dg dd	 ddddddd
dddZ
e	dg dd	 ddddddddddZe
edZdS )a  
Define conversation format for each training phases and language models.

Modified from LLaVA codebase: https://github.com/haotian-liu/LLaVA/blob/main/llava/conversation.py

NOTE:
- an example of required json format is:
    data = {
        "image": IMAGE_PATH, or "images": LIST of IMAGE_PATH,
        "conversations": [
            {"from": "human", "value": "hello"},
            {"from": "assistant", "value": "Hi, how can I help you today?"},
            {"from": "human", "value": "Who are you?"},
            {"from": "assistant", "value": "I am a multimodal large language model created by FAIR. I can assist you with questions related to images and videos."},
        ]
    }
    N)	dataclass)CallableDictListUnionc                
   @   s   e Zd ZU eed< eed< eed< eed< eed< eed< eed< dZeed	< d
Zeed< d
Z	eed< d
Z
eed< d
Zeed< 	d!dedededee fddZ	d"dededefddZdeeef fddZdd Zd S )#Conversationsystemconversations	bos_token
sep_systemsep_question
sep_answerplace_image_token	<|image|>image_token 
pre_systempre_question
pre_answer	eos_token      image
num_imagesnum_patches
media_typereturnc                 C   s  g }| j | j | j }d}|dkr+| jD ]}|d dkr)|d d| j| |d< qn8| jd d dd	d
d	dd	dd	dd	dd	| jd d< | | jd d | j|| | jd d< | jD ]T}|rq|d dkrqqf|d dkrd	}	|r|	|7 }	|	| j|d  | j 7 }	d|	i}
d}qf|d dkr| j	t
|d  | j }	|
d|	i ||
 qftd|d  d| j |d d  |d d< |d d  | j |d d< |S )zR
        Each turn of conversation is a dict with source and target keys.
        Tmulti_imagefromhumanvalue<image>r   <image>
r   
<image><video>

<video><video>	assistantuserFz1conv['from'] must be human or assistant, but got z.Please fix your jsonl file.)r   r   r   r	   replacer   r   r   r   r   strr   updateappend
ValueErrorr
   r   )selfr   r   r   conv_dict_listsys_textis_firstconversationconv	conv_text	conv_dict r7   J/home/ubuntu/.local/lib/python3.10/site-packages/core/data/conversation.pyget_conversation_dict_list*   s^   



z'Conversation.get_conversation_dict_listpromptc                 C   s   | d|kr|d| j| }n$|dddddddddddd}| || j|| }| j| j | j | j }|| j | | j	 | j
 S )Nr!   r"   r   r#   r$   r%   r&   )countr*   r   r   r
   r   r   r   r   r   r   )r/   r:   r   r   r1   r7   r7   r8   get_generation_prompth   s"   
z"Conversation.get_generation_promptr4   c                 C   sF   t |tr| j| d S t |tr| j| d S tdt| )Nz%conv must be a list or dict, but got )
isinstancelistr	   extenddictr-   r.   type)r/   r4   r7   r7   r8   add_conv   s
   

zConversation.add_convc                 C   s>   t | jt| j| j| j| j| j| j	| j
| j| j| j| jdS )N)r   r	   r   r
   r   r   r   r   r   r   r   r   )r   r   copydeepcopyr	   r   r
   r   r   r   r   r   r   r   r   )r/   r7   r7   r8   rC      s   
zConversation.copyN)r   r   r   )r   r   )__name__
__module____qualname__r+   __annotations__r>   r   r   r   r   r   r   intr   r   r9   r<   r   rB   rC   r7   r7   r7   r8   r      sB   
 
?
r   r   c                 C   s   || S Nr7   textr   num_image_tokensr7   r7   r8   <lambda>   s    rN   
r   )r   r	   r   r
   r   r   r   r   r   r   r   r   zYou are a helpful language and vision assistant. You are able to understand the visual content that the user provides, and assist the user with a variety of tasks using natural language.c                 C   s   || |  S rJ   r7   rK   r7   r7   r8   rN      s   z<|begin_of_text|>z,<|start_header_id|>system<|end_header_id|>

z*<|start_header_id|>user<|end_header_id|>

z/<|start_header_id|>assistant<|end_header_id|>

z
<|eot_id|>z<|end_of_text|>)warmupplm_sft)__doc__rC   dataclassesr   typingr   r   r   r   r   conv_warmupconv_plm_sftREGISTERED_CONVSr7   r7   r7   r8   <module>   sJ   
