o
    }oi                  	   @   s  d dl Z d dlmZmZ d dlmZmZ d dlmZ d dl	m
Z
mZmZmZ G dd deZeeeded	ed
eee jf fddZeeeded	efddZeeeded	efddZdZdZdZdZdZG dd deZeeeded	ed
eee jf fddZdS )    N)CutMixedCut)NeMoSFTExampleSourceTargetTextExample)registered_prompt_format_fn)BOS_SLOTEOS_SLOTModalityPromptFormatterc                   @   sb   e Zd ZdZdZdZde dejejddde d	d
ejidede	 d
ejidiZ
dS )Llama2PromptFormatterz
    This template has been validated to provide identical tokenized results to the official code
    in https://github.com/meta-llama/llama/blob/main/llama/generation.py
    llama2	assistantsystem_and_userz3[INST] <<SYS>>
|system|
<</SYS>>

|message| [/INST]systemmessagetemplateslotsuserz[INST] |message| [/INST]r   z
|message| N)__name__
__module____qualname____doc__NAMEOUTPUT_ROLEr   r	   Textr   TEMPLATE r   r   Y/home/ubuntu/.local/lib/python3.10/site-packages/nemo/collections/common/prompts/llama.pyr      s*    r   cutpromptreturnc                 C   s   t | tr| j} | dr| j}n| dr| j}n| j}g }| dr1|d| j|dd n
|dd|id | j	d	 j
 }d urO|d
d|id ||S )Ncontextquestionsystem_promptr   r   roler   r   r   r   r   
isinstancer   first_non_padding_cut
has_customr#   r$   default_contextappendr%   supervisionstextencode_dialogr    r!   r#   turnsanswerr   r   r   r   7   s   




r   examplec                 C   sT   | j d urd| j j| jjdd}n	dd| jjid}|||jd| jjidgS )Nr   r   r&   r   r   )r$   r/   sourcer0   r   target)r4   r!   	user_turnr   r   r   llama2_src_tgt_text_exampleL   s   

r8   c                    sx   | j d d d }d| j v r| j d rd| j d |dd}ndd	|id} |g fd
d| j d dd  D  S )Nconversationsr   valuer   r   r   r&   r   r   c                    s0   g | ]}|d  dkrdn j d|d idqS )fromUserr   r   r:   r&   )r   ).0turnr!   r   r   
<listcomp>o   s    "z+llama2_sft_text_example.<locals>.<listcomp>   )datar0   )r4   r!   first_user_turn
first_turnr   r?   r   llama2_sft_text_example`   s   
rE   z<|begin_of_text|>z<|start_header_id|>z<|end_header_id|>z
<|eot_id|>z

c                   @   s   e Zd ZdZdZdZe de e Z	dde
ide de e de dejid	d
e d
e e de dejid	ee	 de dejid	iZdS )Llama3PromptFormatterz|
    Implemented following the code at:
     https://github.com/meta-llama/llama3/blob/main/llama/test_tokenizer.py#L56
    llama3r   preambler   r   z	|message|r   r   r   N)r   r   r   r   r   r   LLAMA3_HEADER_BEGINLLAMA3_HEADER_END	LLAMA3_NLINFERENCE_PREFIX
LLAMA3_BOSLLAMA3_END_OF_TURNr	   r   r   r   r   r   r   rF   }   s0    rF   c                 C   s   t | tr| j} | dr| j}n| dr| j}n| j}g }| dr/|dd| jid |dd|id | j	d j
 }d urM|d	d|id ||S )
Nr#   r$   r%   r   r   r&   r   r   r   r(   r1   r   r   r   rG      s   




rG   )torch
lhotse.cutr   r   1nemo.collections.common.data.lhotse.text_adaptersr   r   &nemo.collections.common.data.prompt_fnr   )nemo.collections.common.prompts.formatterr   r   r	   r
   r   dictstrTensorr   r8   rE   rM   rI   rJ   rN   rK   rF   rG   r   r   r   r   <module>   s(   ""&