o
    }oi                     @   s   d dl mZmZ d dlZd dlmZ d dlmZ ejdd Z	ejdd Z
ejd	d
d Zejd	dd Zejd	dd Zejd	dd Zejd	dd Zejd	dd ZdS )    )	MagicMockpatchN)CommonInferenceParamsMegatronLLMDeployableNemo2c                  c   s|    t d/} t }t }t }t |j_d|jj_d|jj_d|jj_|||f| _| V  W d   dS 1 s7w   Y  dS )z.Fixture to mock the model and tokenizer setup.z1nemo.collections.llm.inference.setup_mcore_enginez{{messages}}z<bos>z<eos>N)r   r   	tokenizerchat_template	bos_token	eos_tokenreturn_value)
mock_setupmock_engine
mock_modelmock_tokenizer r   \/home/ubuntu/.local/lib/python3.10/site-packages/tests/deploy/test_megatronllm_deployable.pymock_model_and_tokenizer   s   




"r   c                 C   s$   t dddddddddddddd	d
S )zAFixture to create a deployable instance with mocked dependencies.
dummy.nemo   bfloat16i  i       *   TF)nemo_checkpoint_filepathnum_devices	num_nodestensor_model_parallel_sizepipeline_model_parallel_sizecontext_parallel_sizeexpert_model_parallel_sizeparams_dtype&inference_batch_times_seqlen_thresholdinference_max_seq_lengthmax_batch_sizerandom_seedenable_flash_decodelegacy_ckptr   )r   r   r   r   
deployable&   s    r&   GPUc                 C   s   | j dksJ |  dS )z,Test initialization of the deployable class.r   N)r   assert_called_once)r&   r   r   r   r   test_initialization;   s   r)   c                 C   s   ddg}t dddddd}t| jd	%}t }d
|_|g|_| ||}t|dks-J |	  W d   dS 1 s<w   Y  dS )z#Test text generation functionality.HelloWorldg      ?r   g           F)temperaturetop_ktop_pnum_tokens_to_generatereturn_log_probsgeneratezGenerated textN)
r   r   objectmcore_enginer   generated_textr   r2   lenr(   )r&   promptsinference_paramsmock_generatemock_resultresultsr   r   r   test_generateB   s    	
"r<   c                 C   s<   dddg}|  |}t|tsJ |d d |v sJ dS )zTest chat template application.userr*   )rolecontentr   r?   N)apply_chat_template
isinstancestr)r&   messagestemplater   r   r   test_apply_chat_templateY   s   
rE   c                 C   s&   g d}|  |}|g dksJ dS )zTest EOS token removal.)z
Hello<eos>r+   z	Test<eos>)r*   r+   TestN)remove_eos_token)r&   textscleaned_textsr   r   r   test_remove_eos_tokenb   s   
rJ   c                 C   s0   d}|  |}t|tsJ |d dksJ dS )z%Test string to dictionary conversion.z{"key": "value"}keyvalueN)str_to_dictrA   dict)r&   json_strresultr   r   r   test_str_to_dictj   s   
rQ   c                 C   s   | j }| j}t|dksJ t|dksJ dd |D }d|v s#J d|v s)J d|v s/J d|v s5J d	|v s;J d
|v sAJ d|v sGJ d|v sMJ d|v sSJ d|v sYJ d|v s_J dd |D }d|v slJ d|v srJ d|v sxJ dS )z0Test Triton input and output tensor definitions.      c                 S      g | ]}|j qS r   name.0tensorr   r   r   
<listcomp>}       z,test_triton_input_output.<locals>.<listcomp>r7   
max_lengthr"   r.   r/   r-   r#   compute_logprobr@   n_top_logprobsechoc                 S   rT   r   rU   rW   r   r   r   rZ      r[   	sentences	log_probstop_logprobsN)get_triton_inputget_triton_outputr6   )r&   inputsoutputsinput_namesoutput_namesr   r   r   test_triton_input_outputs   s(   ri   )unittest.mockr   r   pytest/megatron.core.inference.common_inference_paramsr   &nemo.deploy.nlp.megatronllm_deployabler   fixturer   r&   markrun_only_onr)   r<   rE   rJ   rQ   ri   r   r   r   r   <module>   s(   












