o
    wi                     @   s   d dl mZmZ d dlZd dlZd dlZd dlmZm	Z	 d dl
mZ ejdd Zejdd Zejd	d
 Zejdd Zejdd ZG dd dZG dd dZdS )    )	MagicMockpatchN)AutoModelForCausalLMAutoTokenizer)HuggingFaceLLMDeployc                  C   s6   t td} t  | _tg dg| j_t | d| _| S )Nspec         return_value)r   r   generatetorchtensorr   cuda)model r   \/home/ubuntu/sommelier/.venv/lib/python3.10/site-packages/tests/deploy/test_hf_deployable.py
mock_model   s
   
r   c                  C   sL   t td} d| _d| _t dgd| _tg dgtg dgd| _| S )	Nr   z[PAD]z[EOS]Generated textr   r	   )r
   r
   r
   )	input_idsattention_mask)r   r   	pad_token	eos_tokenbatch_decoder   r   r   )	tokenizerr   r   r   mock_tokenizer#   s   
$r   c                  c   s@    t d} t | j_| V  W d    d S 1 sw   Y  d S )Nz'nemo.deploy.nlp.hf_deployable.PeftModel)r   r   from_pretrainedr   mockr   r   r   mock_peft_model-   s
   

"r"   c                  c   sb    t d"} d| j_d| j_d| j_ttdgd| _| V  W d    d S 1 s*w   Y  d S )Nztorch.distributedTr   r
   r   r   )	r   is_initializedr   get_world_sizeget_rankr   r   r   	broadcastr    r   r   r   mock_distributed4   s   
"r'   c                	   c   s    t ddd0 t dtg dgd d V  W d    n1 s"w   Y  W d    d S W d    d S 1 s:w   Y  d S )Nztorch.cuda.is_availableFr   ztorch.Tensor.cudar	   )r   r   r   r   r   r   r   mock_torch_cuda>   s   "r(   c                   @   s,   e Zd Zdd Zdd Zdd Zdd Zd	S )
MockRequestc                 C   s   || _ d | _d S N)dataspan)selfr+   r   r   r   __init__F   s   
zMockRequest.__init__c                 C   s
   | j | S r*   )r+   )r-   keyr   r   r   __getitem__J      
zMockRequest.__getitem__c                 C   
   | j  S r*   )r+   keysr-   r   r   r   r3   M   r1   zMockRequest.keysc                 C   r2   r*   )r+   valuesr4   r   r   r   r5   P   r1   zMockRequest.valuesN)__name__
__module____qualname__r.   r0   r3   r5   r   r   r   r   r)   E   s
    r)   c                   @   sd   e Zd Zdd Zdd Zdd Zdd Zd	d
 Zdd Zdd Z	dd Z
dd Zdd Zdd ZdS )TestHuggingFaceLLMDeployc                 C   s<   t t tddd W d    d S 1 sw   Y  d S )N
test/modelzinvalid-taskhf_model_id_pathtask)pytestraisesAssertionErrorr   r4   r   r   r    test_initialization_invalid_taskV   s   "z9TestHuggingFaceLLMDeploy.test_initialization_invalid_taskc                 C   s:   t t tdd W d    d S 1 sw   Y  d S )Ntext-generation)r=   )r>   r?   
ValueErrorr   r4   r   r   r   test_initialization_no_modelZ   s   "z5TestHuggingFaceLLMDeploy.test_initialization_no_modelc                 C   sP   t td}t td}t||dd}|j|ksJ |j|ksJ |jdks&J d S )Nr   rB   r   r   r=   )r   r   r   r   r   r   r=   )r-   r   r   deployerr   r   r   ,test_initialization_with_model_and_tokenizer^   s   

zETestHuggingFaceLLMDeploy.test_initialization_with_model_and_tokenizerc              	   C   s   t d|d; t d|d tddd}|j|ksJ |j|ks"J W d    n1 s,w   Y  W d    d S W d    d S 1 sDw   Y  d S )N1transformers.AutoModelForCausalLM.from_pretrainedr   *transformers.AutoTokenizer.from_pretrainedr:   rB   r;   )r   r   r   r   )r-   r   r   rF   r   r   r   #test_initialization_with_model_pathf   s   

Pz<TestHuggingFaceLLMDeploy.test_initialization_with_model_pathc              	   C   s   t d|d7 t d|d tdddd}|j|jjksJ W d    n1 s(w   Y  W d    d S W d    d S 1 s@w   Y  d S )NrH   r   rI   r:   ztest/peft_modelrB   )r<   hf_peft_model_id_pathr=   )r   r   r   r   r   )r-   r   r   r"   rF   r   r   r   #test_initialization_with_peft_modelo   s   

Pz<TestHuggingFaceLLMDeploy.test_initialization_with_peft_modelc                 C   s   t t t dd}|j}|j}t|dksJ t|dksJ tdd |D s*J tdd |D s5J tdd |D s@J td	d |D sKJ td
d |D sVJ d S )NrB   rE   
   r   c                 s       | ]}|j d kV  qdS )promptsNname.0r   r   r   r   	<genexpr>       zKTestHuggingFaceLLMDeploy.test_triton_input_output_config.<locals>.<genexpr>c                 s   rN   )
max_lengthNrP   rR   r   r   r   rT      rU   c                 s   rN   )	sentencesNrP   rR   r   r   r   rT      rU   c                 s   rN   )logitsNrP   rR   r   r   r   rT      rU   c                 s   rN   )scoresNrP   rR   r   r   r   rT      rU   )r   r   get_triton_inputget_triton_outputlenany)r-   rF   inputsoutputsr   r   r   test_triton_input_output_configy   s   z8TestHuggingFaceLLMDeploy.test_triton_input_output_configc                 C   sV   t t t dd}d |_tt |jdgd W d    d S 1 s$w   Y  d S )NrB   rE   test prompttext_inputs)r   r   r   r>   r?   RuntimeErrorr   )r-   rF   r   r   r   test_generate_without_model   s
   "z4TestHuggingFaceLLMDeploy.test_generate_without_modelc                 C   sB   t ||dd}|jdgd}|dgksJ |j  |j  d S )NrB   rE   ra   rb   r   )r   r   assert_called_oncer   r-   r   r   r(   rF   outputr   r   r   test_generate_with_model   s
   
z1TestHuggingFaceLLMDeploy.test_generate_with_modelc                 C   s   t g dgt dgt dgd|j_t||dd}|jdgdddd	}t|ts.J d
|v s4J d|v s:J d|v s@J d S )Nr	         ?g      ?)	sequencesrX   rY   rB   rE   ra   T)rc   output_logitsoutput_scoresreturn_dict_in_generaterW   rX   rY   )r   r   r   r   r   
isinstancedictrg   r   r   r   +test_generate_with_output_logits_and_scores   s   



zDTestHuggingFaceLLMDeploy.test_generate_with_output_logits_and_scoresc              	   C   s   t ||dd}tdgtdggtdggtdggtdggtdggtdggd	}t|g}||}d
|d v sEJ t|d d
 tjsQJ d S )NrB   rE   ra   rj   r
           rM   FrO   temperaturetop_ktop_prV   rl   rm   rW   r   )r   nparrayr)   triton_infer_fnro   ndarrayr-   r   r   rF   request_datarequestsrh   r   r   r   test_triton_infer_fn   s   

	
z-TestHuggingFaceLLMDeploy.test_triton_infer_fnc              	   C   s   t ||dd}td|j_tdgtdggtdggtdggtdggtd	ggtd	ggd
}t|g}||}d|d v sKJ dt|d d d v sYJ d S )NrB   rE   z
Test errorra   rj   r
   rr   rM   Frs   rW   r   zAn error occurred)	r   	Exceptionr   side_effectrw   rx   r)   ry   strr{   r   r   r   test_triton_infer_fn_with_error   s   

	
 z8TestHuggingFaceLLMDeploy.test_triton_infer_fn_with_errorN)r6   r7   r8   rA   rD   rG   rJ   rL   r`   re   ri   rq   r~   r   r   r   r   r   r9   T   s    	
r9   )unittest.mockr   r   numpyrw   r>   r   transformersr   r   nemo.deploy.nlp.hf_deployabler   fixturer   r   r"   r'   r(   r)   r9   r   r   r   r   <module>   s$   

	

	
