o
    wim(                     @   s   d dl Z d dlZd dlmZ d dlmZ d dlmZ d dlm	Z	m
Z
 d dlmZ edgZej r;eed e jd	d
dd Ze jd	d
dd Ze jd	d
dd ZG dd dZdS )    N)Trainer)pad_sequence)EncDecCTCModelBPE)BoostingTreeModelConfigGPUBoostingTreeModel)ContextGraphcpucudamodule)scopec                  C   sF   g d} g dg ddgg}g d}t ddd}|j|| |dd	 |S )
Nabcabdc)         )r   r      r           r   r         ?context_scoredepth_scalingF	token_idsphrasesscoresuniform_weights)r   build)r   phrases_idsr   context_graph r"   e/home/ubuntu/sommelier/.venv/lib/python3.10/site-packages/tests/collections/asr/test_boosting_tree.pytest_context_graph!   s   r$   c                 C   s   t j| dddddd}|S )N   r   TFr!   
vocab_size	unk_scorefinal_eos_score
use_tritonr   )r   from_context_graph)r$   boosting_treer"   r"   r#   test_boosting_tree+   s   r-   c                  C   s*   t jdd} | tddd |  } | S )Nstt_en_conformer_ctc_small)
model_namer   r   )devicesaccelerator)r   from_pretrainedset_trainerr   eval)modelr"   r"   r#   conformer_ctc_bpe_model8   s   r6   c                   @   s   e Zd Zejjdd Zejjejdeejdg ddd Z	ejjejdedd	 Z
ejjejded
d Zejjejjej  dddd Zejjdd Zejjdd ZdS )TestGPUBoostingTreeModelc                 C   s  |}|j dks	J |jjd jd jd jsJ |jjd jd jd js'J |jjd js0J |jjd jd jd jdksAJ |jjd jd jd jdksRJ |jjd jdks]J |jjd jd jd jjdksoJ |jjd jd jd jjd	ksJ |jjd jjd	ksJ t|jjd jd jd jdd
ksJ t|jjd jd jd jdd
ksJ t|jjd jddksJ dS )z'Test initial python-based context graphr%   r   r   r   r   r   r   r   g)\(@r   N)		num_nodesrootnextis_endphrasefailtokenround
node_score)selfr$   r!   r"   r"   r#   test_building_context_graphA   s   ""$$(( z4TestGPUBoostingTreeModel.test_building_context_graphdevice
batch_size)r   r      c                 C   sN   | | |j|dd}||\}}|j|dfksJ |j|dfks%J dS )z.Test advance method with different batch sizesT)rE   bosr%   N)toget_init_statesadvanceshape)rB   r-   rD   rE   init_statesr   next_statesr"   r"   r#   test_advance_methodW   s
   
z,TestGPUBoostingTreeModel.test_advance_methodc                 C   s<   | | tjg dtj|d}||}|jdksJ dS )z%Test get_final method for EOS scoring)r   r   r   )dtyperD   )r   N)rH   torchtensorlong	get_finalrK   )rB   r-   rD   statesfinal_scoresr"   r"   r#   test_get_final_methodd   s   

z.TestGPUBoostingTreeModel.test_get_final_methodc                 C   s   | | g dg dg dg g}|tdd |D dd |tdd |D  |d	d	d
}tjg dg dg dg dg|d}tj||ddsMJ dS )z6Test boosting tree inference with predefined sentencesr   r   r   r   r   r   r   r   r   r   )r   r   r   r   c                 S      g | ]}t |qS r"   rP   
LongTensor.0sentencer"   r"   r#   
<listcomp>w       zITestGPUBoostingTreeModel.test_boosting_tree_inference.<locals>.<listcomp>Tbatch_firstc                 S      g | ]}t |qS r"   lenr\   r"   r"   r#   r_   z       Flabelslabels_lengthsrG   eos)r   绸? @r   r   )r   r   r   rk   rl   )r   r   rk   gr   )r   r   r   r   r   )rD   g-C6?atolN)rH   r   rP   r[   rQ   allclose)rB   r-   rD   sentences_idsboosting_scorescorrect_answerr"   r"   r#   test_boosting_tree_inferenceo   s&   
	z5TestGPUBoostingTreeModel.test_boosting_tree_inferencezCUDA not available)reasonc           
      C   s   t d}tj|ddd|}tj|ddd|}g dg dg}tdd	 |D dd
|}t dd	 |D |}|||ddd}|||ddd}	t j||	ddsXJ dS )z)Compare Triton vs PyTorch implementationsr	   r%   T)r!   r'   r*   FrW   rX   c                 S   rY   r"   rZ   r]   sr"   r"   r#   r_      r`   zOTestGPUBoostingTreeModel.test_triton_vs_pytorch_consistency.<locals>.<listcomp>ra   c                 S   rc   r"   rd   ru   r"   r"   r#   r_      rf   rg   gh㈵>rm   N)rP   rD   r   r+   rH   r   r[   ro   )
rB   r$   rD   boosting_tree_tritonboosting_tree_pytorchrp   rh   lengthsscores_tritonscores_pytorchr"   r"   r#   "test_triton_vs_pytorch_consistency   s"   
z;TestGPUBoostingTreeModel.test_triton_vs_pytorch_consistencyc                 C   sf   t j|dddd}tjddgtjd}|j|dd	\}}t|d
  ddks)J |d dks1J dS )z2Test EOS token handling (important for AED models)r%   r   r   )r!   r'   r(   r)   r   r   )rO   r   )eos_id)r   r   g
ףp=
?)r   r   g       @N)r   r+   rP   rQ   rR   rJ   r@   item)rB   r$   r,   rL   r   rM   r"   r"   r#   test_eos_handling   s   z*TestGPUBoostingTreeModel.test_eos_handlingc                    s:  t  }g d} fdd|D }g d}t|j|jd}|j||||jd tj| jj	|j
|j|j|jd}||d  t |d d	}tj| jd
}	t|d d}
|
d W d   n1 sdw   Y  t |d d}tj| jd
}t g dd}tj| jd
}t|	j|jsJ t|	j|jsJ dS )zyTest that the boosting tree model is built correctly from the config using model_path, key_phrases_file, key_phrases_listr   c                    s   g | ]} j |qS r"   )	tokenizertext_to_ids)r]   r=   r6   r"   r#   r_      s    zQTestGPUBoostingTreeModel.test_boosting_tree_model_from_config.<locals>.<listcomp>r   r   r   r&   ztest_boosting_tree.nemo)
model_path)r   ztest_boosting_tree.txtwz	abc
abd
cN)key_phrases_file)key_phrases_list)r   r   r   r   r   r   r   r+   r   r'   r(   r)   r*   save_tofrom_configopenwriterP   ro   arcs_weights)rB   r6   tmp_pathboosting_tree_cfgr   r    r   r!   r-   boosting_tree_from_model_pathf#boosting_tree_from_key_phrases_file#boosting_tree_from_key_phrases_listr"   r   r#   $test_boosting_tree_model_from_config   sP   
	z=TestGPUBoostingTreeModel.test_boosting_tree_model_from_configN)__name__
__module____qualname__pytestmarkunitrC   parametrizeDEVICESrN   rV   rs   skipifrP   r	   is_availabler|   r   r   r"   r"   r"   r#   r7   @   s(    

	
r7   )r   rP   lightning.pytorchr   torch.nn.utils.rnnr   nemo.collections.asr.modelsr   Anemo.collections.asr.parts.context_biasing.boosting_graph_batchedr   r   Bnemo.collections.asr.parts.context_biasing.context_graph_universalr   rD   r   r	   r   appendfixturer$   r-   r6   r7   r"   r"   r"   r#   <module>   s"   



	


