o
    }oi@                     @   sv   d dl Z d dlZd dlmZmZ d dlmZ d dlmZ d dl	m
Z
mZ e jddde
fd	d
Zdd Zdd ZdS )    N)CutSetSupervisionSegment)DummyManifest)LhotseSpeechToTextBpeDataset)SentencePieceTokenizercreate_spt_modelsession)scopereturnc              
   C   sX   |  d}|d }|dttttdtd t|dddt|d	\}}t	|S )
Nklingon_tokensztext.txt
az    F)
vocab_sizesample_sizedo_lower_case
output_dir)
mktemp
write_textjoinmapchrrangeordr   strr   )tmp_path_factorytmpdir	text_path
model_path
vocab_path r"   a/home/ubuntu/.local/lib/python3.10/site-packages/tests/collections/asr/test_asr_lhotse_dataset.py	tokenizer   s   
$
r$   c                 C   sP  t tdddd}| |d jd j|d jd _td|d jddd	d
td|d jdddd
g|d _t| d}|| }t	|t
sDJ t|dksLJ tdd |D sWJ |\}}}}|jdksdJ | dgd ksoJ |jdksvJ |d  g dksJ |d  |d  ksJ |d  g dksJ | g dksJ d S )Nr      Tbegin_idend_id	with_data   z
cuts2-sup0         ?firstidrecording_idstartdurationtextz
cuts2-sup1second)r$      c                 s   s    | ]	}t |tjV  qd S )N)
isinstancetorchTensor).0tr"   r"   r#   	<genexpr>7   s    z*test_lhotse_asr_dataset.<locals>.<genexpr>)r%   >  r<   )r%      )r*   
      r?      r=   r@      r+         r   r   )r*      r>   r?      rC   r*   rE   r@   r5      rB      )   rH   r=   )r   r   text_to_idssupervisionsr3   tokensr   r0   r   r6   tuplelenallshapetolist)r$   cutsdatasetbatchaudio
audio_lensrK   
token_lensr"   r"   r#   test_lhotse_asr_dataset#   s$   "

rW   c                 C   s\  t tdddd}d|d _d|d _td|d jd	d
ddg|d _td|d jddddg|d _t| dd}|| }t|tsCJ t	|dksKJ |\}}}}}|d jd j
dks^J |d jd j
dksjJ |d jdkssJ |d jdks|J |d jd jd
ksJ |d jd jd	ksJ |d jd jdksJ |d jd jdksJ d S )Nr   r+   Tr&   cuts0cuts1r*   z
cuts0-sup0g?r,   r-   r.   z
cuts1-sup0 )r$   return_cutsrG   g        )r   r   r/   r   r0   rJ   r   r6   rL   rM   r3   r2   r1   )r$   rQ   datasets_metadatarS   _cuts_metadatar"   r"   r#    test_lhotse_asr_dataset_metadataF   s(   



r_   )pytestr7   lhotser   r   lhotse.testing.dummiesr   .nemo.collections.asr.data.audio_to_text_lhotser   :nemo.collections.common.tokenizers.sentencepiece_tokenizerr   r   fixturer$   rW   r_   r"   r"   r"   r#   <module>   s   

#