o
    2wi!                     @   s  d dl mZ d dlmZ d dlZd dlmZ d dlmZ e	 dd Z
e
jdd	ejd
ejddddejdedejde dejddeddejdddeddejdddeddejddddedd gd!d"ejd#d$ddded%d&d
ededed'ed(ee d)ed*ed+efd,d-Ze
 ejd.ejddd/dejd0ejddd/dejd1e dejdd2edd3d4ejd5d6dd7d8	d=d.ed0ed1ed9ee d:ee f
d;d<ZdS )>    )Path)OptionalN)cli)Pathlikec                   C   s   dS )z%Kaldi import/export related commands.N r   r   r   S/home/ubuntu/sommelier/.venv/lib/python3.10/site-packages/lhotse/bin/modes/kaldi.pykaldi
   s   r   import)namedata_dirTF)exists	file_okay)typesampling_ratemanifest_dirz-fz--frame-shiftzBFrame shift (in seconds) is required to support reading feats.scp.)r   helpz-uz--map-string-to-underscoreszWhen specified, we will replace all instances of this string
    in SupervisonSegment IDs to underscores. This is to help with handling
    underscores in Kaldi (see 'export_to_kaldi').)defaultr   r   z-jz
--num-jobs   z1Number of jobs for computing recording durations.z-tz--feature-typezkaldi-fbankz
kaldi-mfccz?Feature type when importing precomputed features from feats.scp)r   show_defaultr   r   z-dz--compute-durationszJCompute durations by reading the whole file instead of using reco2dur file)r   is_flagr   r   r   frame_shiftmap_string_to_underscoresnum_jobsfeature_typecompute_durationsc              	   C   s   ddl m} ddlm}	 |	| ||||| |d\}
}}t|}|jddd |
|d  |dur8||d	  |durC||d
  |j|
||dd|d  dS )z
    Convert a Kaldi data dir DATA_DIR into a directory MANIFEST_DIR of lhotse manifests. Ignores feats.scp.
    The SAMPLING_RATE has to be explicitly specified as it is not available to read from DATA_DIR.
    r   )CutSet)load_kaldi_data_dir)pathr   r   r   r   use_reco2durr   T)parentsexist_okzrecordings.jsonl.gzNzsupervisions.jsonl.gzzfeatures.jsonl.gzg?)
recordingssupervisionsfeatures	tolerancezcuts.jsonl.gz)lhotser   lhotse.kaldir   r   mkdirto_filefrom_manifests)r   r   r   r   r   r   r   r   r   r   recording_setmaybe_supervision_setmaybe_feature_setr   r   r   import_   s2   9	r-   r!   )r   dir_okayr"   
output_dirz--map-underscores-tozoOptional string with which we will replace all underscores.This helps avoid issues with Kaldi data dir sorting.)r   r   r   z-pz--prefix-spk-idzZPrefix utterance ids with speaker ids.This helps avoid issues with Kaldi data dir sorting.)r   r   map_underscores_toprefix_spk_idc                 C   s~   ddl m} ddlm} t|}||| |||||d tjdddd tjd	| d
| ddd tjd| dd dS )zp
    Convert a pair of ``RecordingSet`` and ``SupervisionSet`` manifests into a Kaldi-style data directory.
    r   )export_to_kaldi)load_manifest_lazy_or_eager)r!   r"   r/   r0   r1   zFExport completed! You likely need to run the following Kaldi commands:Tyellow)boldfgz  utils/utt2spk_to_spk2utt.pl z/utt2spk > z/spk2utt)r6   z  utils/fix_data_dir.sh N)r&   r2   lhotse.serializationr3   r   clicksecho)r!   r"   r/   r0   r1   r2   r3   r   r   r   exportd   s(   !r:   )F)pathlibr   typingr   r8   lhotse.bin.modes.cli_baser   lhotse.utilsr   groupr   commandargumentintoptionfloatstrChoiceboolr-   r:   r   r   r   r   <module>   s    

		)
