o
    2wi%                     @   s  d dl mZ d dlmZ d dlZd dlmZmZmZm	Z	m
Z
 d dlmZ d dlmZ d dlmZmZmZmZ d dlmZ d d	lmZmZ d d
lmZ d dlmZmZ e dd Zej e!dddej"de dej#dde$e%eddddede&fddZ'ej e!dddej"dejddddej"de dej#ddejdddd d!ej#d"e$e d#d$dej#d%d&e(d'd(dej#d)d*ejddd+dd,dej#d-d.e(d/d0ddeded1ee d2e&d3e(d4ee d5e(fd6d7Z)ej e!dddej"d8ejddddej"d9e dej"d:e dej#ddejdddd d!ej#d"e$e d#d$dej#d-d.e(d/d0dd8ed9ed:ed1ee d2e&d5e(fd;d<Z*ej e!dddej"d8ejdddd=dej"d9ejdd>dej"d:e dej#ddejdddd?d!ej#d"e$e d#d$dej#d-d.e(d@dAdej#dBdCe+dDdEdd8ed9ed:ed1ee d2e&d5e(dFe
fdGdHZ,ej e!dddej"d1ejdddde"dIej"dJe dej#d.d-e(d/dKd1edIe&dJed5e(fdLdMZ-dNedIe&dOefdPdQZ.dS )R    )Path)OptionalN)CutSetFeatures
FeatureSetLilcomURLWriterSeconds)RecordingSet)cli)FbankFeatureExtractorFeatureSetBuilder create_default_feature_extractor)FEATURE_EXTRACTORS)available_storage_backends
get_writer)load_manifest_lazy_or_eager)Pathlikefastcopyc                  C   s    ddl } | d | d dS )z$Feature extraction related commands.r   N   )torchset_num_threadsset_num_interop_threads)r    r   V/home/ubuntu/sommelier/.venv/lib/python3.10/site-packages/lhotse/bin/modes/features.pyfeat   s   
r   T)show_default)context_settingsoutput_config)typez-fz--feature-typefbankz$Which feature extractor type to use.)r   defaulthelpfeature_typec                 C   s   t ||  dS )z:Save a default feature extraction config to OUTPUT_CONFIG.N)r   to_yaml)r   r#   r   r   r   write_default_config   s   r%   recording_manifestF)existsdir_okay
output_dirz--feature-manifestz=Optional manifest specifying feature extractor configuration.)r   r"   z--storage-typelilcom_chunkyz2Select a storage backend for the feature matrices.z-tz--lilcom-tick-powerzfDetermines the compression accuracy; the input will be compressed to integer multiples of 2^tick_powerz-rz
--root-dir)r'   	file_okayzCRoot directory - all paths in the manifest will use this as prefix.z-jz
--num-jobsr   zNumber of parallel processes.feature_manifeststorage_typelilcom_tick_powerroot_dirnum_jobsc                 C   s   t | }|dur||}|durt|nt }t|}|jddd d|v r-|d n|d }	t||	|d}
t	||
d}|j
||d	 |d
 W d   dS 1 sUw   Y  dS )z
    Extract features for recordings in a given AUDIO_MANIFEST. The features are stored in OUTPUT_DIR,
    with one file per recording (or segment).
    NT)exist_okparentshdf5zfeats.h5storage)
tick_power)feature_extractorr5   zfeature_manifest.json.gz)
recordingsoutput_manifestr1   )r	   	from_filewith_path_prefixr   	from_yamlr   r   mkdirr   r   process_and_store_recordings)r&   r)   r-   r.   r/   r0   r1   r8   r7   storage_pathr5   feature_set_builderr   r   r   extract-   s2   
.

"rA   cutsetoutput_cutsetr?   c                 C   sZ   t | }|durt|nt }|j|||t|d}t|jj	ddd |
| dS )z
    Extract features for cuts in a given CUTSET manifest.
    The features are stored in STORAGE_PATH, and the output manifest
    with features is stored in OUTPUT_CUTSET.
    N)	extractorr?   r1   r.   Tr3   r2   )r   r:   r   r<   r   compute_and_store_featuresr   r   parentr=   to_file)rB   rC   r?   r-   r.   r1   cutsr7   r   r   r   extract_cutsy   s   
 
rJ   )r'   r(   
allow_dash)rK   zOptional manifest specifying feature extractor configuration. If you want to use CUDA, you should specify the device in this config.   zNumber of dataloader workers.z-bz--batch-durationg     @zCAt most this many seconds of audio will be processed in each batch.batch_durationc           	      C   s\   t | }|durt|nt }|j||||t|d}t|jj	ddd |
| dS )a  
    Extract features for cuts in a given CUTSET manifest.
    The features are stored in STORAGE_PATH, and the output manifest
    with features is stored in OUTPUT_CUTSET.

    This version enables CUDA acceleration for feature extractors
    that support it (e.g., kaldifeat extractors).

    
    Example usage of kaldifeat fbank with CUDA:

        $ pip install kaldifeat  # note: ensure it's compiled with CUDA

        $ lhotse feat write-default-config -f kaldifeat-fbank feat.yml

        $ sed 's/device: cpu/device: cuda/' feat.yml feat-cuda.yml

        $ lhotse feat extract-cuts-batch -f feat-cuda.yml cuts.jsonl cuts_with_feats.jsonl feats.h5
    N)rD   r?   rM   num_workersr.   TrE   )r   r:   r   r<   r    compute_and_store_features_batchr   r   rG   r=   rH   )	rB   rC   r?   r-   r.   r1   rM   rI   r7   r   r   r   extract_cuts_batch   s   
8
rP   urlr9   )r   r!   c              
   C   s   ddl m} ddlm} t|}d|jv sJ dt| }t|M}||0}g }	||ddD ]}
|	|	t
|
| q1||	d| dD ]	}
||
  qGW d	   n1 s[w   Y  W d	   d	S W d	   d	S 1 ssw   Y  d	S )
a  
    Read an existing FEATURE_MANIFEST, upload the feature matrices it contains to a URL location,
    and save a new feature OUTPUT_MANIFEST that refers to the uploaded features.

    The URL can refer to endpoints such as AWS S3, GCP, Azure, etc.
    For example: "s3://my-bucket/my-features" is a valid URL.

    This script does not currently support credentials,
    and assumes that you have the write permissions.
    r   )ProcessPoolExecutor)tqdmz.jsonlz;This mode only supports writing to JSONL feature manifests.z&Submitting parallel uploading tasks...)desczUploading features to N)concurrent.futuresrR   rS   r   suffixesr   r:   open_writerappendsubmit_upload_onewriteresult)r-   rQ   r9   r1   rR   rS   local_featuresmanifest_writerexfuturesitemr   r   r   upload   s(   
Prb   ra   returnc                 C   s2   |   }t|}|j| j|d}t| |||jdS )N)keyvalue)r?   storage_keyr.   )loadr   r[   rf   r   name)ra   rQ   	feats_matfeats_writernew_keyr   r   r   rZ     s   
rZ   )/pathlibr   typingr   clicklhotser   r   r   r   r   lhotse.audior	   lhotse.bin.modes.cli_baser
   lhotse.featuresr   r   r   r   lhotse.features.baser   lhotse.features.ior   r   lhotse.serializationr   lhotse.utilsr   r   groupr   commanddictargumentoptionChoiceliststrr%   intrA   rJ   floatrP   rb   rZ   r   r   r   r   <module>   s<   
	

+



-"