o
    Sie                  	   @   s   d dl Z d dlZd dlZd dlZd dlmZmZ d dlmZ d dl	m
Z
mZmZ d dlmZmZ d dlmZ d dlmZ d dlmZmZmZmZmZmZmZmZ d dlZd dlZd d	l m!Z! d d
l"m#Z# d dl$m%Z% d dl&m'Z'm(Z(m)Z) d dl*m+Z+ d dl,m-Z-m.Z.m/Z/m0Z0 d dl1m2Z2m3Z3m4Z4m5Z5m6Z6m7Z7m8Z8m9Z9m:Z:m;Z;m<Z< G dd dedZ=i Z>de?defddZ@de?ddfddZAdd ZBG dd de=ZCeddG d d! d!ZDG d"d# d#e.e+ZEG d$d% d%ZFd&ejGd'e'de?fd(d)ZH	d0d*eeD d+ee2 dee?ejGf fd,d-ZIG d.d/ d/ZJdS )1    N)ABCMetaabstractmethod)ProcessPoolExecutor)asdict	dataclassis_dataclass)chainislice)isclose)Path)AnyDictIterableListOptionalSequenceTypeUnion)tqdm)	Recording)	AugmentFn)FeaturesWriter
get_readeris_in_memory)AlgorithmMixin)	LazyMixinSerializable	load_yamlsave_to_yaml)PathlikeSecondsasdict_nonullcompute_num_framescompute_num_frames_from_samplesexactly_one_not_nullfastcopyifnonesplit_manifest_lazysplit_sequenceuuid4c                   @   s  e Zd ZdZdZdZd3dee fddZe	de
jdede
jfd	d
Zee	defddZe	dedefddZedeeejf fddZede
jde
jdede
jfddZede
jdefddZ	d3dee
jejee
j eej f dedeee
jejf  dee
jejee
j eej f fddZ			d4de
jdedededeeeee f  d ee dd!fd"d#Z 				d5d$e!deded%ee d&eeee f d ee dd!fd'd(Z"e#d)e$dd fd*d+Z%de&eef fd,d-Z'e#d.e(dd fd/d0Z)d.e(fd1d2Z*dS )6FeatureExtractora  
    The base class for all feature extractors in Lhotse.
    It is initialized with a config object, specific to a particular feature extraction method.
    The config is expected to be a dataclass so that it can be easily serialized.

    All derived feature extractors must implement at least the following:

    * a ``name`` class attribute (how are these features called, e.g. 'mfcc')
    * a ``config_type`` class attribute that points to the configuration dataclass type
    * the ``extract`` method,
    * the ``frame_shift`` property.

    Feature extractors that support feature-domain mixing should additionally specify two static methods:

    * ``compute_energy``, and
    * ``mix``.

    By itself, the ``FeatureExtractor`` offers the following high-level methods
    that are not intended for overriding:

    * ``extract_from_samples_and_store``
    * ``extract_from_recording_and_store``

    These methods run a larger feature extraction pipeline that involves data augmentation and disk storage.
    Nconfigc                 C   s*   |d u r|   }t|sJ d|| _d S )Nz5The feature configuration object must be a dataclass.)config_typer   r+   )selfr+    r.   H/home/ubuntu/.local/lib/python3.10/site-packages/lhotse/features/base.py__init__C   s   
zFeatureExtractor.__init__samplessampling_ratereturnc                 C      dS )z
        Defines how to extract features using a numpy ndarray of audio samples and the sampling rate.

        :return: a numpy ndarray representing the feature matrix.
        Nr.   )r-   r1   r2   r.   r.   r/   extractK   s   zFeatureExtractor.extractc                 C      d S Nr.   r-   r.   r.   r/   frame_shiftT   s   zFeatureExtractor.frame_shiftc                 C   r6   r7   r.   )r-   r2   r.   r.   r/   feature_dimY      zFeatureExtractor.feature_dimc                 C   r4   )Ncpur.   r8   r.   r.   r/   device]   r;   zFeatureExtractor.device
features_a
features_benergy_scaling_factor_bc                 C      t d)a|  
        Perform feature-domain mix of two signals, ``a`` and ``b``, and return the mixed signal.

        :param features_a: Left-hand side (reference) signal.
        :param features_b: Right-hand side (mixed-in) signal.
        :param energy_scaling_factor_b: A scaling factor for ``features_b`` energy.
            It is used to achieve a specific SNR.
            E.g. to mix with an SNR of 10dB when both ``features_a`` and ``features_b`` energies are 100,
            the ``features_b`` signal energy needs to be scaled by 0.1.
            Since different features (e.g. spectrogram, fbank, MFCC) require different combination of
            transformations (e.g. exp, log, sqrt, pow) to allow mixing of two signals, the exact place
            where to apply ``energy_scaling_factor_b`` to the signal is determined by the implementer.
        :return: A mixed feature matrix.
        zThe feature extractor's "mix" operation is undefined. It does not support feature-domain mix, consider computing the features after, rather than before mixing the cuts.
ValueError)r>   r?   r@   r.   r.   r/   mixa   s   zFeatureExtractor.mixfeaturesc                 C   rA   )aO  
        Compute the total energy of a feature matrix. How the energy is computed depends on a
        particular type of features.
        It is expected that when implemented, ``compute_energy`` will never return zero.

        :param features: A feature matrix.
        :return: A positive float value of the signal energy.
        zThe feature extractor's "compute_energy" operation is undefined. It does not support feature-domain mix, consider computing the features after, rather than before mixing the cuts.rB   rE   r.   r.   r/   compute_energyy   s   
zFeatureExtractor.compute_energylengthsc           	         sB  d}d}|durfdd|D }t |tjsJ dn+t |tr%d}n|jdkr/t|}n|ddg}td	d
 |D rHdd |D }d}g  |D ]}j|d}|durc|d|t   } 	| qL|rrdd  D  t dkr|r| S  d S t
 fdd
 dd D r|rtj ddS tj ddS  S )a  
        Performs batch extraction. It is not guaranteed to be faster
        than :meth:`FeatureExtractor.extract` -- it depends on whether
        the implementation of a particular feature extractor supports
        accelerated batch computation. If `lengths` is provided, it is
        assumed that the input is a batch of padded sequences, so we will
        not perform any further collation.

        .. note::
            Unless overridden by child classes, it defaults to sequentially
            calling :meth:`FeatureExtractor.extract` on the inputs.

        .. note::
            This method *should* support variable length inputs.
        FNc                    s   g | ]	}t |j qS r.   )r#   r9   ).0l)r2   r-   r.   r/   
<listcomp>   s    z2FeatureExtractor.extract_batch.<locals>.<listcomp>zNIf `lengths` is provided, `samples` must be a batched and padded torch.Tensor.T   c                 s   s    | ]	}t |tjV  qd S r7   )
isinstancetorchTensorrI   xr.   r.   r/   	<genexpr>       z1FeatureExtractor.extract_batch.<locals>.<genexpr>c                 S   s   g | ]}|  qS r.   )numpyrQ   r.   r.   r/   rK      s    )r2   c                 S      g | ]}t |qS r.   )rO   
from_numpyrQ   r.   r.   r/   rK          r   c                 3   s     | ]}|j  d  j kV  qdS r   N)shape)rI   item)resultr.   r/   rS      s    )dimaxis)rN   rO   rP   listndimreshapeanyr5   lenappendallstacknp)	r-   r1   r2   rH   input_is_listinput_is_torch	feat_lensr[   resr.   )r\   r2   r-   r/   extract_batch   sJ   


zFeatureExtractor.extract_batchr   storageoffsetchannel
augment_fnFeaturesc                 C   s   ddl m} |dur|||}t|jd | dd}| j||d}	t|	|d}
t||| j|	jd |	jd | j|||jt	|j
|
d	}|||	d
 |S )a  
        Extract the features from an array of audio samples in a full pipeline:

        * optional audio augmentation;
        * extract the features;
        * save them to disk in a specified directory;
        * return a ``Features`` object with a description of the extracted features.

        Note, unlike in ``extract_from_recording_and_store``, the returned ``Features`` object
        might not be suitable to store in a ``FeatureSet``, as it does not reference any particular
        ``Recording``. Instead, this method is useful when extracting features from cuts - especially
        ``MixedCut`` instances, which may be created from multiple recordings and channels.

        :param samples: a numpy ndarray with the audio samples.
        :param sampling_rate: integer sampling rate of ``samples``.
        :param storage: a ``FeaturesWriter`` object that will handle storing the feature matrices.
        :param offset: an offset in seconds for where to start reading the recording - when used for
            ``Cut`` feature extraction, must be equal to ``Cut.start``.
        :param channel: an optional channel number(s) to insert into ``Features`` manifest.
        :param augment_fn: an optional ``WavAugmenter`` instance to modify the waveform before feature extraction.
        :return: a ``Features`` manifest item for the extracted feature matrix (it is not written to disk).
        r   validate_featuresNrL      ndigitsr1   r2   rn   )startdurationtype
num_framesnum_featuresr9   r2   channelsstorage_typestorage_pathstorage_key
feats_data)	lhotse.qart   roundrZ   r5   store_feature_arrayrr   namer9   strr   )r-   r1   rn   r2   ro   rp   rq   rt   r{   featsr   manifestr.   r.   r/   extract_from_samples_and_store   s*   
z/FeatureExtractor.extract_from_samples_and_store	recordingr{   r   c                 C   s   ddl m} |j|||d}|dur|||j}| j||jd}	t|	|d}
t|j|dur/|n|j||dur9|j	n|j	| j
|	jd |	jd | j|j|j
t|j|
d}|||	d	 |S )
a'  
        Extract the features from a ``Recording`` in a full pipeline:

        * load audio from disk;
        * optionally, perform audio augmentation;
        * extract the features;
        * save them to disk in a specified directory;
        * return a ``Features`` object with a description of the extracted features and the source data used.

        :param recording: a ``Recording`` that specifies what's the input audio.
        :param storage: a ``FeaturesWriter`` object that will handle storing the feature matrices.
        :param offset: an optional offset in seconds for where to start reading the recording.
        :param duration: an optional duration specifying how much audio to load from the recording.
        :param channels: an optional int or list of ints, specifying the channels;
            by default, all channels will be used.
        :param augment_fn: an optional ``WavAugmenter`` instance to modify the waveform before feature extraction.
        :return: a ``Features`` manifest item for the extracted feature matrix.
        r   rs   )ro   r{   r   Nrx   ry   rL   )recording_idr   rz   r{   r|   r}   r~   r9   r2   r   r   r   r   )r   rt   
load_audior2   r5   r   rr   idchannel_idsr{   r   rZ   r9   r   r   )r-   r   rn   ro   r{   r   rq   rt   r1   r   r   r   r.   r.   r/    extract_from_recording_and_store  s4   z1FeatureExtractor.extract_from_recording_and_storedatac                 C   s&   | d}t|}|j|}||S Nfeature_type)popget_extractor_typer,   	from_dict)clsr   r   extractor_typer+   r.   r.   r/   r   ?  s   
zFeatureExtractor.from_dictc                 C   s   | j  }| j|d< |S r   )r+   to_dictr   )r-   dr.   r.   r/   r   G  s   

zFeatureExtractor.to_dictpathc                 C   s   |  t|S r7   )r   r   )r   r   r.   r.   r/   	from_yamlL  s   zFeatureExtractor.from_yamlc                 C   s>   |   }d|v rt|d tjr|d j|d< t||d d S )Nr=   r   )r   rN   rO   r=   r|   r   )r-   r   r   r.   r.   r/   to_yamlP  s   zFeatureExtractor.to_yamlr7   )r   NN)r   NNN)+__name__
__module____qualname____doc__r   r,   r   r   r0   r   rh   ndarrayintr5   propertyr    r9   r:   r   r   rO   r=   staticmethodfloatrD   rG   rP   r   r   rm   r   r   r   r   r   classmethoddictr   r   r   r   r   r   r.   r.   r.   r/   r*   %   s    
M
:
8r*   )	metaclassr   r3   c                 C   s   t |  S )z
    Return the feature extractor type corresponding to the given name.

    :param name: specifies which feature extractor should be used.
    :return: A feature extractors type.
    )FEATURE_EXTRACTORSr   r.   r.   r/   r   `  s   r   zOptional[FeatureExtractor]c                 C   s
   t |  S )z
    Create a feature extractor object with a default configuration.

    :param name: specifies which feature extractor should be used.
    :return: A new feature extractor instance.
    )r   r   r.   r.   r/    create_default_feature_extractorj  s   
r   c                 C   s   | t | j< | S )a;  
    This decorator is used to register feature extractor classes in Lhotse so they can be easily created
    just by knowing their name.

    An example of usage:

    @register_extractor
    class MyFeatureExtractor: ...

    :param cls: A type (class) that is being registered.
    :return: Registered type.
    )r   r   )r   r.   r.   r/   register_extractort  s   
r   c                   @   sN   e Zd ZdZdeejejf de	dejfddZ
dd Zedefd	d
ZdS )TorchaudioFeatureExtractorzGCommon abstract base class for all torchaudio based feature extractors.r1   r2   r3   c                 C   s   t | j}||dd |d  d9  < |d  d9  < t|tjs(t|}t|jdkr4|	d}| j
|fi |tj}| S )NF)sample_frequency
snip_edgesr9   g     @@frame_lengthrL   r   )r   r+   updaterN   rO   rP   rW   rd   rZ   	unsqueeze_feature_fntofloat32rU   )r-   r1   r2   paramsrE   r.   r.   r/   r5     s   


z"TorchaudioFeatureExtractor.extractc                 O   s   t  r7   NotImplementedErrorr-   argskwargsr.   r.   r/   r     s   z&TorchaudioFeatureExtractor._feature_fnc                 C   s   | j jS r7   )r+   r9   r8   r.   r.   r/   r9     s   z&TorchaudioFeatureExtractor.frame_shiftN)r   r   r   r   r   rh   r   rO   rP   r   r5   r   r   r    r9   r.   r.   r.   r/   r     s    
r   T)orderc                
   @   sz  e Zd ZU dZeed< eed< eed< eed< eed< eed< eed< eed	< eed
< eee	f ed< dZ
ee ed< dZeeeee f  ed< edefddZedefddZedefddZ			d+dee dee deeee f dejfddZ			d,dedee dedd fddZdedd fdd Zdefd!d"Zd#edd fd$d%Zed&edd fd'd(Zd)d* ZdS )-rr   ak  
    Represents features extracted for some particular time range in a given recording and channel.
    It contains metadata about how it's stored: storage_type describes "how to read it", for now
    it supports numpy arrays serialized with np.save, as well as arrays compressed with lilcom;
    storage_path is the path to the file on the local filesystem.
    r|   r}   r~   r9   r2   rz   r{   r   r   r   Nr   r   r3   c                 C   s   | j | j S r7   rz   r{   r8   r.   r.   r/   end     zFeatures.endc                 C   
   t | jS r7   )r   r   r8   r.   r.   r/   r        
zFeatures.is_in_memoryc                 C   s
   | j dkS )Nshar)r   r8   r.   r.   r/   is_placeholder  r   zFeatures.is_placeholderr   
channel_idc              
   C   s   t | j| j}d\}}|d u r| j}|| jd k r.td| j d| d| j d| j d	t|| js@t|| j | j	| j
d}|d urO|t|| j	| j
d }|j| j||d	S )
NrY   gMbP?z#Cannot load features for recording z starting from zs. The available range is (z, z
) seconds.)r9   r2   )left_offset_framesright_offset_frames)r   r   r   rz   rC   r   r   r
   r"   r9   r2   readr   )r-   rz   r{   r   rn   r   r   r.   r.   r/   load  s6   
zFeatures.loadFlilcomc              	   C   s   ddl m} | jdv r| S | j||d}t|jjtjr$|r$|d }n|d }|	d|}t
| dt|| j|jd |j|dd	S )
Nr   )get_memory_writer)memory_lilcommemory_writerr   r   
memory_raw         )rz   r{   r}   r   r   r   )lhotse.features.ior   r   r   
issubclassdtyper|   rh   floatingwriter%   r&   r{   rZ   r   )r-   rz   r{   r   r   arrwriterr   r.   r.   r/   move_to_memory  s"   


zFeatures.move_to_memoryr   c                 C   s   t | tt|| j dS )N)r   )r%   r   r   r   r-   r   r.   r.   r/   with_path_prefix  s   zFeatures.with_path_prefixc                 C   s   t | S r7   )r!   r8   r.   r.   r/   r        zFeatures.to_dictr   c                 C   s.   |   }|| j|}t| |j|j|d}|S )z
        Read the referenced feature array and save it using ``writer``.
        Returns a copy of the manifest with updated fields related to the feature storage.
        )r   r   r   )r   r   r   r%   r   r   )r-   r   r   new_keyr[   r.   r.   r/   
copy_feats!  s   zFeatures.copy_featsr   c                 C   sd   d| vrd| v rt d t| d | d  dd| d< d| v r+d| v r+d	| vr+d | d	< td
i | S )Nr9   r   zThe "frame_shift" field was not found in a feature manifest; we'll try to infer it for now, but you should recreate the manifests.r{   r}      rv   r   r   r.   )warningswarnr   rr   r   r.   r.   r/   r   0  s   
zFeatures.from_dictc                 C   sv   d| j  d| j d| j d| j d| j d| j d| j d| j d	| j d
t	| j
tr.| j
nd d| j d| j dS )NzFeatures(type='z', num_frames=z, num_features=z, frame_shift=z, sampling_rate=z, start=z, duration=z, storage_type='z', storage_path='z', storage_key='z<binary-data>z', recording_id='z', channels=))r|   r}   r~   r9   r2   rz   r{   r   r   rN   r   r   r   r   r8   r.   r.   r/   __repr__F  s4   	
zFeatures.__repr__)NNr   )r   NF) r   r   r   r   r   __annotations__r   r    r   bytesr   r   r   r   r   r   boolr   r   rh   r   r   r   r   r   r   r   r   r   r   r   r   r.   r.   r.   r/   rr     sd   
 
(
 rr   c                   @   sF  e Zd ZU dZdFdeee  ddfddZdd defdd	Z	e
deeeef ee f fd
dZedeee ef dd fddZeZedee dd fddZdee fddZdedd fddZ	dGdedededed  fddZ	dHdedededed  fd d!Zd"d# Z	dId$ee d%ee dd fd&d'Z	(	)		*dJd+ed,eeee f d-ed.ee d/edefd0d1ZdZ eeeee f  e!d2< d3d4 Z"	(	)	dKd+ed,eeee f d-ed.ee de#j$f
d5d6Z%d7e&dd fd8d9Z'	dFd:ee deee#j$f fd;d<Z(defd=d>Z)dee fd?d@Z*dAedefdBdCZ+defdDdEZ,dS )L
FeatureSeta8  
    Represents a feature manifest, and allows to read features for given recordings
    within particular channels and time ranges.
    It also keeps information about the feature extractor parameters used to obtain this set.
    When a given recording/time-range/channel is unavailable, raises a KeyError.
    NrE   r3   c                 C   s   t |g | _d S r7   )r&   rE   )r-   rE   r.   r.   r/   r0   a     zFeatureSet.__init__otherc                 C   s   | j |j kS r7   rF   )r-   r   r.   r.   r/   __eq__d  s   zFeatureSet.__eq__c                 C      | j S )z$Alias property for ``self.features``rF   r8   r.   r.   r/   r   g  s   zFeatureSet.datac                 C   s(   t | trtdd | D S tt| S )Nc                 S   s   g | ]}|qS r.   r.   rI   fr.   r.   r/   rK   o  s    z,FeatureSet.from_features.<locals>.<listcomp>)rN   r   r   r`   rF   r.   r.   r/   from_featuresl  s
   
zFeatureSet.from_featuresr   c                 C   s   t dd | D dS )Nc                 S   rV   r.   )rr   r   )rI   feature_datar.   r.   r/   rK   y  rX   z)FeatureSet.from_dicts.<locals>.<listcomp>rF   )r   r   r.   r.   r/   
from_dictsv  s   zFeatureSet.from_dictsc                 C   s   dd | D S )Nc                 s   s    | ]}|  V  qd S r7   )r   r   r.   r.   r/   rS   }  s    z&FeatureSet.to_dicts.<locals>.<genexpr>r.   r8   r.   r.   r/   to_dicts|  s   zFeatureSet.to_dictsr   c                       t  fdd| D S )Nc                 3   s    | ]}|  V  qd S r7   )r   r   r   r.   r/   rS     s    z.FeatureSet.with_path_prefix.<locals>.<genexpr>r   r   r   r.   r   r/   r     s   zFeatureSet.with_path_prefixF
num_splitsshuffle	drop_lastc                 C   s   dd t | |||dD S )aV  
        Split the :class:`~lhotse.FeatureSet` into ``num_splits`` pieces of equal size.

        :param num_splits: Requested number of splits.
        :param shuffle: Optionally shuffle the recordings order first.
        :param drop_last: determines how to handle splitting when ``len(seq)`` is not divisible
            by ``num_splits``. When ``False`` (default), the splits might have unequal lengths.
            When ``True``, it may discard the last element in some splits to ensure they are
            equally long.
        :return: A list of :class:`~lhotse.FeatureSet` pieces.
        c                 S   rV   r.   r   )rI   subsetr.   r.   r/   rK     s    z$FeatureSet.split.<locals>.<listcomp>)r   r   r   )r(   )r-   r   r   r   r.   r.   r/   split  s
   zFeatureSet.splitr   
output_dir
chunk_sizeprefixc                 C   s   t | |||dS )a;  
        Splits a manifest (either lazily or eagerly opened) into chunks, each
        with ``chunk_size`` items (except for the last one, typically).

        In order to be memory efficient, this implementation saves each chunk
        to disk in a ``.jsonl.gz`` format as the input manifest is sampled.

        .. note:: For lowest memory usage, use ``load_manifest_lazy`` to open the
            input manifest for this method.

        :param it: any iterable of Lhotse manifests.
        :param output_dir: directory where the split manifests are saved.
            Each manifest is saved at: ``{output_dir}/{prefix}.{split_idx}.jsonl.gz``
        :param chunk_size: the number of items in each chunk.
        :param prefix: the prefix of each manifest.
        :return: a list of lazily opened chunk manifests.
        )r   r   r   )r'   )r-   r   r   r   r.   r.   r/   
split_lazy  s   zFeatureSet.split_lazyc                 O   rA   )Nz&FeatureSet does not support shuffling.r   r   r.   r.   r/   r     r   zFeatureSet.shufflefirstlastc                 C   s|   t ||s	J d|dur|dksJ tt| |}|S |dur<|dks'J t| }||kr1| S tt| || |S dS )ai  
        Return a new ``FeatureSet`` according to the selected subset criterion.
        Only a single argument to ``subset`` is supported at this time.

        :param first: int, the number of first supervisions to keep.
        :param last: int, the number of last supervisions to keep.
        :return: a new ``FeatureSet`` with the subset results.
        z*subset() can handle only one non-None arg.Nr   )r$   r   
from_itemsr	   rd   )r-   r   r  outNr.   r.   r/   r     s    zFeatureSet.subsetr   r   皙?r   r   rz   r{   leewayc              
      s   |dur| |   | } fdd|D }|dur'fdd|D }t|}|sDtd| d  d d|du r>d	n| d
	|durUt|fddd}|S t|fddd}|S )a  
        Find and return a Features object that best satisfies the search criteria.
        Raise a KeyError when no such object is available.

        :param recording_id: str, requested recording ID.
        :param channel_id: int, requested channel.
        :param start: float, requested start time in seconds for the feature chunk.
        :param duration: optional float, requested duration in seconds for the feature chunk.
            By default, return everything from the start.
        :param leeway: float, controls how strictly we have to match the requested start and duration criteria.
            It is necessary to keep a small positive value here (default 0.05s), as there might be differences between
            the duration of recording/supervision segment, and the duration of features. The latter one is constrained
            to be a multiple of frame_shift, while the former can be arbitrary.
        :return: a Features object satisfying the search criteria.
        Nc                 3   sB    | ]}|j  kr|j   kr|j k rn n|V  qd S r7   )r   rz   r   r   )r   r  rz   r.   r/   rS     s    .z"FeatureSet.find.<locals>.<genexpr>c                 3   s"    | ]}|j   kr|V  qd S r7   )r   r   )r   r  r.   r/   rS     s     z%No features available for recording 'z', channel z in time range [zs, r   zs]c                    s   | j  d  | j d  S N   )rz   r   r   )r   rz   r.   r/   <lambda>  s    z!FeatureSet.find.<locals>.<lambda>)keyc                    s    | j  d S r  rz   r	  r  r.   r/   r
     s    ) _index_by_recording_id_and_cacher`   KeyErrormin)r-   r   r   rz   r{   r  
candidatesfeature_infor.   )r   r   r  rz   r/   find  s,   zFeatureSet.find_features_by_recording_idc                 C   s,   | j d u rddlm} |dd | | _ | j S )Nr   )groupbyc                 S   r   r7   )r   )featr.   r.   r/   r
    s    z=FeatureSet._index_by_recording_id_and_cache.<locals>.<lambda>)r  cytoolzr  )r-   r  r.   r.   r/   r    s   
z+FeatureSet._index_by_recording_id_and_cachec                 C   s$   | j ||||d}|j||d}|S )z
        Find a Features object that best satisfies the search criteria and load the features as a numpy ndarray.
        Raise a KeyError when no such object is available.
        )r   r   rz   r{   r   )r  r   )r-   r   r   rz   r{   r  rE   r.   r.   r/   r     s   zFeatureSet.loadr   c                    r   )z
        For each manifest in this FeatureSet,
        read the referenced feature array and save it using ``writer``.
        Returns a copy of the manifest with updated fields related to the feature storage.
        c                 3   s    | ]	}|j  d V  qdS )r   N)r   r   r  r.   r/   rS   *  rT   z(FeatureSet.copy_feats.<locals>.<genexpr>r   )r-   r   r.   r  r/   r   $  s   zFeatureSet.copy_featsr   c                 C   s   t | |dS )a  
        Compute the global means and standard deviations for each feature bin in the manifest.
        It follows the implementation in scikit-learn:
        https://github.com/scikit-learn/scikit-learn/blob/0fb307bf39bbdacd6ed713c00724f8f871d60370/sklearn/utils/extmath.py#L715
        which follows the paper:
        "Algorithms for computing the sample variance: analysis and recommendations", by Chan, Golub, and LeVeque.

        :param storage_path: an optional path to a file where the stats will be stored with pickle.
        :return a dict of ``{'norm_means': np.ndarray, 'norm_stds': np.ndarray}`` with the
            shape of the arrays equal to the number of feature bins in this manifest.
        )feature_manifestsr   )compute_global_stats)r-   r   r.   r.   r/   r  ,  s   zFeatureSet.compute_global_statsc                 C   s   dt |  dS )NzFeatureSet(len=r   )rd   r8   r.   r.   r/   r   <  r   zFeatureSet.__repr__c                 C   r   r7   )iterrE   r8   r.   r.   r/   __iter__?     
zFeatureSet.__iter__ic                 C   s
   | j | S r7   rF   )r-   r  r.   r.   r/   __getitem__B  r  zFeatureSet.__getitem__c                 C   r   r7   )rd   rE   r8   r.   r.   r/   __len__E  r  zFeatureSet.__len__r7   )FF)r   )NN)r   r   Nr  )r   r   N)-r   r   r   r   r   r   rr   r0   r   r   r   r   r   r   r   r   r   r   r   r  r   r   r   r   r   r   r   r   r   r   r    r  r  r   r  rh   r   r   r   r   r  r   r  r  r  r.   r.   r.   r/   r   Y  s   
 $ 



8
	
r   c                	   @   sl   e Zd ZdZ	ddededee fddZ		dd	e	e
 d
ee dedefddZde
dee fddZdS )FeatureSetBuildera  
    An extended constructor for the FeatureSet. Think of it as a class wrapper for a feature extraction script.
    It consumes an iterable of Recordings, extracts the features specified by the FeatureExtractor config,
    and saves stores them on the disk.

    Eventually, we plan to extend it with the capability to extract only the features in
    specified regions of recordings and to perform some time-domain data augmentation.
    Nfeature_extractorrn   rq   c                 C   s   || _ || _|| _d S r7   )r!  rn   rq   )r-   r!  rn   rq   r.   r.   r/   r0   S  s   
zFeatureSetBuilder.__init__rL   
recordingsoutput_manifestnum_jobsr3   c              
   C   s   |dkrt ttt| j|t|dd}n-t|t	
dd}t tt|| j|t|dd}W d    n1 s@w   Y  |d urN|| |S )NrL   zExtracting and storing features)totaldescspawn)
mp_contextz+Extracting and storing features in parallel)r   r   r   r   from_iterablemap_process_and_store_recordingrd   r   multiprocessingget_contextto_file)r-   r"  r#  r$  feature_setexr.   r.   r/   process_and_store_recordings]  s6   



z.FeatureSetBuilder.process_and_store_recordingsr   c              	   C   s2   g }|j D ]}|| jj|| j|| jd q|S )N)r   rn   r   rq   )r   re   r!  r   rn   rq   )r-   r   resultsrp   r.   r.   r/   r+    s   
z.FeatureSetBuilder._process_and_store_recordingr7   )NrL   )r   r   r   r   r*   r   r   r   r0   r   r   r   r   r   r1  r   rr   r+  r.   r.   r.   r/   r   I  s4    

"r   r   rn   c                 C   s   t t }||| }|S )a  
    Store ``feats`` array on disk, using ``lilcom`` compression by default.

    :param feats: a numpy ndarray containing features.
    :param storage: a ``FeaturesWriter`` object to use for array storage.
    :return: a path to the file containing the stored array.
    )r   r)   r   )r   rn   feats_idr   r.   r.   r/   r     s   
r   r  r   c                 C   s   t | } t| }t|jd}t|g| D ]}| tj}|	| q|
 }|durHt|d}t|| W d   |S 1 sCw   Y  |S )a_  
    Compute the global means and standard deviations for each feature bin in the manifest.
    It performs only a single pass over the data and iteratively updates the estimate of the
    means and variances.

    We follow the implementation in scikit-learn:
    https://github.com/scikit-learn/scikit-learn/blob/0fb307bf39bbdacd6ed713c00724f8f871d60370/sklearn/utils/extmath.py#L715
    which follows the paper:
    "Algorithms for computing the sample variance: analysis and recommendations", by Chan, Golub, and LeVeque.

    :param feature_manifests: an iterable of ``Features`` objects.
    :param storage_path: an optional path to a file where the stats will be stored with pickle.
    :return a dict of ``{'norm_means': np.ndarray, 'norm_stds': np.ndarray}`` with the
        shape of the arrays equal to the number of feature bins in this manifest.
    )r:   Nwb)r  nextStatsAccumulatorr~   r   r   astyperh   float64r   getopenpickledump)r  r   r   statsrE   r   mvnr   r.   r.   r/   r    s   
r  c                   @   sn   e Zd ZdefddZdejddfddZedejfd	d
Z	edejfddZ
deeejf fddZdS )r6  r:   c                 C   s2   t j|ft jd| _t j|ft jd| _d| _d S )N)r   r   )rh   zerosr8  	total_sumtotal_unnorm_vartotal_frames)r-   r:   r.   r.   r/   r0     s   
zStatsAccumulator.__init__r   r3   Nc                 C   s   t jdddT |t j}|jdd}| j| }|jd }| j| }| j| }t j|dd| }| jdkrJ| j	| || | j| | d   | _	n|| _	|| _|| _W d    d S 1 s^w   Y  d S )Nignore)divideinvalidr   r^   r  )
rh   errstater7  r8  sumr@  rZ   rB  varrA  )r-   r   curr_sumupdated_total_sumcurr_framesupdated_total_framestotal_over_curr_framescurr_unnorm_varr.   r.   r/   r     s.   




"zStatsAccumulator.updatec                 C   s   | j | j S r7   )r@  rB  r8   r.   r.   r/   
norm_means  r   zStatsAccumulator.norm_meansc                 C   s   t | j| j S r7   )rh   sqrtrA  rB  r8   r.   r.   r/   	norm_stds  s   zStatsAccumulator.norm_stdsc                 C   s   | j | jdS )NrO  rQ  rR  r8   r.   r.   r/   r9    s   zStatsAccumulator.get)r   r   r   r   r0   rh   r   r   r   rO  rQ  r   r   r9  r.   r.   r.   r/   r6    s    r6  r7   )Kloggingr,  r;  r   abcr   r   concurrent.futures.processr   dataclassesr   r   r   	itertoolsr   r	   mathr
   pathlibr   typingr   r   r   r   r   r   r   r   rU   rh   rO   	tqdm.autor   lhotse.audior   lhotse.augmentationr   r   r   r   r   lhotse.lazyr   lhotse.serializationr   r   r   r   lhotse.utilsr   r    r!   r"   r#   r$   r%   r&   r'   r(   r)   r*   r   r   r   r   r   r   rr   r   r   r   r   r  r6  r.   r.   r.   r/   <module>   sd    (4  :

 : qG

!