o
    }oi<                    @   s  d dl Z d dlZd dlZd dlmZ d dlmZmZmZm	Z	m
Z
mZmZ d dlZd dlZd dlmZmZ d dlmZ d dlmZmZ G dd de jZG d	d
 d
eZG dd deZG dd deZG dd deZG dd deZG dd deZ G dd de!Z"G dd deZ#G dd de!Z$G dd de$Z%G dd deZ&G dd  d e&Z'G d!d" d"eZ(G d#d$ d$e(Z)G d%d& d&eZ*G d'd( d(e*Z+G d)d* d*eZ,G d+d, d,e,Z-G d-d. d.eZ.G d/d0 d0e.Z/G d1d2 d2eZ0G d3d4 d4e0Z1G d5d6 d6eZ2G d7d8 d8e2Z3dS )9    N)combinations)AnyCallableDictIterableListOptionalUnion)manifestparsers)get_full_path)logginglogging_modec                   @   s   e Zd ZdZdZdS )_Collectionz%List of parsed and preprocessed data.N)__name__
__module____qualname____doc__OUTPUT_TYPE r   r   k/home/ubuntu/.local/lib/python3.10/site-packages/nemo/collections/common/parts/preprocessing/collections.pyr      s    r   c                       s<   e Zd ZdZeddZdee de	j
f fddZ  ZS )TextzCSimple list of preprocessed text entries, result in list of tokens.
TextEntitytokenstextsparserc                    sR   g | j }}|D ]}||}|du rtd| q||| qt | dS )zInstantiates text manifest and do the preprocessing step.

        Args:
            texts: List of raw texts strings.
            parser: Instance of `CharParser` to convert string to tokens.
        NzFail to parse '%s' text line.)r   r   warningappendsuper__init__)selfr   r   dataoutput_typetextr   	__class__r   r   r   (   s   zText.__init__)r   r   r   r   collections
namedtupler   r   strr   
CharParserr   __classcell__r   r   r$   r   r   #   s    $r   c                       sF   e Zd ZdZdedejf fddZedede	e fddZ
  ZS )	FromFileTextz6Another form of texts manifest with reading from file.filer   c                    s   |  |}t || dS )zInstantiates text manifest and do the preprocessing step.

        Args:
            file: File path to read from.
            parser: Instance of `CharParser` to convert string to tokens.
        N)_FromFileText__parse_textsr   r   )r    r,   r   r   r$   r   r   r   @   s   
zFromFileText.__init__returnc                 C   s   t j| s
tdt j| \}}|dkr!t| d  }|S |dkr3tdd t	
| D }|S t| d}| }W d    |S 1 sHw   Y  |S )Nz$Provided texts file does not exists!z.csv
transcriptz.jsonc                 s   s    | ]}|d  V  qdS )r#   Nr   ).0itemr   r   r   	<genexpr>U   s    z-FromFileText.__parse_texts.<locals>.<genexpr>r)ospathexists
ValueErrorsplitextpdread_csvtolistlistr
   	item_iteropen	readlines)r,   _extr   fr   r   r   __parse_textsL   s   

zFromFileText.__parse_texts)r   r   r   r   r(   r   r)   r   staticmethodr   r-   r*   r   r   r$   r   r+   =   s
     r+   c                          e Zd ZdZejdddZ					ddee dee	 d	ee
 d
ee	 dee	 deee  deee  deee  deee	  dejdee
 dee
 dee dedef fddZ  ZS )	AudioText@List of audio-transcript text correspondence with preprocessing.AudioTextEntityzGid audio_file duration text_tokens offset text_raw speaker orig_sr langtypenamefield_namesNFidsaudio_files	durationsr   offsetsspeakersorig_sampling_ratestoken_labelslangsr   min_durationmax_duration
max_numberdo_sort_by_durationindex_by_file_idc           !         s"  | j }d}g dddf\}}}}|ri | _t|||||||||		D ]\	}}}}}}}}}|du r1d}|durF|durF||k rF||7 }|d7 }q |dur[|dur[||kr[||7 }|d7 }q |durb|}n3|dkrt|
dr|
jrt|tr|dur}|
||}ntd	|
|}ng }|du r||7 }|d7 }q ||dur|nd7 }|||||||||||	 |rt	j
t	j
|\}} || jvrg | j|< | j| t|d  t||kr nq |r|rtd
 n|jdd d tdt||d  td||d  |s	td t | dS )a  Instantiates audio-text manifest with filters and preprocessing.

        Args:
            ids: List of examples positions.
            audio_files: List of audio files.
            durations: List of float durations.
            texts: List of raw text transcripts.
            offsets: List of duration offsets or None.
            speakers: List of optional speakers ids.
            orig_sampling_rates: List of original sampling rates of audio files.
            langs: List of language ids, one for eadh sample, or None.
            parser: Instance of `CharParser` to convert string to tokens.
            min_duration: Minimum duration to keep entry with (default: None).
            max_duration: Maximum duration to keep entry with (default: None).
            max_number: Maximum number of samples to collect.
            do_sort_by_duration: True if sort samples list by duration. Not compatible with index_by_file_id.
            index_by_file_id: If True, saves a mapping from filename base (ID) to index in data.
        T        r   NF    is_aggregate9lang required in manifest when using aggregate tokenizersLTried to sort dataset by duration, but cannot since index_by_file_id is set.c                 S      | j S Ndurationentityr   r   r   <lambda>       z$AudioText.__init__.<locals>.<lambda>key1Dataset loaded with %d files totalling %.2f hours  +%d files were filtered totalling %.2f hourszRNot all audios have duration information, the total number of hours is inaccurate.r   mappingziphasattrr\   
isinstancer(   r7   r   r4   r5   r8   basenamelenr   r   sortinfor   r   )!r    rL   rM   rN   r   rO   rP   rQ   rR   rS   r   rT   rU   rV   rW   rX   r"   all_has_durationr!   duration_filterednum_filteredtotal_durationid_
audio_filerb   offsetr#   speakerorig_srlangtext_tokensfile_idr@   r$   r   r   r   e   sd   %



zAudioText.__init__NNNFFr   r   r   r   r&   r'   r   r   intr(   floatr   r   r)   boolr   r*   r   r   r$   r   rF   ]   T    


	

rF   c                       rE   )	VideoTextz@List of video-transcript text correspondence with preprocessing.rH   zGid video_file duration text_tokens offset text_raw speaker orig_sr langrI   NFrL   video_filesrN   r   rO   rP   rQ   rR   rS   r   rT   rU   rV   rW   rX   c                     s  | j }g dddf\}}}}|ri | _t|||||||||		D ]\	}}}}}}}}}|dur:||k r:||7 }|d7 }q|durK||krK||7 }|d7 }q|durR|}n3|dkrvt|
drq|
jrqt|trq|durm|
||}ntd|
|}ng }|du r||7 }|d7 }q||7 }|||||||||||	 |rt	j
t	j
|\}}|| jvrg | j|< | j| t|d  t||kr nq|r|rtd n|jd	d
 d tdt||d  td||d  t | dS )a  Instantiates video-text manifest with filters and preprocessing.

        Args:
            ids: List of examples positions.
            video_files: List of video files.
            durations: List of float durations.
            texts: List of raw text transcripts.
            offsets: List of duration offsets or None.
            speakers: List of optional speakers ids.
            orig_sampling_rates: List of original sampling rates of audio files.
            langs: List of language ids, one for eadh sample, or None.
            parser: Instance of `CharParser` to convert string to tokens.
            min_duration: Minimum duration to keep entry with (default: None).
            max_duration: Maximum duration to keep entry with (default: None).
            max_number: Maximum number of samples to collect.
            do_sort_by_duration: True if sort samples list by duration. Not compatible with index_by_file_id.
            index_by_file_id: If True, saves a mapping from filename base (ID) to index in data.
        rY   r   NrZ   r[   r\   r]   r^   c                 S   r_   r`   ra   rc   r   r   r   re   4  rf   z$VideoText.__init__.<locals>.<lambda>rg   ri   rj   rk   rl   ) r    rL   r   rN   r   rO   rP   rQ   rR   rS   r   rT   rU   rV   rW   rX   r"   r!   rv   rw   rx   ry   
video_filerb   r{   r#   r|   r}   r~   r   r   r@   r$   r   r   r      sZ   %


zVideoText.__init__r   r   r   r   r$   r   r      r   r   c                       s   e Zd ZdZejdddZ								ddeee	e f de
e d	e
e d
e
e de
e dedededef fddZdd Z  ZS )InstructionTuningAudioText5`AudioText` collector from asr structured json files.InstructionTuningTextzjid context context_type context_duration question question_type answer answer_type answer_duration speakerrI   NFmanifests_filesrT   rU   max_seq_lengthrV   rW   rX   decoder_only_modeluse_phoneme_tokenizerc
           !         sX  | j }
|	| _g dddf\}}}}|ri | _t|D ]}|d }|d }|d }|d }|d }|d }|d	 }|d
 }|d }|d }|d }|du rNdn|}|dkrV|n|}|duri||k ri||7 }|d7 }q|durz||krz||7 }|d7 }qt| |||d d}| ||d}| |||}|r|| | |ks|| |ks||kr||7 }|d7 }q||7 }||
||||||||||
 |rtj	
tj	|\}} d|v r|dd }|| jvrg | j|< | j| t|d  t||kr nq|r|rtd n|jdd d tdt||d  td||d  t | dS )aH  Parse lists of audio files, durations and transcripts texts.
        Args:
            manifests_files: Either single string file or list of such -
                manifests to yield items from.
            *args: Args to pass to `AudioText` constructor.
            **kwargs: Kwargs to pass to `AudioText` constructor.
        rY   r   idcontextcontext_durationcontext_typequestionquestion_typer|   answeranswer_durationanswer_typetaskNttsrZ   g333333?i  z.contextir^   c                 S   r_   r`   ra   rc   r   r   r   re     rf   z5InstructionTuningAudioText.__init__.<locals>.<lambda>rg   ri   rj   rk   )r   r   rm   r
   r=   min_get_lenr   r4   r5   r8   rq   rr   r   r   rs   rt   r   r   )!r    r   rT   rU   r   rV   rW   rX   r   r   r"   r!   rv   rw   rx   r1   r   r   r   r   r   r   r|   r   r   r   r   rb   approx_context_lenapprox_question_lenapprox_answer_lenr   r@   r$   r   r   r   G  s   

z#InstructionTuningAudioText.__init__c                 C   s\   |dkr|d S |dkr| j rt|S t|dd S |dkr&t|d S td| d)	NSPEECHL   TEXT    TOKENSzUnknown field type .)r   rr   splitr7   )r    
field_typer!   duration_datar   r   r   r     s   z#InstructionTuningAudioText._get_len)NNNNFFFF)r   r   r   r   r&   r'   r   r	   r(   r   r   r   r   r   r   r   r*   r   r   r$   r   r   <  sD    	
gr   c                       s<   e Zd ZdZddeeee f dee f fddZ	  Z
S )ASRAudioTextr   Nr   
parse_funcc              
      s   g g g g g f\}}}}}	g g g g f\}
}}}t j||dD ]A}||d  ||d  ||d  ||d  |	|d  |
|d  ||d  ||d	  ||d
  qt j|||||	|
|||g	|R i | dS )I  Parse lists of audio files, durations and transcripts texts.

        Args:
            manifests_files: Either single string file or list of such -
                manifests to yield items from.
            *args: Args to pass to `AudioText` constructor.
            **kwargs: Kwargs to pass to `AudioText` constructor.
        r   r   rz   rb   r#   r{   r|   r}   rR   r~   Nr
   r=   r   r   r   )r    r   r   argskwargsrL   rM   rN   r   rO   rP   orig_srsrR   rS   r1   r$   r   r   r     s<   	
zASRAudioText.__init__r`   )r   r   r   r   r	   r(   r   r   r   r   r*   r   r   r$   r   r     s    0r   c                   @   s   e Zd ZdZdddZdS )SpeechLLMAudioTextEntityz(Class for SpeechLLM dataloader instance.r.   Nc
           
      C   s:   || _ || _|| _|| _|| _|| _|| _|| _|	| _dS )zCInitialize the AudioTextEntity for a SpeechLLM dataloader instance.N)	r   rz   rb   r   r   r{   r|   r}   r~   )
r    sidrz   rb   r   r   r{   r|   r}   r~   r   r   r   r     s   
z!SpeechLLMAudioTextEntity.__init__)r.   N)r   r   r   r   r   r   r   r   r   r     s    r   c                       2   e Zd ZdZdeeee f f fddZ  ZS )ASRVideoTextz4`VideoText` collector from cv structured json files.r   c              
      s   g g g g g f\}}}}}g g g g f\}	}
}}t |D ]A}||d  ||d  ||d  ||d  ||d  |	|d  |
|d  ||d  ||d	  qt j||||||	|
||g	|R i | d
S )aI  Parse lists of video files, durations and transcripts texts.

        Args:
            manifests_files: Either single string file or list of such -
                manifests to yield items from.
            *args: Args to pass to `VideoText` constructor.
            **kwargs: Kwargs to pass to `VideoText` constructor.
        r   r   rb   r#   r{   r|   r}   rR   r~   Nr   )r    r   r   r   rL   r   rN   r   rO   rP   r   rR   rS   r1   r$   r   r   r     s<   
zASRVideoText.__init__	r   r   r   r   r	   r(   r   r   r*   r   r   r$   r   r         &r   c                    @   s   e Zd ZdZ						ddee dee dee dee dee d	ee d
eee  deee  deee  dee dee dee de	de	dee fddZ
dd Zdd ZdS )SpeechLLMAudioTextzList of audio-transcript text correspondence with preprocessing.

    All of the audio, duration, context, answer are optional.
    If answer is not present, text is treated as the answer.
    NFrL   rM   rN   context_listanswersrO   rP   rQ   rS   rT   rU   rV   rW   rX   max_num_samplesc           #         s  g dddf\ }}}|ri | _ t|||||||||		D ]\	}}}}}}}}}|durqt|tr3t|n|}t|tr>t|n|}t|trIt|n|}|
dur\||
k r\||7 }|d7 }q|durm||krm||7 }|d7 }q||7 }|du r~||7 }|d7 }q t|||||||||	 |r|durt	j
t	j
|\}} || j vrg | j |< | j | t d  t |kr nq|dur|s|t krtdt  d| d  d|  nAtdt  d| d  |t    |t  }! fd	d
tjjt |!ddD }" |" n|dur"|r"td |r6|r.td n jdd d tdt |d  td||d   | _dS )a  Instantiates audio-context-answer manifest with filters and preprocessing.

        Args:
            ids: List of examples positions.
            audio_files: List of audio files.
            durations: List of float durations.
            context_list: List of raw text transcripts.
            answers: List of raw text transcripts.
            offsets: List of duration offsets or None.
            speakers: List of optional speakers ids.
            orig_sampling_rates: List of original sampling rates of audio files.
            langs: List of language ids, one for eadh sample, or None.
            min_duration: Minimum duration to keep entry with (default: None).
            max_duration: Maximum duration to keep entry with (default: None).
            max_number: Maximum number of samples to collect.
            do_sort_by_duration: True if sort samples list by duration. Not compatible with index_by_file_id.
            index_by_file_id: If True, saves a mapping from filename base (ID) to index in data.
        rY   r   NrZ   zSubsampling dataset from z to z sampleszOversampling dataset from c                    s   g | ]} | qS r   r   )r0   idxr!   r   r   
<listcomp>      z/SpeechLLMAudioText.__init__.<locals>.<listcomp>F)replacezXTried to subsample dataset by max_num_samples, but cannot since index_by_file_id is set.r^   c                 S   r_   r`   ra   rc   r   r   r   re     rf   z-SpeechLLMAudioText.__init__.<locals>.<lambda>rg   ri   rj   rk   )rm   rn   rp   r<   r   maxsumr   r   r4   r5   r8   rq   rr   r   rt   nprandomchoiceextendr   rs   r!   )#r    rL   rM   rN   r   r   rO   rP   rQ   rS   rT   rU   rV   rW   rX   r   rv   rw   rx   ry   rz   rb   r{   r   r   r|   r}   r~   curr_min_durcurr_max_durcurr_sum_durr   r@   res_numres_datar   r   r   r   )  sh   %

$

zSpeechLLMAudioText.__init__c                 C   s<   |dk s|t | jkrtdt | j d| d| j| S )Nr   zindex out of range [0,z), got z instead)rr   r!   r7   )r    r   r   r   r   __getitem__  s   
zSpeechLLMAudioText.__getitem__c                 C   s
   t | jS r`   )rr   r!   )r    r   r   r   __len__  s   
zSpeechLLMAudioText.__len__)NNNFFN)r   r   r   r   r   r   r(   r   r   r   r   r   r   r   r   r   r   r   "  sR    

	


fr   c                	       st   e Zd ZdZ			ddeeee f deeee ef  dedef fd	d
Zdedede	ee
f fddZ  ZS )SpeechLLMAudioTextCollectionz`SpeechLLMAudioText` collector from SpeechLLM json files.

    This collector also keeps backward compatibility with SpeechLLMAudioText.
    Nr   r   r   context_filecontext_key
answer_keyc              
      s  || _ || _g g g g g g f\}}}	}
}}g g g }}}|durht|tr*|dn|}g | _|D ]*}t|d}| D ]}| }|rK| j	| q=W d   n1 sVw   Y  q1t
d| d|  nd| _tj|| jdD ]A}|	|d  |	|d  |		|d	  |
	|d
  |	|d  |	|d  |	|d  |	|d  |	|d  qst j|||	|
|||||g	|R i | dS )r   N,r3   zUse random text context from z for r   r   rz   rb   r   r   r{   r|   r}   r~   )r   r   rp   r(   r   r   r>   r?   stripr   r   rt   r
   r=   )_SpeechLLMAudioTextCollection__parse_itemr   r   )r    r   r   r   r   r   r   rL   rM   rN   r   r   rO   rP   r   rS   question_file_listfilepathrB   liner1   r$   r   r   r     sh   



z%SpeechLLMAudioTextCollection.__init__r   manifest_filer.   c                 C   s  t |}d|v r|d|d< nd|v r|d|d< nd|vr%d |d< |d d ur6tj|d |d|d< d|vr>d |d< | j|v rL|| j|d< n3d|v rX|d|d< n'd|v r{t|dd	}| |d< W d    n1 suw   Y  nd
|d< | j|v r|| j|d< nUd|v rt|dd	}| |d< W d    n1 sw   Y  n2| j	d urt
j| j	 }||d< nd|v rtjd| j d| tjd |d|d< nd|d< t|d |d t|d t|d |dd |dd |dd |dd d}|S )Naudio_filenamerz   audio_filepathrz   r   rb   r   r#   text_filepathr3   nar   context_filepathr   z	Neither `zC` is found nor`context_file` is set, but found `question` in item: )modezwhat does this audio meanr{   r|   orig_sample_rater~   )rz   rb   r   r   r{   r|   r}   r~   )jsonloadspopr
   r   r   r>   readr   r   r   r   r   r   r   r   r   ONCEdictr(   get)r    r   r   r1   rB   r   r   r   r   __parse_item  sf   












z)SpeechLLMAudioTextCollection.__parse_item)Nr   r   )r   r   r   r   r	   r(   r   r   r   r   r   r   r*   r   r   r$   r   r     s    &Cr   c                       s   e Zd ZdZejdddZ					ddee dee	 d	ee
eef  d
eee	  dee	 dee	 dee dedef fddZ  ZS )SpeechLabelz6List of audio-label correspondence with preprocessing.SpeechLabelEntityz audio_file duration label offsetrI   NFrM   rN   labelsrO   rT   rU   rV   rW   rX   c
                    s  |	ri | _ | j}
g d}}d}d}t||||D ][\}}}}|dur/|dur/||k r/||7 }q|dur@|dur@||kr@||7 }q||
|||| |durT||7 }d}|	rktjtj|\}}t|d | j |< t||krs nq|r|	r~t	
d n|jdd d	 |rt	d
t| d nt	d|d dd t	dt| d|d dd tttdd || _t	dt|t| j t | dS )a  Instantiates audio-label manifest with filters and preprocessing.

        Args:
            audio_files: List of audio files.
            durations: List of float durations.
            labels: List of labels.
            offsets: List of offsets or None.
            min_duration: Minimum duration to keep entry with (default: None).
            max_duration: Maximum duration to keep entry with (default: None).
            max_number: Maximum number of samples to collect.
            do_sort_by_duration: True if sort samples list by duration.
            index_by_file_id: If True, saves a mapping from filename base (ID) to index in data.
        rY   TNFrZ   r^   c                 S   r_   r`   ra   rc   r   r   r   re   j  rf   z&SpeechLabel.__init__.<locals>.<lambda>rg   Dataset loaded with z( items. The durations were not provided.,Filtered duration for loading collection is rj    .2f hours.z!Dataset successfully loaded with z4 items and total duration provided from manifest is c                 S   r_   r`   )label)xr   r   r   re   u  rf   z+# {} files loaded accounting to # {} labels)rm   r   rn   r   r4   r5   r8   rq   rr   r   r   rs   rt   sortedsetmapuniq_labelsformatr   r   )r    rM   rN   r   rO   rT   rU   rV   rW   rX   r"   r!   rv   rx   duration_undefinedrz   rb   commandr{   r   r@   r$   r   r   r   -  sL   
zSpeechLabel.__init__r   )r   r   r   r   r&   r'   r   r   r(   r   r	   r   r   r   r   r*   r   r   r$   r   r   %  s<    
	
r   c                       sX   e Zd ZdZ			ddeeee f f fddZdeded	eee	f fd
dZ
  ZS )ASRSpeechLabelz3`SpeechLabel` collector from structured json files.FNr   c                    s   g g g g f\}}}	}
g }t j|| jdD ]<}||d  ||d  |s6|d }|s0| n||}n	t|d }|g}|	| |
|d  || q|rYt|| _	t
 j|||	|
g|R i | dS )a  Parse lists of audio files, durations and transcripts texts.

        Args:
            manifests_files: Either single string file or list of such -
                manifests to yield items from.
            is_regression_task: It's a regression task.
            cal_labels_occurrence: whether to calculate occurence of labels.
            delimiter: separator for labels strings.
            *args: Args to pass to `SpeechLabel` constructor.
            **kwargs: Kwargs to pass to `SpeechLabel` constructor.
        r   rz   rb   r   r{   N)r
   r=   _ASRSpeechLabel__parse_itemr   r   r   r   r&   Counterlabels_occurrencer   r   )r    r   is_regression_taskcal_labels_occurrence	delimiterr   r   rM   rN   r   rO   
all_labelsr1   r   
label_listr$   r   r   r   ~  s    
$zASRSpeechLabel.__init__r   r   r.   c                 C   s   t |}d|v r|d|d< nd|v r|d|d< ntd| dtj|d |d|d< d|vr<td| dd	|v rH|d	|d
< nd|v rT|d|d
< nd
|v rYntd| dt|d |d |d
 |dd d}|S )Nr   rz   r   /Manifest file has invalid json line structure:  without proper audio file key.r   rb    without proper duration key.r   r   targetz without proper label key.r{   )rz   rb   r   r{   )r   r   r   r7   r
   r   r   r   r    r   r   r1   r   r   r   r     s.   

zASRSpeechLabel.__parse_itemFFN)r   r   r   r   r	   r(   r   r   r   r   r   r*   r   r   r$   r   r   {  s    &(r   c                	       sZ   e Zd ZdZejdddZ		ddee dee d	e	e
 d
ef fddZdd Z  ZS )FeatureSequenceLabelzDList of feature sequence of label correspondence with preprocessing.FeatureSequenceLabelEntityzfeature_file seq_labelrI   NFfeature_files
seq_labelsrV   rX   c                    s   | j }g d}}t | _|ri | _t||D ]B\}}	| |	\}
}||||
 |  j|O  _|
du r9|d7 }q|rPtj	tj
|\}}t|d | j|< t||krX nqtdt| dt| j d t | dS )ai  Instantiates feature-SequenceLabel manifest with filters and preprocessing.

        Args:
            feature_files: List of feature files.
            seq_labels: List of sequences of labels.
            max_number: Maximum number of samples to collect.
            index_by_file_id: If True, saves a mapping from filename base (ID) to index in data.
        rY   NrZ   z# z files loaded including # z unique labels)r   r   r   rm   rn   relative_speaker_parserr   r4   r5   r8   rq   rr   r   rt   r   r   )r    r  r  rV   rX   r"   r!   rw   feature_file	seq_labellabel_tokensuniq_labels_in_seqr   r@   r$   r   r   r     s,   "zFeatureSequenceLabel.__init__c                 C   s\   |  }t }g }|D ]}||v r|| }nt|}|||< || qt| }||fS )aD  Convert sequence of speaker labels to relative labels.
        Convert sequence of absolute speaker to sequence of relative speaker [E A C A E E C] -> [0 1 2 1 0 0 2]
        In this seq of label , if label do not appear before, assign new relative labels len(pos);
        else reuse previous assigned relative labels.

        Args:
            seq_label (str): A string of a sequence of labels.

        Return:
            relative_seq_label (List) : A list of relative sequence of labels
            unique_labels_in_seq (Set): A set of unique labels in the sequence
        )r   r   rr   r   r   keys)r    r  seqconversion_dictrelative_seq_labelseg	convertedunique_labels_in_seqr   r   r   r    s   
z,FeatureSequenceLabel.relative_speaker_parserNF)r   r   r   r   r&   r'   r   r   r(   r   r   r   r   r  r*   r   r   r$   r   r	    s$    	0r	  c                       sb   e Zd ZdZ		ddeeee f dee de	f fddZ
d	ed
edeeef fddZ  ZS )ASRFeatureSequenceLabelz@`FeatureSequenceLabel` collector from asr structured json files.NFr   rV   rX   c                    sR   g g }}t j|| jdD ]}||d  ||d  qt |||| dS )a  Parse lists of feature files and sequences of labels.

        Args:
            manifests_files:
                Either single string file or list of such manifests to yield items from.
            max_number:
                Maximum number of samples to collect; pass to `FeatureSequenceLabel` constructor.
            index_by_file_id:
                If True, saves a mapping from filename base (ID) to index in data;
                pass to `FeatureSequenceLabel` constructor.
        r   r  r  N)r
   r=   _parse_itemr   r   r   )r    r   rV   rX   r  r  r1   r$   r   r   r   #  s
   
z ASRFeatureSequenceLabel.__init__r   r   r.   c                 C   s   t |}d|v r|d|d< nd|v r|d|d< ntd| dtj|d |d< d|v r;|d|d< ntd| dt|d |d d}|S )	Nfeature_filenamer  feature_filepathr  z! without proper feature file key.r  z without proper seq_label key.)r  r  )r   r   r   r7   r4   r5   
expanduserr   r  r   r   r   r  <  s&   


z#ASRFeatureSequenceLabel._parse_itemr  )r   r   r   r   r	   r(   r   r   r   r   r   r   r   r  r*   r   r   r$   r   r     s    &r  c                       s   e Zd ZdZejdddZ			ddee dee	 d	ee d
ee	 dee
 dee dee
 dee
 dee dedef fddZ  ZS )DiarizationLabelzBList of diarization audio-label correspondence with preprocessing.DiarizationLabelEntityz^audio_file duration rttm_file offset target_spks sess_spk_dict clus_spk_digits rttm_spk_digitsrI   NFrM   rN   
rttm_filesrO   target_spks_listsess_spk_dictsclus_spk_listrttm_spk_listrV   rW   rX   c                    s  |ri | _ | j}g d}}t||||||||}|D ]=\}}}}}}}}|du r*d}|||||||||| |rOtjtj|\}}t|d | j |< t||	krW nq|
rj|rbt	
d n|jdd d t	d	| t	d
t| dt| d t | dS )aF  Instantiates audio-label manifest with filters and preprocessing.

        Args:
            audio_files:
                List of audio file paths.
            durations:
                List of float durations.
            rttm_files:
                List of RTTM files (Groundtruth diarization annotation file).
            offsets:
                List of offsets or None.
            target_spks (tuple):
                List of tuples containing the two indices of targeted speakers for evaluation.
                Example: [[(0, 1), (0, 2), (0, 3), (1, 2), (1, 3), (2, 3)], [(0, 1), (1, 2), (0, 2)], ...]
            sess_spk_dict (Dict):
                List of Mapping dictionaries between RTTM speakers and speaker labels in the clustering result.
            clus_spk_digits (tuple):
                List of Tuple containing all the speaker indices from the clustering result.
                Example: [(0, 1, 2, 3), (0, 1, 2), ...]
            rttm_spkr_digits (tuple):
                List of tuple containing all the speaker indices in the RTTM file.
                Example: (0, 1, 2), (0, 1), ...]
            max_number: Maximum number of samples to collect
            do_sort_by_duration: True if sort samples list by duration
            index_by_file_id: If True, saves a mapping from filename base (ID) to index in data.
        rY   Nr   rZ   r^   c                 S   r_   r`   ra   rc   r   r   r   re     rf   z+DiarizationLabel.__init__.<locals>.<lambda>rg   /Filtered duration for loading collection is %f.Total & session files loaded accounting to #  audio clips)rm   r   rn   r   r4   r5   r8   rq   rr   r   r   rs   rt   r   r   )r    rM   rN   r!  rO   r"  r#  r$  r%  rV   rW   rX   r"   r!   rv   zipped_itemsrz   rb   	rttm_filer{   target_spkssess_spk_dictclus_spk_digitsrttm_spk_digitsr   r@   r$   r   r   r   b  s^   )
 zDiarizationLabel.__init__NFF)r   r   r   r   r&   r'   r   r   r(   r   tupler   r   r   r   r   r*   r   r   r$   r   r  Z  s@    	
r  c                	       sx   e Zd ZdZ			ddeeee f dededef fdd	Z	ddedefddZ
dededeeef fddZ  ZS )DiarizationSpeechLabelzP`DiarizationLabel` diarization data sample collector from structured json files.   Fr   emb_dictclus_label_dictround_digitsc           #   
      sd  || _ || _|| _|| _|| _g g g g g g g g f\}	}
}}}}}}tj|| jdD ]}| jrrtt	t
dd ||d  D }|d rmt| j }| j| |d  d }dd | D }d	d | D }| jrl|}ned
}d
}n`g }t|d d#}| D ]}| j|dd\}}}|d||| qW d
   n1 sw   Y  t
 }|D ]}| d }|| qtt	|} dd t| D }t| }!|!}|!}t|dkrdg}"n
dd t|dD }"|"D ]2}!|	|d  |
|d  ||d  ||d  ||! || || || qq)t j|	|
||||||g|R i | d
S )a  
        Parse lists of audio files, durations, RTTM (Diarization annotation) files. Since the diarization
        model infers only two speakers, speaker pairs are generated from the total number of speakers in
        the session.

        Args:
            manifest_filepath (str):
                Path to input manifest JSON files.
            emb_dict (Dict):
                Dictionary containing cluster-average embeddings and speaker mapping information.
            clus_label_dict (Dict):
                Segment-level speaker labels from clustering results.
            round_digit (int):
                Number of digits to round.
            seq_eval_mode (bool):
                If True, F1 score will be calculated for each speaker pair during inference mode.
            pairwise_infer (bool):
                If True, this dataset class operates in inference mode. In inference mode, a set of
                speakers in the input audio is split into multiple pairs of speakers and speaker tuples
                (e.g., 3 speakers: [(0,1), (1,2), (0,2)]) and then fed into the diarization system to
                merge the individual results.
            *args: Args to pass to `SpeechLabel` constructor.
            **kwargs: Kwargs to pass to `SpeechLabel` constructor.
        r   c                 S   s   g | ]}|d  qS r3  r   r0   r   r   r   r   r     r   z3DiarizationSpeechLabel.__init__.<locals>.<listcomp>uniq_idr+  rm   c                 S   s$   i | ]\}}t |d d |qS )r@   r   r   r0   kvr   r   r   
<dictcomp>  s   $ z3DiarizationSpeechLabel.__init__.<locals>.<dictcomp>c                 S   s"   g | ]\}}t |d d qS )r@   rZ   r;  r<  r   r   r   r     s   " Nr3   r   )decimalsz{} {} {}r:  c                 S   s   i | ]\}}||qS r   r   )r0   rh   valr   r   r   r?    s    r3  )r   rZ   c                 S   s   g | ]}|qS r   r   r8  r   r   r   r     s    rz   rb   r{   )r6  r4  r5  seq_eval_modepairwise_inferr
   r=   (_DiarizationSpeechLabel__parse_item_rttmr   r<   r   r   r  itemsr>   r?   split_rttm_liner   r   r   add	enumerater1  rr   r   r   r   )#r    r   r4  r5  r6  rB  rC  r   r   rM   rN   r!  rO   r"  r#  r$  r%  r1   clus_speaker_digitsbase_scale_index_sess_spk_dictr-  rttm_speaker_digitsrttm_labelsrB   r   startendr|   speaker_set	rttm_linespk_strspeaker_listr,  spk_comb_listr$   r   r   r     s   #"



	

zDiarizationSpeechLabel.__init__r   rQ  r@  c                 C   sT   |   }tt|d |}tt|d |tt|d | }|d }|||fS )a  
        Convert a line in RTTM file to speaker label, start and end timestamps.

        An example line of `rttm_line`:
            SPEAKER abc_dev_0123 1 146.903 1.860 <NA> <NA> speaker543 <NA> <NA>

        The above example RTTM line contains the following information:
            session name: abc_dev_0123
            segment start time: 146.903
            segment duration: 1.860
            speaker label: speaker543

        Args:
            rttm_line (str):
                A line in RTTM formatted file containing offset and duration of each segment.
            decimals (int):
                Number of digits to be rounded.

        Returns:
            start (float):
                Start timestamp in floating point number.
            end (float):
                End timestamp in floating point number.
            speaker (str):
                speaker string in RTTM lines.
        r         )r   r   roundr   )r    rQ  r@  rttmrN  rO  r|   r   r   r   rF  7  s
   $
z&DiarizationSpeechLabel.split_rttm_liner   r   r.   c              	   C   s   t |}d|v r|d|d< nd|v r|d|d< ntd| dtj|d |d< tjtj|d d |d< d|vrKtd| d	t	|d |d |d |d
 |
ddd}|S )2Parse each rttm file and save it to in Dict formatr   rz   r   r  r  r   r9  rb   r  rttm_filepathr{   Nrz   r9  rb   r+  r{   )r   r   r   r7   r4   r5   r  r8   rq   r   r   r  r   r   r   __parse_item_rttmX  s(   

 
z(DiarizationSpeechLabel.__parse_item_rttm)r3  FF)r   )r   r   r   r   r	   r(   r   r   r   r   rF  r   rD  r*   r   r   r$   r   r2    s     o&!r2  c                       sp   e Zd ZdZejdddZ			ddee dee d	ee	 d
ee dee	 de
e dedef fddZ  ZS )EndtoEndDiarizationLabelzMList of end-to-end diarization audio-label correspondence with preprocessing.r   z,audio_file uniq_id duration rttm_file offsetrI   NFrM   uniq_idsrN   r!  rO   rV   rW   rX   c	              	      s"  |ri | _ | j}	g d}
}t|||||}|D ]I\}}}}}|du r$d}|
|	||||| |rXt|trCt|dkrCtd| tj	
tj	|\}}t|
d | j |< t|
|kr` nq|rs|rktd n|
jdd d	 td
| tdt|
 dt| d t |
 dS )a  
        Instantiates audio-label manifest with filters and preprocessing.

        This method initializes the EndtoEndDiarizationLabel object by processing the input data
        and applying optional filters and sorting.

        Args:
            audio_files (List[str]): List of audio file paths.
            uniq_ids (List[str]): List of unique identifiers for each audio file.
            durations (List[float]): List of float durations for each audio file.
            rttm_files (List[str]): List of RTTM path strings (Groundtruth diarization annotation file).
            offsets (List[float]): List of offsets or None for each audio file.
            max_number (Optional[int]): Maximum number of samples to collect. Defaults to None.
            do_sort_by_duration (bool): If True, sort samples list by duration. Defaults to False.
            index_by_file_id (bool): If True, saves a mapping from filename base (ID) to index in data.
                                     Defaults to False.

        rY   Nr   zEmpty audio file list: rZ   r^   c                 S   r_   r`   ra   rc   r   r   r   re     rf   z3EndtoEndDiarizationLabel.__init__.<locals>.<lambda>rg   r&  r'  r(  r)  )rm   r   rn   r   rp   r<   rr   r7   r4   r5   r8   rq   r   r   rs   rt   r   r   )r    rM   r^  rN   r!  rO   rV   rW   rX   r"   r!   rv   r*  rz   r9  rb   r+  r{   r   r@   r$   r   r   r   y  sT   


 z!EndtoEndDiarizationLabel.__init__r0  r   r   r   r   r&   r'   r   r   r(   r   r   r   r   r   r*   r   r   r$   r   r]  q  s4    	r]  c                       sX   e Zd ZdZ	ddeeee f def fddZdeded	e	ee
f fd
dZ  ZS )EndtoEndDiarizationSpeechLabelzPEnd-to-end speaker diarization data sample collector from structured json files.r3  r   r6  c                    s   || _ g g g g g f\}}}}}	tj|| jdD ]%}
||
d  ||
d  ||
d  ||
d  |	|
d  qt j|||||	g|R i | dS )a$  
        Parse lists of audio files, durations, RTTM (Diarization annotation) files.
        Since diarization model infers only two speakers, speaker pairs are generated
        from the total number of speakers in the session.

        Args:
            manifest_filepath (str):
                Path to input manifest json files.
            round_digit (int):
                Number of digits to be rounded.
            *args: Args to pass to `SpeechLabel` constructor.
            **kwargs: Kwargs to pass to `SpeechLabel` constructor.
        r   rz   r9  rb   r+  r{   N)r6  r
   r=   0_EndtoEndDiarizationSpeechLabel__parse_item_rttmr   r   r   )r    r   r6  r   r   rM   r^  rN   r!  rO   r1   r$   r   r   r     s0   
z'EndtoEndDiarizationSpeechLabel.__init__r   r   r.   c              	   C   s  t |}d|vs|d du rd|d< d|v r	 d|v r$|d|d< nd|v r0|d|d< ntd| dt|d trT|d D ]}tt||d	 qCt|d< n0t|d t	rwt|d |d	|d< t
j|d svtd
|d  ntd| d|d  dd|v rnd|v r|d|d< nd|v r|d|d< nd|d< |d durt|d |d	|d< t
j|d std|d  d|vrt
jt
j|d d |d< t|d t	std| dd|vrtd| dt|d |d |d |d |ddd}|S )rY  r{   Nr   rz   r   r   r  r  r   zAudio file not found: z" without proper audio file value: r   r+  rttm_filenamerZ  zRTTM file not found: r9  z without proper uniq_id key.rb   r  r[  )r   r   r   r7   rp   r<   audio_file_listr   r   r(   r4   r5   r6   FileNotFoundErrorr8   rq   r   r   )r    r   r   r1   single_audio_filer   r   r   r\    sj   


 
z0EndtoEndDiarizationSpeechLabel.__parse_item_rttmr7  )r   r   r   r   r	   r(   r   r   r   r   r   ra  r*   r   r   r$   r   r`    s    &/r`  c                       s~   e Zd ZdZejd ddZ				ddeee	e	f  dee
 dee
 d	ee	 d
ee
 dee
 dee def fddZ  ZS )Audioz8Prepare a list of all audio items, filtered by duration.z audio_files duration offset textrI   NFaudio_files_listduration_listoffset_list	text_listrT   rU   rV   rW   c	                    s   | j }	g d}
}d\}}t||||D ]>\}}}}|dur*||k r*||7 }|d7 }q|dur;||kr;||7 }|d7 }q||7 }|
|	|||| t|
|krQ nq|r\|
jdd d tdt|
|d	  td
||d	  t |
 dS )aL  Instantiantes an list of audio files.

        Args:
            audio_files_list: list of dictionaries with mapping from audio_key to audio_filepath
            duration_list: list of durations of input files
            offset_list: list of offsets
            text_list: list of texts
            min_duration: Minimum duration to keep entry with (default: None).
            max_duration: Maximum duration to keep entry with (default: None).
            max_number: Maximum number of samples to collect.
            do_sort_by_duration: True if sort samples list by duration.
        rY   )r   rY   NrZ   c                 S   r_   r`   ra   rc   r   r   r   re   x  rf   z Audio.__init__.<locals>.<lambda>rg   ri   rj   rk   )	r   rn   r   rr   rs   r   rt   r   r   )r    rg  rh  ri  rj  rT   rU   rV   rW   r"   r!   rx   rw   rv   rM   rb   r{   r#   r$   r   r   r   H  s,   
zAudio.__init__)NNNF)r   r   r   r   r&   r'   r   r   r   r(   r   r   r   r   r   r*   r   r   r$   r   rf  C  s0    	rf  c                       s\   e Zd ZdZdeeee f deeef f fddZdededeee	f fd	d
Z
  ZS )AudioCollectionz)List of audio files from a manifest file.manifest_filesaudio_to_manifest_keyc                    s   t |tkr|d}| D ]\}}t |tkr$d|v r$|d||< q|| _g g g g f\}}}	}
tj|| jdD ]}||d  ||d  |	|d  |
|d  q:t	 j
|||	|
g|R i | dS )a  Instantiates a list of audio files loaded from a manifest file.

        Args:
            manifest_files: path to a single manifest file or a list of paths
            audio_to_manifest_key: dictionary mapping audio signals to keys of the manifest
        r   r   rM   rb   r{   r#   N)typer(   r   rE  rm  r
   r=   _AudioCollection__parse_itemr   r   r   )r    rl  rm  r   r   	audio_keymanifest_keyrg  rh  ri  rj  r1   r$   r   r   r     s   
$zAudioCollection.__init__r   r   r.   c           	         s  dt dtttt f fdd}t|}i }| j D ]A\}}|||}t|tr2t	
| ||< qt|trC fdd|D ||< q|du rQ|drQd||< qtd	t| d
| ||d< d|vrotd| d  d|vrwd|d< d|vrd|d< t|d |d |d |d dS )a;  Parse a single line from a manifest file.

        Args:
            line: a string representing a line from a manifest file in JSON format
            manifest_file: path to the manifest file. Used to resolve relative paths.

        Returns:
            Dictionary with audio_files, duration, and offset.
        r1   rq  c                 S   s   |du rd}|S t |tr| | }|S t |trHg }|D ])}| | }t |tr-|| qt |tr7||7 }qtdt| d| d| |S tdt| d| )z{Get item[key] if key is string, or a list
            of strings by combining item[key[0]], item[key[1]], etc.
            NUnexpected type z of item for key z: z of manifest_key: )rp   r(   r   r   r<   r7   rn  )r1   rq  rz   rh   item_keyr   r   r   get_audio_file  s"   




z4AudioCollection.__parse_item.<locals>.get_audio_filec                    s   g | ]}t | qS r   )r
   r   )r0   rB   r   r   r   r     s    z0AudioCollection.__parse_item.<locals>.<listcomp>Nr  rr  z of audio_file: rM   rb   z Duration not available in line: z. Manifest file: r{   rY   r#   )rM   rb   r{   r#   )r   r	   r(   r   r   r   rm  rE  rp   r
   r   r   
startswithr7   rn  r   )	r    r   r   rt  r1   rM   rp  rq  rz   r   ru  r   r     s,   




zAudioCollection.__parse_item)r   r   r   r   r	   r(   r   r   r   r   ro  r*   r   r   r$   r   rk    s    
&%rk  c                       st   e Zd ZdZejdddZ					ddee dee d	ee	 d
e
e	 de
e	 de
e dedef fddZ  ZS )FeatureLabelzKList of feature sequence and their label correspondence with preprocessing.FeatureLabelEntityzfeature_file label durationrI   NFr  r   rN   rT   rU   rV   rW   rX   c	                    sd  | j }	g }
d}d}t | _|ri | _t|||D ]T\}}}|dur*||k r*||7 }q|dur7||kr7||7 }q|
|	||| |  jt|O  _||7 }|rdtjtj	|\}}t
|
d | j|< t
|
|krl nq|r|rwtd n|
jdd d td|d	 d
d tdt
|
 d|d dd tdt
|
t
| j t |
 dS )aX  Instantiates feature-SequenceLabel manifest with filters and preprocessing.

        Args:
            feature_files: List of feature files.
            labels: List of labels.
            max_number: Maximum number of samples to collect.
            index_by_file_id: If True, saves a mapping from filename base (ID) to index in data.
        rY   NrZ   r^   c                 S   r_   r`   ra   rc   r   r   r   re   7  rf   z'FeatureLabel.__init__.<locals>.<lambda>rg   r   i(
  z.2fr   r   z items, total duration of rj   r   z.# {} files loaded including # {} unique labels)r   r   r   rm   rn   r   r4   r5   r8   rq   rr   r   r   rs   rt   r   r   r   )r    r  r   rN   rT   rU   rV   rW   rX   r"   r!   rv   rx   r  r   rb   r   r@   r$   r   r   r      s>   "zFeatureLabel.__init__r   r_  r   r   r$   r   rw    s8    
	rw  c                
       sh   e Zd ZdZ			ddeeee f dededee f fdd	Z	d
edede
eef fddZ  ZS )ASRFeatureLabelz8`FeatureLabel` collector from asr structured json files.FNr   r   r   r   c                    s   g g g }}}	g }
t j|| jdD ]5}||d  |	|d  |s4|d }|s.| n||}n	t|d }|g}|| |
| q|rPt|
| _	t
 j|||	g|R i | dS )a  Parse lists of feature files and sequences of labels.

        Args:
            manifests_files: Either single string file or list of such -
                manifests to yield items from.
            max_number:  Maximum number of samples to collect; pass to `FeatureSequenceLabel` constructor.
            index_by_file_id: If True, saves a mapping from filename base (ID) to index in data;
                              pass to `FeatureSequenceLabel` constructor.
        r   r  rb   r   N)r
   r=   r  r   r   r   r   r&   r   r   r   r   )r    r   r   r   r   r   r   r  r   rN   r  r1   r   r  r$   r   r   r   B  s   
"zASRFeatureLabel.__init__r   r   r.   c                 C   s   t |}d|v r|d|d< nd|v r|d|d< nd|vr)td| dtj|d |d|d< d|v r@|d|d< ntd| dt|d |d |d	 d
}|S )Nr  r  r  r  z# without proper 'feature_file' key.r   r   z without proper 'label' key.rb   )r  r   rb   )r   r   r   r7   r
   r   r   r  r   r   r   r  i  s   

zASRFeatureLabel._parse_itemr  )r   r   r   r   r	   r(   r   r   r   r   r   r   r  r*   r   r   r$   r   ry  ?  s    &'ry  c                !       s   e Zd ZdZejdddZ					ddee dee	 d	ee	 d
ee
 dee	 dee	 deee  deee  deee  deee	  dejdee
 dee
 dee dedef  fddZ  ZS )FeatureTextrG   FeatureTextEntityzSid feature_file rttm_file duration text_tokens offset text_raw speaker orig_sr langrI   NFrL   r  r!  rN   r   rO   rP   rQ   rR   rS   r   rT   rU   rV   rW   rX   c           "         s  | j }g dddf\}}}}|ri | _t|||||||||	|

D ]\
}}}}}}}}}	}|dur<||k r<||7 }|d7 }q|durM||krM||7 }|d7 }q|	durT|	}n3|dkrxt|drs|jrst|trs|duro|||}ntd||}ng }|du r||7 }|d7 }q||7 }||||||||||||
 |rt	j
t	j
|\} }!| | jvrg | j| < | j|  t|d  t||kr nq|r|rtd n|jd	d
 d tdt||d  td||d  t | dS )aK  Instantiates feature-text manifest with filters and preprocessing.

        Args:
            ids: List of examples positions.
            feature_files: List of audio feature files.
            rttm_files: List of audio rttm files.
            durations: List of float durations.
            texts: List of raw text transcripts.
            offsets: List of duration offsets or None.
            speakers: List of optional speakers ids.
            orig_sampling_rates: List of original sampling rates of audio files.
            langs: List of language ids, one for eadh sample, or None.
            parser: Instance of `CharParser` to convert string to tokens.
            min_duration: Minimum duration to keep entry with (default: None).
            max_duration: Maximum duration to keep entry with (default: None).
            max_number: Maximum number of samples to collect.
            do_sort_by_duration: True if sort samples list by duration. Not compatible with index_by_file_id.
            index_by_file_id: If True, saves a mapping from filename base (ID) to index in data.
        rY   r   NrZ   r[   r\   r]   r^   c                 S   r_   r`   ra   rc   r   r   r   re     rf   z&FeatureText.__init__.<locals>.<lambda>rg   ri   rj   rk   rl   )"r    rL   r  r!  rN   r   rO   rP   rQ   rR   rS   r   rT   rU   rV   rW   rX   r"   r!   rv   rw   rx   ry   	feat_filer+  rb   r{   r#   r|   r}   r~   r   r   r@   r$   r   r   r     sp   '


zFeatureText.__init__r   r   r   r   r$   r   rz    sX    

	


rz  c                       r   )ASRFeatureTextz7`FeatureText` collector from asr structured json files.r   c                    s   g g g g g g f\}}}}}}	g g g g f\}
}}}t |D ]H}||d  ||d  ||d  ||d  ||d  |	|d  |
|d  ||d  ||d	  ||d
  qt j||||||	|
|||g
|R i | dS )r   r   r  r+  rb   r#   r{   r|   r}   rR   r~   Nr   )r    r   r   r   rL   r  r!  rN   r   rO   rP   r   rR   rS   r1   r$   r   r   r     sT   	
zASRFeatureText.__init__r   r   r   r$   r   r}    r   r}  )4r&   r   r4   	itertoolsr   typingr   r   r   r   r   r   r	   numpyr   pandasr9   +nemo.collections.common.parts.preprocessingr
   r   4nemo.collections.common.parts.preprocessing.manifestr   
nemo.utilsr   r   UserListr   r   r+   rF   r   r   r   objectr   r   r   r   r   r   r	  r  r  r2  r]  r`  rf  rk  rw  ry  rz  r}  r   r   r   r   <module>   sL   $ sl +*v VOV:k -Zx=xGCy