o
    }oiQ                     @   s  d dl Z d dlZd dlmZ d dlmZ d dlmZ d dlm	Z	m
Z
mZ d dlZd dlZd dlmZmZmZmZmZmZ d dlmZ d dlmZ dhdededefddZ				didededededededededededededefdd Zd!ed"ed#edefd$d%Zd&edefd'd(Z d)ed*ed+ed,ede	ee!f f
d-d.Z"d/ede	ee!f fd0d1Z#d2e	ee!f d3e	ee!f d4ed5ed,ef
d6d7Z$d8ed9e
e! d:efd;d<Z%d=ede
e fd>d?Z&d=e
e de	eef fd@dAZ'dBe
e dCe
e de	eef fdDdEZ(djdFedCe
e dGede	eef fdHdIZ)d/ed4ed*ed+ed5ed,efdJdKZ*					LdkdMedNedOedPedQedRedSe+fdTdUZ,dVeeef de
e! fdWdXZ-dldZeeef d[e
e! d\e+fd]d^Z.dZed_e	ee!f fd`daZ/dZed_e	ee!f fdbdcZ0ddeeB deedefdfdgZ1dS )m    N)Counter)OrderedDict)Path)DictListUnion)audio_rttm_mapget_subsegments_scriptableget_uniqname_from_filepathrttm_to_labels)segments_manifest_to_subsegments_manifestwrite_rttm2manifest)logging)DataStoreObject      numoutput_precisionreturnc                 C   s    t |t||}| d| dS )a  
    Get a string of a float number with rounded precision.

    Args:
        num (float): float number to round
        output_precision (int): precision of the output floating point number
        min_precision (int, optional): Minimum precision of the output floating point number. Defaults to 1.
        max_precision (int, optional): Maximum precision of the output floating point number. Defaults to 3.

    Returns:
        (str): Return a string of a float number with rounded precision.
    .f)minmax)r   r   min_precisionmax_precision r   c/home/ubuntu/.local/lib/python3.10/site-packages/nemo/collections/asr/parts/utils/manifest_utils.pyget_rounded_str_float%   s   r   NAunknown1   sourcechannel
start_timedurationtokenconftype_of_tokenspeakerNA_tokenUNKdefault_channelc                 C   sX  g d}t |tkr|ddd rt|}nt |tkr'tdt | t |tkr;|ddd r;t|}nt |tkrJtdt | t |tkr^|ddd r^t|}n|du re|}nt |tkrttdt | |durt |tkrt|}|durt |tkrd	|  krdksn td
| |durt |tkrtdt | d| |dur||vrtd| d| d| |durt |tkrtdt | |du r|
n|}|du r|n|}|du r|n|}|du r|	n|}t||}t||}||krt||n|}|  d| d| d| d| d| d| d| dS )u  
    Get a line in Conversation Time Mark (CTM) format. Following CTM format appeared in
    `Rich Transcription Meeting Eval Plan: RT09` document.

    CTM Format:
        <SOURCE><SP><CHANNEL><SP><BEG-TIME><SP><DURATION><SP><TOKEN><SP><CONF><SP><TYPE><SP><SPEAKER><NEWLINE>

    Reference:
        https://web.archive.org/web/20170119114252/
        http://www.itl.nist.gov/iad/mig/tests/rt/2009/docs/rt09-meeting-eval-plan-v2.pdf

    Args:
        source (str): <SOURCE> is name of the source file, session name or utterance ID
        channel (int): <CHANNEL> is channel number defaults to 1
        start_time (float): <BEG_TIME> is the begin time of the word, which we refer to as `start_time` in NeMo.
        duration (float): <DURATION> is duration of the word
        token (str): <TOKEN> Token or word for the current entry
        conf (float): <CONF> is a floating point number between 0 (no confidence) and 1 (certainty).
                      A value of “NA” is used (in CTM format data)
                      when no confidence is computed and in the reference data.
        type_of_token (str): <TYPE> is the token type. The legal values of <TYPE> are
                      “lex”, “frag”, “fp”, “un-lex”, “for-lex”, “non-lex”, “misc”, or “noscore”
        speaker (str): <SPEAKER> is a string identifier for the speaker who uttered the token.
                      This should be “null” for non-speech tokens and “unknown” when
                      the speaker has not been determined.
        NA_token (str, optional): A token for  . Defaults to '<NA>'.
        output_precision (int, optional): The precision of the output floating point number. Defaults to 3.

    Returns:
        str: Return a line in CTM format filled with the given information.
    )lexfragfpzun-lexzfor-lexznon-lexmiscnoscorer    r   z>`start_time` must be a float or str containing float, but got z<`duration` must be a float or str containing float, but got Nz8`conf` must be a float or str containing float, but got r   z(`conf` must be between 0 and 1, but got z!`type` must be a string, but got z type z`type` must be one of z
, but got z$`speaker` must be a string, but got  
)typestrreplaceisdigitfloat
ValueErrorintr   )r"   r#   r$   r%   r&   r'   r(   r)   r*   r+   r,   r   VALID_TOKEN_TYPESr   r   r   get_ctm_line6   sB   -


*

4r=   soldnewc                 C   s   |  |d}||S )z
    Replace end of string.

    Args:
        s (str): string to operate on
        old (str): ending of string to replace
        new (str): replacement for ending of string
    Returns:
        new.join(li) (string): new string with end replaced
    r   )rsplitjoin)r>   r?   r@   lir   r   r   rreplace   s   
rD   pathc                 C   s@   t j| ddd }t|dkrd|}|S |d }|S )z
    Get uniq_id from path string with period in it.

    Args:
        path (str): path to audio file
    Returns:
        uniq_id (str): unique speaker ID
    r   Nr   r   )osrE   basenamesplitlenrB   )rE   
split_pathuniq_idr   r   r   get_uniq_id_with_period   s
   	rM   subsegments_manifest_filewindowshiftdecic                 C   s  i }t | dp}| }|D ]`}| }t|}|d |d |d }	}
}t|
|||d}|d dur:|d }nt|	}||vrIg g d||< |D ]}|\}}qK|| d	 t||t|| |g || d
 | qW d   |S 1 szw   Y  |S )ar  
    Get subsegment dictionary from manifest file.

    Args:
        subsegments_manifest_file (str): Path to subsegment manifest file
        window (float): Window length for segmentation
        shift (float): Shift length for segmentation
        deci (int): Rounding number of decimal places
    Returns:
        _subsegment_dict (dict): Subsegment dictionary
    raudio_filepathoffsetr%   )rT   rO   rP   r%   rL   N)tsjson_dicrU   rV   )	open	readlinesstripjsonloadsr	   rM   appendround)rN   rO   rP   rQ   _subsegment_dictsubsegments_manifestsegmentssegmentdicaudiorT   r%   subsegmentsrL   
subsegmentstartdurr   r   r   get_subsegment_dict   s,   


&
rh   input_manifest_pathc                 C   sl   i }t | d%}| }|D ]}t|}d|d< t|d }|||< qW d   |S 1 s/w   Y  |S )z
    Get dictionary from manifest file.

    Args:
        input_manifest_path (str): Path to manifest file
    Returns:
        input_manifest_dict (dict): Dictionary from manifest file
    rR   -textrS   N)rW   rX   rZ   r[   r
   )ri   input_manifest_dictinput_manifest_fp
json_lines	json_linerb   rL   r   r   r   get_input_manifest_dict   s   	


rp   rl   r^   output_manifest_path
step_countc                 C   s  t |dt}| D ]f\}}t|d }tj|dd}	|	jd | }
t|
d D ]F}|	dddf ||  }|	dddf |d |  }||df }||df }t|| |}| | }||d< ||d< t	|| |
d	 q)q
W d   dS 1 s|w   Y  dS )
a  
    Write subsegments to manifest filepath.

    Args:
        input_manifest_dict (dict): Input manifest dictionary
        _subsegment_dict (dict): Input subsegment dictionary
        output_manifest_path (str): Path to output manifest file
        step_count (int): Number of the unit segments you want to create per utterance
        deci (int): Rounding number of decimal places
    wrU   r   )axisr   NrT   r%   r4   )rW   itemsnparrayargsortshaperanger]   rZ   dumpwrite)rl   r^   rq   rr   rQ   output_manifest_fprL   subseg_dictsubseg_arraysubseg_array_idxchunked_set_countidxchunk_index_sttchunk_index_end
offset_secend_secrg   metar   r   r   write_truncated_subsegments   s&   "r   namelinesr   c                 C   sX   t | d}|D ]}|| }t|| |d qW d   dS 1 s%w   Y  dS )z
    Write json lines to file.

    Args:
        name (str): Output file path
        lines (list): List of json lines
        idx (int): Indices to dump to the file
    rs   r4   NrW   rZ   r{   r|   )r   r   r   foutirb   r   r   r   
write_file  s   	"r   pathlistc                 C   s@   t | d}| } W d   t| S 1 sw   Y  t| S )z
    Read list of lines from target file.

    Args:
        pathlist (str): Input file path
    Returns:
        sorted(pathlist) (list): List of lines
    rR   N)rW   rX   sorted)r   r   r   r   r   	read_file  s   	

r   c                 C   s:   t  }t| } | D ]}tj|dd }|||< q	|S )z
    Read dictionaries from list of lines

    Args:
        pathlist (list): List of file paths
    Returns:
        path_dict (dict): Dictionary containing dictionaries read from files
    r   r   )odr   rG   rE   rH   rI   )r   	path_dict	line_pathrL   r   r   r   get_dict_from_wavlist  s   	
r   data_pathlistuniqidsc                 C   sH   i }| D ]}t j|dd }||v r|||< qtd| d|S )z
    Create dictionaries from list of lines

    Args:
        data_pathlist (list): List of file paths
        uniqids (list): List of file IDs
    Returns:
        path_dict (dict): Dictionary containing file paths
    r   r   zuniq id z is not in wav filelist)rG   rE   rH   rI   r:   )r   r   r   r   rL   r   r   r   get_dict_from_list0  s   

r   	data_pathlen_wavsc                 C   sP   | durt | }|durt||ksJ t||}|S |dur&dd |D }|S )aA  
    Create dictionary from list of lines (using the get_dict_from_list function)

    Args:
        data_path (str): Path to file containing list of files
        uniqids (list): List of file IDs
        len_wavs (int): Length of file list
    Returns:
        data_pathdict (dict): Dictionary containing file paths
    Nc                 S   s   i | ]}|d qS Nr   ).0rL   r   r   r   
<dictcomp>U  s    z!get_path_dict.<locals>.<dictcomp>)r   rJ   r   )r   r   r   r   data_pathdictr   r   r   get_path_dictD  s   
r   c                 C   s   d| vrt d|rd|vrt d|st| dd| d}t| }t| dd}t| dd}d}	t|}t| }
t|
||}|}t|||||	 t||||}t||||| t	
| t	
| d	S )
a  
    Create segmented manifest file from base manifest file

    Args:
        input_manifest_path (str): Path to input manifest file
        output_manifest_path (str): Path to output manifest file
        window (float): Window length for segmentation
        shift (float): Shift length for segmentation
        step_count (int): Number of the unit segments you want to create per utterance
        deci (int): Rounding number of decimal places
    z.jsonz4input_manifest_path file should be .json file formatz5output_manifest_path file should be .json file format_zseg.jsonz	_seg.jsonz_subseg.jsong?N)r:   rD   rp   r;   r   r   r   rh   r   rG   remove)ri   rq   rO   rP   rr   rQ   rl   segment_manifest_pathsubsegment_manifest_pathmin_subsegment_durationAUDIO_RTTM_MAPsegments_manifest_filerN   subsegments_dictr   r   r   create_segment_manifestY  s2   
r   Fwav_pathmanifest_filepath	text_path	rttm_pathuem_pathctm_pathadd_durationc                 C   s  t j|rt | t| }t|}t|}	t| }
t	||
|	}t	||
|	}t	||
|	}t	||
|	}g }|
D ]}|| || || || || f\}}}}}|
 }|duro|
 }t|}tdd |D   }nd}|dury|
 }|durt|
 }| d 
 }W d   n1 sw   Y  nd}|dur|
 }d}|rtj|dd\}}tj||d}|d|d|||||d		g}|| q9t||tt| dS )
a  
    Create base manifest file

    Args:
        wav_path (str): Path to list of wav files
        manifest_filepath (str): Path to output manifest file
        text_path (str): Path to list of text files
        rttm_path (str): Path to list of rttm files
        uem_path (str): Path to list of uem files
        ctm_path (str): Path to list of ctm files
        add_duration (bool): Whether to add durations to the manifest file
    Nc                 S   s   g | ]}|  d  qS )rF   )rI   )r   labelr   r   r   
<listcomp>  s    z#create_manifest.<locals>.<listcomp>r   rj   )sr)yr   infer)	rS   rT   r%   r   rk   num_speakersrttm_filepathuem_filepathctm_filepath)rG   rE   existsr   r   r   rJ   r   keysr   rY   r   r   __len__rW   rX   librosaloadget_durationextendr   rz   )r   r   r   r   r   r   r   wav_pathlistwav_pathdictr   r   text_pathdictrttm_pathdictuem_pathdictctm_pathdictr   uidwavrk   rttmuemctm
audio_linelabelsr   r   r%   r   r   r   r   r   r   create_manifest  sf   
r   manifestc              	   C   s   t t| } g }zt|  ddd}W n
   td|  g }| D ]%}| }|s-q$zt|}W n tj	yC   |
| Y q$w |
| q$|  |rqtt| d|   |D ]}td| d q^td|  |S )	z
    Read manifest file

    Args:
        manifest (str or Path): Path to manifest file
    Returns:
        data (list): List of JSON items
    rR   utf-8encodingz#Manifest file could not be opened: z1 Errors encountered while reading manifest file: z-- Failed to parse line: ``z0Errors encountered while reading manifest file: )r   r6   rW   get	ExceptionrX   rY   rZ   r[   JSONDecodeErrorr\   closer   errorrJ   RuntimeError)r   datar   errorslineitemr   r   r   r   read_manifest  s2   	
r   Toutput_pathtarget_manifestensure_asciic                 C   sX   t | ddd}|D ]}tj|||d |d q
W d   dS 1 s%w   Y  dS )a  
    Write to manifest file

    Args:
        output_path (str or Path): Path to output manifest file
        target_manifest (list): List of manifest file entries
        ensure_ascii (bool): default is True, meaning the output is guaranteed to have all incoming
                             non-ASCII characters escaped. If ensure_ascii is false, these characters
                             will be output as-is.
    rs   r   r   )r   r4   Nr   )r   r   r   outfiletgtr   r   r   write_manifest  s   "r   
target_ctmc                 C   s\   |j dd d t| d}|D ]}|d }|| qW d   dS 1 s'w   Y  dS )z
    Write ctm entries from diarization session to a .ctm file.

    Args:
        output_path (str): target file path
        target_ctm (dict): list of ctm entries
    c                 S      | d S Nr   r   r   r   r   r   <lambda>      zwrite_ctm.<locals>.<lambda>keyrs   r   N)sortrW   r|   )r   r   r   pairr   r   r   r   	write_ctm  s   "r   c                 C   sx   |j dd d t| d%}|D ]}|d }|dd }||d  q|d W d	   d	S 1 s5w   Y  d	S )
z
    Write text from diarization session to a .txt file

    Args:
        output_path (str): target file path
        target_ctm (dict): list of ctm entries
    c                 S   r   r   r   r   r   r   r   r   &  r   zwrite_text.<locals>.<lambda>r   rs   r   r3      r4   N)r   rW   rI   r|   )r   r   r   r   r   wordr   r   r   
write_text  s   "r   filepath	base_pathc                 C   s,   t |  } |  s|  s||   } | S )a  
    Return absolute path to an audio file.

    Check if a file exists at `filepath`.
    If not, assume that the path is relative to `base_path`.

    Args:
        filepath (str or Path): path to file
        base_path (Path): base path to resolve relative path
    )r   
expanduseris_fileis_absoluteabsolute)r   r   r   r   r   filepath_to_absolute/  s   r   )r   r   )r   r   r    r!   r   )NNNNF)T)2rZ   rG   collectionsr   r   r   pathlibr   typingr   r   r   r   numpyrv   .nemo.collections.asr.parts.utils.speaker_utilsr   r	   r
   r   r   r   
nemo.utilsr   nemo.utils.data_utilsr   r9   r;   r6   r   r=   rD   rM   dictrh   rp   r   r   r   r   r   r   r   boolr   r   r   r   r   r   r   r   r   r   <module>   s    	

U&!


$&(
.
U$%