o
    wiar                     @   s>  d dl mZ d dl mZ d dlZd dlZd dlZd dlZd dlZd dlZd dl	Z	d dl
mZ d dlmZ d dlmZ d dlZd dlmZ d dlmZ d d	lmZ d d
lmZ d dlmZ d dlmZ d dlmZ d dlmZ d dlmZ d dlmZ d dlmZ ejd  dkZ e rd dl!m"Z" e#Z$e%fZ&dGddZ'dGddZ(nd dl)m"Z" e%Z$e*fZ&dGddZ'dGddZ(dHddZ+dIddZ,dJd d!Z-G d"d# d#e"Z.dKd$d%Z/d&d' Z0d(d) Z1dLd*d+Z2dMd,d-Z3d.d/ Z4dNd1d2Z5dOd4d5Z6dOd6d7Z7dPd8d9Z8d:d; Z9d<d= Z:		3	3				dQd>d?Z;dKd@dAZ<dKdBdCZ=dRdEdFZ>dS )S    )division)unicode_literalsN)partial)BytesIO)StringIO)GlobalHeader)PerColHeader)
LazyLoader)LimitedSizeDict)MultiFileDescriptor)default_encoding)open_like_kaldi)
open_or_fd)seekable)read_wav)	write_wav   )Mappinglittlec                 C   s   |  ||S N)to_bytes)nlength	endianess r   J/home/ubuntu/sommelier/.venv/lib/python3.10/site-packages/kaldiio/matio.pyr   %      r   c                 C   s   t | |S r   )int
from_bytessr   r   r   r   r   (   r   r   c                 C   sX   |dv sJ |d|  }t dt|d  | |d d}|dkr%|S |d d d S )N)bigr   s   %x   0   hexr!   )codecsdecodelenzfill)r   r   r   hr    r   r   r   r   1   s   &c                 C   s(   |dkr| d d d } t t| ddS )Nr   r%   r$      )r   r&   encoder   r   r   r   r   7   s   <c                 C   s   |dv sJ ||dkr|durt dt|}nd}|du rdtt||d}t|}t| d+}|D ]}	|	|d}
t|
dkrGt d	|	|
\}}|	 ||< q2W d   |S 1 s]w   Y  |S t
| ||d
S )Lazy loader for kaldi scp file.

    Args:
        fname (str or file(text mode)):
        endian (str):
        separator (str):
        segments (str): The path of segments
    r-   >r   Nz/max_cache_fd is not supported for segments mode)endianfd_dictr   r#   Invalid line is found:
>   {}	separatorsegments)
ValueErrorr
   r   load_matr	   r   splitr(   formatrstripSegmentsExtractor)fnamer1   r7   r8   max_cache_fdd	load_funcloaderfdlinesepstokenarknamer   r   r   load_scp=   s,   	

rI   c              	   c   sB   |dv s	J ||du rt | dt}d}d}zV|D ]Q}||d}t|dkr/td||\}	}
|
 }
t|
\}}}||krM|}t||||d}n|durU|  t |d}t||||d}|}|}|	|fV  qW n t	y}   |dur||   w W d   dS 1 sw   Y  dS t
| ||d	 D ]}|V  qdS )
r.   r/   Nr3   r4   r#   r5   r1   rbr6   )r   r;   r(   r9   r<   r=   _parse_arkpath	_load_matclose	Exceptionr>   	generator)r?   r1   r7   r8   rD   prev_ark
prev_arkfdrE   rF   rG   rH   arkoffsetslicesarkfdmatdatar   r   r   load_scp_sequential^   sN   	
" rY   c                 C   s   t dt t| ||dS )Nz$Use load_scp instead of load_wav_scpr6   )warningswarnDeprecationWarningrI   )r?   r8   r7   r   r   r   load_wav_scp   s   r]   c                   @   sJ   e Zd ZdZdddZdd Zdd Zd	d
 Zdd Zdd Z	dd Z
dS )r>   a*  Emulate the following,

    https://github.com/kaldi-asr/kaldi/blob/master/src/featbin/extract-segments.cc

    Args:
        segments (str): The file format is
            "<segment-id> <recording-id> <start-time> <end-time>
"
            "e.g. call-861225-A-0050-0065 call-861225-A 5.0 6.5
"
    Nc                 C   s   || _ t| j |d| _|| _i | _t| jdB}|D ]6}| |}t|dkr0t	d
||\}}}	}
|t|	t|
f| j|< || jvrPt	d
|| j qW d    d S 1 s\w   Y  d S )N)r7   r3      zFormat is invalid: {}zNot found "{}" in {})wav_scprI   
wav_loaderr8   _segments_dictr   r=   r;   r(   RuntimeErrorr<   float)selfr?   r8   r7   frE   spsuttidrecodeidstetr   r   r   __init__   s$   
"zSegmentsExtractor.__init__c                 c   s    i }| j  D ]\}\}}}||dd ||< qi }| j  D ]3\}\}}}||vr3| j| ||< || }||  d8  < || dkrJ|| || |||fV  q!d S Nr   r4   )ra   itemsgetr`   pop_return)rd   recodeid_counteruttrh   ri   rj   cachedarrayr   r   r   rP      s   
zSegmentsExtractor.generatorc                 C   
   t | jS r   )iterra   rd   r   r   r   __iter__      
zSegmentsExtractor.__iter__c                 C   s
   || j v S r   )ra   )rd   itemr   r   r   __contains__   ry   zSegmentsExtractor.__contains__c                 C   ru   r   )r(   ra   rw   r   r   r   __len__   ry   zSegmentsExtractor.__len__c                 C   s(   | j | \}}}| j| }| |||S r   )ra   r`   rp   )rd   keyrh   ri   rj   rt   r   r   r   __getitem__   s   
zSegmentsExtractor.__getitem__c                 C   sh   t |ttfr|\}}ntd| j|dkr(||t|| t||  fS ||t|| d  fS )Nz{} is not wav.scp?r%   )
isinstancetuplelistrb   r<   r_   r   )rd   rt   ri   rj   rater   r   r   rp      s   
 zSegmentsExtractor._returnNN)__name__
__module____qualname____doc__rk   rP   rx   r{   r|   r~   rp   r   r   r   r   r>      s    

r>   c                 C   s   |dv sJ ||d urt |tstdt|t| \}}}|d urL| d dksL| d dksL||vr@t|d||< || }t||||dS t|d}t||||dW  d    S 1 sdw   Y  d S )Nr/   z(fd_dict must be dict or None, bot got {}r%   |r   rK   rJ   )	r   r   rb   r<   typerL   stripr   rM   )ark_namer1   r2   rS   rT   rU   rD   r   r   r   r:      s   ($r:   c                 C   s   |   d dks|   d dkr| ddfS d}d| v r@d| v r@| d\}}|dd  }zt|}W n	 ty=   Y nw |} d| v r`| dd	\}}zt|}W n ty_   | }d}Y nw | }d}|||fS )
a  Parse arkpath

    Args:
        ark_name (str):
    Returns:
        Tuple[str, int, Optional[Tuple[slice, ...]]]
    Examples:
        >>> _parse_arkpath('a.ark')
        'a.ark', None, None
        >>> _parse_arkpath('a.ark:12')
        'a.ark', 12, None
        >>> _parse_arkpath('a.ark:12[3:4]')
        'a.ark', 12, (slice(3, 4, None),)
        >>> _parse_arkpath('cat "fo:o.ark" |')
        'cat "fo:o.ark" |', None, None

    r%   r   r   N[] :r4   )r   r;   replace_convert_to_slicerO   rsplitr   r9   )r   rU   	_ark_nameRanger?   rT   r   r   r   rL      s.    

rL   c              
   C   s  g }|  dD ]x}|dks|dkr|td qg }| dD ]}z	|t| W q  ty9   td| w t|dkrLt|d |d d }n.t|dkr^t|d |d d }nt|d	krst|d |d d |d }ntd
| || qt|S )a  Convert slice-str to slice

    Examples:
        >>> _convert_to_slice('0:51')
        (slice(0, 52),)
        >>> _convert_to_slice('0:51,6:10')
        (slice(0, 52), slice(6, 11))
        >>> _convert_to_slice(',6:10')
        (slice(None), slice(6, 11))

    ,r   r   NzFormat error: {}r4   r   r#   r   zToo many : {})	r;   appendslicer   r9   r<   r(   rb   r   )stringrU   elerf   spslr   r   r   r   "  s(   r   c                 C   sV   |d ur	|  | t| |}|d ur)t|ttfr%|d |d | f}|S || }|S rl   )seek
read_kaldir   r   r   )rD   rT   rU   r1   rt   r   r   r   rM   G  s   

rM   c                 c   sl    |dv s	J |t | d}	 t|}|d u rnt||}||fV  qW d    d S 1 s/w   Y  d S )Nr/   rK   )r   
read_tokenr   )r?   r1   rD   rG   rt   r   r   r   load_arkT  s   

"r   c                 C   sT   g }	 |  d}|dks|dkrn|| qt|dkrdS d|jtd}|S )z-Read token

    Args:
        fd (file):
    Tr4           r   Nencoding)readr   r(   joinr'   r   )rD   rG   cdecodedr   r   r   r   _  s   

r   	soundfilec                 C   sZ  |dv sJ ||du ri }t d}| |}t|ts"J t|t| r.| | d ntt|| } |dd dkrCt	| }|S |dd dkrdddl
}|  }t|}	||	\}
}||
f}|S |dd	 d
kr| d	 t| }| |}t|}	tj|	fi |}|S |dd	 dkr| d	 tj| fi |}|S |dd dkr| d t| }| |}t|}	|dkrddl
}|j}ntd|||	fi |\}}t|trt|tjr||f}|S t|tjrt|tr||f}|S tdt|t||dd dkr'|dd	 dkr t| |}|S t| |}|S t| }|S )zLoad kaldi

    Args:
        fd (file): Binary mode file object. Cannot input string
        endian (str):
        audio_loader: (Union[str, callable]):
    r/   N   AUDIOr4   r^   s   RIFFs   fLaCr   r      NPY   PKL   r   zNot supported: audio_loader={}z/Got unexpected type from audio_loader: ({}, {})r#       B   )r(   r   r   binary_typer   r   r   r   r   r   r   _read_length_headernploadpickler9   r<   r   ndarrayrb   read_int32vectorread_matrix_or_vectorread_ascii_mat)rD   r1   audio_loaderload_kwargsmax_flag_lengthbinary_flagrt   r   buf_fdaudior   length_x1x2r   r   r   r   r  sv   
81

+
'



r   Fc                 C   s   |  ddks	J |  ddksJ t|d |  dd }tj|tjd}t|D ]}|  ddks6J t|d |  dd ||< q+|rR||d d	 d fS |S )
Nr#   r   r4   r   ir^   r   dtyper   )r   structunpackr   emptyint32range)rD   r1   return_sizer   rt   r   r   r   r   r     s    r   c                 C   s  d}|  ddksJ |d7 }tt| }|t|d 7 }d|krgt | ||}||j7 }t | |}||j7 }|  |j|j }||j|j 7 }t	j
|t	|d d}||j|jf}||}|j}nd|krt | ||}||j7 }|  d|j |j }t	j
|t	|d	 d}||j|jf}||}nd
|krt | ||}||j7 }|  |j|j }t	j
|t	|d d}||j|jf}||}n|dks|dkr|d }	d}
n|dks|dkr|d }	d}
ntd||  ddksJ |d7 }t|d |  dd }|d7 }|}d|v r>|  ddks%J |d7 }t|d |  dd }|d7 }|| }|  ||
 }|||
 7 }t	j
|t	|	d}d|v rbt	|||f}|ri||fS |S )znCall from load_kaldi_file

    Args:
        fd (file):
        endian (str):
        return_size (bool):
    r   r#   r   r4   CMu1r   CM2u2CM3FMFVre   r^   DMDVrA      zHUnexpected format: "{}". Now FM, FV, DM, DV, CM, CM2, CM3 are supported.r   r   M)r   strr   r(   r   sizer   rowscolsr   
frombufferr   reshapechar_to_floatTuint_to_floatr9   r<   r   r   )rD   r1   r   r   Typeglobal_headerper_col_headerr   rt   r   bytes_per_sampler   dimr   r   r   r   r     sv   






r   c                 C   s  g }d}	 |  d}z|jtd}W n ty   tdw |d7 }|dks*|dkr+q|dkr2d}n|| d	}	 d}	 |  djtd}|d7 }|rx|d
krh|  djtd}|d7 }|dksg|dksgJ n|dkrod}n|dkrwtdn	|dks|dkrn|| q=d|}t|dksJ t	d|}|du rt
j}	n$|d}
zt|
 W n ty   t|
d w d|
v rt
j}	nt
j}	t
jt||	|d}|r||fS |S )zdCall from load_kaldi_file

    Args:
        fd (file): binary mode
        return_size (bool):
    r   Tr4   r   zFile format is wrong? 
r   Fr   r   r#   z/There are no corresponding bracket ']' with '['z *([^ \n]+) *Nz$is not a digit
File format is wrong?.)r   ndmin)r   r'   r   UnicodeDecodeErrorr9   r   r   r(   rematchr   float32grouprc   rb   r   loadtxtr   )rD   r   r   r   bchar	hasparentr   r   r   mart   r   r   r   r   4  sn   




r   c                 C   s&   t d| d\}t| |}|S )N<Br4   )r   r   r   r   )rD   bytes_lengthr   r   r   r   r   |  s   r   c                 C   sD   |  }tt|d }| td| | t|| d| S )Nr   r   r4   )
bit_lengthr   mathceilwriter   packr   )rD   r   r   r   r   r   r   _write_length_header  s
   r   c	                    sj  du ri t | trd}	nt| dsd}	nz|   d}	W n ty)   d}	Y nw |dur9t | ts9|	s9td|r=dnd}
g }t| |
}|	rN| }nd}d}|D ]}|d	 jtd
}|	| |t
|7 }|| || }|dur| }|dkrddl  fdd}n|dkrfdd}n|dkrfdd}ntd|||||7 }qTt |ttfr|\}}|t|||7 }qT|r|t|||7 }qT|t||||7 }qTW d   n1 sw   Y  |rdnd}
|dur3t | tr| n| j}t||
'}t||D ]\}}|	|d	 | d t||  d  qW d   dS 1 s,w   Y  dS dS )az  Write ark

    Args:
        ark (str or fd):
        array_dict (dict):
        scp (str or fd):
        append (bool): If True is specified, open the file
            with appendable mode
        text (bool): If True, saving in text ark format.
        endian (str):
        compression_method (int):
        write_function: (str):
        write_kwargs: (Optional[dict]):
    NTtellFz\scp file can be created only if the output ark file is a file or a seekable file descriptor.abwbr   r   r   r   c                    s  t |ttfstdt|t|dkrtdt|t }t |d t	j
r6t |d tr6|\}}n$t |d t	j
rJt |d trJ|\}}ntdt|d t|d dvrbdd<  j|||fi  | d	 | }t| t|}| | t|td	 | S )
Nz'Expected list or tuple type, but got {}r#   zExpected length=2, bot got {}r   r4   zPExpected Tuple[int, np.ndarray] or Tuple[np.ndarray, int]: but got Tuple[{}, {}]r<   wavr   )r   r   r   	TypeErrorr<   r   r(   r9   r   r   r   r   r   getvaluer   )rD   rX   r   _array_rater   r   r   write_kwargsr   r   _write_function  s>   

	

z!save_ark.<locals>._write_functionr   c                    sF   |  d t }tj||fi   | }|  | t|td S )Nr   PKL)r   r   r   dump	getbufferr(   )rD   rX   r   r   r  r   r   r    s   

numpyc                    sX   t  }tj||fi   | d | }t| t|}| | t|td | S )Nr   )r   r   saver   r   r   r(   )rD   rX   r   r   r   r  r   r   r    s   

z Not supported: write_function={}awr   r   )r   string_typeshasattrr   rO   r   r   r,   r   r   r(   r   lowerr   rb   r<   r   r   r   write_array_asciiwrite_arraynamezipr   )rS   
array_dictscpr   textr1   compression_methodwrite_functionr  r   modepos_listrD   rT   r   r}   
encode_keyrX   r  r   rt   r  positionr   r  r   save_ark  sx   




)
k
*$r  c                 C   s<   t | d}t||||W  d    S 1 sw   Y  d S )Nr   )r   r  )r?   rt   r1   r  rD   r   r   r   save_mat0  s   $r  c           	      C   s|  d}t |tjsJ t|| d |d7 }|dur|jdkr(td|jt	|||}||| 7 }|jdkr`t
	||}||| |7 }||j}| }| | |t|7 }|S |jdkr{||}| }| | |t|7 }|S |jdkr||}| }| | |t|7 }|S |jtjkr|jd	ksJ |j| d
 | t|d t| |D ]}| d
 | t|d | q|t|d	 d 7 }|S |jtjks|jtjkrdt|j  k rdk sJ  J t|jd	kr;|jtjkr| d |d7 }n|jtjkr | d |d7 }| d
 |d	7 }| t|d t| |d7 }n^t|jdkr|jtjkrT| d |d7 }n|jtjkrd| d |d7 }| d
 |d	7 }| t|d t| |d7 }| d
 |d	7 }| t|d |jd	  |d7 }||jjvr||j }| |  ||j7 }|S td|j)zWrite array

    Args:
        fd (file): binary mode
        array (np.ndarray):
        endian (str):
    Returns:
        size (int):
    r   r   r#   Nz:array must be matrix if compression_method is not None: {}r   r   r   r4   r   r   r   r   s   FV s   DV r^   s   FM s   DM zUnsupported array type: {})r   r   r   r   r   ndimr9   r<   r   computer   float_to_charr   tobytesr(   float_to_uintr   r   r   r   r   float64shaper   astypenewbyteordernbytes)	rD   rt   r1   r  r   r   r   byte_stringxr   r   r   r  5  s   




=


6


/

&"










r  .12gc                 C   s0  t |tjsJ t||jdv sJ |jd}| d |d7 }|jdkr_|D ]+}| d |d7 }|D ]}t||}| |jtd | d |t	|d	 7 }q5q(| d
 |d7 }|S |jd	kr| d |d	7 }|D ]}t||}| |jtd | d |t	|d	 7 }qo| d
 |d7 }|S )zwrite_array_ascii

    Args:
        fd (file): binary mode
        array (np.ndarray):
        digit (str):
    Returns:
        size (int):
    )r4   r#   r   s    [r#   s   
  r   r   r   r4   s   ]
)
r   r   r   r   r  r   r<   r,   r   r(   )rD   rt   digitr   rowr   r   r   r   r   r    s:   











r  )r   )r-   NNr   )r-   NNr   )r-   N)Nr-   )r-   )r-   r   N)r-   F)F)NFFr-   NNN)r+  )?
__future__r   r   r&   r   r   r   r   sysrZ   	functoolsr   ior   r   r	  r   kaldiio.compression_headerr   r   kaldiio.utilsr	   r
   r   r   r   r   r   kaldiio.wavior   r   version_infoPY3collections.abcr   bytesr   r   r  r   r   collections
basestringrI   rY   r]   r>   r:   rL   r   rM   r   r   r   r   r   r   r   r   r  r  r  r  r   r   r   r   <module>   s    




!
1
L/
%


S

aH
 
'
`