o
    qoi5                     @   s"  d Z ddlZddlZdZdd ZdEddZd	d
 Zdd Zdd Z	dd Z
dd Zdd Zdd Zdd Zdd Zdd Zdd Zdd  Zd!d" Zd#d$ ZdFd%d&Zd'd( Zd)d* Zd+d, Zd-d. Zd/d0 Zd1d2 Zd3d4 Zd5d6 ZdFd7d8Zd9d: Zd;d< Z d=d> Z!d?d@ Z"dAdB Z#dCdD Z$dS )GzO
    Kaldi IO function implement(for binary format), test pass in Python 3.6.0
    NFc                 C   s   t rt|  d S d S N)debugprint)info r   N/home/ubuntu/.local/lib/python3.10/site-packages/kaldi_python_io/_io_kernel.py
print_info   s   r    c                 C   s   | st |d S r   )RuntimeError)okr   r   r   r   throw_on_error   s   r   c                 C   (   t | d}t|dkd|  dS )z\ 
        Generally, there is a space following the string token, we need to consume it
        zExpect space, but gets Nbytesdecodereadr   )fdspacer   r   r   expect_space      r   c                 C   r   )zd 
        Read the binary flags in kaldi, the scripts only support reading egs in binary format
        BzExpect binary flag, but gets Nr   )r   flagsr   r   r   expect_binary    r   r   c                 C   sF   d}	 t | d}|dks|dkrn||7 }q|dkrdS | S )zU 
        Read {token + ' '} from the file(this function also consume the space)
    r	   Tr   r   N)r   r   r   strip)r   keycr   r   r   
read_token(   s   r   c                 C   s   |  t|d  dS )z<
        Write a string token, following a space symbol
    r   Nwritestrencode)r   tokenr   r   r   write_token5   s   r%   c                 C   s&   t | }t||kd| d|  dS )zC 
        Check weather the token read equals to the reference
    zExpect token 'z', but gets N)r   r   )r   refr$   r   r   r   expect_token<   s   r'   c                 C   s   t | }|r
t|  |S )zI 
        Read the binary flags following the key(key might be None)
    )r   r   )r   r   r   r   r   read_keyD   s   r(   c                 C   s   |  td dS )z$ 
        Write a binary symbol
    r   Nr    r   r   r   r   write_binary_symbolN   s   r*   c                 C   sB   t | d}t|dkd|  | d}td|}|d S )z: 
        Read a value in type 'int32' in kaldi setup
    r   Expect '\04', but gets    ir   r   r   r   r   structunpack)r   int_sizeint_strint_valr   r   r   
read_int32U   s
   
r5   c                 C   s*   |  td td|}|  | dS )z"
        Write a int32 number
    r+   r.   N)r!   r"   r#   r0   pack)r   int32int_packr   r   r   write_int32`   s   r9   c                 C   s>   t | d}t|dkd|  | d}td|}|S )z> 
        Read a value in type 'BaseFloat' in kaldi setup
    r   r+   r,   r-   fr/   )r   
float_size	float_str	float_valr   r   r   read_float32i   s   
r>   c                 C   s   t d|  |dvrtd| |dkrdnd}|dkr!tjntj}t| }t| }t d| d|  | || | }tj||d	}|||S )
z 
        Read common matrix(for class Matrix in kaldi setup)
        see matrix/kaldi-matrix.cc::
            void Matrix<Real>::Read(std::istream & is, bool binary, bool add)
        Return a numpy ndarray object
    z	Type of the common matrix: )FMDMUnknown matrix type in kaldi: r?   r-      z	Size of the common matrix: z x dtype)	r   r
   npfloat32float64r5   r   
frombufferreshape)r   mat_typer;   
float_typenum_rowsnum_colsmat_datamatr   r   r   read_common_matu   s   rP   c                 C   sp   |j tjtjfvrtd|j  |j tjkrdnd}t| | |j\}}t| | t| | | |	  dS )z#
        Write a common matrix
    Unsupported numpy dtype: r?   r@   N)
rD   rE   rF   rG   r
   r%   shaper9   r!   tobytes)r   rO   rJ   rL   rM   r   r   r   write_common_mat   s   



rT   c                 C   sz   t d|  |dvrtd| |dkrdnd}|dkr!tjntj}t| }t d|  | || }tj||dS )	zc
        Read float vector(for class Vector in kaldi setup)
        see matrix/kaldi-vector.cc
    z	Type of the common vector: )FVDVrA   rU   r-   rB   z	Dim of the common vector: rC   )r   r
   rE   rF   rG   r5   r   rH   )r   vec_typer;   rK   dimvec_datar   r   r   read_float_vec   s   rZ   c                 C   st   |j tjtjfvrtd|j  |j tjkrdnd}t| | |jdkr)td|j}t| | | 	|
  dS )z"
        Write a float vector
    rQ   rU   rV   r   z%write_float_vec accept 1D-vector onlyN)rD   rE   rF   rG   r
   r%   ndimsizer9   r!   rS   )r   vecrW   rX   r   r   r   write_float_vec   s   


r^   c                    s:   |rt   t }tj fddt|D tjd}|S )z,
        Read int32 vector (alignments)
    c                    s   g | ]}t  qS r   r5   ).0_r)   r   r   
<listcomp>   s    z"read_int32_vec.<locals>.<listcomp>rC   )r   r5   rE   arrayranger7   )r   direct_accessvec_sizer]   r   r)   r   read_int32_vec   s
   "rg   c                 C   sd   t | d t| }t| }td| d| d g }t|D ]}t| }t| }|||f q|S )z 
        Reference to function Read in SparseVector
        Return a list of key-value pair:
            [(I1, V1), ..., (In, Vn)]
    SVz	Read sparse vector(dim = z, row = ))r'   r5   r   rd   r>   append)r   rX   	num_elems
sparse_vecra   indexvaluer   r   r   read_sparse_vec   s   
ro   c                 C   s:   t d|  t| }g }t|D ]	}|t|  q|S )zs 
        Reference to function Read in SparseMatrix
        A sparse matrix contains couples of sparse vector
    	Following matrix type: )r   r5   rd   rj   ro   )r   rJ   rL   
sparse_matra   r   r   r   read_sparse_mat   s   rr   c                 C   s  |\}}}}t d| d|  |dkrt| |d|  ks J | dd|  | d| d }}tj|tjdtj}	t|	|d}	|	| d | }	tj|tj	dtj}
t|
||}
|
d	k}|
d
k}t
||
|	d |	d   d |	d  t
||
d |	d |	d   d |	d  |
d	 |	d |	d   d |	d  S |dkrt|d }tj| tjdtj}nt|d }tj| tj	dtj}|||||  }|S )a   
        In format CM(kOneByteWithColHeaders):
        PerColHeader, ...(x C), ... uint8 sequence ...
            first: get each PerColHeader pch for a single column
            then : using pch to uncompress each float in the column
        We load it seperately at a time 
        In format CM2(kTwoByte):
        ...uint16 sequence...
        In format CM3(kOneByte):
        ...uint8 sequence...
    z	Uncompress to matrix z X CMrB   NrC   r-   g    @@      r   r   g      P@      r   g     O@g      `@CM2g     o@)r   lenrE   rH   uint16astyperF   	transposerI   uint8wherefloat)cdatacps_typeheadmin_valprangerL   rM   cheadcmainpchr}   
le64_index
gt92_indexincuint_seqrO   r   r   r   
uncompress   s6   """r   c                 C   s"   t | }t | }t | }|||fS )zh 
        Read the member in struct Index in nnet3/nnet-common.h  
        Return a tuple (n, t, x)
    r_   )r   ntxr   r   r   read_index_tuple  s   
r   c                 C   s   t d| dd }|dkr+t|dk rd|dfS |dkr'tdd| d t| S ||d  }t|dk rD|d |d | |d	 fS |dkrQtdd| d t| S )
z 
        Wapper to handle struct Index reading task(see: nnet3/nnet-common.cc)
            static void ReadIndexVectorElementBinary(std::istream &is,                 int32 i, std::vector<Index> *vec)
        Return a tuple(n, t, x)
    br   r   }      FzUnexpected character z( encountered while reading Index vector.r   )r0   r1   r   absr   r   )r   rm   cur_setr   
prev_indexr   r   r   
read_index  s&   


r   c                 C   sL   t | d t| }td|  g }t|D ]}t| ||}|| q|S )zp 
        Read several Index and return as a list of index:
        [(n_1, t_1, x_1), ..., (n_m, t_m, x_m)]
    z<I1V>z	Size of index vector: )r'   r5   r   rd   r   rj   )r   r\   rm   r.   	cur_indexr   r   r   read_index_vec<  s   
r   c                 C   s   t d|  td| d}t d|  |d |d }}|dkr+|d|  }n|d	kr6d| | }n|d
kr?|| }ntdd|  | |}t|||}|S )za 
        Reference to function Read in CompressMatrix
        Return a numpy ndarray object
    rp   ffii   z	Compress matrix header: r   rw   rs   rB   rx   CM3Fz!Unknown matrix compressing type: )r   r0   r1   r   r   r   )r   rJ   r   rL   rM   remain_sizecompress_datarO   r   r   r   read_compress_matK  s   

r   c                 C   s6   |d dkrt | |S |d dkrt| |S t| |S )zr 
        Reference to function Read in class GeneralMatrix
        Return compress_mat/sparse_mat/common_mat
    r   CS)r   rr   rP   )r   rJ   r   r   r   read_float_matd  s
   


r   c                 C   s4   |rt |  t| }|d dkrt| |S t| |S )z%
    Read float matrix or vector
    V)r   r   rZ   r   )r   re   rW   r   r   r   read_float_mat_vecp  s   

r   c                 C   sD   t |tjr|jdkrt| | dS t| | dS tdt| )z&
    Write float matrix or vector
    r   zUnsupport type: N)
isinstancerE   ndarrayr[   rT   r^   	TypeErrortype)r   
mat_or_vecr   r   r   write_float_mat_vec}  s
   
r   c                 C   st   t | d i }t| }||d< td|  t| }||d< t| t| }t| |}||d< t| t | d |S )z 
        Reference to function Read in class NnetIo
        each NnetIo contains three member: string, Index, GeneralMatrix
        I store them in the dict:{'name': ..., 'index': ..., 'matrix': ...}
    z<NnetIo>namez	Name of NnetIo: rm   matrixz	</NnetIo>)r'   r   r   r   r   )r   nnet_ior   rm   rJ   rO   r   r   r   read_nnet_io  s   


r   c                 C   sJ   t | d t | d t| }g }t|D ]	}|t|  qt | d |S )z 
        Reference to function Read in class NnetExample
        Return a list of dict, each dict represent a NnetIo object
        a NnetExample contains several NnetIo
    z	<Nnet3Eg>z<NumIo>z
</Nnet3Eg>)r'   r5   rd   rj   r   )r   num_ioegsra   r   r   r   read_nnet3_egs  s   


r   c                 c   (    	 t | }|s
dS t| }||fV  q)zh 
        Usage:
        for key, eg in read_nnet3_egs(ark):
            print(key)
            ...
    TN)r(   r   )r   r   r   r   r   r   read_nnet3_egs_ark  s   
r   c                 c   r   )z 
    Read float matrix/vector
        Usage:
        for key, mat in read_ark(ark):
            print(key)
            ...
    TN)r(   r   )r   r   objr   r   r   read_float_ark  s   
r   c                 c   r   )z(
    Read int23 vector (alignments)
    TN)r(   rg   )r   r   alir   r   r   read_int32_ali  s   
r   )r	   )F)%__doc__r0   numpyrE   r   r   r   r   r   r   r%   r'   r(   r*   r5   r9   r>   rP   rT   rZ   r^   rg   ro   rr   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   <module>   sH   

	
0
