o
    qoiu&                     @   s>  d Z ddlZddlZddlZddlZddlZddlZddlZddlZddl	m
Z
 ddlZddlmZ	 g dZd'dd	Zd
d Zdd ZG dd deZdd ddfddZG dd deZG dd deZG dd deZG dd deZG dd deZG dd  d eZG d!d" d"eZG d#d$ d$eZG d%d& d&eZdS )(z
    Simple wrapper for _io_kernel.py
    - ArchiveReader
    - ScriptReader
    - ArchiveWriter
    - AlignArchiveReader
    - AlignScriptReader
    - Nnet3EgsReader
    N)TextIOWrapper   )
_io_kernel)ArchiveReaderScriptReaderAlignArchiveReaderAlignScriptReaderArchiveWriterNnet3EgsReaderReaderTc                 C   sd   |dvrt dtj| dtjd}dd }|r*tj|| |fd}d|_|  |jS || | |jS )N)rbrz Now only support input from pipeT)shellstdoutc                 S   s6   |   |jdkrtd| |j t  d S d S )Nr   z$Command "{0}" exited with status {1})wait
returncodewarningswarnformat_threadinterrupt_main)commandp r   H/home/ubuntu/.local/lib/python3.10/site-packages/kaldi_python_io/inst.pybackground_command_waiter'   s   
z-pipe_fopen.<locals>.background_command_waiter)targetargs)	RuntimeError
subprocessPopenPIPE	threadingThreaddaemonstartr   )r   mode
backgroundr   r   threadr   r   r   
pipe_fopen!   s   
r)   c                 C   s   |dvrt dj|d| sdS |  } | dkr2|dv r'|dkr$tjjS tjS |dkr/tjjS tjS | d	 d
krOt| dd	 ||dkd}|dkrK|S t|S |dv r`t	j
| s`td| t| |S )z
    Extend file open function, to support 
        1) "-", which means stdin/stdout
        2) "$cmd |" which means pipe.stdout
    )wr   wbr   zUnknown open mode: {mode})r&   N-)r*   r+   r+   r   |)r'   )r   r   zCould not find common file: {})
ValueErrorr   rstripsysr   bufferstdinr)   r   ospathexistsFileNotFoundErroropen)fnamer&   pinr   r   r   _fopen9   s"   
r;   c                 C   s0   | dkr|r| d dkr|   dS dS dS dS )z
    Extend file close function, to support
        1) "-", which means stdin/stdout
        2) "$cmd |" which means pipe.stdout
        3) None type
    r,   r-   r.   N)close)r9   fdr   r   r   _fcloseS   s   r>   c                   @   s(   e Zd ZdZdd Zdd Zdd ZdS )	ext_openzg
    To make _fopen/_fclose easy to use like:
    with open("egs.scp", "r") as f:
        ...
    
    c                 C   s   || _ || _d S N)r9   r&   )selfr9   r&   r   r   r   __init__e   s   
zext_open.__init__c                 C   s   t | j| j| _| jS r@   )r;   r9   r&   r=   rA   r   r   r   	__enter__i   s   zext_open.__enter__c                 G   s   t | j| j d S r@   )r>   r9   r=   )rA   r   r   r   r   __exit__m   s   zext_open.__exit__N)__name__
__module____qualname____doc__rB   rD   rE   r   r   r   r   r?   ^   s
    r?   c                 C      | S r@   r   )xr   r   r   <lambda>r   s    rL      c                 C   s   t  }d}t| d|}| }|D ]l}|  }	|d7 }|	d dkr2|	d d|	dd }
}n7t|	}|dkr>||ksD|rU|dk rUtd	|  d
d|dd|  |dkr^|	\}
}n|	d |	dd }
}|
|v rwtd|
 d|  ||||
< qW d   |S 1 sw   Y  |S )z
    Parse kaldi's script(.scp) file with supported for stdin
    WARN: last line of scripts could not be None and with "
" end
    r   r   r   r-   r.    NrM   zFor z, format error zin line[dz]: zDuplicate key 'z' exists in )	dictr?   	readlinesstripsplitjoinlenr   r/   )scp_pathvalue_processor
num_tokensrestrictscp_dictlinef	raw_linesraw_line
scp_tokenskeyvalue	token_lenr   r   r   
parse_scpsq   s6   

rc   c                   @   s@   e Zd ZdZdd Zdd Zdd Zdd	 Zd
d Zdd Z	dS )r   zK
        Base class for sequential/random accessing, to be implemented
    c                 K   s&   t |fi || _t| j | _d S r@   )rc   
index_dictlistkeys
index_keys)rA   rV   kwargsr   r   r   rB      s   zReader.__init__c                 C   s
   | j | S r@   rd   rA   r`   r   r   r   _load      
zReader._loadc                 C   s
   t | jS r@   )rU   rd   rC   r   r   r   __len__   rl   zReader.__len__c                 C   s
   || j v S r@   ri   rj   r   r   r   __contains__   rl   zReader.__contains__c                 c   s"    | j D ]
}|| |fV  qd S r@   )rg   rk   rj   r   r   r   __iter__   s   
zReader.__iter__c                 C   s   t |ttfvrtdt |t |tkr1t| j}||ks$|dk r,td||| j| }|| jvr=td|| 	|S )NzUnsupported index type: {}r   z)Interger index out of range, {:d} vs {:d}zMissing utterance {}!)
typeintstr
IndexErrorr   rU   rg   KeyErrorrd   rk   )rA   indexnum_uttsr   r   r   __getitem__   s   



zReader.__getitem__N)
rF   rG   rH   rI   rB   rk   rm   rn   ro   rw   r   r   r   r   r      s    r   c                   @   s2   e Zd ZdZdddZdd Zdd Zd	d
 ZdS )Writerz+
        Base class, to be implemented
    Nc                 C   sH   || _ || _|dkr|rtd d | _ t| jd| _t| j d| _d S )Nr,   z;Ignore .scp output discriptor cause dump archives to stdoutr+   r*   )rV   ark_pathr   r   r;   ark_filescp_filerA   ry   rV   r   r   r   rB      s   zWriter.__init__c                 C   rJ   r@   r   rC   r   r   r   rD         zWriter.__enter__c                 C   s    t | j| j t | j| j d S r@   )r>   ry   rz   rV   r{   )rA   rp   ra   tracer   r   r   rE      s   zWriter.__exit__c                 C      t r@   NotImplementedError)rA   r`   ra   r   r   r   write   r}   zWriter.writer@   )rF   rG   rH   rI   rB   rD   rE   r   r   r   r   r   rx      s    
rx   c                   @   s    e Zd ZdZdd Zdd ZdS )SequentialReaderzB
        Base class for sequential reader(only for .ark/.egs)
    c                 C   s
   || _ d S r@   )ark_or_piperA   r   r   r   r   rB      rl   zSequentialReader.__init__c                 C   r   r@   r   rC   r   r   r   ro      r}   zSequentialReader.__iter__N)rF   rG   rH   rI   rB   ro   r   r   r   r   r      s    r   c                       s8   e Zd ZdZ fddZdd Zdd Zdd	 Z  ZS )
r   z>
        Reader for kaldi's scripts(for BaseFloat matrix)
    c                    s(   t  | _dd }tt| j||d d S )Nc                 S   sF   |  d}t|dkrtdd|dd t|d }}||fS )N:r   z"Unsupported scripts address formatr   r-   )rS   rU   r/   rT   rq   )addr
addr_tokenr5   offsetr   r   r   addr_processor   s
   
 z-ScriptReader.__init__.<locals>.addr_processor)rW   )rP   fmgrsuperr   rB   )rA   ark_scpr   	__class__r   r   rB      s
   
zScriptReader.__init__c                 C   s   | j D ]	}| j |   qd S r@   )r   r<   )rA   namer   r   r   __del__   s   
zScriptReader.__del__c                 C   s2   || j vrt|d| j |< | j | }|| |S Nr   )r   r8   seek)rA   objr   arkfr   r   r   _open   s
   


zScriptReader._openc                 C   ,   | j | \}}| ||}tj|dd}|S NT)direct_access)rd   r   ioread_float_mat_vecrA   r`   r5   r   r=   r   r   r   r   rk         zScriptReader._load)	rF   rG   rH   rI   rB   r   r   rk   __classcell__r   r   r   r   r      s    r   c                       (   e Zd ZdZ fddZdd Z  ZS )r   z@
        Sequential Reader for Kalid's archive(.ark) object
    c                       t t| | d S r@   )r   r   rB   r   r   r   r   rB   	     zArchiveReader.__init__c                 c   R    t | jd}t|D ]	\}}||fV  qW d    d S 1 s"w   Y  d S r   )r?   r   r   read_float_ark)rA   r=   r`   matr   r   r   ro        "zArchiveReader.__iter__rF   rG   rH   rI   rB   ro   r   r   r   r   r   r         r   c                       r   )r
   z=
        Sequential Reader for Kalid's nnet3 .egs object
    c                    r   r@   )r   r
   rB   r   r   r   r   rB     r   zNnet3EgsReader.__init__c                 c   r   r   )r?   r   r   read_nnet3_egs_ark)rA   r=   r`   egsr   r   r   ro     r   zNnet3EgsReader.__iter__r   r   r   r   r   r
     r   r
   c                       r   )r   z3
        Reader for kaldi's alignment archives
    c                    r   r@   )r   r   rB   r   r   r   r   rB   #  r   zAlignArchiveReader.__init__c                 c   r   r   )r?   r   r   read_int32_ali)rA   r=   r`   alir   r   r   ro   &  r   zAlignArchiveReader.__iter__r   r   r   r   r   r     r   r   c                       r   )r   zN
        Reader for kaldi's scripts(for int32 vector, such as alignments)
    c                    r   r@   )r   r   rB   )rA   r   r   r   r   rB   0  r   zAlignScriptReader.__init__c                 C   r   r   )rd   r   r   read_int32_vecr   r   r   r   rk   3  r   zAlignScriptReader._load)rF   rG   rH   rI   rB   rk   r   r   r   r   r   r   ,  r   r   c                       s*   e Zd ZdZd fdd	Zdd Z  ZS )r	   zJ
        Writer for kaldi's archive && scripts (for BaseFloat matrix)
    Nc                    s   t t| || d S r@   )r   r	   rB   r|   r   r   r   rB   >  s   zArchiveWriter.__init__c                 C   sn   t | j| | jdkr| j }t | j t | j| | jr5d|t	j
| j|}| j| d S d S )Nr,   z{0}	{1}:{2}
)r   write_tokenrz   ry   tellwrite_binary_symbolwrite_float_mat_vecr{   r   r4   r5   abspathr   )rA   r`   r   r   recordr   r   r   r   A  s   

zArchiveWriter.writer@   )rF   rG   rH   rI   rB   r   r   r   r   r   r   r	   :  s    r	   )T)rI   r4   r1   globrandomr   r   r"   r   r   r   numpynp r   __all__r)   r;   r>   objectr?   rc   r   rx   r   r   r   r
   r   r   r	   r   r   r   r   <module>   s<   


!*#