o
    Si=                     @   s   d dl Z d dlZd dlmZ d dlmZmZmZmZ d dl	Z	d dl
Zd dlmZ d dlmZmZ d dlmZ d dlmZ G dd	 d	ZdS )
    N)BytesIO)ListLiteralOptionalUnion)Features)ArrayTemporalArray)to_shar_placeholder)	TarWriterc                   @   s   e Zd ZdZ				ddedee ded	 d
edef
ddZdd Z	dd Z
dd Zedee fddZdeddfddZdedejdeeeef ddfddZdS ) ArrayTarWriteraj  
    ArrayTarWriter writes numpy arrays or PyTorch tensors into a tar archive
    that is automatically sharded.

    For floating point tensors, we support the option to use `lilcom` compression.
    Note that `lilcom` is only suitable for log-space features such as log-Mel filter banks.

    Example::

        >>> with ArrayTarWriter("some_dir/fbank.%06d.tar", shard_size=100, compression="lilcom") as w:
        ...     w.write("fbank1", fbank1_array)
        ...     w.write("fbank2", fbank2_array)  # etc.

    It would create files such as ``some_dir/fbank.000000.tar``, ``some_dir/fbank.000001.tar``, etc.
    The starting shard offset can be set using ``shard_offset`` parameter. The writer starts from 0 by default.

    It's also possible to use ``ArrayTarWriter`` with automatic sharding disabled::

        >>> with ArrayTarWriter("some_dir/fbank.tar", shard_size=None, compression="numpy") as w:
        ...     w.write("fbank1", fbank1_array)
        ...     w.write("fbank2", fbank2_array)  # etc.

    See also: :class:`~lhotse.shar.writers.tar.TarWriter`, :class:`~lhotse.shar.writers.audio.AudioTarWriter`
      numpyr   pattern
shard_sizecompression)r   lilcomlilcom_tick_powershard_offsetc                 C   s    || _ t|||d| _|| _d S )N)r   )r   r   
tar_writerr   )selfr   r   r   r   r    r   M/home/ubuntu/.local/lib/python3.10/site-packages/lhotse/shar/writers/array.py__init__)   s   
zArrayTarWriter.__init__c                 C   s   | j   | S N)r   	__enter__r   r   r   r   r   5   s   
zArrayTarWriter.__enter__c                 C   s   |    d S r   )close)r   exc_typeexc_valexc_tbr   r   r   __exit__9   s   zArrayTarWriter.__exit__c                 C   s   | j   d S r   )r   r   r   r   r   r   r   <   s   zArrayTarWriter.closereturnc                 C   s   | j jS r   )r   output_pathsr   r   r   r   r$   ?   s   zArrayTarWriter.output_pathskeyNc                 C   s4   | j | dt  | j j| dt dd d S )Nz.nodataz.nometaFcount)r   writer   )r   r%   r   r   r   write_placeholderC   s   z ArrayTarWriter.write_placeholdervaluemanifestc                 C   s   | j dkr t|jtjsJ dtj|| jd}t|}d}nt }tj	||dd d}| j
|| | t|}t }tt| td|d	 |d
 | j
j| d|dd d S )Nr   z7Lilcom compression supports only floating-point arrays.)
tick_powerz.llcF)allow_picklez.npyzutf-8)filer   z.jsonr&   )r   np
issubdtypedtypefloatingr   compressr   r   saver   r(   r
   printjsondumpsto_dictcodecs	getwriterseek)r   r%   r*   r+   datastreamextjson_streamr   r   r   r(   G   s*   

zArrayTarWriter.write)r   r   r   r   )__name__
__module____qualname____doc__strr   intr   r   r   r"   r   propertyr   r$   r)   r/   ndarrayr   r   r   r	   r(   r   r   r   r   r      s@    
r   )r9   r6   ior   typingr   r   r   r   r   r   r/   lhotser   lhotse.arrayr   r	   lhotse.shar.utilsr
   lhotse.shar.writers.tarr   r   r   r   r   r   <module>   s    