o
    2wi                     @   sF   d dl Z d dlmZmZmZ d dlmZ d dlmZ G dd dZ	dS )    N)ListOptionalUnion)Cut)SequentialJsonlWriterc                   @   s   e Zd ZdZ	d!dedee defddZed	e	fd
dZ
dd Zdd Zdd Zdd Zdd Zed	ee fddZd"deeef de	d	dfddZd"dede	d	dfdd ZdS )#JsonlShardWriterad  
    JsonlShardWriter writes Cuts or dicts into multiple JSONL file shards.
    The JSONL can be compressed with gzip if the file extension ends with ``.gz``.

    Example::

        >>> with JsonlShardWriter("some_dir/cuts.%06d.jsonl.gz", shard_size=100) as w:
        ...     for cut in ...:
        ...         w.write(cut)

    It would create files such as ``some_dir/cuts.000000.jsonl.gz``, ``some_dir/cuts.000001.jsonl.gz``, etc.
    The starting shard offset can be set using ``shard_offset`` parameter. The writer starts from 0 by default.

    See also: :class:`~lhotse.shar.writers.tar.TarWriter`
      r   pattern
shard_sizeshard_offsetc                 C   s6   || _ | js|d urtd || _|| _|   d S )NzSharding is disabled because `pattern` doesn't contain a formatting marker (e.g., '%06d'), but shard_size is not None - ignoring shard_size.)r	   sharding_enabledloggingwarningr
   initial_shard_offsetreset)selfr	   r
   r    r   T/home/ubuntu/sommelier/.venv/lib/python3.10/site-packages/lhotse/shar/writers/cut.py__init__   s   zJsonlShardWriter.__init__returnc                 C   s
   d| j v S )N%r	   r   r   r   r   r   &   s   
z!JsonlShardWriter.sharding_enabledc                 C   s$   d | _ d | _| j| _d| _d| _d S )Nr   )fnamestreamr   
num_shards	num_itemsnum_items_totalr   r   r   r   r   *   s
   
zJsonlShardWriter.resetc                 C   s   |    | S N)r   r   r   r   r   	__enter__1   s   zJsonlShardWriter.__enter__c                 O   s   |    d S r   )close)r   argskwargsr   r   r   __exit__5   s   zJsonlShardWriter.__exit__c                 C   s   | j d ur| j   d S d S r   )r   r    r   r   r   r   r    8   s   
zJsonlShardWriter.closec                 C   sJ   |    | jr| j| j | _|  jd7  _n| j| _t| j| _d| _d S )N   r   )r    r   r	   r   r   r   r   r   r   r   r   r   _next_stream<   s   
zJsonlShardWriter._next_streamc                    s*    j r fddt j jD S  jgS )Nc                    s   g | ]} j | qS r   r   ).0ir   r   r   
<listcomp>L   s    z1JsonlShardWriter.output_paths.<locals>.<listcomp>)r   ranger   r   r	   r   r   r   r   output_pathsI   s
   
zJsonlShardWriter.output_pathsFdataflushNc                 C   sb   | j dks| jr| jdkr| j| j dkr|   | jj||d |  jd7  _|  j d7  _ d S )Nr   r,   r$   )r   r   r   r
   r%   r   write)r   r+   r,   r   r   r   r.   R   s   

zJsonlShardWriter.writecut_idc                 C   s   | j d|i|d d S )Nr/   r-   )r.   )r   r/   r,   r   r   r   write_placeholderc   s   z"JsonlShardWriter.write_placeholder)r   r   )F)__name__
__module____qualname____doc__strr   intr   propertyboolr   r   r   r#   r    r%   r   r*   r   r   dictr.   r0   r   r   r   r   r      s*    
 r   )
r   typingr   r   r   
lhotse.cutr   lhotse.serializationr   r   r   r   r   r   <module>   s
    