o
    2wi                     @   s  U d dl Z d dlZd dlZd dlZd dlZd dlZd dlZd dlZd dlZd dl	Z	d dl
Z
d dlZd dlZd dlmZmZ d dlmZmZ d dlmZmZmZ d dlmZ d dlmZmZ d dlmZ d dlmZmZm Z m!Z!m"Z"m#Z#m$Z$m%Z%m&Z&m'Z'm(Z(m)Z)m*Z* d d	l+m,Z,m-Z- d dl.Z.d dl/Z0d dl1Z1d d
l2m3Z3 e*ee4f Z5e)dZ6e7Z8e7Z9dZ:dZ;e<e;Z=d Z>da?e&e e@d< G dd dZAde4deBfddZCdeDfddZEdd ZFde e4ef fddZGG dd dZHeG dd dZId ed!edeBfd"d#ZJdd%ed&ed'e7deBfd(d)ZKd*e8d+e8d,e8deDfd-d.ZL	/dd0e5d1e4d2e&eB de$e fd3d4ZMed5eDfd6d7ZNd8e6de6fd9d:ZO	;	<	 dd=e"e d>e5d?eDd@e4dAeDdBeDde$fdCdDZP	EddFe'e dGeDdHeBdIeBde$e$e  f
dJdKZQdLe8d,e8dMeDdeDfdNdOZRdPeDd,e8dMeDdeDfdQdRZSdSe8dTe8dUe8deDfdVdWZTdeBfdXdYZU	E		EddZe4d[e5d\eBd]e&eD d^eBddfd_d`ZVdaedbefdcddZW	e	ddEdfdged0e5dhe&e$e4  dieBddf
djdkZX	e	ddled0e5dhe&e$e4  ddfdmdnZYG dodp dpeZZdPeDdqe7deDfdrdsZ[efdLe8dMe*eDe7f deDfdtduZ\dve8dMeDde8fdwdxZ]	yddze8dLe8d{e8d|e%d} de(e8e8f f
d~dZ^			Edde"e4 d@e4de4deBde&e4 f
ddZ_deBfddZ`	dd,e8dMeDde&eD de(eDeDf fddZa	ddMeDde&eD de(eDeDf fddZbdeBfddZcde*e6e'e6 f de*e6e'e6 f deBfddZdde4deBfddZed ed!ede7fddZfde&e dedefddZgde*ee'e f de$e fddZhdedefddZidde4de&eD deDfddZjde1jkde1jlfddZmdd ZnG dd deoZpG dd deqZrdd ZsG dd dZt		dde#e6 deDde&eju de!e6ddf fddZvdd ZwG dd dZxG dd de.jyZzdeBfddZ{de*eDe%d f dejufddÄZ|dEZ}deBfddńZ~ddǄ ZdS )    N)AbstractContextManagercontextmanager)asdict	dataclass)ROUND_HALF_DOWNROUND_HALF_UPDecimal)chain)ceilisclose)Path)AnyCallableDict	GeneratorIterableIteratorListLiteralOptionalSequenceTupleTypeVarUnion)urlparse
urlunparse)tqdmTi   g|=_lhotse_uuidc                   @   s\   e Zd ZU dZdZee ed< dZdZ	ee
 ed< eddee fddZedd	d
ZdS )	SmartOpena  Wrapper class around smart_open.open method

    The smart_open.open attributes are cached as classed attributes - they play the role of singleton pattern.

    The SmartOpen.setup method is intended for initial setup.
    It imports the `open` method from the optional `smart_open` Python package,
    and sets the parameters which are shared between all calls of the `smart_open.open` method.

    If you do not call the setup method it is called automatically in SmartOpen.open with the provided parameters.

    The example demonstrates that instantiating S3 `session.client` once,
    instead using the defaults and leaving the smart_open creating it every time
    has dramatic performance benefits.

    Example::

        >>> import boto3
        >>> session = boto3.Session()
        >>> client = session.client('s3')
        >>> from lhotse.utils import SmartOpen
        >>>
        >>> if not slow:
        >>>     # Reusing a single client speeds up the smart_open.open calls
        >>>     SmartOpen.setup(transport_params=dict(client=client))
        >>>
        >>> # Simulating SmartOpen usage as in Lhotse data structures: AudioSource, Features, etc.
        >>> for i in range(1000):
        >>>     SmartOpen.open(s3_url, 'rb') as f:
        >>>         source = f.read()
    Ntransport_paramszPlease do 'pip install smart_open' - if you are using S3/GCP/Azure/other cloud-specific URIs, do 'pip install smart_open[s3]' (or smart_open[gcp], etc.) instead.
smart_openc                 C   sf   zddl m} W n ty   t| jw | jd ur+| j|kr+td| j d|  || _|| _ d S )Nr   )openzSSmartOpen.setup second call overwrites existing transport_params with new version	
z	
vs	
)r!   r"   ImportErrorimport_err_msgr    loggingwarning)clsr    sm_open r)   I/home/ubuntu/sommelier/.venv/lib/python3.10/site-packages/lhotse/utils.pysetupd   s    



zSmartOpen.setuprbc                 K   s<   | j d u r| j|d |r|n| j}| j |f||d|S )N)r    )moder    )r!   r+   r    )r'   urir-   r    kwargsr)   r)   r*   r"   u   s   
zSmartOpen.openN)r,   N)__name__
__module____qualname____doc__r    r   r   __annotations__r$   r!   r   classmethoddictr+   r"   r)   r)   r)   r*   r   <   s   
 r   valuereturnc                 C   s4   zt | }t|jot|jW S  ty   Y dS w )NF)r   boolschemenetlocAttributeError)r8   resultr)   r)   r*   is_valid_url   s   r?   random_seedc                    sD   t |  tj |  tj |  t    |   fddadS )z
    Set the same random seed for the libraries and modules that Lhotse interacts with.
    Includes the ``random`` module, numpy, torch, and ``uuid4()`` function defined in this file.
    c                      s   t j ddS )N   )int)uuidUUIDgetrandbitsr)   rdr)   r*   <lambda>   s    z!fix_random_seed.<locals>.<lambda>N)randomseednptorchmanual_seedRandomr   )r@   r)   rF   r*   fix_random_seed   s   

rO   c                   C   s   t durt  S t S )z
    Generates uuid4's exactly like Python's uuid.uuid4() function.
    When ``fix_random_seed()`` is called, it will instead generate deterministic IDs.
    N)r   rC   uuid4r)   r)   r)   r*   rP      s   rP   c                 C   s   dd }t | |dS )z
    Recursively convert a dataclass into a dict, removing all the fields with `None` value.
    Intended to use in place of dataclasses.asdict(), when the null values are not desired in the serialized document.
    c                 S   sD   t | }g }| D ]\}}|d u r|| q
|D ]}||= q|S r0   )r7   itemsappend)
collectiondremove_keyskeyvalkr)   r)   r*   non_null_dict_factory   s   
z,asdict_nonull.<locals>.non_null_dict_factory)dict_factory)r   )dclassrY   r)   r)   r*   asdict_nonull   s   
r\   c                   @   s   e Zd Zdd Zdd ZdS )SetContainingAnythingc                 C      dS NTr)   )selfitemr)   r)   r*   __contains__      z"SetContainingAnything.__contains__c                 C   r^   r_   r)   )r`   iterabler)   r)   r*   intersection   rc   z"SetContainingAnything.intersectionN)r1   r2   r3   rb   re   r)   r)   r)   r*   r]      s    r]   c                   @   s4   e Zd ZU dZeed< eed< edefddZdS )TimeSpanz(Helper class for specifying a time span.startendr9   c                 C   s   | j | j S r0   )rh   rg   r`   r)   r)   r*   duration      zTimeSpan.durationN)r1   r2   r3   r4   Secondsr5   propertyrj   r)   r)   r)   r*   rf      s   
 rf   lhsrhsc                 C   s8   | j |jk o|j | jk ot| j |j ot|j | j S )zAIndicates whether two time-spans/segments are overlapping or not.)rg   rh   r   )rn   ro   r)   r)   r*   overlaps   s   
rp   MbP?spanningspanned	tolerancec                 C   s2   | j | |j   ko|j  ko| j| kS   S )zjIndicates whether the left-hand-side time-span/segment covers the whole right-hand-side time-span/segment.)rg   rh   )rr   rs   rt   r)   r)   r*   	overspans   s   ru   	time_diffframe_lengthframe_shiftc                 C   s"   t | drdS tt| | | S )zUConvert duration to an equivalent number of frames, so as to not exceed the duration.        r   )r   rB   r
   )rv   rw   rx   r)   r)   r*   time_diff_to_num_frames   s   
rz   Tpathpatternstrictc                 C   sT   t | } |  sJ d|  t| |}t|dks(|r(J d| d|  |S )z
    Asserts that ``path`` exists, is a directory and contains at least one file satisfying the ``pattern``.
    If `strict` is False, then zero matches are allowed.

    :returns: a list of paths to files matching the ``pattern``.
    zNo such directory: r   zNo files matching pattern "z" in directory: )r   is_dirsortedrgloblen)r{   r|   r}   matchesr)   r)   r*   check_and_rglob   s   	r   
stack_sizec              	   c   s@    ddl }| }||  zdV  W || dS || w )z
    Code executed in this context will be able to recurse up to the specified recursion limit
    (or will hit a StackOverflow error if that number is too high).

    Usage:
        >>> with recursion_limit(1000):
        >>>     pass
    r   N)sysgetrecursionlimitsetrecursionlimit)r   r   old_sizer)   r)   r*   recursion_limit   s   

r   dataclass_objc                 K   s   t | di i | j|S )a<  
    Returns a new object with the same member values.
    Selected members can be overwritten with kwargs.
    It's supposed to work only with dataclasses.
    It's 10X faster than the other methods I've tried...

    Example:
        >>> ts1 = TimeSpan(start=5, end=10)
        >>> ts2 = fastcopy(ts1, end=12)
    Nr)   )type__dict__)r   r/   r)   r)   r*   fastcopy  s   r       it
output_dir
chunk_sizeprefix
num_digits	start_idxc              	   C   sV  ddl m} t|}|jddd |dkrd}|}g }t| }	zt|	}
W n ty0   | Y S w 	 zhz@d}| |}||| d| d}||k r_|	|
 |d	7 }t|	}
||k sNW d
   n1 siw   Y  |d	7 }W n ty   Y W |
 }|d
ur|| |S w W |
 }|d
ur|| n|
 }|d
ur|| w w q2)a  
    Splits a manifest (either lazily or eagerly opened) into chunks, each
    with ``chunk_size`` items (except for the last one, typically).

    In order to be memory efficient, this implementation saves each chunk
    to disk in a ``.jsonl.gz`` format as the input manifest is sampled.

    .. note:: For lowest memory usage, use ``load_manifest_lazy`` to open the
        input manifest for this method.

    :param it: any iterable of Lhotse manifests.
    :param output_dir: directory where the split manifests are saved.
        Each manifest is saved at: ``{output_dir}/{prefix}.{split_idx}.jsonl.gz``
    :param chunk_size: the number of items in each chunk.
    :param prefix: the prefix of each manifest.
    :param num_digits: the width of ``split_idx``, which will be left padded with zeros to achieve it.
    :param start_idx: The split index to start counting from (default is ``0``).
    :return: a list of lazily opened chunk manifests.
    r   )SequentialJsonlWriterT)parentsexist_okr   split.z	.jsonl.gz   N)lhotse.serializationr   r   mkdiriternextStopIterationzfillwith_suffixwriteopen_manifestrR   )r   r   r   r   r   r   r   	split_idxsplitsrQ   ra   writtenidxwriter	subcutsetr)   r)   r*   split_manifest_lazy  s\   


r   Fseq
num_splitsshuffle	drop_lastc           
         s   t t}||krtd| d| |rt ||  || }|r2dg| }dg| }nt td|d |g||   }dg|dd  } fddtt|||D }fd	d|D }	|	S )
a  
    Split a sequence into ``num_splits`` equal parts. The element order can be randomized.
    Raises a ``ValueError`` if ``num_splits`` is larger than ``len(seq)``.

    :param seq: an input iterable (can be a Lhotse manifest).
    :param num_splits: how many output splits should be created.
    :param shuffle: optionally shuffle the sequence before splitting.
    :param drop_last: determines how to handle splitting when ``len(seq)`` is not divisible
        by ``num_splits``. When ``False`` (default), the splits might have unequal lengths.
        When ``True``, it may discard the last element in some splits to ensure they are
        equally long.
    :return: a list of length ``num_splits`` containing smaller lists (the splits).
    z(Cannot split iterable into more chunks (z) than its number of items r   r   Nc                    s.   g | ]\}}}|  | |d    | gqS )r   r)   ).0ibegin_shift	end_shift)r   r)   r*   
<listcomp>  s    z"split_sequence.<locals>.<listcomp>c                    s   g | ]
\}} || qS r)   r)   )r   beginrh   )r   r)   r*   r     s    )listr   
ValueErrorrI   r   rangezip)
r   r   r   r   	num_items
num_shifts
end_shiftsbegin_shiftssplit_indicesr   r)   )r   r   r*   split_sequence`  s0   



r   rj   sampling_ratec                 C   s0   t | | }t || }t||d  | }|S )zS
    Compute the number of frames from duration and frame_shift in a safe way.
       roundrB   )rj   rx   r   num_samples
window_hop
num_framesr)   r)   r*   compute_num_frames  s   r   r   c                 C   s$   t || }t| |d  | }|S )z\
    Compute the number of frames from number of samples and frame_shift in a safe way.
    r   r   )r   rx   r   r   r   r)   r)   r*   compute_num_frames_from_samples  s   r   sig_lenwin_lenhopc                 C   s:   t t| | d| }| ||  dk}| dk|t|  S )a,  
    Return a number of windows obtained from signal of length equal to ``sig_len``
    with windows of ``win_len`` and ``hop`` denoting shift between windows.
    Examples:
    ```
      (sig_len,win_len,hop) -> num_windows # list of windows times
      (1, 6.1, 3) -> 1  # 0-1
      (3, 1, 6.1) -> 1  # 0-1
      (3, 6.1, 1) -> 1  # 0-3
      (5.9, 1, 3) -> 2  # 0-1, 3-4
      (5.9, 3, 1) -> 4  # 0-3, 1-4, 2-5, 3-5.9
      (6.1, 1, 3) -> 3  # 0-1, 3-4, 6-6.1
      (6.1, 3, 1) -> 5  # 0-3, 1-4, 2-5, 3-6, 4-6.1
      (5.9, 3, 3) -> 2  # 0-3, 3-5.9
      (6.1, 3, 3) -> 3  # 0-3, 3-6, 6-6.1
      (0.0, 3, 3) -> 0
    ```
    :param sig_len: Signal length in seconds.
    :param win_len: Window length in seconds
    :param hop: Shift between windows in seconds.
    :return: Number of windows in signal.
    r   )r
   maxrB   )r   r   r   nbr)   r)   r*   compute_num_windows  s   r   c                  C   s   dd l } t| jdS )Nr   READTHEDOCS)osr:   environget)r   r)   r)   r*   during_docs_build  s   r   urlfilenameforce_downloadcompleted_file_size
missing_okc                    s  t j}|r&|rtd  t  t j}|r%||kr%d S nd}ddi}dd|i|}tj	j
| |d}	|rAdnd	}
t|
  fd
d}z||	| W n tjjy } zv|jdkr||r|t|  d t r{t  nU|jdkr|jdd }|d u rtj	j
| dd}tj	|}|jdddkr|jd}d| }|d| krtd  ntd |tj	j
| |dd n|W Y d }~nd }~ww W d    d S W d    d S 1 sw   Y  d S )NzQRemoving existing file and downloading from scratch because force_download=True: r   z
User-AgentzuMozilla/5.0 (Macintosh; Intel Mac OS X 10_12_4) AppleWebKit/603.1.30 (KHTML, like Gecko) Version/10.1 Safari/603.1.30Rangez	bytes={}-)headerszr+bwbc              	      s     |d    tj| O}d}t|jdd| }t||ddt	d}	 |
|}|s3n | |t| q+W d    n1 sJw   Y  W d    d S W d    d S 1 sbw   Y  d S )Nr   i   zcontent-lengthBT)totalinitialunit
unit_scaledesc)seektruncateurllibrequesturlopenrB   r   r   r   strreadr   updater   )rqsizeresponser   
total_sizepbarchunkfr   r)   r*   	_download  s2   

"z%resumable_download.<locals>._downloadi  z0 does not exist (error 404). Skipping this file.i  zContent-RangeHEAD)methodzAccept-RangesnonezContent-Lengthzbytes */zFile already downloaded: zLServer does not support range requests - attempting downloading from scratch)r   r{   existsr%   infounlinkgetsizeformatr   r   Requestr"   error	HTTPErrorcoder&   r   is_fileremover   r   r   )r   r   r   r   r   file_exists	file_size
ua_headersr   reqr-   r   econtent_rangehead_reqhead_rescontent_lengthr)   r   r*   resumable_download  sj   




" r  	directorytargetc                 C   s   |   }|  }||jv S r0   )resolver   )r  r  abs_directory
abs_targetr)   r)   r*   _is_within_directory6  s   
r  r   numeric_ownertarmembersr  c                C   sF   t |}|  D ]}||j }t||stdq| j|||d dS )z
    Extracts a tar file in a safe way, avoiding path traversal attacks.
    See: https://github.com/lhotse-speech/lhotse/pull/872
    z$Attempted Path Traversal in Tar Filer  N)r   
getmembersnamer  	Exception
extractall)r  r{   r  r  membermember_pathr)   r)   r*   safe_extract>  s   

r  rarc                 C   sB   t |}|  D ]}||j }t||stdq| || dS )zM
    Extracts a rar file in a safe way, avoiding path traversal attacks.
    z$Attempted Path Traversal in Rar FileN)r   infolistr   r  r  r  )r  r{   r  r  r  r)   r)   r*   safe_extract_rarT  s   	

r   c                   @   s*   e Zd ZdZd	ddZdd Zdd ZdS )
nullcontexta  Context manager that does no additional processing.

    Used as a stand-in for a normal context manager, when a particular
    block of code is only sometimes used with a normal context manager:

    cm = optional_cm if condition else nullcontext()
    with cm:
        # Perform operation, using optional_cm if condition is True

    Note(pzelasko): This is copied from Python 3.7 stdlib so that we can use it in 3.6.
    Nc                 O   s
   || _ d S r0   enter_result)r`   r#  argsr/   r)   r)   r*   __init__t  s   
znullcontext.__init__c                 C      | j S r0   r"  ri   r)   r)   r*   	__enter__w  s   znullcontext.__enter__c                 G      d S r0   r)   )r`   excinfor)   r)   r*   __exit__z  rc   znullcontext.__exit__r0   )r1   r2   r3   r4   r%  r'  r*  r)   r)   r)   r*   r!  g  s
    
r!  factorc                 C   s2   |dkrt nt}ttt| | ddjd|dS )zHMimicks the behavior of the speed perturbation on the number of samples.      ?r   ndigitsr   rounding)r   r   rB   r   r   quantize)r   r+  r0  r)   r)   r*   perturb_num_samples~  s   r2  c                 C   s"   t tt| | ddjd|dS )a  
    Convert a time quantity to the number of samples given a specific sampling rate.
    Performs consistent rounding up or down for ``duration`` that is not a multiply of
    the sampling interval (unlike Python's built-in ``round()`` that implements banker's rounding).
    r   r-  r   r/  )rB   r   r   r1  )rj   r   r0  r)   r)   r*   compute_num_samples  s
   r3  dursc                    s   t  fdd|D }|  S )z
    Adds two durations in a way that avoids floating point precision issues.
    The durations in seconds are first converted to audio sample counts,
    then added, and finally converted back to floating point seconds.
    c                 3   s    | ]	}t | d V  qdS )r   N)r3  )r   rT   r5  r)   r*   	<genexpr>  s    
z add_durations.<locals>.<genexpr>sum)r   r4  tot_num_samplesr)   r5  r*   add_durations  s   r:  centerrg   new_duration	direction)r;  leftrightrI   c                 C   s   ||kr| |fS |dkr| || d  }n(|dkr | ||  }n|dkr'| }n|dkr6t | ||  | }ntd| |dk rKt|| dd	}d}t|dd	|fS )
a  
    Compute the new value of "start" for a time interval characterized by ``start`` and ``duration``
    that is being extended to ``new_duration`` towards ``direction``.
    :return: a new value of ``start`` and ``new_duration`` -- adjusted for possible negative start.
    r;  r   r>  r?  rI   zUnexpected direction: r      r-  )rI   uniformr   r   )rg   rj   r<  r=  	new_startr)   r)   r*   'compute_start_duration_for_extended_cut  s   rC  cat#values	delimiterreturn_firstc                 C   sB   t | } t| dkrd S t| dks|r| d S |t|g| S )Nr   r   )r   r   joinr	   )rF  r   rG  rH  r)   r)   r*   merge_items_with_delimiter  s   
rJ  c                  G   s   dd | D }t |dkS )Nc                 S   s   g | ]}|d uqS r0   r)   )r   argr)   r)   r*   r     s    z(exactly_one_not_null.<locals>.<listcomp>r   r7  )r$  not_nullr)   r)   r*   exactly_one_not_null  s   rM  
max_framesc                 C   sH   t | j||d}t | j||d}|r || | }|dkr ||8 }||fS )z
    Utility to convert a supervision's time span into a tuple of ``(start_frame, num_frames)``.
    When ``max_frames`` is specified, it will truncate the ``num_frames`` (if necessary).
    )rx   r   r   )r   rg   rj   )supervisionrx   r   rN  start_framer   diffr)   r)   r*   supervision_to_frames  s   
rR  max_samplesc                 C   sD   t | j|d}t | j|d}|r|| | }|dkr||8 }||fS )z
    Utility to convert a supervision's time span into a tuple of ``(start_sample num_samples)``.
    When ``max_samples`` is specified, it will truncate the ``num_samples`` (if necessary).
    r5  r   )r3  rg   rj   )rO  r   rS  start_sampler   rQ  r)   r)   r*   supervision_to_samples  s   rU  c                 C   s   | d u p| |kS r0   r)   )r8   	thresholdr)   r)   r*   is_none_or_gt  s   rW  otherc                 C   s"   t | } t |}t|t| S r0   )to_listsetissubset)r8   rX  r)   r)   r*   is_equal_or_contains  s   r\  modulesc                     s   ddl  t fdd| D S )a  Returns if a top-level module with :attr:`name` exists *without**
    importing it. This is generally safer than try-catch block around a
    `import X`. It avoids third party libraries breaking assumptions of some of
    our tests, e.g., setting multiprocessing start method when imported
    (see librosa/#747, torchvision/#544).

    Note: "borrowed" from torchaudio:
    https://github.com/pytorch/audio/blob/6bad3a66a7a1c7cc05755e9ee5931b7391d2b94c/torchaudio/_internal/module_utils.py#L9
    r   Nc                 3   s     | ]} j |d uV  qd S r0   )util	find_spec)r   m	importlibr)   r*   r6    s    z&is_module_available.<locals>.<genexpr>)rb  all)r]  r)   ra  r*   is_module_available  s   
rd  c                 C   sR   t | |gdd d\} }| j|j }|dkrdS t| j| j |j|j }|| S )z
    Given two objects with "start" and "end" attributes, return the % of their overlapped time
    with regard to the shorter of the two spans.
    .c                 S   r&  r0   )rg   ra   r)   r)   r*   rH   #  s    z!measure_overlap.<locals>.<lambda>)rV   r   ry   )r   rh   rg   min)rn   ro   overlapped_areadurr)   r)   r*   measure_overlap  s   ri  ra   alt_itemc                 C   s   | du r|S | S )z<Return ``alt_item`` if ``item is None``, otherwise ``item``.Nr)   )ra   rj  r)   r)   r*   ifnone+  s   rk  c                 C   s   t | tr| S | gS )z7Convert ``item`` to a list if it is not already a list.)
isinstancer   re  r)   r)   r*   rY  0  s   rY  c                 C   s   t | tr	t| S | S )zBConvert ``item`` to a hashable type if it is not already hashable.)rl  r   tuplere  r)   r)   r*   to_hashable5  s   rn  s	max_valuec                 C   s,   |du rt j}tt| d d| S )z8Hash a string to an integer in the range [0, max_value).Nzutf-8   )r   maxsizerB   hashlibsha1encode	hexdigest)ro  rp  r)   r)   r*   hash_str_to_int:  s   rw  lensc                 C   sB   | j | jd t| tjd}t| D ]\}}d||d|f< q|S )z
    Create a 2-D mask tensor of shape (batch_size, max_length) and dtype float32
    from a 1-D tensor of integers describing the length of batch samples in another tensor.
    r   )dtyper,  N)	new_zerosshaper   rL   float32	enumerate)rx  maskr   numr)   r)   r*   lens_to_maskA  s   r  c                    s   t   fdd}|S )Nc                     sP   z | i |W S  t y' } zt|| d j d|  d| dd }~ww )Nz
[extra info] When calling: z(args=z kwargs=))r  r   r3   )r$  r/   r  fnr)   r*   wrapperM  s   z$rich_exception_info.<locals>.wrapper	functoolswraps)r  r  r)   r  r*   rich_exception_infoL  s   r  c                   @      e Zd ZdS )NonPositiveEnergyErrorNr1   r2   r3   r)   r)   r)   r*   r  Y      r  c                   @   r  )DeprecatedWarningNr  r)   r)   r)   r*   r  _  r  r  c                    s    fdd}|S )zFlags a method as deprecated.
    Args:
        message: A human-friendly string of instructions, such
            as: 'Please migrate to add_proxy() ASAP.'
    c                    s   t   fdd}|S )zThis is a decorator which can be used to mark functions
        as deprecated. It will result in a warning being emitted
        when the function is used.c                     s4   t  j}tjtt |j|jd  | i |S )N)categoryr   lineno)	inspectcurrentframef_backwarningswarn_explicitr  getfilef_codef_lineno)r$  r/   frame)funcmessager)   r*   r  o  s   

z.deprecated.<locals>.decorator.<locals>.wrapperr  )r  r  r  )r  r*   	decoratorj  s   zdeprecated.<locals>.decoratorr)   )r  r  r)   r  r*   
deprecatedc  s   r  c                   @   s4   e Zd ZdZdddefddZdd Zd	d
 ZdS )suppress_and_warnaq  Context manager to suppress specified exceptions that logs the error message.

    After the exception is suppressed, execution proceeds with the next
    statement following the with statement.

         >>> with suppress_and_warn(FileNotFoundError):
         ...     os.remove(somefile)
         >>> # Execution still resumes here if the file was already removed
    T)enabledr  c                G   s   || _ || _d S r0   )_enabled_exceptions)r`   r  
exceptionsr)   r)   r*   r%    s   
zsuppress_and_warn.__init__c                 C   r(  r0   r)   ri   r)   r)   r*   r'    rc   zsuppress_and_warn.__enter__c                 C   s>   | j sd S |d uot|| j}|rtd|j d|  |S )Nz[Suppressed z] Error message: )r  
issubclassr  r%   r&   r3   )r`   exctypeexcinstexctbshould_suppressr)   r)   r*   r*    s   zsuppress_and_warn.__exit__N)r1   r2   r3   r4   r:   r%  r'  r*  r)   r)   r)   r*   r    s
    
r  '  databufsizerngc              	   c   s    |du rt }g }d}| D ]G}t||k r(z	|t|  W n	 ty'   Y nw t|dkrA|dt|d }|| |}||< |rOt||k rO|| qd}|V  q|D ]}|V  qWdS )a*  
    Shuffle the data in the stream.

    This uses a buffer of size ``bufsize``. Shuffling at
    startup is less random; this is traded off against
    yielding samples quickly.

    This code is mostly borrowed from WebDataset; note that we use much larger default
    buffer size because Cuts are very lightweight and fast to read.
    https://github.com/webdataset/webdataset/blob/master/webdataset/iterators.py#L145

    .. warning: The order of the elements is expected to be much less random than
        if the whole sequence was shuffled before-hand with standard methods like
        ``random.shuffle``.

    :param data: iterator
    :param bufsize: buffer size for shuffling
    :param rng: either random module or random.Random instance
    :return: a generator of cuts, shuffled on-the-fly.
    NTr   r   F)rI   r   rR   r   r   randint)r  r  r  bufstartupsamplerX   r)   r)   r*   streaming_shuffle  s.   
r  c                 C   s,   ddl m} || \}}t|d t||S )z$s -> (s0,s1), (s1,s2), (s2, s3), ...r   )teeN)	itertoolsr  r   r   )rd   r  ar   r)   r)   r*   pairwise  s   

r  c                
   @   s   e Zd ZdZdddddededed	ee fd
dZ	dd Z
dd ZdefddZdd Zdd Zdd Zdd Zdd Zdd Zdd  Zd!d" ZdS )#Pipea  Wrapper class for subprocess.Pipe.

    This class looks like a stream from the outside, but it checks
    subprocess status and handles timeouts with exceptions.
    This way, clients of the class do not need to know that they are
    dealing with subprocesses.

    Note: This class is based on WebDataset and modified here.
    Original source is in https://github.com/webdataset/webdataset

    :param *args: passed to `subprocess.Pipe`
    :param **kw: passed to `subprocess.Pipe`
    :param timeout: timeout for closing/waiting
    :param ignore_errors: don't raise exceptions on subprocess errors
    :param ignore_status: list of status codes to ignore
    g      @FN)timeoutignore_errorsignore_statusr-   r  r  r  c          	      O   s   ddl m}m} || _ddgt|g  | _|| _||f| _|d dkrB|||d|vd|| _| jj	| _
| j
du rAt| dn$|d d	krf|||d|vd
|| _| jj| _
| j
du rft| dd| _dS )zCreate an IO Pipe.r   )PIPEPopen   rr   )stdouttextNz: couldn't openw)stdinr  )
subprocessr  r  r  rk  r  r  r$  procr  streamr   r  status)	r`   r-   r  r  r  r$  kwr  r  r)   r)   r*   r%    s"   






zPipe.__init__c                 C   s   d| j  dS )Nz<Pipe >)r$  ri   r)   r)   r*   __str__	     zPipe.__str__c                 C   s"   | j  }|dur|   dS dS )z'Poll the process and handle any errors.N)r  pollwait_for_child)r`   r  r)   r)   r*   check_status  s   
zPipe.check_statusr9   c                 C   s   | j  d u S r0   )r  r  ri   r)   r)   r*   
is_running  r  zPipe.is_runningc                 C   sL   | j durdS | j | _ | j | jvr"| js$t| j d| j  ddS dS )z>Check the status variable and raise an exception if necessary.Nz: exit z (read))r  r  waitr  r  r  r$  ri   r)   r)   r*   r    s   
zPipe.wait_for_childc                 O      | j j|i |}|   |S )z#Wrap stream.read and checks status.)r  r   r  r`   r$  r  r>   r)   r)   r*   r        z	Pipe.readc                 O   r  )z$Wrap stream.write and checks status.)r  r   r  r  r)   r)   r*   r   #  r  z
Pipe.writec                 O   s*   | j j|i |}| j | _|   |S )z'Wrap stream.readLine and checks status.)r  readliner  r  r  r  r  r)   r)   r*   r  )  s   zPipe.readlinec                 C   s&   | j   | j| j| _|   dS )z>Wrap stream.close, wait for the subprocess, and handle errors.N)r  closer  r  r  r  r  ri   r)   r)   r*   r  0  s   
z
Pipe.closec                 c   s(    |   }|r|V  |   }|sd S d S r0   )r  )r`   retvalr)   r)   r*   __iter__6  s   zPipe.__iter__c                 C   s   | S )Context handler.r)   ri   r)   r)   r*   r'  <  s   zPipe.__enter__c                 C   s   |    dS )r  N)r  )r`   etyper8   	tracebackr)   r)   r*   r*  @  rk   zPipe.__exit__)r1   r2   r3   r4   r   floatr:   r   r   r%  r  r  r  r  r   r   r  r  r  r'  r*  r)   r)   r)   r*   r    s2    
r  c                   @   s   e Zd Zdd ZdS )PythonLiteralOptionc                 C   sN   z t |}t|tst|trt|dkr|d W S |W S |W S    Y d S )Nr   r   )astliteral_evalrl  r   rm  r   )r`   ctxr8   rW   r)   r)   r*   type_cast_valueG  s   
z#PythonLiteralOption.type_cast_valueN)r1   r2   r3   r  r)   r)   r)   r*   r  F  s    r  c                   C   s   t dS )N
torchaudio)rd  r)   r)   r)   r*   is_torchaudio_availableR  s   r  rJ   trngc                 C   s   | dkrt  S t| S )Nr  )secretsSystemRandomrI   rN   )rJ   r)   r)   r*   	build_rngV  s   
r  c                   C   s   t ptjd S )NLHOTSE_DILL_ENABLED)_LHOTSE_DILL_ENABLEDr   r   r)   r)   r)   r*   is_dill_enabled`  r  r  c                 C   s   t | }|j|d}t|S )N)r<   )r   _replacer   )
identifierprofile_nameparsed_identifierupdated_identifierr)   r)   r*    replace_bucket_with_profile_named  s   r  )rq   )T)r   r   r   )FF)FNF)r   N)r;  )rD  rE  Fr0   )r  N)r  r  rs  r  r%   mathr   rI   r  r   r   rC   r  
contextlibr   r   dataclassesr   r   decimalr   r   r   r  r	   r
   r   pathlibr   typingr   r   r   r   r   r   r   r   r   r   r   r   r   urllib.parser   r   clicknumpyrK   rL   	tqdm.autor   r   Pathliker   r  rl   DecibelsINT16MAXEPSILONlogLOG_EPSILONDEFAULT_PADDING_VALUEr   r5   r   r:   r?   rB   rO   rP   r\   r]   rf   rp   ru   rz   r   r   r   r   r   r   r   r   r   r  r  r  r   r!  r2  r3  r:  rC  rJ  rM  rR  rU  rW  r\  rd  ri  rk  rY  rn  rw  	IntTensorTensorr  r  r   r  UserWarningr  r  r  rN   r  r  r  Optionr  r  r  r  r  r  r)   r)   r)   r*   <module>   s  
 <
H





B

4

	
e




	



&
	




"!
/	m 