o
    Si$                     @   s  d Z ddlZddlZddlZddlZddlZddlZddlZddl	Z	ddl
Z
ddlmZmZmZmZmZ ddlZddlZeeejddZdd Zdd	 Zd
d Zdd ZG dd dZdedefddZd8dedefddZ dede!fddZ"e	j#e	j#fdededefd d!Z$d"ee%e!f fd#d$Z&ddde&fd%ededed&ed'ed(ef f
d)d*Z'd9d+d,Z(d9d-d.Z)d9d/d0Z*G d1d2 d2e+Z,d9dd3d4d5Z-d6d7 Z.dS ):z Miscellaneous utility functions.    N)AnyCallableIterableIteratorUnion
WDS_SECURE0c                 C   s   t  | }dd |D S )zApply glob to patterns with braces by pre-expanding the braces.

    Args:
        pattern (str): The glob pattern with braces.

    Returns:
        list: A list of file paths matching the expanded pattern.
    c                 S   s    g | ]}t  |D ]}|q	qS  )glob).0patfr	   r	   D/home/ubuntu/.local/lib/python3.10/site-packages/webdataset/utils.py
<listcomp>$   s     z$glob_with_braces.<locals>.<listcomp>)braceexpand)patternexpandedr	   r	   r   glob_with_braces   s   
	r   c                    s<   t  |}|D ]}t |r dS qt fdd|D S )a*  Apply fnmatch to patterns with braces by pre-expanding the braces.

    Args:
        filename (str): The filename to match against.
        pattern (str): The pattern with braces to match.

    Returns:
        bool: True if the filename matches any of the expanded patterns, False otherwise.
    Tc                 3   s    | ]	}t   |V  qd S N)fnmatch)r   r   filenamer	   r   	<genexpr>5   s    z&fnmatch_with_braces.<locals>.<genexpr>)r   r   any)r   r   r   r   r	   r   r   fnmatch_with_braces'   s   

r   c                  G   s&   d}| D ]}|d t | d@ }q|S )zGenerate a seed value from the given arguments.

    Args:
        *args: Variable length argument list to generate the seed from.

    Returns:
        int: A 31-bit positive integer seed value.
    r      i)hash)argsseedargr	   r	   r   	make_seed8   s   	r    c                 C   sJ   t | trdS t | trdS t | trdS t | trdS t | tr#dS dS )zCheck if an object is iterable (excluding strings and bytes).

    Args:
        obj: The object to check for iterability.

    Returns:
        bool: True if the object is iterable (excluding strings and bytes), False otherwise.
    FT)
isinstancestrbyteslistr   r   )objr	   r	   r   is_iterableG   s   
	



r&   c                   @   s   e Zd ZdZdd ZdS )PipelineStagezBase class for pipeline stages.c                 O   s   t )zInvoke the pipeline stage.

        Args:
            *args: Variable length argument list.
            **kw: Arbitrary keyword arguments.

        Raises:
            NotImplementedError: This method should be implemented by subclasses.
        )NotImplementedError)selfr   kwr	   r	   r   invoke`   s   
zPipelineStage.invokeN)__name__
__module____qualname____doc__r+   r	   r	   r	   r   r'   ]   s    r'   xreturnc                 C   s   | S )zReturn the argument as is.

    Args:
        x (Any): The input value.

    Returns:
        Any: The input value unchanged.
    r	   )r0   r	   r	   r   identitym   s   	r2   {}sexprc                 C   s0   t dd| | krtd|  dt|| S )a5  Evaluate the given expression more safely.

    Args:
        s (str): The string to evaluate.
        expr (str, optional): The expression format. Defaults to "{}".

    Returns:
        Any: The result of the evaluation.

    Raises:
        ValueError: If the input string contains illegal characters.
    z[^A-Za-z0-9_] z#safe_eval: illegal characters in: '')resub
ValueErrorevalformat)r4   r5   r	   r	   r   	safe_evaly   s   r=   symmodulesc                 C   s8   |D ]}t j|dd}t|| d}|dur|  S qdS )zLook up a symbol in a list of modules.

    Args:
        sym (str): The symbol to look up.
        modules (list): A list of module names to search in.

    Returns:
        Any: The found symbol, or None if not found.
    
webdataset)packageN)	importlibimport_modulegetattr)r>   r?   mnamemoduleresultr	   r	   r   
lookup_sym   s   
rH   loadernepochsnbatchesc                 c   s&    t |D ]}t| |E dH  qdS )ae  Repeatedly returns batches from a DataLoader.

    Args:
        loader (Iterator): The data loader to yield batches from.
        nepochs (int, optional): Number of epochs to repeat. Defaults to sys.maxsize.
        nbatches (int, optional): Number of batches per epoch. Defaults to sys.maxsize.

    Yields:
        Any: Batches from the data loader.
    N)rangeittislice)rI   rJ   rK   _r	   r	   r   repeatedly0   s   rP   batchc                 C   s   t | d S )zGuess the batch size by looking at the length of the first element in a tuple.

    Args:
        batch (Union[tuple, list]): The batch to guess the size of.

    Returns:
        int: The guessed batch size.
    r   )len)rQ   r	   r	   r   guess_batchsize   s   	rS   sourcensamples	batchsize.c           	      c   s~    d}d}d}	 | D ]%}|V  |d7 }|dur||kr dS |dur/|t |7 }||kr/ dS q
|d7 }|dur>||kr>dS q)a  Repeatedly yield samples from an iterator.

    Args:
        source (Iterator): The source iterator to yield samples from.
        nepochs (int, optional): Number of epochs to repeat. Defaults to None.
        nbatches (int, optional): Number of batches to yield. Defaults to None.
        nsamples (int, optional): Number of samples to yield. Defaults to None.
        batchsize (Callable[..., int], optional): Function to guess batch size. Defaults to guess_batchsize.

    Yields:
        Any: Samples from the source iterator.
    r   T   N)rS   )	rT   rJ   rK   rU   rV   epochrQ   totalsampler	   r	   r   
repeatedly   s&   r[   c                 C   s  d}d}d}d}dt jv r!dt jv r!tt jd }tt jd }n/z%ddl}|j rE|j rE| p6|jjj} |jj	| d}|jj
| d}W n	 tyO   Y nw dt jv ridt jv ritt jd }tt jd }n zddl}|jj }|dur~|j}|j}W n	 ty   Y nw ||||fS )	a  Return node and worker info for PyTorch and some distributed environments.

    Args:
        group (optional): The process group for distributed environments. Defaults to None.

    Returns:
        tuple: A tuple containing (rank, world_size, worker, num_workers).
    r   rW   RANK
WORLD_SIZENgroupWORKERNUM_WORKERS)osenvironinttorch.distributeddistributedis_availableis_initializedr_   WORLDget_rankget_world_sizeModuleNotFoundErrortorch.utils.datautilsdataget_worker_infoidnum_workers)r_   rank
world_sizeworkerrr   torchworker_infor	   r	   r   pytorch_worker_info   s>   	rx   c                 C   s   t | d\}}}}|d | S )zCompute a distinct, deterministic RNG seed for each worker and node.

    Args:
        group (optional): The process group for distributed environments. Defaults to None.

    Returns:
        int: A deterministic RNG seed.
    r^   i  )rx   )r_   rs   rt   ru   rr   r	   r	   r   pytorch_worker_seed  s   	ry   c                    s:   t | r	| }d  nd }|   fdd}|d u r|S ||S )Nc                    s   t   fdd}|S )Nc                     s@   d j  d}d ur|d 7 }tj|tdd  | i |S )NzCall to deprecated function .	 Reason:    )category
stacklevel)r,   warningswarnDeprecationWarningr   kwargsmsgfuncreasonr	   r   new_func   s   z/deprecated.<locals>.decorator.<locals>.new_func)	functoolswraps)r   r   r   )r   r   	decorator  s   zdeprecated.<locals>.decorator)callable)r   r   r   r	   r   r   
deprecated  s   r   c                   @   s   e Zd ZdS )ObsoleteExceptionN)r,   r-   r.   r	   r	   r	   r   r   6  s    r   r   c                   s2    d u rt jtdS t   fdd}|S )Nr   c                     sJ   t tjddsd j d}d ur|d 7 }t| | i |S )NALLOW_OBSOLETEr   zCall to obsolete function z%. Set env ALLOW_OBSOLETE=1 to permit.r{   )rd   rb   rc   getr,   r   r   r   r	   r   r   >  s   zobsolete.<locals>.new_func)r   partialobsoleter   )r   r   r   r	   r   r   r   :  s
   r   c                 C   sB   t dd | D }t dd | D }|| }|t | }|S )Nc                 S      g | ]}|d  qS )r   r	   r   pr	   r	   r   r   K      z*compute_sample_weights.<locals>.<listcomp>c                 S   r   )rW   r	   r   r	   r	   r   r   L  r   )nparrayamax)	n_w_pairsnswsweightedpsr	   r	   r   compute_sample_weightsJ  s
   r   )r3   r   )/r/   r   r   r
   rB   	itertoolsrM   rb   r8   sysr   typingr   r   r   r   r   r   numpyr   boolrd   rc   r   enforce_securityr   r   r    r&   r'   r2   r"   r=   r$   rH   maxsizerP   tuplerS   r[   rx   ry   r   	Exceptionr   r   r   r	   r	   r	   r   <module>   sZ    


%
+
!