o
    Si[                     @   s"  d dl Z d dlZd dlZd dlZd dlmZ d dlmZmZm	Z	m
Z
 d dlZd dlmZ d dlmZ dZ				dd	e	e d
e	e dede	e de	eegdf  f
ddZ				dded	e	e d
e	e dede	e ddfddZde
eed df defddZdefddZdefddZdS )    N)partial)CallableLiteralOptionalUnion)distributed)fix_random_seedLHOTSE_PROCESS_SEEDT*   rank
world_size#set_different_node_and_worker_seedsseedreturnc                 C   s   t t| |||dS )a  
    Calling this function creates a worker_init_fn suitable to pass to PyTorch's DataLoader.

    It helps with two issues:

    * sets the random seeds differently for each worker and node, which helps with
        avoiding duplication in randomized data augmentation techniques.
    * sets environment variables that help WebDataset detect it's inside multi-GPU (DDP)
        training, so that it correctly de-duplicates the data across nodes.
    r   r   r   r   )r   worker_init_fnr    r   N/home/ubuntu/.local/lib/python3.10/site-packages/lhotse/dataset/dataloading.pymake_worker_init_fn   s   r   	worker_idc                 C   s   |r|d|   }|dur|d| 7 }t | t|tjt< |du r'|du r'dS |dur/|dus9J d| d| t|tjd< t|tjd< dS )z~
    Function created by :func:`~lhotse.dataset.dataloading.make_worker_init_fn`, refer to its documentation for details.
    d   Ni z!Both args must be not None: rank=z, world_size=RANK
WORLD_SIZE)r   strosenvironr	   )r   r   r   r   r   process_seedr   r   r   r   )   s   
r   )trng
randomizedc                 C   s   t | tr| S | du rt d d S | dkr9tjj }|du r)t d d S tt	j
v s2J dtt	j
t S | dkrBtdS tdt| d	| d
)a  
    Resolves the special values of random seed supported in Lhotse.

    If it's an integer, we'll just return it.

    If it's "trng", we'll use the ``secrets`` module to generate a random seed
    using a true RNG (to the extend supported by the OS).

    If it's "randomized", we'll check whether we're in a dataloading worker of ``torch.utils.data.DataLoader``.
    If we are, we expect that it was passed the result of :func:`~lhotse.dataset.dataloading.make_worker_init_fn`
    into its ``worker_init_fn`` argument, in which case we'll return a special seed exclusive to that worker.
    If we are not in a dataloading worker (or ``num_workers`` was set to ``0``), we'll return Python's ``random``
    module global seed.
    N   r   r   zRequested seed='randomized' for shuffling shards differently on each DataLoader node and worker, but lhotse.dataset.dataloading.worker_init_fn was not called.r   l        z-Unexpected type or value of seed: type(seed)=z seed=z<. Supported values are: None, int, 'trng', and 'randomized'.)
isinstanceintrandomgetstatetorchutilsdataget_worker_infor	   r   r   secrets	randbelow
ValueErrortype)r   worker_infor   r   r   resolve_seedH   s"   

r-   c                   C   4   dt jv rtt jd S t rt rt S dS )mSource: https://github.com/danpovey/icefall/blob/74bf02bba6016c1eb37858a4e0e8a40f7d302bdb/icefall/dist.py#L56r   r   )r   r   r!   distis_availableis_initializedget_world_sizer   r   r   r   r3   ~   
   
r3   c                   C   r.   )r/   r   r   )r   r   r!   r0   r1   r2   get_rankr   r   r   r   r5      r4   r5   )NNTr
   )r   r"   r(   sys	functoolsr   typingr   r   r   r   r$   r   r0   lhotse.utilsr   r	   r!   boolr   r   r-   r3   r5   r   r   r   r   <module>   sZ    

 6
