o
    8wi$4                     @   sz   d Z ddlZddlZddlZddlmZmZmZmZ ddl	m
Z
 ddlmZmZmZ ddlmZ eeZG dd dZdS )	zConvenience functions for the simplest parameter transfer cases.

Use `speechbrain.utils.checkpoints.Checkpointer` to find a checkpoint
and the path to the parameter file.

Authors
 * Aku Rouhe 2020
 * Andreas Nautsch 2023
 * Adel Moumen 2023
    N)DEFAULT_LOAD_HOOKSDEFAULT_TRANSFER_HOOKSPARAMFILE_EXTget_default_hook)run_on_main)FetchSourceLocalStrategyfetch)
get_loggerc                   @   s   e Zd ZdZ					dddZdd Zdd Zd	d
 Zdd Zdd Z	e
dd ZddejfdefddZdd Zdd Zdd ZdS )
Pretrainera  Orchestrates pretraining

    First optionally collects files from some source (local directory,
    HuggingFace repository, base URL), into the `collect_in` directory, if
    specified.

    Then, calls load hooks for each of those files.

    Arguments
    ---------
    collect_in : str or Path, optional
        Path to directory where the files are to be collected.
        If `None`, then files will be referred to from cache or directly, if
        possible (URLs will fail). There will not be a centralized target
        directory with all the files.

    loadables : mapping
        Mapping from loadable key to object. This connects the keys to
        the actual object instances.
    paths : mapping
        Mapping from loadable key to filepath. The last part
        of the path is treated as file name, the rest of it
        is treated as a "source" which can be either a directory
        path or a magic source like Huggingface hub ID.
        e.g. sb/asr-crdnn-libri/lm.ckpt
        -> source=sb/asr-crdnn-libri, file=lm.ckpt
        Note that when collecting, you can specify a default source,
        which is used for all loadables that don't have a path specified.
    custom_hooks : mapping
        Mapping from loadable key to parameter transfer hook function. If you
        want to use a custom loading function, specify it here.
    conditions: mapping
        An optional mapping from loadable keys to condition values,
        useful for loading certain elements only if a flag is turned on
    Nc                 C   st   i | _ | | |d ur| | i | _|d ur| | i | _|d ur)| | i | _|d ur5| | g | _	d S )N)
	loadablesset_collect_inadd_loadablespaths	add_pathscustom_hooksadd_custom_hooks
conditionsadd_conditionsis_local)self
collect_inr   r   r   r    r   a/home/ubuntu/sommelier/.venv/lib/python3.10/site-packages/speechbrain/utils/parameter_transfer.py__init__B   s   





zPretrainer.__init__c                 C   s"   |durt || _dS d| _dS )zChange the collecting pathN)pathlibPathr   )r   pathr   r   r   r   [   s   "zPretrainer.set_collect_inc                 C      | j | dS )zUpdate the loadables dict from the given mapping.

        Arguments
        ---------
        loadables : mapping
            Mapping from loadable key to object
        N)r   update)r   r   r   r   r   r   _   s   zPretrainer.add_loadablesc                 C   r   )a  Update the paths for different loadables.

        When collecting parameters, paths here are preferred. Note that when
        collecting, you can specify a default source, which is used for all
        loadables that don't have a path specified.

        Arguments
        ---------
        paths : mapping
            Mapping from loadable key to filepath. The last part
            of the path is treated as file name, the rest of it
            is treated as a "source" which can be either a directory
            path or a magic source like Huggingface hub ID.
            e.g. sb/asr-crdnn-libri/lm.ckpt
            -> source=sb/asr-crdnn-libri, file=lm.ckpt
        N)r   r   )r   r   r   r   r   r   i   s   zPretrainer.add_pathsc                 C   r   )aL  Update the custom hooks.

        When loading parameters, hooks here are preferred over class defaults.

        Arguments
        ---------
        custom_hooks : mapping
            Mapping from loadable key to parameter transfer hook function. If
            you want to use a custom loading function, specify it here.

        N)r   r   )r   r   r   r   r   r   |   s   zPretrainer.add_custom_hooksc                 C   r   )zUpdate the conditions.

        Arguments
        ---------
        conditions: mapping
            Mapping from loadable keys to condition values,
            useful for loading certain elements only if a flag is turned on

        N)r   r   )r   r   r   r   r   r      s   
zPretrainer.add_conditionsc                 C   s<   dd }t | tr| \}}||\}}t|||fS || S )a'  Splits a path to source and filename

        This also handles URLs and Huggingface hub paths, in addition to
        regular paths.

        Arguments
        ---------
        path : str

        Returns
        -------
        str
            Source
        str
            Filename
        c                 S   s   d| v r| j dddS d| fS )zCore function to split path./   )maxsplitz./)rsplit)srcr   r   r   split   s   z$Pretrainer.split_path.<locals>.split)
isinstancer   )r   r%   
fetch_from
fetch_pathsourcefilenamer   r   r   
split_path   s   
zPretrainer.split_pathFlocal_strategyc              
      sD  | j dur'td| j  d | j jdd t dkr&|tjkr&t	d ntd i }| j
D ]n}| |s9q1|t }|| jv rM| | j| \}}n|durV|}|}ntd	| d
||| j d||d|d}	d  fdd}
t|
|	|
|	d  ||< t|tr|\}}td| d   t | j|< | j| q1|S )aR  Fetches parameters from known paths with fallback default_source

        The actual parameter files may reside elsewhere, but this ensures a
        symlink in the self.collect_in directory. The symlink always uses the
        loadable key in the filename. This standardization makes it easier to
        orchestrate pretraining on e.g. distributed setups.

        Use the default_source if you have everything organized neatly into one
        location, like a Huggingface hub repo.

        Arguments
        ---------
        default_source : str or Path or FetchSource
            This is used for each loadable which doesn't have a path already
            specified.
            e.g. if the loadable has key `"asr"`, then the file to look for is
            `<default_source>/asr.ckpt`
        use_auth_token : bool (default: False)
            If true Huggingface's auth_token will be used to load private models from the HuggingFace Hub,
            default is False because the majority of models are public.
        local_strategy : speechbrain.utils.fetching.LocalStrategy
            The fetching strategy to use, which controls the behavior of remote file
            fetching with regards to symlinking and copying.
            Ignored if a `collect_in` directory was not specified.
            See :func:`speechbrain.utils.fetching.fetch` for further details.

        Returns
        -------
        dict
            Mapping from loadable key to a local path from which loadable's
            parameters can be loaded. This is not used in this class, but
            can possibly be helpful.
        Nz2Collecting files (or symlinks) for pretraining in .T)exist_okWindowszRequested Pretrainer collection using symlinks on Windows. This might not work; see `LocalStrategy` documentation. Consider unsetting `collect_in` in Pretrainer to avoid symlinking altogether.z<Fetching files for pretraining (no collection directory set)zPath not specified for 'z', and no default_source given!F)r*   r)   savedir	overwritesave_filenameuse_auth_tokenrevisionr,   c                     s   t di |  dS )zVery basic local wrapper to fetch to store the path in a
                local of collect_files

                Arguments
                ---------
                **kwargs : dict
                    Arguments to forward to fetchNr   )r	   )kwargsr   r   r   	run_fetch  s   	z+Pretrainer.collect_files.<locals>.run_fetch)r5   	post_funcpost_kwargszSet local path in self.paths["z"] = )r   loggerdebugmkdirplatformsystemr   SYMLINKwarningswarnr   is_loadabler   r   r+   
ValueErrorr   r&   r   strr   append)r   default_sourcer3   r,   loadable_pathsnamer2   r)   r*   fetch_kwargsr7   _fetch_fromr   r6   r   collect_files   sf   
(





zPretrainer.collect_filesc                 C   s.   || j vrdS | j | }t|r| S t|S )a7  Returns True if no condition is defined or for the specified
        loadable or if the condition is true

        Arguments
        ---------
        name: str
            the name of the loadable

        Returns
        -------
        is_loadable: bool
            whether the item should be loaded
        T)r   callablebool)r   rH   	conditionr   r   r   rB   1  s   

zPretrainer.is_loadablec                 C   s   t dd| j  i }| jD ];}| |sq|t }|| jv r8t d| d| j|   | j| ||< q| j	durE| j	| ||< qt
d| d| | dS )z)Loads the files that have been collected.zLoading pretrained files for: z, z'Redirecting (loading from local path): z -> Nz Pretrainer has never collected `z`, did you forget a call to `collect_files`? Could not fall back to `collect_in`, as it was not specified (default is no longer "model_checkpoints").)r:   infojoinr   rB   r   r   r;   r   r   rC   _call_load_hooks)r   
paramfilesrH   r*   r   r   r   load_collectedG  s&   




zPretrainer.load_collectedc                 C   s   | j  D ]H\}}| |sq|| }|| jv r!| j| || qt|t}|d ur0||| qt|t}|d urBd}|||| qdt| d}t|d S )NFzDon't know how to load zO. Register default hook                     or add custom hook for this object.)	r   itemsrB   r   r   r   r   typeRuntimeError)r   rR   rH   objloadpathdefault_hookend_of_epochMSGr   r   r   rQ   _  s&   




zPretrainer._call_load_hooks)NNNNN)__name__
__module____qualname____doc__r   r   r   r   r   r   staticmethodr+   r   r?   rK   rB   rS   rQ   r   r   r   r   r      s0    &


#
yr   )r_   r   r=   r@   speechbrain.utils.checkpointsr   r   r   r   speechbrain.utils.distributedr   speechbrain.utils.fetchingr   r   r	   speechbrain.utils.loggerr
   r\   r:   r   r   r   r   r   <module>   s    