o
    fi{                     @   s   d dl Z d dlZd dlZd dlZd dlZdededefddZdee dedee fd	d
Zdededee	eef  ddfddZ
				ddededededdf
ddZdS )    Nfile_urlfolderreturnc                 C   s   t j| }t j||}t j|rtd| d|   tj| dd}t	|d}|
|j W d   |S 1 s<w   Y  |S )a*  Download a file from a URL into the given folder.

    If a file with the same name already exists, it will be overwritten.
    Returns the basename of the downloaded file. Network-related exceptions from
    ``requests.get`` (e.g., timeouts or connection errors) may propagate to the caller.

    zgiven file "z." already exists and will be overwritten with 
   )timeoutwbN)ospathbasenamejoinisfileloggingwarningrequestsgetopenwritecontent)r   r   fname	file_pathrqoutfile r   `/home/ubuntu/SoloSpeech/.venv/lib/python3.10/site-packages/lightning_utilities/docs/retriever.py_download_file   s   
r   
list_filespatternc              	   C   sZ   g }| D ]&}t |dd}| }W d   n1 sw   Y  t||}||7 }q|S )aA  Search for all occurrences of a regular-expression pattern across files.

    Args:
        list_files: The list of file paths to scan.
        pattern: A regular-expression pattern to search for in each file.

    Returns:
        A list with all matches found across the provided files (order preserved per file).

    UTF-8encodingN)r   readrefindall)r   r   	collectedr   fopembodyfoundr   r   r   _search_all_occurrences   s   

r'   r   docs_folderpairs_url_pathc                 C   s   t j| |d}tdd |t jjD }t| dd}| }W d   n1 s-w   Y  |D ]\}}|rJdg| }	t jj	g |	|R  }|||}q4t| ddd}
|

| W d   dS 1 siw   Y  dS )	aC  Replace all matching remote URLs with local file paths in a given file.

    Args:
        file_path: The file in which replacements should be performed.
        docs_folder: The documentation root folder (used to compute relative paths).
        pairs_url_path: Pairs of (remote_url, local_relative_path) to replace.

     c                 S   s   g | ]}|r|qS r   r   ).0pr   r   r   
<listcomp>?   s    z._replace_remote_with_local.<locals>.<listcomp>r   r   Nz..w)r   r	   dirnamereplacelensplitsepr   r    r   r   )r   r(   r)   	relt_pathdepthfopenr%   urlfpathpath_upfwr   r   r   _replace_remote_with_local3   s   


"r;   docs/sourcefetched-s3-assets*.rst5https?://[-a-zA-Z0-9_]+\.s3\.[-a-zA-Z0-9()_\\+.\\/=]+assets_folderfile_patternretrieve_patternc              	   C   s   t j tj| d|dd}|std|  d| d dS t||d}|s2td	|  d
|  dS tj| |}tj|dd g }t	t
|D ]%\}}	td| dt| d|	  t|	|}
||	tj||
f qH|D ]}t|| | qpdS )a  Find S3 (or HTTP) asset URLs in docs, download them locally, and rewrite references to local paths.

    Args:
        docs_folder: The documentation root relative to the project.
        assets_folder: Subfolder inside ``docs_folder`` used to store downloaded assets (created if missing).
        file_pattern: Glob pattern of files to scan.
        retrieve_pattern: Regular-expression pattern used to find remote asset URLs.

    z**T)	recursivez no files were listed in folder "z" and pattern ""N)r   z"no resources/assets were match in z for )exist_okz >> downloading (/z): )globr   r	   r   r   r   r'   infomakedirs	enumeratesetr1   r   appendr;   )r(   r@   rA   rB   r   urlstarget_folderpairs_url_fileir7   r   r8   r   r   r   fetch_external_assetsK   s$    
rQ   )r<   r=   r>   r?   )rG   r   r   r!   r   strr   listr'   tupler;   rQ   r   r   r   r   <module>   s.   &