o
    i                  
   @   s  d dl Z d dlZd dlZd dlZd dlZd dlZd dlmZ d dl	m
Z
mZmZmZ d dlmZ ddlmZ ddlmZmZmZmZ ddlmZmZmZ e
rTd d	lmZ G d
d dZdedefddZdededee dee def
ddZdd Z deeef defddZ!dS )    N)Path)TYPE_CHECKINGDictListOptional)msg   )Errors)check_spacy_env_varsdownload_fileensure_pathyget_checksum)get_hashmake_tempdirupload_file)	CloudPathc                
   @   s   e Zd ZdZdddedefddZded	ed
eddfddZdddded	ee d
ee ded fddZ	dddded	ee d
ee ded fddZ
ded	ed
eddfddZdedefddZdS )RemoteStoragezPush and pull outputs to and from a remote file storage.

    Remotes can be anything that `smart_open` can support: AWS, GCS, file system,
    ssh, etc.
    gz)compressionproject_rooturlc                C   s   || _ t|| _|| _d S N)rootr   r   r   )selfr   r   r    r   M/home/ubuntu/.local/lib/python3.10/site-packages/weasel/cli/remote_storage.py__init__   s   

zRemoteStorage.__init__pathcommand_hashcontent_hashreturnr   c           
   	   C   s   | j | }| std| d| |||}| r|S t D}|| t| }| jr4d| j nd}tj	||d}	|	j
t|t|d W d   n1 sSw   Y  t|| W d   |S 1 shw   Y  |S )a  Compress a file or directory within a project and upload it to a remote
        storage. If an object exists at the full URL, nothing is done.

        Within the remote storage, files are addressed by their project path
        (url encoded) and two user-supplied hashes, representing their creation
        context and their file contents. If the URL already exists, the data is
        not uploaded. Paths are archived and compressed prior to upload.
        zCannot push z: does not exist.zw:wmode)arcnameN)r   existsIOErrormake_urlr   encode_namestrr   tarfileopenaddr   )
r   r   r   r   locr   tmptar_locmode_stringtar_filer   r   r   push    s"   
	
zRemoteStorage.pushNr   r   c             	      s  | j | }| rdS | j|||d}|du r|S |j s%|jjdd t O}||jd  }t|| | jr>d| j nd}t	j
||d}	d	d
   fdd}
|
|	| j  W d   n1 sbw   Y  W d   |S W d   |S 1 szw   Y  |S )a  Retrieve a file from the remote cache. If the file already exists,
        nothing is done.

        If the command_hash and/or content_hash are specified, only matching
        results are returned. If no results are available, an error is raised.
        Nr3   T)parentszr:rr"   c                 S   s0   t j| }t j|}t j||g}||kS r   )osr   abspathcommonprefix)	directorytargetabs_directory
abs_targetprefixr   r   r   is_within_directoryZ   s   z/RemoteStorage.pull.<locals>.is_within_directoryc                    s\   |   D ]}tj||j} ||sttjqtj	dkr'| j
|dd d S | 
| d S )N)      data)filter)
getmembersr7   r   joinname
ValueErrorr	   E201sysversion_info
extractall)tarr   membermember_pathr?   r   r   safe_extract`   s   


z(RemoteStorage.pull.<locals>.safe_extract)r   r%   findparentmkdirr   partsr   r   r*   r+   )r   r   r   r   destr   r.   r/   r0   r1   rP   r   rO   r   pull8   s0   





zRemoteStorage.pullc                   s  |  t|}g }|dur$ dur$| j| |   }| r!|gng }n>|dur=| j| |  r<t| j| |  }n%| j|  rb| j|  D ]	}||  qK durb fdd|D }t|dkrz
|jdd d W n t	y   t
d Y nw |r|d	 S dS )
a  Find the best matching version of a file within the storage,
        or `None` if no match can be found. If both the creation and content hash
        are specified, only exact matches will be returned. Otherwise, the most
        recent matching file is preferred.
        Nc                    s   g | ]}|j d   kr|qS )r5   )rT   ).0r   r   r   r   
<listcomp>   s    z&RemoteStorage.find.<locals>.<listcomp>r   c                 S   s
   |   jS r   )statst_mtime)xr   r   r   <lambda>   s   
 z$RemoteStorage.find.<locals>.<lambda>)keyzkUnable to sort remote files by last modified. The file(s) pulled from the cache may not be the most recent.r5   )r(   r)   r   r%   listiterdirextendlensort	Exceptionr   warn)r   r   r   r   rF   urlsr   sub_dirr   rX   r   rQ   m   s.   zRemoteStorage.findc                 C   s   | j | t| | | S )zCConstruct a URL from a subpath, a creation hash and a content hash.)r   r(   r)   )r   r   r   r   r   r   r   r'      s   zRemoteStorage.make_urlrF   c                 C   s   t j|S )z&Encode a subpath into a URL-safe name.)urllibparse
quote_plus)r   rF   r   r   r   r(      s   zRemoteStorage.encode_name)__name__
__module____qualname____doc__r   r)   r   r2   r   rV   rQ   r'   r(   r   r   r   r   r      s8    
9
$r   r-   r    c                 C   s   t | S r   r   )r-   r   r   r   get_content_hash   s   rp   	site_hashenv_hashdepscmdc                 C   sL   t   dd t|D }| |g| }|| d|d}t| S )zCreate a hash representing the execution of a command. This includes the
    currently installed packages, whatever environment variables have been marked
    as relevant, and the command.
    c                 S   s   g | ]}t |qS r   ro   )rW   depr   r   r   rY      s    z$get_command_hash.<locals>.<listcomp> utf8)r
   sortedra   rE   encodehashlibmd5	hexdigest)rq   rr   rs   rt   dep_checksumshashescreation_bytesr   r   r   get_command_hash   s   
r   c                  C   s   t  } t jr| t   t }| D ]!}t|}| D ]}|jd 	dr3|
|jd dd qqdt|d}t| S )zHash the current Python environment's site-packages contents, including
    the name and version of the libraries. The list we're hashing is what
    `pip freeze` would output.
    r5   z	dist-infoz
.dist-inforv   rw   )sitegetsitepackagesENABLE_USER_SITEra   getusersitepackagessetr   r`   rT   endswithr,   replacerE   rx   ry   rz   md5sumr|   )	site_dirspackagessite_dirsubpathpackage_bytesr   r   r   get_site_hash   s   r   envc                 C   sL   i }|   D ]\}}|drtj|dd d||< q|||< qt|S )zConstruct a hash of the environment variables that will be passed into
    the commands.

    Values in the env dict may be references to the current os.environ, using
    the syntax $ENV_VAR to mean os.environ[ENV_VAR]
    $   Nrv   )items
startswithr7   environgetr   )r   env_varsr^   valuer   r   r   get_env_hash   s   

r   )"rz   r7   r   rI   r*   urllib.parserh   pathlibr   typingr   r   r   r   wasabir   errorsr	   utilr
   r   r   r   r   r   r   cloudpathlibr   r   r)   rp   r   r   r   r   r   r   r   <module>   s<     
