o
    5ti                     @   s   d dl Z d dlZd dlZd dlZd dlmZ d dlmZ d dlm	Z	 d dl
Ze eZdedeeef fddZd	e	deeB eB fd
dZdeeB dedB fddZdd Zdeee	f fddZdeee	f fddZdS )    N)version)Path)Anyinput_stringreturnc                 C   s(   t d}t |d| }|| k}||fS )a  Remove the ',none' substring from the input_string if it exists at the end.

    Args:
        input_string (str): The input string from which to remove the ',none' substring.

    Returns:
        Tuple[str, bool]: A tuple containing the modified input_string with the ',none' substring removed
                          and a boolean indicating whether the modification was made (True) or not (False).
    z,none$ )recompilesub)r   patternresultremoved r   I/home/ubuntu/.local/lib/python3.10/site-packages/lm_eval/loggers/utils.pyremove_none_pattern   s   
r   oc                 C   s4   t | tjtjfrt| S t | trt| S t| S )a  Handle non-serializable objects by converting them to serializable types.

    Args:
        o (Any): The object to be handled.

    Returns:
        Union[int, str, list]: The converted object. If the object is of type np.int64 or np.int32,
            it will be converted to int. If the object is of type set, it will be converted
            to a list. Otherwise, it will be converted to str.
    )
isinstancenpint64int32intsetliststr)r   r   r   r   _handle_non_serializable%   s
   
r   	repo_pathc              
   C   s   zNt | d}| rt |j|jdddd dd }t |d rJt |djdddd dd }t ||}|jdddd	}W |S d }W |S  tyn } zt	d
t
|  d|  W Y d }~d S d }~ww )Nz.gitzutf-8)encoding
r    HEADr   z0Failed to retrieve a Git commit hash from path: z	. Error: )r   is_fileparent	read_textsplitexistsreplace	Exceptionloggerdebugr   )r   
git_folder	head_namehead_refgit_hasherrr   r   r   get_commit_from_path8   s8   

r/   c               	   C   sH   zt g d } |  } W | S  t jtfy#   tt } Y | S w )z
    Gets the git commit hash of your current repo (if it exists).
    Source: https://github.com/EleutherAI/gpt-neox/blob/b608043be541602170bfcfb8ec9bf85e8a0799e0/megatron/neox_arguments/neox_args.py#L42
    )gitdescribez--always)	
subprocesscheck_outputstripdecodeCalledProcessErrorFileNotFoundErrorr/   osgetcwd)r-   r   r   r   get_git_commit_hashS   s   
r:   storagec           
   
   C   s   ddl m}m} | r-zddlm} | }W n ty, } z
t|}W Y d }~nd }~ww d}ztd}W n tyK } z
t|}W Y d }~nd }~ww | rVddlm	} nd}t
tt d}||||d	}	| |	 d S )
Nr   )is_torch_availableis_transformers_available)get_pretty_env_infozN/A (torch not installed)lm_eval)__version__zN/Az..)pretty_env_infotransformers_versionlm_eval_versionupper_git_hash)lm_eval.utilsr<   r=   torch.utils.collect_envr>   r'   r   r   transformersr@   r/   r   r8   r9   update)
r;   r<   r=   r>   rA   r.   rC   rB   upper_dir_commit
added_infor   r   r   add_env_infoa   s8   
rK   c              
   C   s   t |ddrSz0|jjt|jjg|jjt|jjg|jjt|jjgt |dd t |dd d}| 	| W d S  t
yR } ztd| d W Y d }~d S d }~ww td d S )	N	tokenizerFeot_token_id
max_length)tokenizer_pad_tokentokenizer_eos_tokentokenizer_bos_tokenrM   rN   z,Logging detailed tokenizer info failed with z, skipping...zTLM does not have a 'tokenizer' attribute, not logging tokenizer metadata to results.)getattrrL   	pad_tokenr   pad_token_id	eos_tokeneos_token_id	bos_tokenbos_token_idrH   r'   r(   r)   )r;   lmtokenizer_infor.   r   r   r   add_tokenizer_info   s0   





r[   )loggingr8   r   r2   importlib.metadatar   pathlibr   typingr   numpyr   	getLogger__name__r(   r   tupleboolr   r   r   r   r/   r:   dictrK   r[   r   r   r   r   <module>   s    
"