o
    [۷i'                     @  s   d dl mZ d dlZd dlZd dlZd dlZd dlZd dlmZ d dl	m	Z	 d dl
mZmZmZ dejvrOeejjjjd  rOejd eeejjj d dlZedZG d	d
 d
eZd$ddZd%ddZd&dd Zd'd!d"Zed#kr}e  dS dS )(    )annotationsN)Path)tqdm)AnySequence
NamedTupleNO_LOCAL_GGUFzgguf-pyzgguf-new-metadatac                   @  s6   e Zd ZU ded< ded< dZded< dZd	ed
< dS )MetadataDetailszgguf.GGUFValueTypetyper   value strdescriptionNzgguf.GGUFValueType | Nonesub_type)__name__
__module____qualname____annotations__r   r    r   r   T/home/ubuntu/vllm_env/lib/python3.10/site-packages/gguf/scripts/gguf_new_metadata.pyr	      s
   
 r	   readergguf.GGUFReaderkeyr   returnr   c                 C  s   |  |}|r| S d S N)	get_fieldcontents)r   r   fieldr   r   r   get_field_data   s   
r   
token_listSequence[int]tokenc                   s6    fddt | D }t|dkrtd  d|S )Nc                   s   g | ]
\}}| kr|qS r   r   ).0indexr   r!   r   r   
<listcomp>%   s    zfind_token.<locals>.<listcomp>r   zUnable to find "z" in token list!)	enumeratelenLookupError)r   r!   	token_idsr   r$   r   
find_token$   s   r*   writergguf.GGUFWriternew_metadatadict[str, MetadataDetails]remove_metadataSequence[str]Nonec                 C  s|  | j  D ]}|jtjjjks|jdrt	d|j  q|jtjj
jr9tjj
j|v r9t	d|j  q|j|v rHt	d|j  q|jd }|tjjkrX|jd nd }t|| |d}||j|}|j|v rt	d|j d	|j d
|j d|j  ||j= n|jd urt	d|j  |jd ur|j|j|j|j|jd u r|n|jd qtjj
j|v rt	d ||tjj
j j |tjj
j= | D ]\}	}t	d|	 d	|j d|j  ||	|j|j qd}
| jD ]}|
|j7 }
||j|jj|jj|jj|j  qt!d|
ddd}|"  |#  |$  | jD ]}|j%|j| j&d |'|j q%|(  d S )NzGGUF.zSuppressing z	Skipping z	Removing r   )r   z
Modifying z: "z" -> "z" zCopying zAdding chat template(s)zAdding WritingbyteT)desctotalunit
unit_scale)tensor_endianess))fieldsvaluesnameggufKeysGeneralARCHITECTURE
startswithloggerdebug	TokenizerCHAT_TEMPLATEtypesGGUFValueTypeARRAYr	   r   getr   r   add_key_valuer
   r   add_chat_templateitemstensorsn_bytesadd_tensor_infodatashapedtypenbytestensor_typer   write_header_to_filewrite_kv_data_to_filewrite_ti_data_to_filewrite_tensor_data	endianessupdateclose)r   r+   r-   r/   r   val_typer   old_valvalr   total_bytestensorbarr   r   r   copy_with_new_metadata-   sR    


*


(
 

$
rb   c               
   C  s  dd t jjj D } tdd | D }tjdd}|jdt	dd |jd	t	d
d |jdt
ddd |jdt
ddd |jdt
ddd |jdt	ddd |jdt	ddd |jdt
ddd |jddt
d d!d" |jd#dt
d$d%d&| d'fd( |jd)dt
d*d%d&| d+fd( |jd,d-d.d/ |jd0d-d1d/ |ttjd%krd nd2g}tj|jrtjntjd3 i }|jpg }|jrtt jj|j|t jjj< |jrtt jj|j|t jjj< |jrtt jj|jd4rt !|jn|j|t jjj"< |j#r4t$|j#d5d6d7!}t %|}|&d8}|r$tt jj||t jjj"< W d    n	1 s/w   Y  |j'rat$|j'd5d6d7}|( }tt jj||t jjj"< W d    n	1 s\w   Y  |j)rrtt jj|j)|t jjj*< |rt+,d9 t+,d: t+,d; |j-st+,d< t.d=}	|	d>krt+/d? t0d@ t+/dA|j.  t 1|j.d5}
t2|
t jjj3}t2|
t jjj4pg }|j5pg D ]M\}}||vrt+,dB| dC qt6||}tt jj7|d@ dD| ||| < t|dEkrt+,dF| dG|d@  dH t+,dIdJd |D  q|j8pg D ]K\}}||vr1t+,dB| dC q|9 s>t:dK| dLt;|}|d@kra|t|k ratt jj7|dD||  ||| < qt:dM| dNt<j=>|j?r|j-st+,d9 t+,dO|j? dP t+,d< t.d=}	|	d>krt+/d? t0d@ t+/dQ|j?  t j@|j?||
jAdR}t2|
t jjjB}|d urt+CdS|  ||_DtE|
||| d S )TNc                 s  s(    | ]}| d sttjj|V  qdS )_N)rA   getattrr=   r>   rD   r"   nr   r   r   	<genexpr>h   s   & zmain.<locals>.<genexpr>c                 s  s:    | ]}| d r|dd dtd   |fV  qdS )	_token_id.r2   N)endswithsplitr'   re   r   r   r   rg   i   s   8 z,Make a copy of a GGUF file with new metadata)r   inputz GGUF format model input filename)r
   helpoutputz!GGUF format model output filenamez--general-namezThe models general.namez"name")r
   rm   metavarz--general-descriptionzThe models general.descriptionz"Description ..."z--chat-templatez:Chat template string (or JSON string containing templates)z"{% ... %} ..."z--chat-template-configz'Config file containing chat template(s)ztokenizer_config.jsonz--chat-template-filez#Jinja file containing chat templatezchat_template.jinjaz--pre-tokenizerzThe models tokenizer.ggml.prez"pre tokenizer"z--remove-metadataappendz/Remove metadata (by key name) from output modelzgeneral.url)actionr
   rm   ro   z--special-tokenzSpecial token by value   z | z	"<token>")rq   r
   rm   nargsro   z--special-token-by-idzSpecial token by id0z--force
store_truez$Bypass warnings without confirmation)rq   rm   z	--verbosezIncrease output verbosityz--help)level[rzutf-8)encodingchat_templatez&*** Warning *** Warning *** Warning **z=* Most metadata is required for a fully functional GGUF file,z@* removing crucial metadata may result in a corrupt output file!z<* Enter exactly YES if you are positive you want to proceed:zYES, I am sure> YESz(You didn't enter YES. Okay then, see ya!r   z* Loading: zUnknown special token "z", ignoring...z=    z
Multiple "z" tokens found, choosing ID z0, use --special-token-by-id if you want another:z, c                 s  s    | ]}t |V  qd S r   )r   )r"   ir   r   r   rg      s    z
Token ID "z" is not a valid ID!z	Token ID z is not within token list!z* The "z3" GGUF file already exists, it will be overwritten!z* Writing: )archrY   zSetting custom alignment: )Fr=   r>   rD   __dict__keysdictargparseArgumentParseradd_argumentr   r   join
parse_argsr'   sysargvloggingbasicConfigverboseDEBUGINFOr/   general_namer	   rG   STRINGr?   NAMEgeneral_descriptionDESCRIPTIONrz   rA   jsonloadsrE   chat_template_configopenloadrI   chat_template_filereadpre_tokenizerPRErB   warningforcerl   infoexit
GGUFReaderr   r@   LISTspecial_tokenr*   UINT32special_token_by_id	isdecimalr(   intospathisfilern   
GGUFWriterrY   	ALIGNMENTrC   data_alignmentrb   )tokenizer_metadatatoken_namesparserargsr-   r/   fpconfigtemplateresponser   r~   r   r<   r!   ids	id_stringid_intr+   	alignmentr   r   r   maing   s   $$
2










"

&





r   __main__)r   r   r   r   r   r   )r   r    r!   r   r   r    )
r   r   r+   r,   r-   r.   r/   r0   r   r1   )r   r1   )
__future__r   r   r   r   r   r   pathlibr   r   typingr   r   r   environ__file__parentexistsr   insertr   r=   	getLoggerrB   r	   r   r*   rb   r   r   r   r   r   r   <module>   s*   "



	
:p
