o
    i'&                     @  s   d dl mZ d dlZd dlZd dlZd dlZd dlZd dlmZ d dl	m	Z	 d dl
mZmZmZ dejvrOeejjjjd  rOejd eeejjj d dlZedZG d	d
 d
eZd$ddZd%ddZd&dd Zd'd!d"Zed#kr}e  dS dS )(    )annotationsN)Path)tqdm)AnySequence
NamedTupleNO_LOCAL_GGUFzgguf-pyzgguf-new-metadatac                   @  s6   e Zd ZU ded< ded< dZded< dZd	ed
< dS )MetadataDetailszgguf.GGUFValueTypetyper   value strdescriptionNzgguf.GGUFValueType | Nonesub_type)__name__
__module____qualname____annotations__r   r    r   r   [/home/ubuntu/veenaModal/venv/lib/python3.10/site-packages/gguf/scripts/gguf_new_metadata.pyr	      s
   
 r	   readergguf.GGUFReaderkeyr   returnr   c                 C  s   |  |}|r| S d S N)	get_fieldcontents)r   r   fieldr   r   r   get_field_data   s   
r   
token_listSequence[int]tokenc                   s6    fddt | D }t|dkrtd  d|S )Nc                   s   g | ]
\}}| kr|qS r   r   ).0indexr   r!   r   r   
<listcomp>%   s    zfind_token.<locals>.<listcomp>r   zUnable to find "z" in token list!)	enumeratelenLookupError)r   r!   	token_idsr   r$   r   
find_token$   s   r*   writergguf.GGUFWriternew_metadatadict[str, MetadataDetails]remove_metadataSequence[str]Nonec                 C  sv  | j  D ]}|jtjjjks|jdrt	d|j  q|jtjj
jr9tjj
j|v r9t	d|j  q|j|v rHt	d|j  q|jd }|tjjkrX|jd nd }t|| |d}||j|}|j|v rt	d|j d	|j d
|j d|j  ||j= n|jd urt	d|j  |jd ur|j|j|j|j|jd u r|n|jd qtjj
j|v rt	d ||tjj
j j |tjj
j= | D ]\}	}t	d|	 d	|j d|j  ||	|j|j qd}
| jD ]}|
|j7 }
||j|jj|jj|jj|j  qt!d|
ddd}|"  |#  |$  | jD ]}|%|j |&|j q%|'  d S )NzGGUF.zSuppressing z	Skipping z	Removing r   )r   z
Modifying z: "z" -> "z" zCopying zAdding chat template(s)zAdding WritingbyteT)desctotalunit
unit_scale)(fieldsvaluesnameggufKeysGeneralARCHITECTURE
startswithloggerdebug	TokenizerCHAT_TEMPLATEtypesGGUFValueTypeARRAYr	   r   getr   r   add_key_valuer
   r   add_chat_templateitemstensorsn_bytesadd_tensor_infodatashapedtypenbytestensor_typer   write_header_to_filewrite_kv_data_to_filewrite_ti_data_to_filewrite_tensor_dataupdateclose)r   r+   r-   r/   r   val_typer   old_valvalr   total_bytestensorbarr   r   r   copy_with_new_metadata-   sR    


*


(
 

$
r`   c               
   C  s0  dd t jjj D } tdd | D }tjdd}|jdt	dd |jd	t	d
d |jdt
ddd |jdt
ddd |jdt
ddd |jdt	ddd |jdt
ddd |jddt
ddd |jd dt
d!d"d#| d$fd% |jd&dt
d'd"d#| d(fd% |jd)d*d+d, |jd-d*d.d, |ttjd"krd nd/g}tj|jrtjntjd0 i }|jpg }|jrtt jj|j|t jjj< |jrtt jj|j|t jjj< |jrtt jj|jd1rt !|jn|j|t jjj"< |j#r)t$|j#d2!}t %|}|&d3}|rtt jj||t jjj"< W d    n	1 s$w   Y  |j'r:tt jj|j'|t jjj(< |rht)*d4 t)*d5 t)*d6 |j+sht)*d7 t,d8}	|	d9krht)-d: t.d; t)-d<|j,  t /|j,d2}
t0|
t jjj1}t0|
t jjj2pg }|j3pg D ]M\}}||vrt)*d=| d> qt4||}tt jj5|d; d?| ||| < t|d@krt)*dA| dB|d;  dC t)*dDdEd |D  q|j6pg D ]K\}}||vrt)*d=| d> q|7 st8dF| dGt9|}|d;kr)|t|k r)tt jj5|d?||  ||| < qt8dH| dIt:j;<|j=rd|j+sdt)*d4 t)*dJ|j= dK t)*d7 t,d8}	|	d9krdt)-d: t.d; t)-dL|j=  t j>|j=||
j?dM}t0|
t jjj@}|d urt)AdN|  ||_BtC|
||| d S )ONc                 s  s(    | ]}| d sttjj|V  qdS )_N)r@   getattrr<   r=   rC   r"   nr   r   r   	<genexpr>h   s   & zmain.<locals>.<genexpr>c                 s  s:    | ]}| d r|dd dtd   |fV  qdS )	_token_id.r2   N)endswithsplitr'   rc   r   r   r   re   i   s   8 z,Make a copy of a GGUF file with new metadata)r   inputz GGUF format model input filename)r
   helpoutputz!GGUF format model output filenamez--general-namezThe models general.namez"name")r
   rk   metavarz--general-descriptionzThe models general.descriptionz"Description ..."z--chat-templatez:Chat template string (or JSON string containing templates)z"{% ... %} ..."z--chat-template-configz'Config file containing chat template(s)ztokenizer_config.jsonz--pre-tokenizerzThe models tokenizer.ggml.prez"pre tokenizer"z--remove-metadataappendz/Remove metadata (by key name) from output modelzgeneral.url)actionr
   rk   rm   z--special-tokenzSpecial token by value   z | z	"<token>")ro   r
   rk   nargsrm   z--special-token-by-idzSpecial token by id0z--force
store_truez$Bypass warnings without confirmation)ro   rk   z	--verbosezIncrease output verbosityz--help)level[rchat_templatez&*** Warning *** Warning *** Warning **z=* Most metadata is required for a fully functional GGUF file,z@* removing crucial metadata may result in a corrupt output file!z<* Enter exactly YES if you are positive you want to proceed:zYES, I am sure> YESz(You didn't enter YES. Okay then, see ya!r   z* Loading: zUnknown special token "z", ignoring...z=    z
Multiple "z" tokens found, choosing ID z0, use --special-token-by-id if you want another:z, c                 s  s    | ]}t |V  qd S r   )r   )r"   ir   r   r   re      s    z
Token ID "z" is not a valid ID!z	Token ID z is not within token list!z* The "z3" GGUF file already exists, it will be overwritten!z* Writing: )arch	endianesszSetting custom alignment: )Dr<   r=   rC   __dict__keysdictargparseArgumentParseradd_argumentr   r   join
parse_argsr'   sysargvloggingbasicConfigverboseDEBUGINFOr/   general_namer	   rF   STRINGr>   NAMEgeneral_descriptionDESCRIPTIONrw   r@   jsonloadsrD   chat_template_configopenloadrH   pre_tokenizerPRErA   warningforcerj   infoexit
GGUFReaderr   r?   LISTspecial_tokenr*   UINT32special_token_by_id	isdecimalr(   intospathisfilerl   
GGUFWriterr|   	ALIGNMENTrB   data_alignmentr`   )tokenizer_metadatatoken_namesparserargsr-   r/   fpconfigtemplateresponser   r{   r   r;   r!   ids	id_stringid_intr+   	alignmentr   r   r   maing   s   $$
2










"

&





r   __main__)r   r   r   r   r   r   )r   r    r!   r   r   r    )
r   r   r+   r,   r-   r.   r/   r0   r   r1   )r   r1   )
__future__r   r   r   r   r   r   pathlibr   r   typingr   r   r   environ__file__parentexistsr   insertr   r<   	getLoggerrA   r	   r   r*   r`   r   r   r   r   r   r   <module>   s*   "



	
:j
