o
    [۷i                     @  s@  d dl mZ d dlZd dlZd dlZd dlZd dlZd dlZd dlm	Z	 d dl
mZmZ d dlmZ d dlmZ d dlmZ d dlmZmZmZmZ d d	lmZmZ d dlZd
dlmZmZm Z m!Z!m"Z"m#Z#m$Z$m%Z%m&Z&m'Z'm(Z( d
dl)m*Z* e+e,Z-dZ.e	G dd dZ/e	G dd dZ0G dd deZ1G dd dZ2dS )    )annotationsN)	dataclass)Enumauto)prod)Path)BufferedWriter)IOAnySequenceMappingascii_lettersdigits   )GGUF_DEFAULT_ALIGNMENT
GGUF_MAGICGGUF_VERSIONGGMLQuantizationType
GGUFEndianGGUFValueTypeKeysRopeScalingTypePoolingType	TokenTypeExpertGatingFuncType)quant_shape_from_byte_shapez{:s}-{:05d}-of-{:05d}.ggufc                   @  s2   e Zd ZU ded< ded< ded< dZded	< dS )

TensorInfoSequence[int]shaper   dtypeintnbytesNznp.ndarray[Any, Any] | Nonetensor)__name__
__module____qualname____annotations__r#    r(   r(   F/home/ubuntu/vllm_env/lib/python3.10/site-packages/gguf/gguf_writer.pyr   )   s
   
 r   c                   @  s*   e Zd ZU ded< ded< dZded< dS )	GGUFValuer
   valuer   typeNGGUFValueType | Nonesub_type)r$   r%   r&   r'   r.   r(   r(   r(   r)   r*   1   s   
 r*   c                   @  s0   e Zd Ze Ze Ze Ze Ze Ze Z	dS )WriterStateN)
r$   r%   r&   r   NO_FILEEMPTYHEADERKV_DATATI_DATAWEIGHTSr(   r(   r(   r)   r/   8   s    
r/   c                   @  s"  e Zd ZU ded< ded< ded< ded< d	ed
< ded< ejdejdejdejdej	dej
dejdejdejdejdejdiZdejddddfdd&d'Zdd*d+Zdd.d/Zddd2d3Zdd4d5Zdd6d7Zddd8d9Zdd:d;Zdd<d=ZdddEdFZddGdHZddIdJZddKdLZddMdNZ ddOdPZ!ddQdRZ"ddTdUZ#ddVdWZ$ddXdYZ%ddZd[Z&dd\d]Z'dd^d_Z(ddadbZ)e*ddedfZ+	0dddodpZ,	0	0dddwdxZ-ddd}d~Z.ddddZ/dddddZ0dddZ1dddZ2dddZ3dddZ4dddZ5dddZ6dddZ7dddZ8dddZ9dddZ:dddZ;dddZ<dddZ=dddZ>dddZ?dddZ@dddZAdddZBdddZCdddZDdddZEdddZFdddńZGdddȄZHddd˄ZIddd΄ZJdddфZKdddԄZLdddׄZMdddلZNdddۄZOdddބZPdddZQdddZRdddZSdddZTdddZUdddZVdddZWdddZXdddZYdddZZdddZ[dddZ\dddZ]dddZ^dאd dZ_dؐddZ`dِddZadАddZbdѐdd	ZcdҐd
dZddӐddZedԐddZfdՐddZgd֐ddZhdאddZidؐddZjdِddZkdڐddZldېdd Zmdܐd"d#Zndݐd%d&Zodސd(d)Zpdސd*d+Zqdސd,d-Zrdސd.d/Zsdސd0d1Ztdސd2d3Zudސd4d5Zvdސd6d7Zwdސd8d9Zxdސd:d;Zydސd<d=Zzdߐd?d@Z{ddBdCZ|dސdDdEZ}dސdFdGZ~dސdHdIZddKdLZddNdOZddQdRZddSdTZddUdVZddWdXZdd[d\Zdd^d_Zdd`daZdސdbdcZdސdddeZdސdfdgZdސdhdiZddjdkZdސdldmZddndoZddqdrZddsdtZddudvZddxdyZdd}d~ZdddZdddZdddZdddZdddZdddZdddZdddZdddZdddZdddZdddZdddZdddZdddZdddZdddZdddZdddZdddZdddZdddZdddZdddZdݐddZdddZdddZdddZdddZdddZdddZdddZdސddZdސdÐdĄZdސdŐdƄZdސdǐdȄZdސdɐdʄZddːd̄Zdސd͐d΄ZddϐdЄZddѐd҄ZddӐdԄZddՐdքZddאd؄ZddِdڄZddܐd݄Zddސd߄ZÐdddZĐdddZŐdddZƐdddZǐdddZȐdddZɐdddZʐdddZːdddZ̐dddZ͐dddZΐdddZϐdddZАdddZѐdddZҐdd dZӐdddZԐdddZՐdddZ֐ddd	ZאdddZؐdddZِdddZڐdddZېdddZܐdddZݐdddZސdd d!Zߐdd"d#Zdd$d%Zdd&d'Zdd(d)Zdd*d+Zdd,d-Zdd.d/Zdd0d1Zdd2d3Zdd4d5Zdd8d9Zdd;d<Zdd=d>Zdd?d@ZddBdCZddDdEZddFdGZddHdIZddJdKZddLdMZddNdOZddPdQZddRdSZddTdUZddVdWZddXdYZddZd[Zdd\d]Zdd^d_Zdd`daZddbdcZddddeZddfdgZ ddhdiZddjdkZddldmZddndoZddqdrZddtduZddvdwZddxdyZddzd{Z	dd|d}Z
dd~dZdddZdddZdddZdddZdddZdddZdddZdddZdddZdddZdd ddZddddZe*dddZd0S (  
GGUFWriterzlist[BufferedWriter] | NonefoutPath | Nonepathz+tempfile.SpooledTemporaryFile[bytes] | None	temp_filezlist[dict[str, TensorInfo]]tensorszlist[dict[str, GGUFValue]]kv_datar/   stateBbHhIifQqd?Fr   os.PathLike[str] | str | Nonearchstruse_temp_filebool	endianessr   split_max_tensorsr!   split_max_sizedry_runsmall_first_shardc	           	      C  s   d | _ |r	t|nd | _|| _|| _t| _|| _d | _i g| _	i g| _
|| _|| _|| _|| _td| jtjkr;dnd tj| _| jrL| j	i  |   d S )Nz+gguf: This GGUF file is for {0} Endian onlyBigLittle)r7   r   r9   rJ   rN   r   data_alignmentrL   r:   r;   r<   rO   rP   rQ   rR   loggerinfoformatr   BIGr/   r0   r=   appendadd_architecture)	selfr9   rJ   rL   rN   rO   rP   rQ   rR   r(   r(   r)   __init__V   s(   zGGUFWriter.__init__returntuple[int, int, int, int]c                 C  s,  d}d}d}d}d}d }| j D ]o}| D ]h\}}	|	j}
|dr&||	f}q|drT|d u s;|d |d d d krDtd   dS g |
d d |d jd R }
t|
}d	|v ru|
d
|v rcdnd }||| 7 }||7 }|d7 }n||7 }||7 }qq|dkr|| nd}|d ur| }||||fS )Nr   z.lora_az.lora_baz:can't measure LoRA size correctly, tensor order is unusual)r   r   r   r   r   z_exps.z.bias)r;   itemsr   endswithrV   warningr   )r\   total_paramsshared_paramsexpert_params
expert_sumn_expert_tensorslast_lora_ar;   namerW   r   sizeexpert_countr(   r(   r)   get_total_parameter_countq   s<   


 
 

z$GGUFWriter.get_total_parameter_countr   
list[Path]c                   s2   t jdkr
 gS  fddtt jD S )Nr   c              
     s,   g | ]}  t j|d  tjqS )r   )	with_nameSHARD_NAME_FORMATrX   stemlenr;   .0rC   r9   r\   r(   r)   
<listcomp>   s   , z1GGUFWriter.format_shard_names.<locals>.<listcomp>)ru   r;   range)r\   r9   r(   rx   r)   format_shard_names   s   zGGUFWriter.format_shard_namesNNonec                 C  s   | j tju r| jd ur|d u s|| jkrd S | j tjur$td| j  |d ur+|| _| jd urB|  }dd |D | _tj| _ d S d S )N/Expected output file to be not yet opened, got c                 S  s   g | ]}t |d qS )wb)open)rw   filenamer(   r(   r)   ry      s    z/GGUFWriter.open_output_file.<locals>.<listcomp>)r=   r/   r1   r7   r9   r0   
ValueError
print_plan)r\   r9   	filenamesr(   r(   r)   open_output_file   s   (
zGGUFWriter.open_output_filec                 C  s   t d | jd usJ | | j}t|t| jksJ t|| jD ]\}}t | dt| dtt	dd |
 D   q#| jrWt d |D ]}t| qMt  |S )NzWriting the following files:z: n_tensors = z, total_size = c                 s      | ]}|j V  qd S Nr"   rw   tir(   r(   r)   	<genexpr>       z(GGUFWriter.print_plan.<locals>.<genexpr>zDry run, not writing files)rV   rW   r9   r{   ru   r;   zipr6   format_n_bytes_to_strsumvaluesrQ   printexit)r\   r   rm   r;   r(   r(   r)   r      s   
8

zGGUFWriter.print_planc                 C  s   t | jdkr	d S tdd | jD }| jd usJ t | j}| jdd tt | j|D  t| jD ]"\}}t|t	j
|tjj< t|t	j
|tjj< t|t	j|tjj< q5d S )Nr   c                 s  s    | ]}t |V  qd S r   )ru   )rw   tr(   r(   r)   r          z/GGUFWriter.add_shard_kv_data.<locals>.<genexpr>c                 s  s    | ]}i V  qd S r   r(   )rw   _r(   r(   r)   r      s    )ru   r;   r   r7   r<   extendrz   	enumerater*   r   UINT16r   SplitLLM_KV_SPLIT_NOLLM_KV_SPLIT_COUNTINT32LLM_KV_SPLIT_TENSORS_COUNT)r\   total_tensorstotal_splitsrC   r<   r(   r(   r)   add_shard_kv_data   s   
"zGGUFWriter.add_shard_kv_datac                 C  s  t | jdkr| jdks| jdkrtd | | | jtj	ur)t
d| j | jd us0J t | jt | jks<J t | jdksEJ |   t| j| j| jD ]3\}}}|| jdtdd || dt || d	t | || d	t | |  qRtj| _d S )
Nr   r   z-Model fails split requirements, not splittingz&Expected output file to be empty, got z<ITskip_pack_prefixrB   rE   )ru   r;   rO   rP   rV   rf   r   r=   r/   r1   r   r7   r<   r   r   write_packr   r   flushr2   )r\   r9   r7   r;   r<   r(   r(   r)   write_header_to_file   s    "


zGGUFWriter.write_header_to_filec              	   C  s   | j tjurtd| j  | jd usJ t| j| jD ].\}}t }| D ]\}}|| j	|t
jdd7 }|| j	|j|jd|jd7 }q'|| q|   tj| _ d S )Nz0Expected output file to contain the header, got F	add_vtypeT)r   r.   )r=   r/   r2   r   r7   r   r<   	bytearrayrd   	_pack_valr   STRINGr+   r,   r.   r   r   r3   )r\   r7   r<   kv_byteskeyvalr(   r(   r)   write_kv_data_to_file   s   z GGUFWriter.write_kv_data_to_filec           	   
   C  s  | j tjurtd| j  | jd usJ t| j| jD ]d\}}t }d}| D ]M\}}|| j	|t
jdd7 }t|j}|| d|7 }t|D ]}|| d|j|d |  7 }qI|| d|j7 }|| d|7 }|t|j| j7 }q)|| |  qtj| _ d S )Nz-Expected output file to contain KV data, got r   Fr   rB   rE   r   )r=   r/   r3   r   r7   r   r;   r   rd   r   r   r   ru   r   r   rz   r    r6   ggml_padr"   rU   r   r   r4   )	r\   r7   r;   ti_dataoffset_tensorrm   r   n_dimsjr(   r(   r)   write_ti_data_to_file   s$   
 

z GGUFWriter.write_ti_data_to_filer   r   r
   vtyper   r.   r-   c                   sR   t  fdd| jD rtd d|d|j  t|||d| jd  < d S )Nc                 3      | ]} |v V  qd S r   r(   )rw   r<   r   r(   r)   r     r   z+GGUFWriter.add_key_value.<locals>.<genexpr>zDuplicated key name z , overwriting it with new value z	 of type )r+   r,   r.   r   )anyr<   rV   rf   rm   r*   )r\   r   r   r   r.   r(   r   r)   add_key_value  s   zGGUFWriter.add_key_valuec                 C     |  ||tj d S r   )r   r   UINT8r\   r   r   r(   r(   r)   	add_uint8     zGGUFWriter.add_uint8c                 C  r   r   )r   r   INT8r   r(   r(   r)   add_int8  r   zGGUFWriter.add_int8c                 C  r   r   )r   r   r   r   r(   r(   r)   
add_uint16  r   zGGUFWriter.add_uint16c                 C  r   r   )r   r   INT16r   r(   r(   r)   	add_int16!  r   zGGUFWriter.add_int16c                 C  r   r   )r   r   UINT32r   r(   r(   r)   
add_uint32$  r   zGGUFWriter.add_uint32c                 C  r   r   )r   r   r   r   r(   r(   r)   	add_int32'  r   zGGUFWriter.add_int32floatc                 C  r   r   )r   r   FLOAT32r   r(   r(   r)   add_float32*  r   zGGUFWriter.add_float32c                 C  r   r   )r   r   UINT64r   r(   r(   r)   
add_uint64-  r   zGGUFWriter.add_uint64c                 C  r   r   )r   r   INT64r   r(   r(   r)   	add_int640  r   zGGUFWriter.add_int64c                 C  r   r   )r   r   FLOAT64r   r(   r(   r)   add_float643  r   zGGUFWriter.add_float64c                 C  r   r   )r   r   BOOLr   r(   r(   r)   add_bool6  r   zGGUFWriter.add_boolc                 C  s   |sd S |  ||tj d S r   )r   r   r   r   r(   r(   r)   
add_string9  s   zGGUFWriter.add_stringSequence[Any]c                 C  s$   t |dkrd S | ||tj d S Nr   )ru   r   r   ARRAYr   r(   r(   r)   	add_array>  s   zGGUFWriter.add_arrayxnc                 C  s   | | d | | S )Nr   r(   )r   r   r(   r(   r)   r   C  s   zGGUFWriter.ggml_padrm   tensor_shaper   tensor_dtypenp.dtypetensor_nbytes	raw_dtypeGGMLQuantizationType | Nonec                   sp  | j tjurtd| j  t fdd| jD r!td |d u rh|tjkr.tj	}nF|tj
kr7tj}n=|tjkr@tj}n4|tjkrItj}n+|tjkrRtj}n"|tjkr[tj}n|tjkrdtj}ntd|}|tjkrtt||}t| jd dkr| jdkrt| jd | jks| jdkrtdd | jd  D | | jkr| ji  t|||d	| jd  < d S )
Nr}   c                 3  r   r   r(   )rw   r;   rm   r(   r)   r   N  r   z-GGUFWriter.add_tensor_info.<locals>.<genexpr>zDuplicated tensor name zCOnly F16, F32, F64, I8, I16, I32, I64 tensors are supported for nowr`   r   c                 s  r   r   r   r   r(   r(   r)   r   n  r   )r   r    r"   )r=   r/   r0   r   r   r;   npfloat16r   F16float32F32float64F64int8I8int16I16int32I32int64I64uint8r   ru   rO   rP   r   r   rZ   r   )r\   rm   r   r   r   r   r    r(   r   r)   add_tensor_infoG  s<   










&zGGUFWriter.add_tensor_infor#   np.ndarray[Any, Any]	raw_shapeSequence[int] | Nonetensor_endianessGGUFEndian | Nonec                 C  s   |d u rt jdkrtjntj}|| jkr|jdd}| jr1| jd u r1t	j
ddd}|d || _|d ur7|n|j}| j|||j|j|d | jd u rU|| jd	 | _d S || j | | j|j d S )
NbigFinplacezw+bi   )modemax_sizer   )r   r`   )sys	byteorderr   rY   LITTLErN   byteswaprL   r:   tempfileSpooledTemporaryFileseekr   r   r    r"   r;   r#   tofilewrite_padding)r\   rm   r#   r   r   r   fpr   r(   r(   r)   
add_tensort  s   


zGGUFWriter.add_tensorr  	IO[bytes]align
int | Nonec                 C  sB   t ||d ur	|n| j| }|dkr|tdg|  d S d S r   )r6   r   rU   r   bytes)r\   r  r   r  padr(   r(   r)   r    s   zGGUFWriter.write_paddingc           	      C  s  | j tjur| j tjurtd| j  | jd usJ |d u r*tjdkr'tj	ntj
}|| jkr5|jdd}d}t| jD ]\}}t|dkrJ|} nq<| j| }dd t| j|  td	D d }| j| |}|j|jkssJ | ||  || | ||j tj| _ d S )
Nz<Expected output file to contain tensor info or weights, got r   Fr   r`   r   c                 S  s   g | ]\}}|qS r(   r(   )rw   rm   r   r(   r(   r)   ry     s    z0GGUFWriter.write_tensor_data.<locals>.<listcomp>r   )r=   r/   r4   r5   r   r7   r   r   r   rY   r   rN   r   r   r;   ru   r   keysrz   popr"   r  tellr  )	r\   r#   r   file_idrC   r;   r7   first_tensor_namer   r(   r(   r)   write_tensor_data  s*   

&
zGGUFWriter.write_tensor_data)progressr  c                C  s  |    | jd usJ | jD ]
}| ||  q| jd u rd }d }|rRddlm} tdd | jD }t| jdkrJ|dt| j dd dd	d
}|d|dd	d
}t	t
| j| jD ]m\}\}}|d ur|d|d  dt| j d tdd | D }	|j|	dkr|	nd d | D ]6}
|
jd usJ |
jj|
jksJ |
j| |d ur||
j |d ur||
j | ||
j d |
_qq[n| jd t| j| j| jsdnd  |   | j  tj| _d S )Nr   )tqdmc                 s  s$    | ]}|  D ]}|jV  qqd S r   )r   r"   )rw   r   r   r(   r(   r)   r        " z3GGUFWriter.write_tensors_to_file.<locals>.<genexpr>r   z	Shard (0/)byteT)desctotalunit
unit_scaleWritingzShard (/c                 s  r   r   r   r   r(   r(   r)   r     r   )r  )r   r7   r  r  r:   r  r   r;   ru   r   r   set_descriptionr   resetr#   r"   r  updater  shutilcopyfileobjrR   r   closer/   r5   r=   )r\   r  r7   	shard_barbarr  total_bytesrC   r;   r  r   r(   r(   r)   write_tensors_to_file  sF   

"
z GGUFWriter.write_tensors_to_filec                 C  s&   | j d usJ | j D ]}|  q
d S r   )r7   r   r\   r7   r(   r(   r)   r     s   

zGGUFWriter.flushc                 C  s,   | j d ur| j D ]}|  qd | _ d S d S r   )r7   r!  r&  r(   r(   r)   r!    s
   



zGGUFWriter.close	type_namec                 C     |  tjj| d S r   )r   r   GeneralTYPE)r\   r'  r(   r(   r)   add_type  r   zGGUFWriter.add_typec                 C  s   |  tjj| j d S r   )r   r   r)  ARCHITECTURErJ   )r\   r(   r(   r)   r[        zGGUFWriter.add_architecturequantization_versionc                 C  r(  r   )r   r   r)  QUANTIZATION_VERSION)r\   r.  r(   r(   r)   add_quantization_version  r   z#GGUFWriter.add_quantization_version	alignmentc                 C  s:   |dks||d @ dkrt d|| _| tjj| d S )Nr   r   z2Invalid alignment: must be a non-zero power of two)r   rU   r   r   r)  	ALIGNMENT)r\   r1  r(   r(   r)   add_custom_alignment  s   zGGUFWriter.add_custom_alignmentftypec                 C  r(  r   )r   r   r)  	FILE_TYPE)r\   r4  r(   r(   r)   add_file_type  r   zGGUFWriter.add_file_typesequencec                 C  r(  r   )r   r   r)  SAMPLING_SEQUENCE)r\   r7  r(   r(   r)   add_sampling_sequence   r   z GGUFWriter.add_sampling_sequencetop_kc                 C  r(  r   )r   r   r)  SAMPLING_TOP_Kr\   r:  r(   r(   r)   add_sampling_top_k  r   zGGUFWriter.add_sampling_top_ktop_pc                 C  r(  r   )r   r   r)  SAMPLING_TOP_P)r\   r>  r(   r(   r)   add_sampling_top_p  r   zGGUFWriter.add_sampling_top_pmin_pc                 C  r(  r   )r   r   r)  SAMPLING_MIN_P)r\   rA  r(   r(   r)   add_sampling_min_p	  r   zGGUFWriter.add_sampling_min_pxtc_probabilityc                 C  r(  r   )r   r   r)  SAMPLING_XTC_PROBABILITY)r\   rD  r(   r(   r)   add_sampling_xtc_probability  r   z'GGUFWriter.add_sampling_xtc_probabilityxtc_thresholdc                 C  r(  r   )r   r   r)  SAMPLING_XTC_THRESHOLD)r\   rG  r(   r(   r)   add_sampling_xtc_threshold  r   z%GGUFWriter.add_sampling_xtc_thresholdtempc                 C  r(  r   )r   r   r)  SAMPLING_TEMP)r\   rJ  r(   r(   r)   add_sampling_temp  r   zGGUFWriter.add_sampling_temppenalty_last_nc                 C  r(  r   )r   r   r)  SAMPLING_PENALTY_LAST_N)r\   rM  r(   r(   r)   add_sampling_penalty_last_n  r   z&GGUFWriter.add_sampling_penalty_last_npenalty_repeatc                 C  r(  r   )r   r   r)  SAMPLING_PENALTY_REPEAT)r\   rP  r(   r(   r)   add_sampling_penalty_repeat  r   z&GGUFWriter.add_sampling_penalty_repeatmirostatc                 C  r(  r   )r   r   r)  SAMPLING_MIROSTAT)r\   rS  r(   r(   r)   add_sampling_mirostat  r   z GGUFWriter.add_sampling_mirostatmirostat_tauc                 C  r(  r   )r   r   r)  SAMPLING_MIROSTAT_TAU)r\   rV  r(   r(   r)   add_sampling_mirostat_tau  r   z$GGUFWriter.add_sampling_mirostat_taumirostat_etac                 C  r(  r   )r   r   r)  SAMPLING_MIROSTAT_ETA)r\   rY  r(   r(   r)   add_sampling_mirostat_eta!  r   z$GGUFWriter.add_sampling_mirostat_etac                 C  r(  r   )r   r   r)  NAME)r\   rm   r(   r(   r)   add_name$  r   zGGUFWriter.add_nameauthorc                 C  r(  r   )r   r   r)  AUTHOR)r\   r^  r(   r(   r)   
add_author'  r   zGGUFWriter.add_authorversionc                 C  r(  r   )r   r   r)  VERSION)r\   ra  r(   r(   r)   add_version*  r   zGGUFWriter.add_versionorganizationc                 C  r(  r   )r   r   r)  ORGANIZATION)r\   rd  r(   r(   r)   add_organization-  r   zGGUFWriter.add_organizationfinetunec                 C  r(  r   )r   r   r)  FINETUNE)r\   rg  r(   r(   r)   add_finetune0  r   zGGUFWriter.add_finetunebasenamec                 C  r(  r   )r   r   r)  BASENAME)r\   rj  r(   r(   r)   add_basename3  r   zGGUFWriter.add_basenamedescriptionc                 C  r(  r   )r   r   r)  DESCRIPTION)r\   rm  r(   r(   r)   add_description6  r   zGGUFWriter.add_description	quantizedc                 C  r(  r   )r   r   r)  QUANTIZED_BY)r\   rp  r(   r(   r)   add_quantized_by9  r   zGGUFWriter.add_quantized_by
size_labelc                 C  r(  r   )r   r   r)  
SIZE_LABEL)r\   rs  r(   r(   r)   add_size_label<  r   zGGUFWriter.add_size_labellicensec                 C  r(  r   )r   r   r)  LICENSEr\   rv  r(   r(   r)   add_license?  r   zGGUFWriter.add_licensec                 C  r(  r   )r   r   r)  LICENSE_NAMErx  r(   r(   r)   add_license_nameB  r   zGGUFWriter.add_license_namec                 C  r(  r   )r   r   r)  LICENSE_LINKrx  r(   r(   r)   add_license_linkE  r   zGGUFWriter.add_license_linkurlc                 C  r(  r   )r   r   r)  URLr\   r~  r(   r(   r)   add_urlH  r   zGGUFWriter.add_urldoic                 C  r(  r   )r   r   r)  DOIr\   r  r(   r(   r)   add_doiK  r   zGGUFWriter.add_doiuuidc                 C  r(  r   )r   r   r)  UUIDr\   r  r(   r(   r)   add_uuidN  r   zGGUFWriter.add_uuidrepo_urlc                 C  r(  r   )r   r   r)  REPO_URLr\   r  r(   r(   r)   add_repo_urlQ  r   zGGUFWriter.add_repo_urlc                 C  r(  r   )r   r   r)  
SOURCE_URLr  r(   r(   r)   add_source_urlT  r   zGGUFWriter.add_source_urlc                 C  r(  r   )r   r   r)  
SOURCE_DOIr  r(   r(   r)   add_source_doiW  r   zGGUFWriter.add_source_doic                 C  r(  r   )r   r   r)  SOURCE_UUIDr  r(   r(   r)   add_source_uuidZ  r   zGGUFWriter.add_source_uuidc                 C  r(  r   )r   r   r)  SOURCE_REPO_URLr  r(   r(   r)   add_source_repo_url]  r   zGGUFWriter.add_source_repo_urlsource_countc                 C  r(  r   )r   r   r)  BASE_MODEL_COUNTr\   r  r(   r(   r)   add_base_model_count`  r   zGGUFWriter.add_base_model_count	source_idc                 C     |  tjjj|d| d S N)id)r   r   r)  BASE_MODEL_NAMErX   r\   r  rm   r(   r(   r)   add_base_model_namec     zGGUFWriter.add_base_model_namec                 C  r  r  )r   r   r)  BASE_MODEL_AUTHORrX   r\   r  r^  r(   r(   r)   add_base_model_authorf  r  z GGUFWriter.add_base_model_authorc                 C  r  r  )r   r   r)  BASE_MODEL_VERSIONrX   r\   r  ra  r(   r(   r)   add_base_model_versioni  r  z!GGUFWriter.add_base_model_versionc                 C  r  r  )r   r   r)  BASE_MODEL_ORGANIZATIONrX   r\   r  rd  r(   r(   r)   add_base_model_organizationl  r  z&GGUFWriter.add_base_model_organizationc                 C  r  r  )r   r   r)  BASE_MODEL_DESCRIPTIONrX   r\   r  rm  r(   r(   r)   add_base_model_descriptiono  r  z%GGUFWriter.add_base_model_descriptionc                 C  r  r  )r   r   r)  BASE_MODEL_URLrX   r\   r  r~  r(   r(   r)   add_base_model_urlr  r  zGGUFWriter.add_base_model_urlc                 C  r  r  )r   r   r)  BASE_MODEL_DOIrX   r\   r  r  r(   r(   r)   add_base_model_doiu  r  zGGUFWriter.add_base_model_doic                 C  r  r  )r   r   r)  BASE_MODEL_UUIDrX   r\   r  r  r(   r(   r)   add_base_model_uuidx  r  zGGUFWriter.add_base_model_uuidc                 C  r  r  )r   r   r)  BASE_MODEL_REPO_URLrX   r\   r  r  r(   r(   r)   add_base_model_repo_url{  r  z"GGUFWriter.add_base_model_repo_urlc                 C  r(  r   )r   r   r)  DATASET_COUNTr  r(   r(   r)   add_dataset_count~  r   zGGUFWriter.add_dataset_countc                 C  r  r  )r   r   r)  DATASET_NAMErX   r  r(   r(   r)   add_dataset_name  r  zGGUFWriter.add_dataset_namec                 C  r  r  )r   r   r)  DATASET_AUTHORrX   r  r(   r(   r)   add_dataset_author  r  zGGUFWriter.add_dataset_authorc                 C  r  r  )r   r   r)  DATASET_VERSIONrX   r  r(   r(   r)   add_dataset_version  r  zGGUFWriter.add_dataset_versionc                 C  r  r  )r   r   r)  DATASET_ORGANIZATIONrX   r  r(   r(   r)   add_dataset_organization  r  z#GGUFWriter.add_dataset_organizationc                 C  r  r  )r   r   r)  DATASET_DESCRIPTIONrX   r  r(   r(   r)   add_dataset_description  r  z"GGUFWriter.add_dataset_descriptionc                 C  r  r  )r   r   r)  DATASET_URLrX   r  r(   r(   r)   add_dataset_url  r  zGGUFWriter.add_dataset_urlc                 C  r  r  )r   r   r)  DATASET_DOIrX   r  r(   r(   r)   add_dataset_doi  r  zGGUFWriter.add_dataset_doic                 C  r  r  )r   r   r)  DATASET_UUIDrX   r  r(   r(   r)   add_dataset_uuid  r  zGGUFWriter.add_dataset_uuidc                 C  r  r  )r   r   r)  DATASET_REPO_URLrX   r  r(   r(   r)   add_dataset_repo_url  r  zGGUFWriter.add_dataset_repo_urltagsSequence[str]c                 C  r(  r   )r   r   r)  TAGS)r\   r  r(   r(   r)   add_tags  r   zGGUFWriter.add_tags	languagesc                 C  r(  r   )r   r   r)  	LANGUAGES)r\   r  r(   r(   r)   add_languages  r   zGGUFWriter.add_languageslayoutc                 C     |  tjjj| jd| d S N)rJ   )r   r   LLMTENSOR_DATA_LAYOUTrX   rJ   )r\   r  r(   r(   r)   add_tensor_data_layout     z!GGUFWriter.add_tensor_data_layoutrn   c                 C  r  r  )r   r   r  
VOCAB_SIZErX   rJ   r\   rn   r(   r(   r)   add_vocab_size  r  zGGUFWriter.add_vocab_sizelengthc                 C  r  r  )r   r   r  CONTEXT_LENGTHrX   rJ   r\   r  r(   r(   r)   add_context_length  r  zGGUFWriter.add_context_lengthc                 C  r  r  )r   r   r  EMBEDDING_LENGTHrX   rJ   r  r(   r(   r)   add_embedding_length  r  zGGUFWriter.add_embedding_lengthc                 C  r  r  )r   r   r  EMBEDDING_LENGTH_OUTrX   rJ   r  r(   r(   r)   add_embedding_length_out  r  z#GGUFWriter.add_embedding_length_outc                 C  r  r  )r   r   r  FEATURES_LENGTHrX   rJ   r  r(   r(   r)   add_features_length  r  zGGUFWriter.add_features_lengthc                 C  r  r  )r   r   PosNetr  rX   rJ   r  r(   r(   r)   add_posnet_embedding_length  r  z&GGUFWriter.add_posnet_embedding_lengthc                 C  r  r  )r   r   r  BLOCK_COUNTrX   rJ   r  r(   r(   r)   add_posnet_block_count  r  z!GGUFWriter.add_posnet_block_countc                 C  r  r  )r   r   ConvNextr  rX   rJ   r  r(   r(   r)   add_convnext_embedding_length  r  z(GGUFWriter.add_convnext_embedding_lengthc                 C  r  r  )r   r   r  r  rX   rJ   r  r(   r(   r)   add_convnext_block_count  r  z#GGUFWriter.add_convnext_block_countc                 C  r  r  )r   r   	ShortConvL_CACHErX   rJ   r  r(   r(   r)   add_shortconv_l_cache  r  z GGUFWriter.add_shortconv_l_cachec                 C  r  r  )r   r   r  r  rX   rJ   r  r(   r(   r)   add_block_count  r  zGGUFWriter.add_block_countc                 C  r  r  )r   r   r  LEADING_DENSE_BLOCK_COUNTrX   rJ   r  r(   r(   r)   add_leading_dense_block_count  r  z(GGUFWriter.add_leading_dense_block_countintervalc                 C  r  r  )r   r   r  FULL_ATTENTION_INTERVALrX   rJ   )r\   r  r(   r(   r)   add_full_attention_interval  r  z&GGUFWriter.add_full_attention_intervalint | Sequence[int]c                 C  F   t |tr| tjjj| jd| d S | tjjj| jd| d S r  )	
isinstancer!   r   r   r  FEED_FORWARD_LENGTHrX   rJ   r   r  r(   r(   r)   add_feed_forward_length     
z"GGUFWriter.add_feed_forward_lengthc                 C  r  r  )r   r   r  EXPERT_FEED_FORWARD_LENGTHrX   rJ   r  r(   r(   r)   add_expert_feed_forward_length  r  z)GGUFWriter.add_expert_feed_forward_lengthc                 C  r  r  )r   r   r  !EXPERT_SHARED_FEED_FORWARD_LENGTHrX   rJ   r  r(   r(   r)   %add_expert_shared_feed_forward_length  r  z0GGUFWriter.add_expert_shared_feed_forward_lengthc                 C  r  r  )r   r   r   EXPERT_CHUNK_FEED_FORWARD_LENGTHrX   rJ   r  r(   r(   r)   $add_expert_chunk_feed_forward_length  r  z/GGUFWriter.add_expert_chunk_feed_forward_lengthusec                 C  r  r  )r   r   r  USE_PARALLEL_RESIDUALrX   rJ   )r\   r  r(   r(   r)   add_parallel_residual  r  z GGUFWriter.add_parallel_residualr  c                 C  r  r  )r   r   r  DECODER_START_TOKEN_IDrX   rJ   r\   r  r(   r(   r)   add_decoder_start_token_id  r  z%GGUFWriter.add_decoder_start_token_idr+   c                 C  r  r  )r   r   r  DECODER_BLOCK_COUNTrX   rJ   r\   r+   r(   r(   r)   add_decoder_block_count  r  z"GGUFWriter.add_decoder_block_countc                 C  r  r  )r   r   r  EMBD_LENGTH_PER_LAYER_INPrX   rJ   r  r(   r(   r)   $add_embedding_length_per_layer_input  r  z/GGUFWriter.add_embedding_length_per_layer_inputc                 C  r  r  )r   r   r  ALTUP_ACTIVE_IDXrX   rJ   r\   r   r(   r(   r)   add_altup_active_idx  r  zGGUFWriter.add_altup_active_idxc                 C  r  r  )r   r   r  ALTUP_NUM_INPUTSrX   rJ   r  r(   r(   r)   add_altup_num_inputs  r  zGGUFWriter.add_altup_num_inputsr   Sequence[float]c                 C  r  r  )r   r   r  ACTIVATION_SPARSITY_SCALErX   rJ   r\   r   r(   r(   r)   add_activation_sparsity_scale  r  z(GGUFWriter.add_activation_sparsity_scalecountc                 C  r  r  )	r  r!   r   r   	Attention
HEAD_COUNTrX   rJ   r   r\   r  r(   r(   r)   add_head_count  r  zGGUFWriter.add_head_countc                 C  r  r  )	r  r!   r   r   r  HEAD_COUNT_KVrX   rJ   r   r  r(   r(   r)   add_head_count_kv  r  zGGUFWriter.add_head_count_kvc                 C  r  r  )r   r   r  
KEY_LENGTHrX   rJ   r  r(   r(   r)   add_key_length  r  zGGUFWriter.add_key_lengthc                 C  r  r  )r   r   r  VALUE_LENGTHrX   rJ   r  r(   r(   r)   add_value_length  r  zGGUFWriter.add_value_lengthc                 C  r  r  )r   r   r  KEY_LENGTH_MLArX   rJ   r  r(   r(   r)   add_key_length_mla  r  zGGUFWriter.add_key_length_mlac                 C  r  r  )r   r   r  VALUE_LENGTH_MLArX   rJ   r  r(   r(   r)   add_value_length_mla  r  zGGUFWriter.add_value_length_mlac                 C      |  tjjjj| jd| d S r  )r   r   r  Indexerr  rX   rJ   r  r(   r(   r)   add_indexer_head_count      z!GGUFWriter.add_indexer_head_countc                 C  r(  r  )r   r   r  r)  r   rX   rJ   r  r(   r(   r)   add_indexer_key_length  r+  z!GGUFWriter.add_indexer_key_lengthc                 C  r(  r  )r   r   r  r)  TOP_KrX   rJ   r<  r(   r(   r)   add_indexer_top_k  r+  zGGUFWriter.add_indexer_top_kbiasc                 C  r  r  )r   r   r  MAX_ALIBI_BIASrX   rJ   )r\   r/  r(   r(   r)   add_max_alibi_bias  r  zGGUFWriter.add_max_alibi_biasc                 C  r  r  )r   r   r  	CLAMP_KQVrX   rJ   r  r(   r(   r)   add_clamp_kqv  r  zGGUFWriter.add_clamp_kqvc                 C  r  r  )r   r   r  SHARED_KV_LAYERSrX   rJ   r  r(   r(   r)   add_shared_kv_layers  r  zGGUFWriter.add_shared_kv_layersint | Sequence[bool]c                 C  s<   t jjj| jd}t|tr| || d S | || d S r  )	r   r  SLIDING_WINDOW_PATTERNrX   rJ   r  r!   r   r   )r\   r+   r   r(   r(   r)   add_sliding_window_pattern  s   
z%GGUFWriter.add_sliding_window_patterndensein_fout_fc                 C  s<   |  tjjj| j|d| |  tjjj| j|d| d S )N)rJ   r9  )r   r   r  DENSE_FEAT_IN_SIZErX   rJ   DENSE_FEAT_OUT_SIZE)r\   r9  r:  r;  r(   r(   r)   add_dense_features_dims!  s    z"GGUFWriter.add_dense_features_dimsc                 C  r  r  )r   r   r  LOGIT_SCALErX   rJ   r  r(   r(   r)   add_logit_scale%  r  zGGUFWriter.add_logit_scalec                 C  r  r  )r   r   r  ATTN_LOGIT_SOFTCAPPINGrX   rJ   r  r(   r(   r)   add_attn_logit_softcapping(  r  z%GGUFWriter.add_attn_logit_softcappingc                 C  r  r  )r   r   r  ROUTER_LOGIT_SOFTCAPPINGrX   rJ   r  r(   r(   r)   add_router_logit_softcapping+  r  z'GGUFWriter.add_router_logit_softcappingc                 C  r  r  )r   r   r  FINAL_LOGIT_SOFTCAPPINGrX   rJ   r  r(   r(   r)   add_final_logit_softcapping.  r  z&GGUFWriter.add_final_logit_softcappingc                 C  r  r  )r   r   r  EXPERT_COUNTrX   rJ   r  r(   r(   r)   add_expert_count1  r  zGGUFWriter.add_expert_countc                 C  r  r  )r   r   r  EXPERT_USED_COUNTrX   rJ   r  r(   r(   r)   add_expert_used_count4  r  z GGUFWriter.add_expert_used_countc                 C  r  r  )r   r   r  EXPERT_SHARED_COUNTrX   rJ   r  r(   r(   r)   add_expert_shared_count7  r  z"GGUFWriter.add_expert_shared_countc                 C  r  r  )r   r   r  EXPERT_GROUP_COUNTrX   rJ   r  r(   r(   r)   add_expert_group_count:  r  z!GGUFWriter.add_expert_group_countc                 C  r  r  )r   r   r  EXPERT_GROUP_USED_COUNTrX   rJ   r  r(   r(   r)   add_expert_group_used_count=  r  z&GGUFWriter.add_expert_group_used_countc                 C  r  r  )r   r   r  EXPERT_WEIGHTS_SCALErX   rJ   r  r(   r(   r)   add_expert_weights_scale@  r  z#GGUFWriter.add_expert_weights_scalec                 C  r  r  )r   r   r  EXPERT_WEIGHTS_NORMrX   rJ   r  r(   r(   r)   add_expert_weights_normC  r  z"GGUFWriter.add_expert_weights_normr   c                 C      |  tjjj| jd|j d S r  )r   r   r  EXPERT_GATING_FUNCrX   rJ   r+   r  r(   r(   r)   add_expert_gating_funcF  r+  z!GGUFWriter.add_expert_gating_funcc                 C  r  r  )r   r   r  SWIGLU_CLAMP_EXPrX   rJ   r  r(   r(   r)   add_swiglu_clamp_expI  r  zGGUFWriter.add_swiglu_clamp_expc                 C  r  r  )r   r   r  SWIGLU_CLAMP_SHEXPrX   rJ   r  r(   r(   r)   add_swiglu_clamp_shexpL  r  z!GGUFWriter.add_swiglu_clamp_shexpc                 C  r  r  )r   r   r  EXPERT_GROUP_SCALErX   rJ   r  r(   r(   r)   add_expert_group_scaleO  r  z!GGUFWriter.add_expert_group_scalec                 C  r  r  )r   r   r  EXPERTS_PER_GROUPrX   rJ   r  r(   r(   r)   add_experts_per_groupR  r  z GGUFWriter.add_experts_per_groupc                 C  r  r  )r   r   r  MOE_EVERY_N_LAYERSrX   rJ   r  r(   r(   r)   add_moe_every_n_layersU  r  z!GGUFWriter.add_moe_every_n_layersc                 C  r  r  )r   r   r  NEXTN_PREDICT_LAYERSrX   rJ   r  r(   r(   r)   add_nextn_predict_layersX  r  z#GGUFWriter.add_nextn_predict_layersc                 C  r  r  )r   r   r  	SWIN_NORMrX   rJ   r  r(   r(   r)   add_swin_norm[  r  zGGUFWriter.add_swin_normc                 C  r  r  )r   r   r  RESCALE_EVERY_N_LAYERSrX   rJ   r  r(   r(   r)   add_rescale_every_n_layers^  r  z%GGUFWriter.add_rescale_every_n_layersdimc                 C  r  r  )r   r   r  TIME_MIX_EXTRA_DIMrX   rJ   r\   rh  r(   r(   r)   add_time_mix_extra_dima  r  z!GGUFWriter.add_time_mix_extra_dimc                 C  r  r  )r   r   r  TIME_DECAY_EXTRA_DIMrX   rJ   rj  r(   r(   r)   add_time_decay_extra_dimd  r  z#GGUFWriter.add_time_decay_extra_dimc                 C  r  r  )r   r   r  RESIDUAL_SCALErX   rJ   r  r(   r(   r)   add_residual_scaleg  r  zGGUFWriter.add_residual_scalec                 C  r  r  )r   r   r  EMBEDDING_SCALErX   rJ   r  r(   r(   r)   add_embedding_scalej  r  zGGUFWriter.add_embedding_scalec                 C  r  r  )r   r   WKV	HEAD_SIZErX   rJ   r  r(   r(   r)   add_wkv_head_sizem  r  zGGUFWriter.add_wkv_head_sizec                 C  r  r  )r   r   r  TOKEN_SHIFT_COUNTrX   rJ   r  r(   r(   r)   add_token_shift_countp  r  z GGUFWriter.add_token_shift_countc                 C  r  r  )r   r   r  INTERLEAVE_MOE_LAYER_STEPrX   rJ   r  r(   r(   r)   add_interleave_moe_layer_steps  r  z(GGUFWriter.add_interleave_moe_layer_stepc                 C  r  r  )r   r   r  LAYERNORM_EPSrX   rJ   r  r(   r(   r)   add_layer_norm_epsv  r  zGGUFWriter.add_layer_norm_epsc                 C  r  r  )r   r   r  LAYERNORM_RMS_EPSrX   rJ   r  r(   r(   r)   add_layer_norm_rms_epsy  r  z!GGUFWriter.add_layer_norm_rms_epsc                 C  r  r  )r   r   r  GROUPNORM_EPSrX   rJ   r  r(   r(   r)   add_group_norm_eps|  r  zGGUFWriter.add_group_norm_epsc                 C  r  r  )r   r   r  GROUPNORM_GROUPSrX   rJ   r  r(   r(   r)   add_group_norm_groups  r  z GGUFWriter.add_group_norm_groupsc                 C  r  r  )r   r   r  CAUSALrX   rJ   r  r(   r(   r)   add_causal_attention  r  zGGUFWriter.add_causal_attentionc                 C  r  r  )r   r   r  Q_LORA_RANKrX   rJ   r  r(   r(   r)   add_q_lora_rank  r  zGGUFWriter.add_q_lora_rankc                 C  r  r  )r   r   r  KV_LORA_RANKrX   rJ   r  r(   r(   r)   add_kv_lora_rank  r  zGGUFWriter.add_kv_lora_rankc                 C  r  r  )r   r   r  DECAY_LORA_RANKrX   rJ   r  r(   r(   r)   add_decay_lora_rank  r  zGGUFWriter.add_decay_lora_rankc                 C  r  r  )r   r   r  ICLR_LORA_RANKrX   rJ   r  r(   r(   r)   add_iclr_lora_rank  r  zGGUFWriter.add_iclr_lora_rankc                 C  r  r  )r   r   r  VALUE_RESIDUAL_MIX_LORA_RANKrX   rJ   r  r(   r(   r)    add_value_residual_mix_lora_rank  r  z+GGUFWriter.add_value_residual_mix_lora_rankc                 C  r  r  )r   r   RopeFREQ_BASE_SWArX   rJ   r  r(   r(   r)   add_rope_freq_base_swa  r  z!GGUFWriter.add_rope_freq_base_swac                 C  r  r  )r   r   r  GATE_LORA_RANKrX   rJ   r  r(   r(   r)   add_gate_lora_rank  r  zGGUFWriter.add_gate_lora_rankc                 C  r  r  )r   r   r  REL_BUCKETS_COUNTrX   rJ   r  r(   r(   r)   add_relative_attn_buckets_count  r  z*GGUFWriter.add_relative_attn_buckets_countc                 C  r  r  )r   r   r  SLIDING_WINDOWrX   rJ   r  r(   r(   r)   add_sliding_window  r  zGGUFWriter.add_sliding_windowc                 C  r  r  )r   r   r  SCALErX   rJ   r  r(   r(   r)   add_attention_scale  r  zGGUFWriter.add_attention_scalec                 C  r  r  )r   r   r  OUTPUT_SCALErX   rJ   r  r(   r(   r)   add_attn_output_scale  r  z GGUFWriter.add_attn_output_scalec                 C  r  r  )r   r   r  TEMPERATURE_LENGTHrX   rJ   r  r(   r(   r)   add_attn_temperature_length  r  z&GGUFWriter.add_attn_temperature_lengthc                 C  r  r  )r   r   r  TEMPERATURE_SCALErX   rJ   r  r(   r(   r)   add_attn_temperature_scale  r  z%GGUFWriter.add_attn_temperature_scaler   c                 C  rU  r  )r   r   r  POOLING_TYPErX   rJ   r+   r  r(   r(   r)   add_pooling_type  r+  zGGUFWriter.add_pooling_typec                 C  r  r  )r   r   r  NUM_DEEPSTACK_LAYERSrX   rJ   r  r(   r(   r)   add_num_deepstack_layers  r  z#GGUFWriter.add_num_deepstack_layersc                 C  r  r  )r   r   r  DIMENSION_COUNTrX   rJ   r  r(   r(   r)   add_rope_dimension_count  r  z#GGUFWriter.add_rope_dimension_countdimsc                 C  r  r  )r   r   r  DIMENSION_SECTIONSrX   rJ   )r\   r  r(   r(   r)   add_rope_dimension_sections  r  z&GGUFWriter.add_rope_dimension_sectionsc                 C  r  r  )r   r   r  	FREQ_BASErX   rJ   r  r(   r(   r)   add_rope_freq_base  r  zGGUFWriter.add_rope_freq_baser   c                 C  rU  r  )r   r   r  SCALING_TYPErX   rJ   r+   r  r(   r(   r)   add_rope_scaling_type  r+  z GGUFWriter.add_rope_scaling_typec                 C  r  r  )r   r   r  SCALING_FACTORrX   rJ   r  r(   r(   r)   add_rope_scaling_factor  r  z"GGUFWriter.add_rope_scaling_factorc                 C  r  r  )r   r   r  SCALING_ATTN_FACTORrX   rJ   r  r(   r(   r)   add_rope_scaling_attn_factors  r  z(GGUFWriter.add_rope_scaling_attn_factorsc                 C  r  r  )r   r   r  SCALING_ORIG_CTX_LENrX   rJ   r  r(   r(   r)   add_rope_scaling_orig_ctx_len  r  z(GGUFWriter.add_rope_scaling_orig_ctx_lenc                 C  r  r  )r   r   r  SCALING_FINETUNEDrX   rJ   r  r(   r(   r)   add_rope_scaling_finetuned  r  z%GGUFWriter.add_rope_scaling_finetunedc                 C  r  r  )r   r   r  SCALING_YARN_LOG_MULrX   rJ   r  r(   r(   r)   add_rope_scaling_yarn_log_mul  r  z(GGUFWriter.add_rope_scaling_yarn_log_mulc                 C  r  r  )r   r   r  SCALING_YARN_EXT_FACTORrX   rJ   r  r(   r(   r)    add_rope_scaling_yarn_ext_factor  r  z+GGUFWriter.add_rope_scaling_yarn_ext_factorc                 C  r  r  )r   r   r  SCALING_YARN_ATTN_FACTORrX   rJ   r  r(   r(   r)   !add_rope_scaling_yarn_attn_factor  r  z,GGUFWriter.add_rope_scaling_yarn_attn_factorc                 C  r  r  )r   r   r  SCALING_YARN_BETA_FASTrX   rJ   r  r(   r(   r)   add_rope_scaling_yarn_beta_fast  r  z*GGUFWriter.add_rope_scaling_yarn_beta_fastc                 C  r  r  )r   r   r  SCALING_YARN_BETA_SLOWrX   rJ   r  r(   r(   r)   add_rope_scaling_yarn_beta_slow  r  z*GGUFWriter.add_rope_scaling_yarn_beta_slowc                 C  r  r  )r   r   SSMCONV_KERNELrX   rJ   r  r(   r(   r)   add_ssm_conv_kernel  r  zGGUFWriter.add_ssm_conv_kernelc                 C  r  r  )r   r   r  
INNER_SIZErX   rJ   r  r(   r(   r)   add_ssm_inner_size  r  zGGUFWriter.add_ssm_inner_sizec                 C  r  r  )r   r   r  
STATE_SIZErX   rJ   r  r(   r(   r)   add_ssm_state_size  r  zGGUFWriter.add_ssm_state_sizec                 C  r  r  )r   r   r  TIME_STEP_RANKrX   rJ   r  r(   r(   r)   add_ssm_time_step_rank  r  z!GGUFWriter.add_ssm_time_step_rankc                 C  r  r  )r   r   r  GROUP_COUNTrX   rJ   r  r(   r(   r)   add_ssm_group_count  r  zGGUFWriter.add_ssm_group_countc                 C  r  r  )r   r   r  
DT_B_C_RMSrX   rJ   r  r(   r(   r)   add_ssm_dt_b_c_rms  r  zGGUFWriter.add_ssm_dt_b_c_rmsc                 C  r  r  )r   r   KDAHEAD_DIMrX   rJ   r  r(   r(   r)   add_kda_head_dim  r  zGGUFWriter.add_kda_head_dimmodelc                 C  r(  r   )r   r   	TokenizerMODEL)r\   r  r(   r(   r)   add_tokenizer_model  r   zGGUFWriter.add_tokenizer_modelprec                 C  r(  r   )r   r   r  PRE)r\   r  r(   r(   r)   add_tokenizer_pre  r   zGGUFWriter.add_tokenizer_pretokens5Sequence[str] | Sequence[bytes] | Sequence[bytearray]c                 C  r(  r   )r   r   r  LIST)r\   r  r(   r(   r)   add_token_list  r   zGGUFWriter.add_token_listmergesc                 C  r(  r   )r   r   r  MERGES)r\   r  r(   r(   r)   add_token_merges  r   zGGUFWriter.add_token_mergestypes#Sequence[TokenType] | Sequence[int]c                 C  r(  r   )r   r   r  
TOKEN_TYPE)r\   r  r(   r(   r)   add_token_types  r   zGGUFWriter.add_token_typesc                 C  r(  r   )r   r   r  TOKEN_TYPE_COUNTr  r(   r(   r)   add_token_type_count  r   zGGUFWriter.add_token_type_countscoresc                 C  r(  r   )r   r   r  SCORES)r\   r  r(   r(   r)   add_token_scores   r   zGGUFWriter.add_token_scoresc                 C  r(  r   )r   r   r  BOS_IDr	  r(   r(   r)   add_bos_token_id  r   zGGUFWriter.add_bos_token_idc                 C  r(  r   )r   r   r  EOS_IDr	  r(   r(   r)   add_eos_token_id  r   zGGUFWriter.add_eos_token_idc                 C  r(  r   )r   r   r  UNK_IDr	  r(   r(   r)   add_unk_token_id	  r   zGGUFWriter.add_unk_token_idc                 C  r(  r   )r   r   r  SEP_IDr	  r(   r(   r)   add_sep_token_id  r   zGGUFWriter.add_sep_token_idc                 C  r(  r   )r   r   r  PAD_IDr	  r(   r(   r)   add_pad_token_id  r   zGGUFWriter.add_pad_token_idc                 C  r(  r   )r   r   r  MASK_IDr	  r(   r(   r)   add_mask_token_id  r   zGGUFWriter.add_mask_token_idc                 C  r(  r   )r   r   r  ADD_BOSr  r(   r(   r)   add_add_bos_token  r   zGGUFWriter.add_add_bos_tokenc                 C  r(  r   )r   r   r  ADD_EOSr  r(   r(   r)   add_add_eos_token  r   zGGUFWriter.add_add_eos_tokenc                 C  r(  r   )r   r   r  ADD_SEPr  r(   r(   r)   add_add_sep_token  r   zGGUFWriter.add_add_sep_tokenc                 C  r(  r   )r   r   r  
ADD_PREFIXr  r(   r(   r)   add_add_space_prefix  r   zGGUFWriter.add_add_space_prefixc                 C  r(  r   )r   r   r  REMOVE_EXTRA_WSr  r(   r(   r)   add_remove_extra_whitespaces!  r   z'GGUFWriter.add_remove_extra_whitespacescharsmapr	  c                 C  r(  r   )r   r   r  PRECOMPILED_CHARSMAP)r\   r  r(   r(   r)   add_precompiled_charsmap$  r   z#GGUFWriter.add_precompiled_charsmap!str | Sequence[Mapping[str, str]]c                 C  s   t |tsVd }t }|D ]5}|dd}|d}ddd |D }|rA|d urA|dkr0|}q|| | tjj	j
|d| q|rN| tjjt| |d u rTd S |}| tjj| d S )Nrm    templatec                 s  s$    | ]}|t t v r|nd V  qdS )r   Nr   )rw   cr(   r(   r)   r   1  r  z/GGUFWriter.add_chat_template.<locals>.<genexpr>defaultr   )r  rK   setgetjoinaddr   r   r  CHAT_TEMPLATE_NrX   r   CHAT_TEMPLATESlistCHAT_TEMPLATE)r\   r+   template_defaulttemplate_nameschoicerm   r  r(   r(   r)   add_chat_template'  s&   


zGGUFWriter.add_chat_templatec                 C  r(  r   )r   r   r  EOT_IDr	  r(   r(   r)   add_eot_token_idD  r   zGGUFWriter.add_eot_token_idc                 C  r(  r   )r   r   r  EOM_IDr	  r(   r(   r)   add_eom_token_idG  r   zGGUFWriter.add_eom_token_idlabelsc                 C  r  r  )r   r   
ClassifierOUTPUT_LABELSrX   rJ   )r\   r  r(   r(   r)   add_classifier_output_labelsJ  r  z'GGUFWriter.add_classifier_output_labelsc                 C  r(  r   )r   r   ClipHAS_VISION_ENCODERr  r(   r(   r)   add_clip_has_vision_encoderO  r   z&GGUFWriter.add_clip_has_vision_encoderc                 C  r(  r   )r   r   r  HAS_AUDIO_ENCODERr  r(   r(   r)   add_clip_has_audio_encoderR  r   z%GGUFWriter.add_clip_has_audio_encoderc                 C  r(  r   )r   r   r  PROJECTOR_TYPEr  r(   r(   r)   add_clip_projector_typeU  r   z"GGUFWriter.add_clip_projector_typec                 C  r(  r   )r   r   
ClipVisionr  r  r(   r(   r)   add_clip_vision_projector_typeX  r   z)GGUFWriter.add_clip_vision_projector_typec                 C  r(  r   )r   r   r  PROJECTION_DIMr  r(   r(   r)   add_vision_projection_dim[  r   z$GGUFWriter.add_vision_projection_dimc                 C  r(  r   )r   r   r  
PATCH_SIZEr  r(   r(   r)   add_vision_patch_size^  r   z GGUFWriter.add_vision_patch_sizec                 C  r(  r   )r   r   r  r  r  r(   r(   r)   add_vision_embedding_lengtha  r   z&GGUFWriter.add_vision_embedding_lengthc                 C  r(  r   )r   r   r  r  r  r(   r(   r)   add_vision_feed_forward_lengthd  r   z)GGUFWriter.add_vision_feed_forward_lengthc                 C  r(  r   )r   r   r  r  r  r(   r(   r)   add_vision_block_countg  r   z!GGUFWriter.add_vision_block_countc                 C     |  tjjj| d S r   )r   r   r  r  r  r  r(   r(   r)   add_vision_head_countj  r-  z GGUFWriter.add_vision_head_countc                 C  r&  r   )r   r   r  r  ry  r  r(   r(   r)   "add_vision_attention_layernorm_epsm  r-  z-GGUFWriter.add_vision_attention_layernorm_epsc                 C  r(  r   )r   r   r  
IMAGE_SIZEr  r(   r(   r)   add_vision_image_sizep  r   z GGUFWriter.add_vision_image_sizec                 C  r(  r   )r   r   r  IMAGE_MAX_PIXELSr  r(   r(   r)   add_vision_max_pixelss  r   z GGUFWriter.add_vision_max_pixelsc                 C  r(  r   )r   r   r  IMAGE_MIN_PIXELSr  r(   r(   r)   add_vision_min_pixelsv  r   z GGUFWriter.add_vision_min_pixelsc                 C  r(  r   )r   r   r  PREPROC_IMAGE_SIZEr  r(   r(   r)   add_vision_preproc_image_sizey  r   z(GGUFWriter.add_vision_preproc_image_sizec                 C  r(  r   )r   r   r  
IMAGE_MEANr  r(   r(   r)   add_vision_image_mean|  r   z GGUFWriter.add_vision_image_meanc                 C  r(  r   )r   r   r  	IMAGE_STDr  r(   r(   r)   add_vision_image_std  r   zGGUFWriter.add_vision_image_stdc                 C  r(  r   )r   r   r  SPATIAL_MERGE_SIZEr  r(   r(   r)   add_vision_spatial_merge_size  r   z(GGUFWriter.add_vision_spatial_merge_sizec                 C  r(  r   )r   r   r  USE_GELUr  r(   r(   r)   add_vision_use_gelu  r   zGGUFWriter.add_vision_use_geluc                 C  r(  r   )r   r   r  USE_SILUr  r(   r(   r)   add_vision_use_silu  r   zGGUFWriter.add_vision_use_siluc                 C  r&  r   )r   r   r  	ProjectorSCALE_FACTORr  r(   r(   r)   !add_vision_projector_scale_factor  r-  z,GGUFWriter.add_vision_projector_scale_factorc                 C     |  tjj| dS )a  Add window attention pattern interval for vision models.

        This defines the pattern interval for window attention vs full attention layers.
        For example, if n_wa_pattern=4, then layers 3, 7, 11, ... use full attention,
        while other layers use window attention.

        Used by models like Qwen2.5-VL where full attention layers follow a regular pattern.
        N)r   r   r  N_WA_PATTERNr  r(   r(   r)   add_vision_n_wa_pattern  s   	z"GGUFWriter.add_vision_n_wa_patternlayersc                 C  r>  )a  Add explicit layer indexes that use full attention in vision models.

        This specifies the exact layer indices (0-based) that should use full attention
        instead of window attention. All other layers will use window attention.

        Args:
            layers: List of layer indices that use full attention (e.g., [3, 7, 11, 15])

        Used by models like YoutuVL where full attention layers are explicitly specified
        rather than following a regular pattern.

        Difference from add_vision_n_wa_pattern:
        - n_wa_pattern: Defines a regular interval pattern (every Nth layer uses full attention)
        - wa_layer_indexes: Explicitly lists which layers use full attention (irregular pattern)
        N)r   r   r  WA_LAYER_INDEXESr\   rA  r(   r(   r)   add_vision_wa_layer_indexes  s   z&GGUFWriter.add_vision_wa_layer_indexesSequence[bool]c                 C  r(  r   )r   r   r  IS_DEEPSTACK_LAYERSrC  r(   r(   r)   add_vision_is_deepstack_layers  r   z)GGUFWriter.add_vision_is_deepstack_layersc                 C  r(  r   )r   r   r  WINDOW_SIZEr  r(   r(   r)   add_vision_window_size  r   z!GGUFWriter.add_vision_window_sizec                 C  r(  r   )r   r   	ClipAudior  r  r(   r(   r)   add_clip_audio_projector_type  r   z(GGUFWriter.add_clip_audio_projector_typec                 C  r(  r   )r   r   rJ  r  r  r(   r(   r)   add_audio_projection_dim  r   z#GGUFWriter.add_audio_projection_dimc                 C  r(  r   )r   r   rJ  r  r  r(   r(   r)   add_audio_embedding_length  r   z%GGUFWriter.add_audio_embedding_lengthc                 C  r(  r   )r   r   rJ  r  r  r(   r(   r)   add_audio_feed_forward_length  r   z(GGUFWriter.add_audio_feed_forward_lengthc                 C  r(  r   )r   r   rJ  r  r  r(   r(   r)   add_audio_block_count  r   z GGUFWriter.add_audio_block_countc                 C  r&  r   )r   r   rJ  r  r  r  r(   r(   r)   add_audio_head_count  r-  zGGUFWriter.add_audio_head_countc                 C  r&  r   )r   r   rJ  r  ry  r  r(   r(   r)   !add_audio_attention_layernorm_eps  r-  z,GGUFWriter.add_audio_attention_layernorm_epsc                 C  r(  r   )r   r   rJ  NUM_MEL_BINSr  r(   r(   r)   add_audio_num_mel_bins  r   z!GGUFWriter.add_audio_num_mel_binsc                 C  r&  r   )r   r   rJ  r;  STACK_FACTORr  r(   r(   r)   add_audio_stack_factor  r-  z!GGUFWriter.add_audio_stack_factorc                 C  r(  r   )r   r   xIELUALPHA_Pr  r(   r(   r)   add_xielu_alpha_p  r   zGGUFWriter.add_xielu_alpha_pc                 C  r(  r   )r   r   rV  ALPHA_Nr  r(   r(   r)   add_xielu_alpha_n  r   zGGUFWriter.add_xielu_alpha_nc                 C  r(  r   )r   r   rV  BETAr  r(   r(   r)   add_xielu_beta  r   zGGUFWriter.add_xielu_betac                 C  r(  r   )r   r   rV  EPSr  r(   r(   r)   add_xielu_eps  r   zGGUFWriter.add_xielu_epsc                 C  r(  r   )r   r   	DiffusionSHIFT_LOGITSr  r(   r(   r)   add_diffusion_shift_logits  r   z%GGUFWriter.add_diffusion_shift_logitsfmtr   c                 C  s0   d}|s| j tjkrdnd}t| | |S )Nr  <>)rN   r   r   structpack)r\   rb  r+   r   pack_prefixr(   r(   r)   r     s   zGGUFWriter._packr   c           	        s`  t  }|r|| d|7 }| j|}|d ur&|| j|||tjkd7 }|S |tjkrGt|tr5|	dn|}|| dt
|7 }||7 }|S |tjkrt|tsUtdt
|dkr_td|d urf| n#t|trotj nt|d  t fdd	|d
d  D std|| d 7 }|| dt
|7 }|D ]}|| j| dd7 }q|S td)NrB   r   zutf-8rE   z/Invalid GGUF metadata array, expecting sequencer   z(Invalid GGUF metadata array. Empty arrayc                 3  s    | ]
}t | u V  qd S r   )r   get_typerv   ltyper(   r)   r      s    z'GGUFWriter._pack_val.<locals>.<genexpr>r   z4All items in a GGUF array should be of the same typeFr   z)Invalid GGUF metadata value type or value)r   r   _simple_value_packingr  r   r   r   r  rK   encoderu   r   r   r   r	  r   rh  allr   )	r\   r   r   r   r.   r<   pack_fmtencoded_valitemr(   ri  r)   r     s<   



zGGUFWriter._pack_valnumc                 C  sP   | dkrdS t | }dD ]}t|dk r|d|   S |d }q|ddS )Nr   znegligible - metadata only)r  KMGg     @@z3.1fz.1fzT - over 1TB, split recommended)r   abs)rq  fnumr  r(   r(   r)   r     s   
z GGUFWriter.format_n_bytes_to_str)r9   rI   rJ   rK   rL   rM   rN   r   rO   r!   rP   r!   rQ   rM   rR   rM   )r^   r_   )r9   r   r^   rq   r   )r9   r8   r^   r|   )r^   rq   )r^   r|   )
r   rK   r   r
   r   r   r.   r-   r^   r|   )r   rK   r   r!   r^   r|   )r   rK   r   r   r^   r|   )r   rK   r   rM   r^   r|   )r   rK   r   rK   r^   r|   )r   rK   r   r   r^   r|   )r   r!   r   r!   r^   r!   )rm   rK   r   r   r   r   r   r!   r   r   r^   r|   )NNN)rm   rK   r#   r   r   r   r   r   r   r   r^   r|   )r  r  r   r!   r  r  r^   r|   )r#   r   r   r   r^   r|   )r  rM   r^   r|   )r'  rK   r^   r|   )r.  r!   r^   r|   )r1  r!   r^   r|   )r4  r!   r^   r|   )r7  rK   r^   r|   )r:  r!   r^   r|   )r>  r   r^   r|   )rA  r   r^   r|   )rD  r   r^   r|   )rG  r   r^   r|   )rJ  r   r^   r|   )rM  r!   r^   r|   )rP  r   r^   r|   )rS  r!   r^   r|   )rV  r   r^   r|   )rY  r   r^   r|   )rm   rK   r^   r|   )r^  rK   r^   r|   )ra  rK   r^   r|   )rd  rK   r^   r|   )rg  rK   r^   r|   )rj  rK   r^   r|   )rm  rK   r^   r|   )rp  rK   r^   r|   )rs  rK   r^   r|   )rv  rK   r^   r|   )r~  rK   r^   r|   )r  rK   r^   r|   )r  rK   r^   r|   )r  rK   r^   r|   )r  r!   r^   r|   )r  r!   rm   rK   r^   r|   )r  r!   r^  rK   r^   r|   )r  r!   ra  rK   r^   r|   )r  r!   rd  rK   r^   r|   )r  r!   rm  rK   r^   r|   )r  r!   r~  rK   r^   r|   )r  r!   r  rK   r^   r|   )r  r!   r  rK   r^   r|   )r  r!   r  rK   r^   r|   )r  r  r^   r|   )r  r  r^   r|   )r  rK   r^   r|   )rn   r!   r^   r|   )r  r!   r^   r|   )r  r!   r^   r|   )r  r  r^   r|   )r  rM   r^   r|   )r  r!   r^   r|   )r+   r!   r^   r|   )r   r!   r^   r|   )r   r  r^   r|   )r  r  r^   r|   )r  r!   r^   r|   )r/  r   r^   r|   )r+   r   r^   r|   )r+   r6  r^   r|   )r9  rK   r:  r!   r;  r!   r^   r|   )r+   rM   r^   r|   )r+   r   r^   r|   )rh  r!   r^   r|   )r+   r   r^   r|   )r  r   r^   r|   )r+   r   r^   r|   )r  rK   r^   r|   )r  rK   r^   r|   )r  r  r^   r|   )r  r  r^   r|   )r  r  r^   r|   )r  r  r^   r|   )r  r	  r^   r|   )r+   r  r^   r|   )r  r  r^   r|   )r+   rK   r^   r|   )rA  r   r^   r|   )rA  rE  r^   r|   )r   r  )F)rb  rK   r+   r
   r   rM   r^   r	  )
r   r
   r   r   r   rM   r.   r-   r^   r	  )rq  r!   r^   rK   (  r$   r%   r&   r'   r   r   r   r   r   r   r   r   r   r   r   r   rk  r   r   r]   rp   r{   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   staticmethodr   r   r  r  r  r%  r   r!  r+  r[   r0  r3  r6  r9  r=  r@  rC  rF  rI  rL  rO  rR  rU  rX  r[  r]  r`  rc  rf  ri  rl  ro  rr  ru  ry  r{  r}  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r   r  r  r  r
  r  r  r  r  r  r  r  r!  r#  r%  r'  r*  r,  r.  r1  r3  r5  r8  r>  r@  rB  rD  rF  rH  rJ  rL  rN  rP  rR  rT  rW  rY  r[  r]  r_  ra  rc  re  rg  rk  rm  ro  rq  rt  rv  rx  rz  r|  r~  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r   r"  r#  r$  r%  r'  r(  r*  r,  r.  r0  r2  r4  r6  r8  r:  r=  r@  rD  rG  rI  rK  rL  rM  rN  rO  rP  rQ  rS  rU  rX  rZ  r\  r^  ra  r   r   r   r(   r(   r(   r)   r6   A   s>  
 0.!/&r6   )3
__future__r   loggingosr  re  r   r   dataclassesr   enumr   r   mathr   pathlibr   ior   typingr	   r
   r   r   stringr   r   numpyr   	constantsr   r   r   r   r   r   r   r   r   r   r   quantsr   	getLoggerr$   rV   rs   r   r*   r/   r6   r(   r(   r(   r)   <module>   s2    4
	