o
    հi                     @  s   d dl mZ d dlZd dlZd dlZd dlZd dlmZ d dlm	Z	m
Z
mZ d dlmZ ddlmZ d dlZedZeG d	d
 d
ZdS )    )annotationsN)Path)AnyLiteralOptional)	dataclass   )Keysmetadatac                   @  s  e Zd ZU dZded< dZded< dZded< dZded< dZded< dZ	ded< dZ
ded	< dZded
< dZded< dZded< dZded< dZded< dZded< dZded< dZded< dZded< dZded< dZded< dZded< dZded< dZded< dZded< dZded< dZded< ed>d?d%d&Zed@dAd(d)Zed@dBd*d+Zed@dBd,d-Z ed.d/ Z!edCdDd2d3Z"ed>dEd8d9Z#dFd<d=Z$dS )GMetadataNOptional[str]nameauthorversionorganizationfinetunebasenamedescriptionquantized_by
size_labelurldoiuuidrepo_url
source_url
source_doisource_uuidsource_repo_urllicenselicense_namelicense_linkzOptional[list[dict]]base_modelszOptional[list[str]]tags	languagesdatasetsr   metadata_override_pathOptional[Path]
model_path
model_nametotal_paramsintreturnc                 C  s  t  }t |}t |}t |||||}t | }|tjj|j	|_	|tjj
|j|_|tjj|j|_|tjj|j|_|tjj|j|_|tjj|j|_|tjj|j|_|tjj|j|_|tjj|j|_|tjj|j|_|tjj|j|_|tjj|j|_|tjj |j!|_!|tjj"|j#|_#|tjj$|j%|_%|tjj&|j'|_'|tjj(|j)|_)|tjj*|j+|_+|tjj,|j-|_-|d|j.|_.|d|j/|_/|tjj0|j1|_1|tjj2|j3|_3|d ur||_	|S )Nzgeneral.base_modelszgeneral.datasets)4r   load_model_cardload_hf_parametersapply_metadata_heuristicload_metadata_overridegetr	   GeneralNAMEr   AUTHORr   VERSIONr   ORGANIZATIONr   FINETUNEr   BASENAMEr   DESCRIPTIONr   QUANTIZED_BYr   
SIZE_LABELr   LICENSE_NAMEr   LICENSE_LINKr    URLr   DOIr   UUIDr   REPO_URLr   
SOURCE_URLr   
SOURCE_DOIr   SOURCE_UUIDr   SOURCE_REPO_URLr   r!   r$   TAGSr"   	LANGUAGESr#   )r%   r'   r(   r)   r
   
model_card	hf_paramsmetadata_override rJ   A/home/ubuntu/.local/lib/python3.10/site-packages/gguf/metadata.pyload.   s>   



zMetadata.loaddict[str, Any]c                 C  sP   | d u s|   s
i S t| ddd}t|W  d    S 1 s!w   Y  d S )Nrutf-8encoding)is_fileopenjsonrL   )r%   frJ   rJ   rK   r/   j   s
   $zMetadata.load_metadata_overridec           	      C  s2  | d u s|   s
i S | d }| si S d}t|dddQ}| }| }g }t|dkr7i W  d    S t|dkrL|d dkrLi W  d    S |dd  D ]}|dkrZ n|| qRd	|d	 }W d    n1 sqw   Y  |d
d}|rt	
|}t|tr|S tdt| d i S i S )Nz	README.md rN   rO   rP   r   z---r   
z- no
z- "no"
z3while reading YAML model card frontmatter, data is z instead of dict)is_dirrR   rS   read
splitlineslenappendjoinreplaceyaml	safe_load
isinstancedictloggererrortype)	r'   model_card_pathyaml_contentrU   contentlines
lines_yamllinedatarJ   rJ   rK   r,   r   s<   


zMetadata.load_model_cardc                 C  sd   | d u s|   s
i S | d }| si S t|ddd}t|W  d    S 1 s+w   Y  d S )Nzconfig.jsonrN   rO   rP   )rX   rR   rS   rT   rL   )r'   config_pathrU   rJ   rJ   rK   r-      s   $zMetadata.load_hf_parametersc                 C  s$   d dd |  dd D S )N c                 S  s,   g | ]}|  rtd |s| n|qS )z^(v\d+(?:\.\d+)*|\d.*)$)islowerrematchtitle).0wrJ   rJ   rK   
<listcomp>   s   , z(Metadata.id_to_title.<locals>.<listcomp>-)r]   stripr^   split)stringrJ   rJ   rK   id_to_title   s   $zMetadata.id_to_titlemodel_idMtuple[str | None, str | None, str | None, str | None, str | None, str | None]c              	   C  s  | d u rdS d| v r| d d d d d fS d| v r|  dd\}}nd | }}|d ur6t|dkr6|d dkr6d }| d}ttt|D ]}t|| dkrP||= qCdd	 |D }t|D ]\}}td
|tjrq|| d q\td|tjr|| d |	 ||< q\|dkr:td|tjr:|
dd}|d  r|d d d |d  |d  }t|dkr|d  r|d dv r|d d |d 	  }|dkr%zFt|d d tdd|d  }|dk r|t|d k s|dkrt|| d| d kr|| d |d d |d   }W n
 ty$   Y nw t|| dkr5|| d |||< q\|dkrctd|tjrc|dk r\| dkr\|| d q\|| d q\tdd t||D rt||D ]\}	}
d|
v rtdd |	D r|
d qvd}t||D ]3\}}
|rt|
dkr|d  sd|
v r|
d q|rd }t|
dkr|
d qtt|t|D ]\}}
d|
v rt|
dkr|
d q dd!d t||D pd }dtd"d t||D  pd }dd#d t||D p d }dd$d t||D p0d }|d u rB|d u rB|d u rBd }||||||fS )%N)NNNNNNrn   /r   r   .rv   c                 S  s   g | ]}t  qS rJ   )set)rs   _rJ   rJ   rK   ru      s    z4Metadata.get_model_id_components.<locals>.<listcomp>z(v|iter)?\d+([.]\d+)*r   zi?q\d(_\w)*|b?fp?(16|32)re   zD(([A]|\d+[x])?\d+([._]\d+)?[KMBT][\d]?|small|mini|medium|large|x?xl)r   kmbti  z KMBT      r   r   zchat|instruct|vision|loralorac                 s  s.    | ]\}}d |v r|D ]}|  V  qqdS r   N)	isdecimal)rs   ntcrJ   rJ   rK   	<genexpr>  s   , z3Metadata.get_model_id_components.<locals>.<genexpr>c                 s  s    | ]}|  V  qd S N)isalpha)rs   r   rJ   rJ   rK   r   
  s    Tr   Fc                 s       | ]\}}d |v r|V  qdS )r   NrJ   )rs   r   r   rJ   rJ   rK   r         c                 s  r   r   rJ   )rs   sr   rJ   rJ   rK   r   !  r   c                 s  r   )r   NrJ   )rs   rU   r   rJ   rJ   rK   r   "  r   c                 s  s(    | ]\}}d |v rd|vr|V  qdS )r   r   NrJ   )rs   vr   rJ   rJ   rK   r   %  s   & )rx   r[   reversedrange	enumeraterp   	fullmatch
IGNORECASEaddupperr^   r   floatpowfindabslower
ValueErroranyzipallremover   r]   rb   fromkeyskeys)r{   r)   org_componentmodel_full_name_component
name_partsi
name_typespartlabel_paramsr   r   at_startr   r   r   r   rJ   rJ   rK   get_model_id_components   s   
 
 
$$


,
 *  z Metadata.get_model_id_componentsr
   rG   Optional[dict]rH   c              
     sF  d urd@ fdd}d@ fdd}|dd |d	d	 |d
d
 |dd |dd |dd |dd |dd |dd |dd |dd |dd |dd |d	d |d
d |dd |dd |dd |dd |dd |dd  |dd! |dd" |dd# |dd |d	d$ |dd% d&v sd'v sd(v rg } d& d' d(d }|d urt|tr|| n
t|tr||  jd u rg  _|D ]}	i }
t|	tr}|	d)s|	d*s|	d+r;|	|
d< d,|	v r:t	d-|	}|r:|
d.}t||\}}}}}}|d ur%t||
d< |d ur1t||
d< |d ur:||
d
< nVt|	|\}}}}}}|d urSt||
d< |d ur_t||
d< |d urh||
d
< |d ur||d ur|d/| d0| |
d< nt|	tr|	}
ntd1t|	 d2  j|
 qd3v sd4v sd5v rg } d3 d4 d5d }|d urt|tr|| nt|tr||  jd u rg  _|D ]}i }t|tru|d6r3||d< d,|v r2t	d-|}|r2|
d.}t||\}}}}}}|d urt||d< |d ur)t||d< |d ur2||d
< nVt||\}}}}}}|d urKt||d< |d urWt||d< |d ur`||d
< |d urt|d urtd/| d0| |d< nt|tr~|}ntd7t| d2  j| q|d8d8 |d9d9 |d:d: |d;d; |d;d< |d=d= |d=d> |d ur3| d?}|d ur3|d0d.kr3|}	t|	|\}}}}}} jd u r|d urt| _ jd u r|d urt| _ jd u r	|d ur	| _ jd u r|d ur| _ jd u r%|d ur%| _ jd u r3|d ur3| _|d ur|j}	t|	|\}}}}}} jd u rX|d urXt| _ jd u ri|d urit| _ jd u rw|d urw| _ jd u r|d ur| _ jd u r|d ur| _ jd u r|d ur| _ S )ANmetadata_keystrmodel_card_keyc                   s6   |v rt  | d d u rt | | d S d S d S r   )getattrsetattrr0   )r   r   r
   rG   rJ   rK   use_model_card_metadata5  s   zBMetadata.apply_metadata_heuristic.<locals>.use_model_card_metadatac                   sj    |d }|d u rd S t | d }|d u rg }t|tr#|| n
t|tr-|| t | | d S r   )r0   r   ra   r   r\   listextendr   )r   r   
tags_valuecurrent_valuer   rJ   rK   use_array_model_card_metadata9  s   


zHMetadata.apply_metadata_heuristic.<locals>.use_array_model_card_metadatar   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r(   model_authormodel_versionmodel_organizationmodel_descriptionmodel_finetunemodel_basenamemodel_size_label	model_url	model_doi
model_uuidmodel_repo_urlmodel_creator
model_type
base_modelr!   base_model_sourceshttp://https://ssh://zhuggingface.coz&https?://huggingface.co/([^/]+/[^/]+)$r   zhttps://huggingface.co/r}   zbase model entry 'z' not in a known formatr$   datasetdataset_sources)r   r   r   zdataset entry 'r   r   r    r"   pipeline_tagr#   language_name_or_path)r   r   r   r   )r0   ra   r   r\   r   r   r!   
startswithrp   rq   groupr   r   rz   rb   rc   rd   r$   countr   r   r   r   r   r   )r
   rG   rH   r'   r)   r   r   metadata_base_modelsbase_model_valuer{   r   rq   model_id_componentr   r   r   r   r   r   metadata_datasetsdataset_value
dataset_idr   dataset_id_componentdataset_name_componenthf_name_or_pathrJ   r   rK   r.   -  s0  































 



























z!Metadata.apply_metadata_heuristicgguf_writergguf.GGUFWriterc                 C  sL  | j d usJ || j  | jd ur|| j | jd ur#|| j | jd ur.|| j | jd ur9|	| j | j
d urD|| j
 | jd urO|| j | jd urZ|| j | jd ure|| j | jd urt| jtrz|d| j n|| j | jd ur|| j | jd ur|| j | jd ur|| j | jd ur|| j | jd ur| | j | j!d ur|"| j! | j#d ur|$| j# | j%d ur|&| j% | j'd ur|(| j' | j)d ur|*| j) | j+d ur||,t-| j+ t.| j+D ]z\}}d|v r|/||d  d|v r|0||d  d|v r,|1||d  d|v r9|2||d  d|v rF|3||d  d|v rS|4||d  d|v r`|5||d  d	|v rm|6||d	  d
|v rz|7||d
  q| j8d ur
|9t-| j8 t.| j8D ]z\}}d|v r|:||d  d|v r|;||d  d|v r|<||d  d|v r|=||d  d|v r|>||d  d|v r|?||d  d|v r|@||d  d	|v r|A||d	  d
|v r|B||d
  q| jCd ur|D| jC | jEd ur$|F| jE d S d S )N,r   r   r   r   r   r   r   r   r   )Gr   add_namer   
add_authorr   add_versionr   add_organizationr   add_finetuner   add_basenamer   add_descriptionr   add_quantized_byr   add_size_labelr   ra   r   add_licenser]   r   add_license_namer    add_license_linkr   add_urlr   add_doir   add_uuidr   add_repo_urlr   add_source_urlr   add_source_doir   add_source_uuidr   add_source_repo_urlr!   add_base_model_countr[   r   add_base_model_nameadd_base_model_authoradd_base_model_versionadd_base_model_organizationadd_base_model_descriptionadd_base_model_urladd_base_model_doiadd_base_model_uuidadd_base_model_repo_urlr$   add_dataset_countadd_dataset_nameadd_dataset_authoradd_dataset_versionadd_dataset_organizationadd_dataset_descriptionadd_dataset_urladd_dataset_doiadd_dataset_uuidadd_dataset_repo_urlr"   add_tagsr#   add_languages)selfr   keybase_model_entrydataset_entryrJ   rJ   rK   set_gguf_meta_model  s   




































zMetadata.set_gguf_meta_model)NNNr   )
r%   r&   r'   r&   r(   r   r)   r*   r+   r   r   )r%   r&   r+   rM   )r'   r&   r+   rM   )Nr   )r{   r   r)   r*   r+   r|   )r
   r   rG   r   rH   r   r'   r&   r)   r*   r+   r   )r   r   )%__name__
__module____qualname__r   __annotations__r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r    r!   r"   r#   r$   staticmethodrL   r/   r,   r-   rz   r   r.   r  rJ   rJ   rJ   rK   r      sR   
 ;+
| rr   )
__future__r   rp   rT   r_   loggingpathlibr   typingr   r   r   dataclassesr   	constantsr	   gguf	getLoggerrc   r   rJ   rJ   rJ   rK   <module>   s    
