o
    i.W                     @   s  d dl Z d dlmZmZ d dlZd dlmZ ddlm	Z	m
Z
mZ ddlmZ ddlmZ ddlmZ e r9d dlZeeZd	d
dddddde	de
d ide
d idZeed  ZG dd deZG dd dZG dd deZG dd deZG dd deZG dd  d eZG d!d" d"eZG d#d$ d$eZ G d%d& d&eZ!G d'd( d(eZ"G d)d* d*eZ#eeeeeeee e!e"e"e#d+Z$d,d- Z%			.d7d/ee& d0ee' d1e&fd2d3Z(d8d5d6Z)dS )9    N)
NamedTupleOptional)tqdm   )GGUF_CONFIG_MAPPINGGGUF_TOKENIZER_MAPPING_gguf_parse_value)is_torch_available)is_gguf_available)
get_loggerversiontensor_countkv_count)r   r   r   	file_typequantization_version)r   r   )GGUFgeneral	tokenizertokenizer_config)ignoreconfigr   r   r   c                   @   s(   e Zd ZU ejed< eed< eed< dS )
GGUFTensorweightsnamemetadataN)__name__
__module____qualname__npndarray__annotations__strdict r#   r#   \/home/ubuntu/.local/lib/python3.10/site-packages/transformers/modeling_gguf_pytorch_utils.pyr   7   s   
 
r   c                   @   s   e Zd ZdddZdd ZdS )TensorProcessorNc                 C   s   |pi | _ d S Nr   selfr   r#   r#   r$   __init__>   s   zTensorProcessor.__init__c                 K   s   t ||i S r&   r   r)   r   r   kwargsr#   r#   r$   processA   s   zTensorProcessor.processr&   )r   r   r   r*   r.   r#   r#   r#   r$   r%   =   s    
r%   c                	       sL   e Zd Zd fdd	Zdd Z	ddejdedee d	ejfd
dZ	  Z
S )LlamaTensorProcessorNc                       t  j|d d S Nr'   superr*   r(   	__class__r#   r$   r*   F      zLlamaTensorProcessor.__init__c                 K   sz   d|v sd|v r7| j d}| j d}d ||fv r t||i S d|v r,| |||}nd|v r7| |||}t||i S )Nz.attn_k.z.attn_q.num_attention_headsnum_key_value_heads)r   getr   _reverse_permute_weights)r)   r   r   r-   	num_headsnum_kv_headsr#   r#   r$   r.   I   s   zLlamaTensorProcessor.processr   n_headr<   returnc                 C   sZ   |d ur
||kr
|}|j d | d }|j||dg|j dd  R  }|dd|j S )Nr      r   )shapereshapeswapaxes)r)   r   r=   r<   dimwr#   r#   r$   r:   V   s
    z-LlamaTensorProcessor._reverse_permute_weightsr&   )r   r   r   r*   r.   r   r   intr   r:   __classcell__r#   r#   r4   r$   r/   E   s    r/   c                       sJ   e Zd Zd fdd	Zdd Zdejdeeef ded	efd
dZ	  Z
S )Qwen2MoeTensorProcessorNc                    r0   r1   r2   r(   r4   r#   r$   r*   d   r6   z Qwen2MoeTensorProcessor.__init__c                 K   s^   d|v r| d}| d}|r| |||| t|d i S d|v r)tj|dd}t||i S )N_exptensor_key_mappingparsed_parametersffn_gate_inp_shexpr   axis)r9   _split_moe_expert_tensorr   r   expand_dims)r)   r   r   r-   rI   rJ   r#   r#   r$   r.   g   s   

zQwen2MoeTensorProcessor.processr   rJ   r   rI   c           	      C   s^   || }| j dd}td|D ]}|dd| d}|| }tt||d |< qd S )Nnum_experts<   r   mlp.experts..tensors)r   r9   rangereplacetorch
from_numpyr   copy)	r)   r   rJ   r   rI   	w_counteri	temp_name
exp_weightr#   r#   r$   rN   t   s   z0Qwen2MoeTensorProcessor._split_moe_expert_tensorr&   )r   r   r   r*   r.   r   r   r"   r!   rN   rF   r#   r#   r4   r$   rG   c   s    
rG   c                       sV   e Zd Zd fdd	Zdd Zdejdedefd	d
ZdejdedefddZ	  Z
S )BloomTensorProcessorNc                    r0   r1   r2   r(   r4   r#   r$   r*      r6   zBloomTensorProcessor.__init__c                 K   sN   d|v r!| j d }| j d }d|v r| |||}n| |||}t||i S )Nattn_qkvr=   hidden_sizeweight)r   _reverse_reshape_weights_reverse_reshape_biasr   )r)   r   r   r-   r;   n_embedr#   r#   r$   r.      s   

zBloomTensorProcessor.processr   r=   rd   c                 C   sx   t j|ddd\}}}|||| |}|||| |}|||| |}t j|||gdd}||d ||  |S )N   r   rL   r   )r   array_splitrA   stack)r)   r   r=   rd   qkvqkv_weightsr#   r#   r$   rb      s   z-BloomTensorProcessor._reverse_reshape_weightsc                 C   s^   t |d\}}}|||| }|||| }|||| }t j|||gdd }|S )Nre   r   rL   )r   rf   rA   rg   flatten)r)   r   r=   rd   q_biask_biasv_biasqkv_biasr#   r#   r$   rc      s   z*BloomTensorProcessor._reverse_reshape_biasr&   )r   r   r   r*   r.   r   r   rE   rb   rc   rF   r#   r#   r4   r$   r^      s
    
 r^   c                       &   e Zd Zd fdd	Zdd Z  ZS )T5TensorProcessorNc                    r0   r1   r2   r(   r4   r#   r$   r*      r6   zT5TensorProcessor.__init__c                 K   s8   d }| dD ]}| rt|} nqt||d|iS )NrS   bid)splitisdigitrE   r   )r)   r   r   r-   rs   chunkr#   r#   r$   r.      s   zT5TensorProcessor.processr&   r   r   r   r*   r.   rF   r#   r#   r4   r$   rr          rr   c                       rq   )GPT2TensorProcessorNc                    r0   r1   r2   r(   r4   r#   r$   r*      r6   zGPT2TensorProcessor.__init__c                 K   sf   d|v sd|v sd|v sd|v r|j }|dkr-d}|di }tt||d |< d }t||i S )	Nzattn_qkv.weightzffn_down.weightzffn_up.weightzattn_output.weightoutput.weightzlm_head.weightrJ   rT   )Tr9   rW   rX   r   rY   r   )r)   r   r   r-   rJ   r#   r#   r$   r.      s   zGPT2TensorProcessor.processr&   rw   r#   r#   r4   r$   ry      rx   ry   c                       rq   )MambaTensorProcessorNc                    r0   r1   r2   r(   r4   r#   r$   r*      r6   zMambaTensorProcessor.__init__c                 K   s6   d|v rt j|dd}d|v rt | }t||i S )Nzssm_conv1d.weightr   rL   ssm_a)r   rO   logr   r,   r#   r#   r$   r.      s
   zMambaTensorProcessor.processr&   rw   r#   r#   r4   r$   r|      rx   r|   c                       rq   )NemotronTensorProcessorNc                    r0   r1   r2   r(   r4   r#   r$   r*      r6   z NemotronTensorProcessor.__init__c                 K      d|v r|d }t ||i S Nznorm.weightr   r+   r,   r#   r#   r$   r.         zNemotronTensorProcessor.processr&   rw   r#   r#   r4   r$   r      s    r   c                       rq   )Gemma2TensorProcessorNc                    r0   r1   r2   r(   r4   r#   r$   r*      r6   zGemma2TensorProcessor.__init__c                 K   r   r   r+   r,   r#   r#   r$   r.      r   zGemma2TensorProcessor.processr&   rw   r#   r#   r4   r$   r      s    r   c                       rq   )Lfm2TensorProcessorNc                    r0   r1   r2   r(   r4   r#   r$   r*      r6   zLfm2TensorProcessor.__init__c                 K   s"   d|v rt j|dd}t||i S )Nzshortconv.conv.weightr   rL   )r   rO   r   r,   r#   r#   r$   r.      s   zLfm2TensorProcessor.processr&   rw   r#   r#   r4   r$   r      rx   r   )llamaqwen2moeqwen3moebloomt5	t5encodergpt2mambanemotrongemma2gemma3lfm2c                    s,   || j vrg S | j |   fdd jD S )Nc                       g | ]}t  j|  jqS r#   r   partstypes.0_data_indexvaluer#   r$   
<listcomp>      zread_field.<locals>.<listcomp>)fieldsdata)readerfieldr#   r   r$   
read_field  s   

r    
model_type
num_layers	qual_namec                    s  t  rt rddlm}m} n	td td|du r | jj	n|}|du r*| jj
n|}|dkr3d}n|dkr:d	}n|d
krAd}n|dkrHd}n|dkrNd}d}| D ]\}}||kr`|} nqT|du rmtd| d|||}	i  |  }
|
D ]>}|dv rd|v rtdd|}|d}}|ds|dr|dd\}}d| }|	|}|du rqz||  || < qz|   }r|D ]!\}}t|||| | dd} fdd| D } | q S )aY  
    GGUF uses this naming convention for their tensors from HF checkpoint:
    `blk.N.BB.weight` and `blk.N.BB.bias`
    where N signifies the block number of a layer, and BB signifies the
    attention/mlp layer components.
    See "Standardized tensor names" in
    https://github.com/ggerganov/ggml/blob/master/docs/gguf.md for details.
    r   )MODEL_ARCH_NAMESget_tensor_name_mapLoading a GGUF checkpoint in PyTorch, requires both PyTorch and GGUF>=0.10.0 to be installed. Please see https://pytorch.org/ and https://github.com/ggerganov/llama.cpp/tree/master/gguf-py for installation instructions.KPlease install torch and gguf>=0.10.0 to load a GGUF checkpoint in PyTorch.Ncoherez	command-r	qwen2_moer   	qwen3_moer   gemma3_textr   umt5r   zUnknown gguf model_type: z in gguf-py. This might because you're using an outdated version of gguf-py package, you can install `gguf` package from source refer to https://github.com/ggerganov/llama.cpp/tree/master/gguf-py#development)r   r   rR   zmlp.experts.\d+.r   z.weightz.biasrS   r   )r   c                    s   i | ]\}}| vr||qS r#   r#   )r   ri   rj   gguf_to_hf_name_mapr#   r$   
<dictcomp>e  r   z+get_gguf_hf_weights_map.<locals>.<dictcomp>)r
   r	   ggufr   r   loggererrorImportErrorr   r   num_hidden_layersitemsNotImplementedError
state_dictresubendswithrsplitget_namenamed_childrenget_gguf_hf_weights_mapupdate)hf_modelr   r   r   r   r   archkeyr   name_mapr   hf_namer   suffix	gguf_namer   childsub_mapr#   r   r$   r     s`   



r   Fc           !         s\  t  rt rddlm}m} n	td td|| }|j}t	|
 }dd tD }t|dd }	t|d}
d	}d
|	v rEd|
v rEd}n6d|	v sMd|	v ryd|d d< |
rkd|
d  v rkd}d|	v rjdg|d d< nd|	v rvdg|d d< d}n|	}d|	v rd}nd|	v rd}d|	v rh d dt fdd|jD }tfdd|jD }||d d< | |d d < |	tvr|tvrtd!|	 d"d#d$g}td%d |jD p|	|v |d d&< |j D ]\}||	|}|d'}|d }d'|d(d	 }fd)d*jD }t|d(kr|d }t|tr$|	|v r$||	|}t D ]3\}}||v rZ||| v rZ|| | }|d+krEq(|d	urP||| |< ||v rZ|| q(||v rltd,| d-|  q|d d. d/kr|d0|d d.< |d d. d1kr|d d2 }t||d d2< d3|d d4< d5d* t|D |d d6< d7|d vr|d8 }d9|v rt|d9 |d d7< ntd: |ri |d;< t |}|!di }t"!|	t#}||d<}t$|jd=d>D ]3}|j%}||j|j&}|j'||||d?} | j(}| j%}||vrq|| }t)*t+,||d; |< qt|dkr,td@|  |S )Aa  
    Load a GGUF file and return a dictionary of parsed parameters containing tensors, the parsed
    tokenizer and config attributes.

    Args:
        gguf_checkpoint_path (`str`):
            The path the to GGUF file to load
        return_tensors (`bool`, defaults to `False`):
            Whether to read the tensors from the file and return them. Not doing so is faster
            and only loads the metadata in memory.
    r   )
GGUFReader
dequantizer   r   c                 S   s   i | ]}|i qS r#   r#   )r   ri   r#   r#   r$   r     s    z(load_gguf_checkpoint.<locals>.<dictcomp>zgeneral.architecturezgeneral.nameNr   mistralr   r   Tr   is_gated_actr   UMT5EncoderModelarchitecturesT5EncoderModelr   r   r   r   stablelm>   attn_k.biasattn_q.biasattn_v.biasffn_normc                 3   s$    | ]} D ]}||j v V  qqd S r&   r   )r   tensor	bias_name)attn_bias_namer#   r$   	<genexpr>  s   " z'load_gguf_checkpoint.<locals>.<genexpr>c                 3   s    | ]} |j v V  qd S r&   r   r   r   )ffn_norm_namer#   r$   r         use_qkv_biasuse_parallel_residualzGGUF model with architecture z is not supported yet.falconr   c                 s   s    | ]}d |j kV  qdS )rz   Nr   r   r#   r#   r$   r     r   tie_word_embeddingsrS   r   c                    r   r#   r   r   )r   r#   r$   r     r   z(load_gguf_checkpoint.<locals>.<listcomp>z1Some keys were not parsed and added into account z | r   r   r   r   r8   Fblock_auto_adjust_ff_dimc                 S   s   g | ]
\}}|d kr|qS )r   r#   )r   r[   r<   r#   r#   r$   r     s    full_attn_idxs
vocab_sizer   tokenszCan't find a way to retrieve missing config vocab_size from tokenizer parameters. This will use default value from model config class and cause unexpected behavior.rT   r'   z,Converting and de-quantizing GGUF tensors...)desc)r   r   rI   rJ   z0Some keys of the GGUF file were not considered: )-r
   r	   r   r   r   r   r   r   r   listkeysGGUF_TO_TRANSFORMERS_MAPPINGr   loweranyrT   GGUF_SUPPORTED_ARCHITECTURES
ValueErrorallr   rV   rt   joinr   len
isinstancer!   removeinfomax	enumeratewarningr   r9   TENSOR_PROCESSORSr%   r   r   tensor_typer.   r   rW   rX   r   rY   )!gguf_checkpoint_pathreturn_tensorsmodel_to_loadr   r   r   r   reader_keysrJ   architecture
model_nameupdated_architecturerp   r   
exceptionsgguf_keyrt   prefix
config_keyr   	parameterparameter_renamesrenamed_config_keygguf_num_key_value_headstokenizer_parametersrI   r   ProcessorClass	processorr   r   r   resultr#   )r   r   r   r$   load_gguf_checkpointk  s   










r  )NNr   )FN)*r   typingr   r   numpyr   	tqdm.autor   integrationsr   r   r   utilsr	   utils.import_utilsr
   utils.loggingr   rW   r   r   r   r   r   r   r   r%   r/   rG   r^   rr   ry   r|   r   r   r   r   r   r!   rE   r   r  r#   r#   r#   r$   <module>   st   

'

R