o
    ٷis                     @   s   d Z ddlZddlZddlmZ ddlmZmZm	Z	m
Z
mZmZmZ ddlmZ dejdejd	ed
ejfddZG dd de	ZG dd deZG dd deZdS )z4GGUF quantization config for diffusion transformers.    N)_custom_ops)UNQUANTIZED_TYPES
GGUFConfigGGUFLinearMethod
LinearBaseQuantizeMethodBaseUnquantizedLinearMethodis_layer_skipped_gguf   )DiffusionQuantizationConfigxqweightqweight_typereturnc                 C   s`   |t v r	| |j S tj| \}}|jd |jd | | f}tj||g|| jR  }| |j S )Nr   r
   )r   TggufGGML_QUANT_SIZESshapeopsggml_dequantizedtype)r   r   r   
block_size	type_sizer   weight r   Y/home/ubuntu/.local/lib/python3.10/site-packages/vllm_omni/diffusion/quantization/gguf.pydequant_gemm_gguf   s   

r   c                	   @   s8   e Zd Z	ddejjdejdejdB dejfddZdS )	DiffusionGGUFLinearMethodNlayerr   biasr   c              	   C   s   t |jdd }|rGd|v rg dn|}|j}g }|D ]$}|jj| \}}	}
|jj| }|t||||	d |
f  | qtj	|dd}n|j}|jj
}t|||}|d ur]|| |S )Nshard_idq)r!   kv)axis)getattrr   shard_offset_mapr   shard_weight_typeappendr   
contiguoustorchcatweight_typeadd_)selfr   r   r   r    r   resultidxstartendoffsetr   outr   r   r   apply   s    	(
zDiffusionGGUFLinearMethod.applyN)__name__
__module____qualname__r+   nnModuleTensorr6   r   r   r   r   r      s    r   c                   @   s&   e Zd ZdejjdeddfddZdS )_GGUFConfigr   prefixr   r   c                 C   s,   t |trt|| j| jrt S t| S d S r7   )
isinstancer   r	   unquantized_modulespacked_modules_mappingr   r   )r/   r   r?   r   r   r   get_quant_method>   s
   
z_GGUFConfig.get_quant_methodN)r8   r9   r:   r+   r;   r<   strrC   r   r   r   r   r>   =   s    r>   c                   @   s<   e Zd ZdZeZ		ddedB dee dB ddfddZdS )	DiffusionGgufConfiga  GGUF quantization config for diffusion transformers.

    This is a thin wrapper around vLLM's GGUFConfig and also carries
    the GGUF model reference for loader use.

    Args:
        gguf_model: GGUF model path or HF reference (repo/file or repo:quant_type)
        unquantized_modules: Optional list of module name patterns to skip GGUF
            quantization. Note: diffusion linear layers often use short prefixes
            (e.g., "to_qkv"), so these patterns are matched as substrings.
    N
gguf_modelrA   r   c                 C   s"   || _ |pg | _t| jd| _d S )N)rA   )rF   rA   r>   _vllm_config)r/   rF   rA   r   r   r   __init__U   s   
zDiffusionGgufConfig.__init__)NN)	r8   r9   r:   __doc__r   quant_config_clsrD   listrH   r   r   r   r   rE   F   s    
rE   )rI   r   r+   vllmr   r   ,vllm.model_executor.layers.quantization.ggufr   r   r   r   r   r   r	   baser   r=   intr   r   r>   rE   r   r   r   r   <module>   s   $
 		