o
    ٷi                     @   s   U d Z ddlmZ ddlmZ ddlmZ ddlmZ ddl	m
Z
 er)ddlmZ eeZee
d	Zeeee f ed
< ee ZdedB dedB fddZdedB ddfddZg dZdS )a  Quantization support for diffusion models.

This module provides a unified interface for quantizing diffusion transformers
using various methods (FP8, etc.). It wraps vLLM's quantization infrastructure
while allowing diffusion-model-specific defaults and optimizations.

Example usage:
    from vllm_omni.diffusion.quantization import (
        get_diffusion_quant_config,
        get_vllm_quant_config_for_layers,
    )

    # Create FP8 config for diffusion model
    diff_config = get_diffusion_quant_config("fp8")

    # Get vLLM config to pass to linear layers
    vllm_config = get_vllm_quant_config_for_layers(diff_config)

    # Use in model initialization
    linear_layer = QKVParallelLinear(..., quant_config=vllm_config)
    )TYPE_CHECKING)init_logger   )DiffusionQuantizationConfig)DiffusionFp8Config)DiffusionGgufConfig)QuantizationConfig)fp8gguf_QUANT_CONFIG_REGISTRYquantizationNreturnc                 K   s^   | du s
|   dkrdS |   } | tvrtd| dt t|  }td|  |di |S )a  Factory function to create quantization config for diffusion models.

    Args:
        quantization: Quantization method name ("fp8", etc.) or None to disable
        **kwargs: Method-specific parameters passed to the config constructor

    Returns:
        DiffusionQuantizationConfig instance or None if quantization is disabled

    Raises:
        ValueError: If the quantization method is not supported

    Example:
        # Default FP8 with dynamic activation scaling
        config = get_diffusion_quant_config("fp8")

        # FP8 with custom parameters
        config = get_diffusion_quant_config(
            "fp8",
            activation_scheme="static",
            ignored_layers=["proj_out"],
        )
    NnonezUnknown quantization method: z. Supported methods: z*Creating diffusion quantization config: %s )lowerr   
ValueErrorSUPPORTED_QUANTIZATION_METHODSloggerinfo)r   kwargs
config_clsr   r   ]/home/ubuntu/.local/lib/python3.10/site-packages/vllm_omni/diffusion/quantization/__init__.pyget_diffusion_quant_config2   s   r   diffusion_quant_configzQuantizationConfig | Nonec                 C   s   | du rdS |   S )a  Get the vLLM QuantizationConfig to pass to linear layers.

    This extracts the underlying vLLM config from a DiffusionQuantizationConfig,
    which can then be passed to vLLM linear layers (QKVParallelLinear, etc.).

    Args:
        diffusion_quant_config: The diffusion quantization config, or None

    Returns:
        vLLM QuantizationConfig instance, or None if input is None
    N)get_vllm_quant_config)r   r   r   r    get_vllm_quant_config_for_layers[   s   r   )r   r   r   r   r   r   )__doc__typingr   vllm.loggerr   baser   r	   r   r
   r   3vllm.model_executor.layers.quantization.base_configr   __name__r   r   dictstrtype__annotations__listkeysr   r   r   __all__r   r   r   r   <module>   s0   
)
