o
    ۷i                     @   s   d dl mZmZ d dlmZ ddlmZmZmZm	Z	m
Z
mZ ddlmZ er,ddlmZ e
 r3d dlZe r>d d	lmZmZ e	 rGd
dlmZ eeZG dd deZdS )    )TYPE_CHECKINGAny)is_optimum_quanto_version   )get_module_from_nameis_accelerate_availableis_accelerate_versionis_optimum_quanto_availableis_torch_availablelogging   )DiffusersQuantizer)
ModelMixinN)CustomDtypeset_module_tensor_to_device   )_replace_with_quanto_layersc                
       s  e Zd ZdZdZdZddgZ fddZdd	 Zd
dddde	de
e	ef fddZd
dddde	ddfddZde
e	ee	B f de
e	ee	B f fddZd1ddZd2d3d d!Zd"ee	 d#e	dee	 fd$d%Zg fd
dd&ee	 fd'd(Zd)d* Zed+d, Zed-d. Zedefd/d0Z  ZS )4QuantoQuantizerz0
    Diffusers Quantizer for Optimum Quanto
    TFquanto
acceleratec                    s   t  j|fi | d S N)super__init__)selfquantization_configkwargs	__class__ b/home/ubuntu/vllm_env/lib/python3.10/site-packages/diffusers/quantizers/quanto/quanto_quantizer.pyr   )   s   zQuantoQuantizer.__init__c                 O   sd   t  stdtddstdt std|dd }t|tr.t| dkr0t	dd S d S )	NzhLoading an optimum-quanto quantized model requires optimum-quanto library (`pip install optimum-quanto`)>=z0.2.6zLoading an optimum-quanto quantized model requires `optimum-quanto>=0.2.6`. Please upgrade your installation with `pip install --upgrade optimum-quantoz`Loading an optimum-quanto quantized model requires accelerate library (`pip install accelerate`)
device_mapr   zy`device_map` for multi-GPU inference or CPU/disk offload is currently not supported with Diffusers and the Quanto backend)
r	   ImportErrorr   r   get
isinstancedictlenkeys
ValueError)r   argsr   r!   r   r   r   validate_environment,   s$   
z$QuantoQuantizer.validate_environmentmodelr   param_valueztorch.Tensor
param_name
state_dictc           
         sl   ddl m}m} ddlm} t||\ }	| jr't fdd||fD r'dS t |r4d|	v r4 j	 S dS )	Nr   )QModuleMixinQTensor)PackedTensorc                 3   s    | ]}t  |V  qd S r   )r$   ).0tmoduler   r   	<genexpr>O   s    z;QuantoQuantizer.check_if_quantized_param.<locals>.<genexpr>TweightF)
optimum.quantor/   r0   optimum.quanto.tensor.packedr1   r   pre_quantizedanyr$   frozen)
r   r+   r,   r-   r.   r   r/   r0   r1   tensor_namer   r4   r   check_if_quantized_paramB   s   	 z(QuantoQuantizer.check_if_quantized_paramtarget_deviceztorch.devicec           
      O   sV   | dtj}t||\}}	| jrt||	| dS t||||| |  d|j_	dS )ze
        Create the quantized parameter by calling .freeze() after setting it to the module.
        dtypeFN)
r#   torchfloat32r   r:   setattrr   freezer7   requires_grad)
r   r+   r,   r-   r?   r)   r   r@   r5   r=   r   r   r   create_quantized_paramV   s   z&QuantoQuantizer.create_quantized_param
max_memoryreturnc                 C   s   dd |  D }|S )Nc                 S   s   i | ]	\}}||d  qS )g?r   )r2   keyvalr   r   r   
<dictcomp>m   s    z5QuantoQuantizer.adjust_max_memory.<locals>.<dictcomp>)items)r   rG   r   r   r   adjust_max_memoryl   s   z!QuantoQuantizer.adjust_max_memorytarget_dtypetorch.dtypec                 C   s0   t ddrtjtjtjtjd}|| jj }|S )Nr    z0.27.0)int8float8int4int2)	r   rA   rP   r   FP8INT4INT2r   weights_dtype)r   rN   mappingr   r   r   adjust_target_dtypep   s   
z#QuantoQuantizer.adjust_target_dtypeNtorch_dtypec                 C   s   |d u rt d tj}|S )NzVYou did not specify `torch_dtype` in `from_pretrained`. Setting it to `torch.float32`.)loggerinforA   rB   )r   rZ   r   r   r   update_torch_dtype|   s   
z"QuantoQuantizer.update_torch_dtypemissing_keysprefixc                    s   ddl m} g  | D ]*\}}t||r6|D ]}||v s&|| d| v r5|ds5|ds5 | qq fdd|D S )Nr   )r/   .z.weightz.biasc                    s   g | ]}| vr|qS r   r   )r2   knot_missing_keysr   r   
<listcomp>   s    z7QuantoQuantizer.update_missing_keys.<locals>.<listcomp>)r8   r/   named_modulesr$   endswithappend)r   r+   r^   r_   r/   namer5   missingr   rb   r   update_missing_keys   s   

z#QuantoQuantizer.update_missing_keyskeep_in_fp32_modulesc                 K   sP   | j j| _t| jts| jg| _| j| t|| j| j | jd}| j |j_ d S )N)modules_to_not_convertr   r:   )r   rl   r$   listextendr   r:   config)r   r+   r!   rk   r   r   r   r   $_process_model_before_weight_loading   s   

z4QuantoQuantizer._process_model_before_weight_loadingc                 K   s   |S r   r   )r   r+   r   r   r   r   #_process_model_after_weight_loading   s   z3QuantoQuantizer._process_model_after_weight_loadingc                 C      dS NTr   r   r   r   r   is_trainable      zQuantoQuantizer.is_trainablec                 C   rr   rs   r   rt   r   r   r   is_serializable   rv   zQuantoQuantizer.is_serializablec                 C   rr   rs   r   rt   r   r   r   is_compileable   rv   zQuantoQuantizer.is_compileable)rN   rO   rH   rO   r   )rZ   rO   rH   rO   )__name__
__module____qualname____doc__use_keep_in_fp32_modulesrequires_calibrationrequired_packagesr   r*   strr%   r   r>   rF   intrM   rY   r]   rm   rj   rp   rq   propertyru   rw   boolrx   __classcell__r   r   r   r   r       sT    


*



r   )typingr   r   diffusers.utils.import_utilsr   utilsr   r   r   r	   r
   r   baser   models.modeling_utilsr   rA   accelerate.utilsr   r   r   
get_loggerry   r[   r   r   r   r   r   <module>   s     
