o
    Gi^                     @   s   d dl mZmZ ddlmZmZmZmZmZ ddl	m
Z
 er$ddlmZ e r1d dlZd dlmZ e r:d dlmZ eeZG d	d
 d
e
ZdS )    )TYPE_CHECKINGAny   )get_module_from_nameis_accelerate_availableis_nvidia_modelopt_availableis_torch_availablelogging   )DiffusersQuantizer)
ModelMixinN)set_module_tensor_to_devicec                
       s  e Zd ZdZdZdZdgZ fddZdd Zd	d
ddde	de
e	ef fddZd	d
ddde	ddfddZde
e	ee	B f de
e	ee	B f fddZd,ddZd-d.dd Zd	d
dee	 fd!d"Zg fd	d
d#ee	 fd$d%Zd&d' Zed(d) Zed*d+ Z  ZS )/NVIDIAModelOptQuantizerz8
    Diffusers Quantizer for Nvidia-Model Optimizer
    TFnvidia_modeloptc                    s   t  j|fi | d S N)super__init__)selfquantization_configkwargs	__class__ d/home/ubuntu/.local/lib/python3.10/site-packages/diffusers/quantizers/modelopt/modelopt_quantizer.pyr   %   s   z NVIDIAModelOptQuantizer.__init__c                 O   sb   t  stdd| _|dd }t|tr-d| v s!d| v r/| jr(tdd| _d S d S d S )NzkLoading an nvidia-modelopt quantized model requires nvidia-modelopt library (`pip install nvidia-modelopt`)F
device_mapcpudiskzYou are attempting to perform cpu/disk offload with a pre-quantized modelopt model This is not supported yet. Please remove the CPU or disk device from the `device_map` argument.T)	r   ImportErroroffloadget
isinstancedictvaluespre_quantized
ValueError)r   argsr   r   r   r   r   validate_environment(   s   

z,NVIDIAModelOptQuantizer.validate_environmentmodelr   param_valueztorch.Tensor
param_name
state_dictc           	      K   s<   ddl m} t||\}}| jrdS ||rd|v rdS dS )Nr   )is_quantizedTweightF)!modelopt.torch.quantization.utilsr+   r   r#   )	r   r'   r(   r)   r*   r   r+   moduletensor_namer   r   r   check_if_quantized_param;   s   	z0NVIDIAModelOptQuantizer.check_if_quantized_paramtarget_deviceztorch.devicec                 O   s   ddl m  m} |dtj}t||\}	}
| jr)tj|j	|d|	j
|
< dS t||||| ||	| jjd | jj ||	 d|	j_dS )zh
        Create the quantized parameter by calling .calibrate() after setting it to the module.
        r   Ndtype)device	algorithmF)modelopt.torch.quantizationtorchquantizationr   float32r   r#   nn	Parameterto_parametersr   	calibrater   modelopt_configforward_loopcompressr,   requires_grad)r   r'   r(   r)   r1   r%   r   mtqr2   r.   r/   r   r   r   create_quantized_paramM   s   
z.NVIDIAModelOptQuantizer.create_quantized_param
max_memoryreturnc                 C   s   dd |  D }|S )Nc                 S   s   i | ]	\}}||d  qS )g?r   ).0keyvalr   r   r   
<dictcomp>i   s    z=NVIDIAModelOptQuantizer.adjust_max_memory.<locals>.<dictcomp>)items)r   rD   r   r   r   adjust_max_memoryh   s   z)NVIDIAModelOptQuantizer.adjust_max_memorytarget_dtypetorch.dtypec                 C   s   | j jdkr	tj}|S )NFP8)r   
quant_typer6   float8_e4m3fn)r   rL   r   r   r   adjust_target_dtypel   s   z+NVIDIAModelOptQuantizer.adjust_target_dtypeNtorch_dtypec                 C   s   |d u rt d tj}|S )NzVYou did not specify `torch_dtype` in `from_pretrained`. Setting it to `torch.float32`.)loggerinfor6   r8   )r   rR   r   r   r   update_torch_dtypeq   s   
z*NVIDIAModelOptQuantizer.update_torch_dtypec                 C   sj   t jt jt jt jt jt jf}g }| D ]\}}t||r2|j	ddD ]\}}|
| d|  q#q|S )z
        Get parameter names for all convolutional layers in a HuggingFace ModelMixin. Includes Conv1d/2d/3d and
        ConvTranspose1d/2d/3d.
        F)recurse.)r9   Conv1dConv2dConv3dConvTranspose1dConvTranspose2dConvTranspose3dnamed_modulesr    named_parametersappend)r   r'   
conv_typesconv_param_namesnamer.   r)   _r   r   r   get_conv_param_namesw   s   	
z,NVIDIAModelOptQuantizer.get_conv_param_nameskeep_in_fp32_modulesc                 K   s   dd l m  m} | jrd S | jj}|d u rg }t|tr |g}|| | jj	r1|| 
| |D ]}ddi| jjd d| d < q3|| j_|j|d| jjfgd | j|j_d S )Nr   enableF	quant_cfg*quantize)mode)modelopt.torch.optr6   optr#   r   modules_to_not_convertr    strextenddisable_conv_quantizationre   r>   
apply_modeconfig)r   r'   r   rf   r   mtorn   r.   r   r   r   $_process_model_before_weight_loading   s    

z<NVIDIAModelOptQuantizer._process_model_before_weight_loadingc                 K   sJ   ddl m} | jr|S | D ]\}}t||jr"||ur"|| q|S )Nr   )ModeloptStateManager)rl   rv   r#   r^   hasattr
_state_keyremove_state)r   r'   r   rv   rd   mr   r   r   #_process_model_after_weight_loading   s   
z;NVIDIAModelOptQuantizer._process_model_after_weight_loadingc                 C   s   dS )NTr   r   r   r   r   is_trainable   s   z$NVIDIAModelOptQuantizer.is_trainablec                 C   s   | j jdd dS )Nsaving)	operationT)r   check_model_patchingr|   r   r   r   is_serializable   s   z'NVIDIAModelOptQuantizer.is_serializable)rL   rM   rE   rM   r   )rR   rM   rE   rM   )__name__
__module____qualname____doc__use_keep_in_fp32_modulesrequires_calibrationrequired_packagesr   r&   ro   r!   r   r0   rC   intrK   rQ   rU   listre   ru   r{   propertyr}   r   __classcell__r   r   r   r   r      sP    


*


r   )typingr   r   utilsr   r   r   r   r	   baser   models.modeling_utilsr   r6   torch.nnr9   accelerate.utilsr   
get_loggerr   rS   r   r   r   r   r   <module>   s    
