o
    Giv                     @  s   d dl mZ d dlmZmZ ddlmZ erddlmZ ddl	m
Z
mZmZmZmZmZmZ e rFe rFd dlZd	d
l	mZmZmZmZmZ eeZG dd deZdS )    )annotations)TYPE_CHECKINGAny   )DiffusersQuantizer   )
ModelMixin)get_module_from_nameis_accelerate_availableis_accelerate_versionis_gguf_availableis_gguf_versionis_torch_availableloggingN   )GGML_QUANT_SIZESGGUFParameter#_dequantize_gguf_and_restore_linear_quant_shape_from_byte_shape_replace_with_gguf_linearc                      s   e Zd ZdZ fddZdd Zd5d	d
Zd6ddZd7ddZdd Z	d8ddZ
		d9d:d%d&Zg fd;d)d*Zd<d+d,Zed-d. Zed=d/d0Zed=d1d2Zd3d4 Z  ZS )>GGUFQuantizerTc                   sN   t  j|fi | |j| _|j| _|jpg | _t| jts%| jg| _d S d S N)super__init__compute_dtypepre_quantizedmodules_to_not_convert
isinstancelist)selfquantization_configkwargs	__class__ \/home/ubuntu/.local/lib/python3.10/site-packages/diffusers/quantizers/gguf/gguf_quantizer.pyr   )   s   zGGUFQuantizer.__init__c                 O  s4   t  rtddrtdt rtddrtdd S )N<z0.26.0zoLoading GGUF Parameters requires `accelerate` installed in your environment: `pip install 'accelerate>=0.26.0'`z0.10.0zhTo load GGUF format files you must have `gguf` installed in your environment: `pip install gguf>=0.10.0`)r
   r   ImportErrorr   r   )r   argsr!   r$   r$   r%   validate_environment3   s   z"GGUFQuantizer.validate_environment
max_memorydict[str, int | str]returnc                 C  s   dd |  D }|S )Nc                 S  s   i | ]	\}}||d  qS )g?r$   ).0keyvalr$   r$   r%   
<dictcomp>@   s    z3GGUFQuantizer.adjust_max_memory.<locals>.<dictcomp>)items)r   r*   r$   r$   r%   adjust_max_memory>   s   zGGUFQuantizer.adjust_max_memorytarget_dtype'torch.dtype'c                 C  s"   |t jkrtd| d t jS )Nztarget_dtype z3 is replaced by `torch.uint8` for GGUF quantization)torchuint8loggerinfo)r   r3   r$   r$   r%   adjust_target_dtypeC   s   
z!GGUFQuantizer.adjust_target_dtypetorch_dtypec                 C  s   |d u r| j }|S r   )r   )r   r:   r$   r$   r%   update_torch_dtypeH   s   z GGUFQuantizer.update_torch_dtypec           
      C  sN   |j }|j }|j}t| \}}t|||}	|	|kr%t| d|	 d| dS )Nz% has an expected quantized shape of: z, but received shape: T)shape
quant_typer   r   
ValueError)
r   
param_namecurrent_paramloaded_paramloaded_param_shapecurrent_param_shaper=   
block_size	type_sizeinferred_shaper$   r$   r%   check_quantized_param_shapeM   s   z)GGUFQuantizer.check_quantized_param_shapemodel'ModelMixin'param_value 'GGUFParameter' | 'torch.Tensor'r?   str
state_dictdict[str, Any]boolc                 K  s   t |trdS dS )NTF)r   r   )r   rH   rJ   r?   rM   r!   r$   r$   r%   check_if_quantized_param\   s   
z&GGUFQuantizer.check_if_quantized_paramNtarget_device'torch.device'dict[str, Any] | Noneunexpected_keyslist[str] | Nonec           
      K  sr   t ||\}}	|	|jvr|	|jvrt| d|	 d|	|jv r(|||j|	< |	|jv r7|||j|	< d S d S )Nz- does not have a parameter or a buffer named .)r	   _parameters_buffersr>   to)
r   rH   rJ   r?   rQ   rM   rT   r!   moduletensor_namer$   r$   r%   create_quantized_parami   s   


z$GGUFQuantizer.create_quantized_paramkeep_in_fp32_modules	list[str]c                 K  sB   | dd }| j| dd | jD | _t|| j|| jd d S )NrM   c                 S  s   g | ]}|d ur|qS r   r$   )r-   rZ   r$   r$   r%   
<listcomp>   s    zFGGUFQuantizer._process_model_before_weight_loading.<locals>.<listcomp>)r   )getr   extendr   r   )r   rH   
device_mapr]   r!   rM   r$   r$   r%   $_process_model_before_weight_loading|   s   
z2GGUFQuantizer._process_model_before_weight_loadingc                 K  s   |S r   r$   )r   rH   r!   r$   r$   r%   #_process_model_after_weight_loading   s   z1GGUFQuantizer._process_model_after_weight_loadingc                 C     dS NFr$   r   r$   r$   r%   is_serializable      zGGUFQuantizer.is_serializablec                 C  re   rf   r$   rg   r$   r$   r%   is_trainable   ri   zGGUFQuantizer.is_trainablec                 C  re   )NTr$   rg   r$   r$   r%   is_compileable   ri   zGGUFQuantizer.is_compileablec                 C  s`   |j jdk}|r!td ttdrtj ntj	 }|
| t|| j}|r.|
d |S )NcpuzModel was found to be on CPU (could happen as a result of `enable_model_cpu_offload()`). So, moving it to accelerator. After dequantization, will move the model back to CPU again to preserve the previous device.accelerator)devicetyper7   r8   hasattrr5   rm   current_acceleratorcudacurrent_devicerY   r   r   )r   rH   is_model_on_cpurn   r$   r$   r%   _dequantize   s   

zGGUFQuantizer._dequantize)r*   r+   r,   r+   )r3   r4   r,   r4   )r:   r4   r,   r4   )
rH   rI   rJ   rK   r?   rL   rM   rN   r,   rO   )NN)rH   rI   rJ   rK   r?   rL   rQ   rR   rM   rS   rT   rU   )rH   rI   r]   r^   )rH   rI   )r,   rO   )__name__
__module____qualname__use_keep_in_fp32_modulesr   r)   r2   r9   r;   rG   rP   r\   rc   rd   propertyrh   rj   rk   ru   __classcell__r$   r$   r"   r%   r   &   s,    






r   )
__future__r   typingr   r   baser   models.modeling_utilsr   utilsr	   r
   r   r   r   r   r   r5   r   r   r   r   r   
get_loggerrv   r7   r   r$   r$   r$   r%   <module>   s    $
	