o
    ei,                     @   s  d dl Z d dlZd dlmZ d dlmZ ddlmZ ddlm	Z	m
Z
 er*ddlmZ d d	lmZ dd
lmZmZmZ e rCddlmZ e rJd dlZe raee jdedkrad dlmZ eeZdededB fddZdd Zdd Z e ree jdZ!G dd deZ"dS )    N)TYPE_CHECKING)version   )HfQuantizer)get_module_from_nameshould_convert_module   )PreTrainedModel)	safe_open)is_torch_availableis_torchao_availablelogging)WeightConvertertorchao0.15.0)flatten_tensor_state_dictconfig_namereturnc                 C   s&   |   } td| }|r|dS dS )z
    Extract the size digit from strings like "4weight", "8weight".
    Returns the digit as an integer if found, otherwise None.
    z
(\d)weightr   N)lowerresearchgroup)r   	str_match r   g/home/ubuntu/transcripts/venv/lib/python3.10/site-packages/transformers/quantizers/quantizer_torchao.pyfuzzy_match_size1   s
   
r   c                 C   sj   ddl m} ddlm} t| |r| jj d|   dS t| |r3| jj d| j dt| j	 dS d S )Nr   )AffineQuantizedTensor)LinearActivationQuantizedTensor()z(activation=	, weight=)
torchao.dtypesr   7torchao.quantization.linear_activation_quantized_tensorr   
isinstance	__class____name___quantization_typeinput_quant_funcoriginal_weight_tensor)weightr   r   r   r   r   r&   @   s   

"r&   c                 C   sZ   t | j}|d u rd| jjd  d| jjd  dS d| jjd  d| jjd  d| S )Nzin_features=r   z, out_features=r   z, weight=Noner    )r&   r)   shape)selfr)   r   r   r   _linear_extra_reprK   s   
"&r,   c                       s  e Zd ZdZdZ fddZdd Zdd Zd	d
 Zddde	ddde
f fddZdee	ee	B f dee	ee	B f fddZd*d+ddZddde	defddZdd ZdefddZedefdd Zedefd!d"Zd#ee	 fd$d%Zd&d' Zd(d) Z  ZS ),TorchAoHfQuantizerz?
    Quantizer for torchao: https://github.com/pytorch/ao/
    Fc                    st   t  j|fi | d | _| jj}t|tr)dddd}||v r'|| | _d S d S t|jj	}|dkr5dnd| _d S )Ng      ?r   )int4_weight_onlyint8_weight_only#int8_dynamic_activation_int8_weight4)
super__init__quantized_param_sizequantization_config
quant_typer#   strr   r$   r%   )r+   r5   kwargsr6   map_to_param_size
size_digitr$   r   r   r3   ^   s   
zTorchAoHfQuantizer.__init__c                 O   s   t  stdd| _|d}t|tr6d| v s d| v r6t|dkr6d| _| jr6d| v r6t	d| jrX|d	}|rZt
tj
d
}|t
dk r\td| dd S d S d S )NzSLoading an torchao quantized model requires torchao library (`pip install torchao`)F
device_mapdiskcpur   TzYou are attempting to perform disk offload with a pre-quantized torchao model This is not supported yet . Please remove the disk device from the device_map.weights_onlytorchz2.5.0zlIn order to use torchao pre-quantized model, you need to have torch>=2.5.0. However, the current version is zc. You can also set with `weights_only=False` in `from_pretrained` if you don't want to update torch)r   ImportErroroffloadgetr#   dictvalueslenpre_quantized
ValueErrorr   parse	importlibmetadataRuntimeError)r+   argsr8   r<   r?   torch_versionr   r   r   validate_environmento   s,   

$

z'TorchAoHfQuantizer.validate_environmentc                 C   s2   | j jdkr|tjkrtd| d tj}|S )Nr.   zSetting dtype to zr for int4_weight_only quantization, but only bfloat16 is supported right now. Overwriting torch_dtype to bfloat16.)r5   r6   r@   bfloat16loggerwarning_once)r+   dtyper   r   r   update_dtype   s   

zTorchAoHfQuantizer.update_dtypec                 C   s(   t dtkrt| S tdt )zv
        We flatten the state dict of tensor subclasses so that it is compatible with the safetensors format.
        r   zaIn order to use safetensors with torchao, please use torchao version >= 0.15.0. Current version: )r   rI   TORCHAO_VERSIONr   
state_dictrL   )r+   modelr   r   r   get_state_dict_and_metadata   s
   z.TorchAoHfQuantizer.get_state_dict_and_metadatarW   r	   
param_nameparamztorch.Tensorr   c                    s,   |  ||r| jdur| jS t |||S )z4Return the element size (in bytes) for `param_name`.N)param_needs_quantizationr4   r2   param_element_size)r+   rW   rY   rZ   r;   r   r   r\      s   z%TorchAoHfQuantizer.param_element_size
max_memoryc                 C   s   dd |  D }|S )Nc                 S   s   i | ]	\}}||d  qS )g?r   ).0keyvalr   r   r   
<dictcomp>   s    z8TorchAoHfQuantizer.adjust_max_memory.<locals>.<dictcomp>)items)r+   r]   r   r   r   adjust_max_memory   s   z$TorchAoHfQuantizer.adjust_max_memoryNc                    s   |  || jj|j| _| jjr9|   fdd| D | fdd| D fdd| jD | _|d urD| | d S d S )Nc                    $   g | ]\}}t |t  kr|qS r   idr^   namemodule)	input_embr   r   
<listcomp>      $ zKTorchAoHfQuantizer._process_model_before_weight_loading.<locals>.<listcomp>c                    rd   r   re   rg   )
output_embr   r   rk      rl   c                    s   g | ]
}|  vr|qS r   r   )r^   x)input_emb_namesoutput_emb_namesr   r   rk      s    )	get_modules_to_not_convertr5   modules_to_not_convert_keep_in_fp32_modulesinclude_input_output_embeddingsget_input_embeddingsnamed_modulesget_output_embeddingsset_metadata)r+   rW   checkpoint_filesr8   r   )rj   ro   rm   rp   r   $_process_model_before_weight_loading   s   z7TorchAoHfQuantizer._process_model_before_weight_loadingc                 K   s   t || jsdS t||\}}tjjg}| jjr|tjj	 | j
 tdkr^ddlm}m} t| jj|r^|dd\}	}
||	| jjs\||| jjs\d| jjjv r^t|t|r^dS t|t|oh|d	kS )
NFr   r   )FqnToConfigfqn_matches_fqn_config.r   _defaultTr)   )r   rr   r   r@   nnLinearr5   rt   append	Embedding_get_ao_versionr   rI   torchao.quantizationr{   r|   r#   r6   rsplitfqn_to_configtuple)r+   rW   rY   r8   ri   tensor_name_QUANTIZABLEr{   r|   
module_fqnparam_name_fqnr   r   r   r[      s&   
z+TorchAoHfQuantizer.param_needs_quantizationc                 K   s   d S Nr   )r+   rW   r8   r   r   r   #_process_model_after_weight_loading   s   z6TorchAoHfQuantizer._process_model_after_weight_loadingc                 C   s*   t dtk}t dtkstd |S )Nr   ztorchao quantized model only supports serialization for torchao version >= 0.15.0, please upgrade your version to save the quantized model)r   rI   rU   rQ   warning)r+   _is_torchao_serializabler   r   r   is_serializable   s   z"TorchAoHfQuantizer.is_serializablec                 C   s   ddg}| j j|v S )Nr/   r0   )r5   r6   )r+   "supported_quant_types_for_trainingr   r   r   is_trainable   s   zTorchAoHfQuantizer.is_trainablec                 C   s   dS )NTr   )r+   r   r   r   is_compileable   s   z!TorchAoHfQuantizer.is_compileablery   c              	   C   sl   |d  dr4i }|D ]#}t|dd}| pi }|| W d    n1 s)w   Y  q|| _d S d S )Nr   z.safetensorspt)	framework)endswithr
   rK   update)r+   ry   rK   
checkpointf	metadata_r   r   r   rx      s   
zTorchAoHfQuantizer.set_metadatac                 C   s   ddl m} || S )Nr   )TorchAoQuantize)integrations.torchaor   )r+   r   r   r   r   get_quantize_ops   s   z#TorchAoHfQuantizer.get_quantize_opsc                 C   s0   ddl m} | jrtg dd|| gdgS g S )Nr   )TorchAoDeserialize)_weight_qdata_weight_scale_and_zero_weight_scale_weight_zero_point_weight_act_pre_scaler)   )source_patternstarget_patterns
operations)r   r   rG   r   )r+   r   r   r   r   get_weight_conversions   s   z)TorchAoHfQuantizer.get_weight_conversionsr   )rW   r	   )r%   
__module____qualname____doc__requires_calibrationr3   rO   rT   rX   r7   floatr\   rD   intrc   rz   boolr[   r   r   propertyr   r   listrx   r   r   __classcell__r   r   r;   r   r-   W   s(    	*	
r-   )#rJ   r   typingr   	packagingr   baser   quantizers_utilsr   r   modeling_utilsr	   safetensorsr
   utilsr   r   r   core_model_loadingr   r@   rI   rK   1torchao.prototype.safetensors.safetensors_supportr   
get_loggerr%   rQ   r7   r   r&   r,   rU   r-   r   r   r   r   <module>   s0   
