o
    d۷i<                  
   @   sJ  d dl Z d dlZd dlmZ d dlmZ d dlmZmZm	Z	m
Z
mZmZmZmZmZ d dlZd dlZd dlmZmZ edddZerHd d	lmZ g d
ZdZdeded fddZdedefddZdIdedefddZdejj dedejj fddZ!dedefddZ"ded edefd!d"Z#	dJd#ee d$ee de	egef fd%d&Z$G d'd( d(Z%	 dKd)ej&d*ee' d+e'deej& fd,d-Z(dKd.d/Z)d0ej&dej&fd1d2Z*d3ej&d4ee' dej&fd5d6Z+e j,d7e-d8ed9efd:d;Z.e j,d<ee d8ed=ee fd>d?Z/G d@dA dAe0Z1dBede'fdCdDZ2dBede'fdEdFZ3dBede'fdGdHZ4dS )L    N)wraps)MappingProxyType)	TYPE_CHECKINGAnyCallableDictIterableListMappingOptionalTypeVar)
AutoConfigPretrainedConfigTr   )boundModelCompressor)"infer_compressor_from_model_configfix_fsdp_module_nametensor_follows_mask_structurereplace_moduleis_compressed_tensors_configgetattr_chain
deprecated	Aliasablecombine_shardsshard_tensorpack_bitmasksunpack_bitmasks
patch_attrpatch_attrsParameterizedDefaultDictget_num_attn_headsget_num_kv_headsget_head_dim_fsdp_wrapped_modulepretrained_model_name_or_pathreturnr   c                 C   sf   ddl m} ddlm} t| }||}|du rdS |d}|j|fi |}|j||d}|S )a  
    Given a path to a model config, extract a sparsity config if it exists and return
    the associated ModelCompressor

    :param pretrained_model_name_or_path: path to model config on disk or HF hub
    :return: matching compressor if config contains a sparsity config
    r   r   )CompressionConfigNformatconfig)	compressed_tensors.compressorsr   compressed_tensors.configr(   r   from_pretrainedparse_sparsity_configgetload_from_registry)r&   r   r(   r+   sparsity_configr)   
compressor r4   V/home/ubuntu/vllm_env/lib/python3.10/site-packages/compressed_tensors/utils/helpers.pyr   C   s   



r   namec                 C   s   |  td d dt dS )z
    Remove FSDP wrapper prefixes from a module name
    Accounts for scenario where FSDP_WRAPPER_NAME is
    at the end of the name, as well as in the middle.
    :param name: name to strip
    :return: stripped name
    . )replaceFSDP_WRAPPER_NAME)r6   r4   r4   r5   r   [   s   r   2:4maskc                 C   sP   t tt|d\}}| d|} | dkjdd}t||k s&t	 dS )a,  
    :param tensor: tensor to check
    :param mask: mask structure to check for, in the format "n:m"
    :return: True if the tensor follows the mask structure, False otherwise.
        Note, some weights can incidentally be zero, so we check for
        atleast n zeros in each chunk of size m
    :r      dimT)
tuplemapintsplitviewsumtorchallitem
ValueError)tensorr<   nmzero_countsr4   r4   r5   r   h   s   	r   model
new_modulec                 C   sT   d|v r| ddd }|t|d d  }| |}nd}| }|}t||| d S )Nr7   r?   r   r8   )rsplitlenget_submodulesetattr)rP   r6   rQ   parent_name
child_nameparentr4   r4   r5   r      s   r   compression_configc                 C   s.   zddl m} t| |W S  ty   Y dS w )z
    Returns True if CompressedTensorsConfig is available from transformers and
    compression_config is an instance of CompressedTensorsConfig

    See: https://github.com/huggingface/transformers/pull/31704
    r   )CompressedTensorsConfigF)&transformers.utils.quantization_configrZ   
isinstanceImportError)rY   rZ   r4   r4   r5   r      s   r   obj	chain_strc           	      O   s   t |dkrd}|d }nd|v rd}|d }nd}|d}| }|D ]}t||s9|r0|  S t| d| t||}q#|S )z
    Chain multiple getattr calls, separated by `.`

    :param obj: base object whose attributes are being retrieved
    :param chain_str: attribute names separated by `.`
    :param default: default value, throw error otherwise
    r?   Tr   defaultFr7   z object has no attribute )rS   rE   hasattrAttributeErrorgetattr)	r^   r_   argskwargshas_defaultr`   
attr_namesres	attr_namer4   r4   r5   r      s    



r   future_namemessagec                    s   dt dt f fdd}|S )z
    Decorator to mark functions as deprecated

    :param new_function: Function called in place of deprecated function
    :param message: Deprecation message, replaces default deprecation message
    funcr'   c                    sF   d u r j  dd urd d7 t  fdd}|S )Nz6 is deprecated and will be removed in a future releasez. Please use z	 instead.c                     s   t jtdd  | i |S )N   )
stacklevel)warningswarnDeprecationWarning)rd   re   )rl   rk   r4   r5   wrapped   s   z.deprecated.<locals>.decorator.<locals>.wrapped)__name__r   )rl   rr   rj   rk   )rl   r5   	decorator   s   
zdeprecated.<locals>.decorator)r   )rj   rk   ru   r4   rt   r5   r      s   
r   c                   @   s:   e Zd ZdZedeeef fddZdd Zdd Z	d	S )
r   z
    A mixin for enums to allow aliasing of enum members

    Example:
    >>> class MyClass(Aliasable, int, Enum):
    >>>     ...
    r'   c                   C   s   t  N)NotImplementedErrorr4   r4   r4   r5   get_aliases   s   zAliasable.get_aliasesc                 C   sl   t || jr |  }| j|jkp|| j| j||j|jkS |  }|| j| j}|||}||kS rv   )r\   	__class__rx   valuer0   )selfotheraliases
self_valueother_valuer4   r4   r5   __eq__   s   zAliasable.__eq__c                 C   s   | j | j| j}t|S rv   )r}   r0   rz   hash)r{   canonical_valuer4   r4   r5   __hash__   s   zAliasable.__hash__N)
rs   
__module____qualname____doc__staticmethodr   strrx   r   r   r4   r4   r4   r5   r      s    r   rL   shard_sizesrA   c                 C   sT   t || |krtdg }d}|D ]}|| }| |||}|| |}q|S )a  
    Shards a tensor into a list of tensors along a given dimension.

    raises: ValueError: If the sum of shard_sizes does not match the
        size of the tensor along the given dimension.

    :param tensor: The input tensor to shard.
    :param shard_sizes : List of sizes for each shard along the specified dimension.
    :param dim : The dimension along which to shard the tensor.
    :returns: A list of tensors sharded along the specified dimension.
    zSSum of shard_sizes must equal the size of the tensor along the specified dimension.r   )rG   sizerK   narrowappend)rL   r   rA   shards	start_idxr   end_idxshardr4   r4   r5   r      s   
r   c                    s   | st ddd | D }t|dkrt dt| d j}t fdd| D | < tj|| d j| d jd	}d}| D ]}|j  }|	 ||
| ||7 }q=|S )
z
    Combine decompressed shards along a given dimension using `narrow`.

    :param shards: List of decompressed shard tensors.
    :param dim: Dimension to combine along (default: 0).
    :return: Combined decompressed tensor.
    zThe list of shards is empty.c                 S   s   h | ]}|j qS r4   )dtype.0r   r4   r4   r5   	<setcomp>$  s    z!combine_shards.<locals>.<setcomp>r?   z$All shards must have the same dtype.r   c                 3   s    | ]}|j   V  qd S rv   )shaper   r@   r4   r5   	<genexpr>*  s    z!combine_shards.<locals>.<genexpr>)r   device)rK   rS   listr   rG   rH   zerosr   r   r   copy_)r   rA   shard_dtypestotal_shapecombinedshard_offsetr   
shard_sizer4   r@   r5   r     s   

r   	bytemasksc                 C   s"   t j|   ddd}t|}|S )a  
    Converts a bytemask tensor to a bitmask tensor to reduce memory. Shape RxC will be
    compressed to R x ceil(C/8)

    :param bytemasks: mask tensor where each byte corresponds to a weight
    :return: mask tensor where each bit corresounds to a weight
    r>   little)axisbitorder)numpypackbitsrH   
from_numpy)r   packed_bits_numpypacked_bits_torchr4   r4   r5   r   9  s   
r   packed_bitmasksoriginal_shapec                 C   s8   t j|    d|d dd}t||t}|S )a#  
    Converts a bitmask tensor back to a bytemask tensor for use during decompression

    :param packed_bitmasks: mask tensor where each bit corresponds to a weight
    :param original_shape: dense shape to decompress to
    :return: boolean mask of weights in the original dense shape
    r>   r   )r   countr   )r   
unpackbitscpurH   r   reshapeastypebool)r   r   unpacked_bitsunpacked_bitmasks_torchr4   r4   r5   r   G  s   
r   baseattrrz   c              
   c   sr    t  }t| ||}t| || zdV  W ||ur!t| || dS t| | dS ||ur3t| || w t| | w )a  
    Patch the value of an object attribute. Original value is restored upon exit

    :param base: object which has the attribute to patch
    :param attr: name of the the attribute to patch
    :param value: used to replace original value

    Usage:
    >>> from types import SimpleNamespace
    >>> obj = SimpleNamespace()
    >>> with patch_attr(obj, "attribute", "value"):
    ...     assert obj.attribute == "value"
    >>> assert not hasattr(obj, "attribute")
    N)objectrc   rU   delattr)r   r   rz   	_sentineloriginal_valuer4   r4   r5   r   a  s   r   basesvaluesc                 c   s\    t  }t| |D ]\}}|t||| qdV  W d   dS 1 s'w   Y  dS )a  
    Same as `patch_attr` but for a list of objects to patch
    Patch attribute for a list of objects with list of values.
    Original values are restored upon exit

    :param bases: objects which has the attribute to patch
    :param attr: name of the the attribute to patch
    :param values: used to replace original values. Must be same
        length as bases

    Usage:
    >>> from types import SimpleNamespace
    >>> obj1 = SimpleNamespace()
    >>> obj2 = SimpleNamespace()
    >>> with patch_attr([obj1, obj2], "attribute", ["value1", "value2"]):
    ...     assert obj1.attribute == "value1"
    ...     assert obj2.attribute == "value2"
    >>> assert not hasattr(obj1, "attribute")
    >>> assert not hasattr(obj2, "attribute")
    N)
contextlib	ExitStackzipenter_contextr   )r   r   r   stackr   rz   r4   r4   r5   r    ~  s   
"r    c                   @   sV   e Zd ZdZdeegef fddZdedefddZei d	d
e	defddZ
dS )r!   a
  
    Similar to `collections.DefaultDict`, but upon fetching a key which is missing,
    the key is passed as arguments to the `default_factory`

    :param default_factory: function which takes a key as input and returns the
        corresponding default value
    default_factoryc                 C   s   || _ ti | _d S rv   )r   r   _factory_kwargs)r{   r   r4   r4   r5   __init__  s   z!ParameterizedDefaultDict.__init__keyr'   c                 C   s>   t |tr| j|i | j}n
| j|fi | j}|| |< |S rv   )r\   rB   r   r   )r{   r   rz   r4   r4   r5   __missing__  s
   
z$ParameterizedDefaultDict.__missing__)factory_kwargsr   c                G   s8   t | d| | | W  d   S 1 sw   Y  dS )a"  
        Similar to `__getitem__`, but allows passing kwargs to factory function

        :param \*args: args whose tuple will value will be treated as key
        :param factory_kwargs: keyword arguments to pass to `default_factory`
        :return: dictionary entry for given key
        r   N)r   )r{   r   rd   r4   r4   r5   r0     s   $zParameterizedDefaultDict.getN)rs   r   r   r   r   r   r   r   r   r
   r0   r4   r4   r4   r5   r!     s
     r!   r+   c                 C   >   t | dr| jS t | drt | dr| j| j S td|  )z
    Get the number of attention heads used by a model

    :param config: model config
    :return: num_attention_heads of model
    num_attention_headshidden_sizehead_dimzCannot determine num_attention_heads from config. Config must define either `num_attention_heads` or both `hidden_size` and `head_dim`. )ra   r   r   r   rK   r*   r4   r4   r5   r"        
r"   c                 C   s   t | dr| jS td|  )z
    Get the number of key-value attention heads used by a model

    :param config: model config
    :return: num_key_value_heads of model
    num_key_value_headsz\Cannot determine num_key_value_heads from config. Config must define `num_key_value_heads`. )ra   r   rK   r*   r4   r4   r5   r#     s   
r#   c                 C   r   )z
    Get the number of dimensions used by the attention heads of a model

    :param config: model config
    :return: head_dim of model
    r   r   r   z}Cannot determine head_dim from config. Config must define either `head_dim` or both `hidden_size` and `num_attention_heads`. )ra   r   r   r   rK   r*   r4   r4   r5   r$     r   r$   )r;   )NN)r   )5r   ro   	functoolsr   typesr   typingr   r   r   r   r   r	   r
   r   r   r   rH   transformersr   r   r   r,   r   __all__r:   r   r   r   r   r   nnModuler   r   r   r   r   TensorrD   r   r   r   r   contextmanagerr   r   r    dictr!   r"   r#   r$   r4   r4   r4   r5   <module>   sv   ,
 
 

 !
 !