o
    Xi5                     @  s   d Z ddlmZ ddgZddlZddlZddlZddlZ	e
eZdddZdd ZG dd de	jjZG dd de	jjZdS )z>Pass for removing duplicated initializer tensors from a graph.    )annotationsDeduplicateInitializersPass!DeduplicateHashedInitializersPassNinitializerir.Value
size_limitintreturnboolc                 C  sd   |   s|  rtd| j dS | j}|du r!td| j dS |j|kr0td| j| dS dS )z=Check if the initializer should be skipped for deduplication.zJSkipped deduplication of initializer '%s' as it is a graph input or outputTNzqSkipped deduplication of initializer '%s' as it has no constant value. The model may contain invalid initializerszDSkipped initializer '%s' as it exceeds the size limit of %d elementsF)is_graph_inputis_graph_outputloggerwarningnameconst_valuesizedebug)r   r   	const_val r   c/home/ubuntu/.local/lib/python3.10/site-packages/onnx_ir/passes/common/initializer_deduplication.py_should_skip_initializer   s*   
r   c                 C  s$   | j  rt|   S |  S )a  StringTensor does not support tobytes. Use 'string_data' instead.

    However, 'string_data' yields a list of bytes which cannot be hashed, i.e.,
    cannot be used to index into a dict. To generate keys for identifying
    tensors in initializer deduplication the following converts the list of
    bytes to an array of fixed-length strings which can be flattened into a
    bytes-string. This, together with the tensor shape, is sufficient for
    identifying tensors for deduplication, but it differs from the
    representation used for serializing tensors (that is string_data) by adding
    padding bytes so that each string occupies the same number of consecutive
    bytes in the flattened .tobytes representation.
    )dtype	is_stringnparraystring_datatobytes)valr   r   r   _tobytes2   s   
r   c                      .   e Zd ZdZdd fddZdddZ  ZS )r   as  Remove duplicated initializer tensors from the main graph and all subgraphs.

    This pass detects initializers with identical shape, dtype, and content,
    and replaces all duplicate references with a canonical one.

    Initializers are deduplicated within each graph. To deduplicate initializers
    in the model globally (across graphs), use :class:`~onnx_ir.passes.common.LiftSubgraphInitializersToMainGraphPass`
    to lift the initializers to the main graph first before running pass.

    .. versionadded:: 0.1.3
    .. versionchanged:: 0.1.7
        This pass now deduplicates initializers in subgraphs as well.
       r   r   c                      t    || _d S Nsuper__init__r   selfr   	__class__r   r   r%   S   s   

z$DeduplicateInitializersPass.__init__modelir.Modelr	   ir.passes.PassResultc           	      C  s   d}|  D ]T}i }t|j D ]H}t|| jrq|j}|d us#J |jt|jt	|f}||v rUd}|| }|
| |jd usDJ |j|j td|j|j q|||< qqtjj||dS )NFT8Replaced initializer '%s' with existing initializer '%s'r*   modified)graphstupleinitializersvaluesr   r   r   r   shaper   replace_all_uses_withr   popr   infoirpasses
PassResult)	r'   r*   r/   graphr2   r   r   keyinitializer_to_keepr   r   r   callW   s.   

z DeduplicateInitializersPass.call)r    r   r   r*   r+   r	   r,   __name__
__module____qualname____doc__r%   r>   __classcell__r   r   r(   r   r   D   s    c                      r   )r   a  Remove duplicated initializer tensors (using a hashed method) from the graph.

    This pass detects initializers with identical shape, dtype, and hashed content,
    and replaces all duplicate references with a canonical one.

    This pass should have a lower peak memory usage than :class:`DeduplicateInitializersPass`
    as it does not store the full tensor data in memory, but instead uses a hash of the tensor data.

    .. versionadded:: 0.1.7
            r   r   c                   r!   r"   r#   r&   r(   r   r   r%      s   

z*DeduplicateHashedInitializersPass.__init__r*   r+   r	   r,   c                 C  s  d}|  D ]|}i }t|j D ]p}t|| jrq|j}|d us#J t }|	 }|
| | }	t|j	 }
|j|
|	f}||v r}t|| jt|krZtd|| | qd}|| }|| |jd uslJ |j|j td|j|j q|||< qqtjj||dS )NFzVInitializer deduplication failed: hashes match but values differ with values %s and %sTr-   r.   )r0   r1   r2   r3   r   r   r   hashlibsha512numpyupdate	hexdigestr4   r   r   r   r   r5   r   r6   r7   r8   r9   r:   )r'   r*   r/   r;   r2   r   r   hashedtensor_datatensor_digesttensor_dimsr<   r=   r   r   r   r>      sF   


'z&DeduplicateHashedInitializersPass.call)rG   r?   r@   rA   r   r   r(   r   r   u   s    )r   r   r   r   r	   r
   )rE   
__future__r   __all__rH   loggingrJ   r   onnx_irr8   	getLoggerrB   r   r   r   r9   InPlacePassr   r   r   r   r   r   <module>   s   

1