o
    wi]%                     @  s   d dl mZ d dlmZ d dlZd dlmZmZ d dl	m
Z
mZmZmZmZmZmZ d dlmZmZmZmZ d dlmZ 	ddddZG dd deZG dd deZG dd deZG dd deZG dd deZdS )    )annotations)ClassVarN)TensorProtosubbyte)
float4e2m1float8e4m3fnfloat8e4m3fnuz
float8e5m2float8e5m2fnuzint4uint4)float32_to_float8e4m3float32_to_float8e5m2np_dtype_to_tensor_dtypetensor_dtype_to_np_dtype)OpRunvalue
np.ndarrayshapetuple[int, ...]axisint
block_size
int | Nonereturnc                 C  s  t | jdkr	| S t | jdkr| jdkr| d S |sUt | jdks$J dgt | }z| j||< | t|W S  tyT } ztd| d| j d| d|d}~ww |dkr]tdtj| ||d	} || | j| kry| j	t
|| |d
} | j|krtdt|| j|ksJ | S )a  Reshape/Replicate scale/zero-point to be broadcastable to shape.

    Args:
        value: the array to be reshaped/replicated
        shape: the rarget shape
        axis: quantization axis, applicable for per-axis and blocked quantization
        block_size: size of quantization block, applicable only for blocked quantization

    Returns:
        value array after reshape/replicate according to quantization mode.
    r      zaxis is out of boundary, axis=z, value.shape=z, shape=.Nz&block_size must be a positive integer.)repeatsr   )indicesr   zInvalid shapes for Blocked Quantization. Input 2 shape should identical to Input 1 shape, except for one dimension, in which blocking is performed)lenr   sizereshapetuple
IndexError
ValueErrornprepeattakerangebroadcast_shapes)r   r   r   r   dimse r,   b/home/ubuntu/sommelier/.venv/lib/python3.10/site-packages/onnx/reference/ops/op_quantize_linear.pyreshape_input   sB   

r.   c                   @  s   e Zd ZU eeZeeZejdej	dej
dejdiZded< ejej	ej
ejejejejejejejejfZdddZ						ddddZdS ) _CommonQuantizeLinear)r      )i   )r   i  )i i  z0ClassVar[dict[TensorProto.DataType, tuple[int]]]quant_integer_ranges
zero_pointr   r   r   c                 C  s*  d }|j tkr|j jd d dkrtj}|S |j tkr*|j jd d dkr*tj}|S |j tkr>|j jd d dkr>tj}|S |j t	krR|j jd d dkrRtj
}|S |j tkrf|j jd d dkrftj}|S |j tkrz|j jd d dkrztj}|S |j tkr|j jd d dkrtj}|S t|j }|S )	Nr   e4m3fne4m3fnuze5m2e5m2fnuzr   r   r   )dtyper   descrr   FLOAT8E4M3FNr   FLOAT8E4M3FNUZr	   
FLOAT8E5M2r
   FLOAT8E5M2FNUZr   UINT4r   INT4r   
FLOAT4E2M1r   )selfr3   zero_point_typer,   r,   r-   get_zero_point_typeh   s8   





z)_CommonQuantizeLinear.get_zero_point_typeNr   Txy_scalenp.ndarray | Noner   saturateboolr   r   output_dtype	precisiontuple[np.ndarray]c	                   s&  t ||j||}| |d ur%| |}	|r#||	kr#td| d|	 |	  p)tj  tjvr7td  d|d urCt ||j||nd}|rVt|}
|	|
|	|
 }n|| } tj
v rt|	tj}||7 }t }tj
  }t||d |d 	|fS  tjkrtj||d}|	tfS  tjkrtj|d|d	}|	tfS  tjkrtj||d}|	tfS  tjkrtj|dd|d
}|	tfS  tjtjfv rt|	tj}||7 } fdd}t|}||}|fS  tjkr||7 }t|}|	tfS td  d)Nz+Mismatched output data-types: output_dtype=z, zero_point type=zUnexpected type: output_dtype=z# is not a supported quantized type.r   r   )rG   T)uzrG   )fnrL   rG   c                   s   t j|  tjkdS )N)signed)r   float32_to_4bit_unpackedr   r?   )rD   tensor_typer,   r-   <lambda>   s    
z,_CommonQuantizeLinear._run.<locals>.<lambda>) r.   r   rC   r$   r   UINT8r/   quant_typesr   astyper2   r%   rintint32clipr:   r   r   r;   r   r<   r   r	   r=   r
   r>   r?   	vectorizer@   r   float32_to_float4e2m1_unpackedr   )rA   rD   rE   r3   r   rG   r   rI   rJ   rB   precision_npxir8   quant_rangef8single_funcfunci4f4r,   rP   r-   _run   st   












z_CommonQuantizeLinear._run)r3   r   r   r   )Nr   TNNN)rD   r   rE   r   r3   rF   r   r   rG   rH   r   r   rI   r   rJ   r   r   rK   )__name__
__module____qualname__r%   rY   r   r   r   rS   INT8UINT16INT16r2   __annotations__r>   r?   r:   r;   r<   r=   r@   rT   rC   rc   r,   r,   r,   r-   r/   Q   s8   
 


$r/   c                         e Zd Zd fdd	Z  ZS )QuantizeLinear_10Nc                   s*   t |jdkrtdt j||||dS )Nr   %Input 2 must be a vector or a number.)r   r   r   r$   superrc   )rA   rD   rE   r3   r   	__class__r,   r-   rc      s   zQuantizeLinear_10._run)NNrd   re   rf   rc   __classcell__r,   r,   rp   r-   rl          rl   c                      rk   )QuantizeLinear_19Nc                   s,   t |jdkrtdt j|||||dS )Nr   rm   )r   rG   rn   )rA   rD   rE   r3   r   rG   rp   r,   r-   rc      s   zQuantizeLinear_19._run)NNNrr   r,   r,   rp   r-   ru      rt   ru   c                      s(   e Zd Zddddd fdd
Z  ZS )QuantizeLinear_21Nr   rG   r   rI   c                  s   t  j|||||dS )Nrw   ro   rc   )rA   r   rG   r   rI   argsrp   r,   r-   rc      s   zQuantizeLinear_21._runrr   r,   r,   rp   r-   rv      s     rv   c                      s*   e Zd Zdddddd fdd
Z  ZS )QuantizeLinear_23Nr   rG   r   rI   rJ   c                  s   t  j||||||dS )Nr{   rx   )rA   r   rG   r   rI   rJ   ry   rp   r,   r-   rc      s   
zQuantizeLinear_23._runrr   r,   r,   rp   r-   rz      s    rz   )N)
r   r   r   r   r   r   r   r   r   r   )
__future__r   typingr   numpyr%   onnxr   r   onnx._custom_element_typesr   r   r   r	   r
   r   r   onnx.helperr   r   r   r   onnx.reference.op_runr   r.   r/   rl   ru   rv   rz   r,   r,   r,   r-   <module>   s   $	2 