o
    ß¥µi¥  ã                   @   s,   d dl Z ddlmZ G dd„ de jjƒZdS )é    Né   )Úextract_weight_to_halfc                   @   s@   e Zd Zedejdejdejfdd„ƒZedejfdd„ƒZd	S )
ÚW8A16LinearÚinpÚquant_wÚscale_wc                 C   s|   |  ¡ | _|  ¡ | _|| _|  d¡}| ¡  d|  d¡¡}t|||ƒ}| | ¡ ¡}|  	|||¡ |j| jd d… |f Ž S )Nr   éÿÿÿÿ)
ÚsizeÚ	inp_shapeÚweight_shapeÚweight_bit_widthÚ
contiguousÚviewr   ÚmmÚtÚsave_for_backward)Úctxr   r   r   r   Úout_featuresÚweightÚoutput© r   új/home/ubuntu/.local/lib/python3.10/site-packages/modelscope/models/nlp/glm_130b/quantization/functional.pyÚforward	   s   


zW8A16Linear.forwardÚgrad_outputc                 C   sb   | j \}}}t||| jƒ}| ¡  d| d¡¡}| |¡}| ¡  |¡}| | j¡| | j	¡d fS )Nr   r   )
Úsaved_tensorsr   r   r   r   r	   r   r   r
   r   )r   r   r   r   r   r   Ú
grad_inputÚgrad_weightr   r   r   Úbackward   s   
ÿÿzW8A16Linear.backwardN)Ú__name__Ú
__module__Ú__qualname__ÚstaticmethodÚtorchÚTensorr   r   r   r   r   r   r      s    ÿr   )r"   Úkernelsr   ÚautogradÚFunctionr   r   r   r   r   Ú<module>   s   