o
    ߥi                     @   sp   d dl Z d dlmZmZmZmZmZmZ d dlm	Z	 ddl
mZ ddlmZ G dd	 d	eZG d
d deZdS )    N)ColumnParallelLinearRowParallelLinearcopy_to_model_parallel_region!gather_from_model_parallel_region!reduce_from_model_parallel_region scatter_to_model_parallel_region)	Parameter   )compress_int4_weight   )W8A16Linearc                       ,   e Zd Zddef fddZdd Z  ZS )QuantizedColumnParallelLinearNweight_bit_widthc                      t t| j|i | || _| jj}| `|d u r=tj|d |d | d tj|d d| _tj|d |d |d d| _	n1|
 jddjd	|d  d   | _	t|| j	d d d f  tj| _|d
krnt| j| _t| j|d dd| _t| j	|d dd| _	d S Nr   r      device)dtyper   params_dtype)dimr	      F)requires_grad)superr   __init__r   weightshapetorchemptyint8weight_scaleabsmaxvalueshalfroundtor
   r   selfr   r   argskwargsr   	__class__ f/home/ubuntu/.local/lib/python3.10/site-packages/modelscope/models/nlp/glm_130b/quantization/layers.pyr      @   
z&QuantizedColumnParallelLinear.__init__c                 C   sL   t |}t|| j| j| j}| jd ur|| j }| jr"t|}|S |}|S N)	r   r   applyr   r!   r   biasgather_outputr   )r)   input_input_paralleloutput_paralleloutputr.   r.   r/   forward/   s   


z%QuantizedColumnParallelLinear.forwardr1   __name__
__module____qualname__intr   r9   __classcell__r.   r.   r,   r/   r          r   c                       r   )QuantizedRowParallelLinearNr   c                    r   r   )r   rA   r   r   r   r   r   r   r    r!   r"   r#   r$   r%   r&   r'   r
   r   r(   r,   r.   r/   r   B   r0   z#QuantizedRowParallelLinear.__init__c                 C   sR   | j r|}nt|}t|| j| j| j}t|}| jd ur%|| j }|S |}|S r1   )	input_is_parallelr   r   r2   r   r!   r   r   r3   )r)   r5   r6   r7   output_r8   r.   r.   r/   r9   a   s   


z"QuantizedRowParallelLinear.forwardr1   r:   r.   r.   r,   r/   rA   @   r@   rA   )r   SwissArmyTransformer.mpur   r   r   r   r   r   torch.nn.parameterr   kernelsr
   
functionalr   r   rA   r.   r.   r.   r/   <module>   s    2