o
    TiP                     @   s(   d dl Z d dlmZ G dd deZdS )    N)DeepSpeedTransformerInferencec                       s4   e Zd ZdZ					d	 fdd	Zdd Z  ZS )
DeepSpeedLlama2Inferencez4Initialize the DeepSpeed OPT Transformer Layer.
    N   Fc                    s   t  |||||| d S )N)super__init__)selfconfigmp_groupquantize_scalesquantize_groupsmerge_countmlp_extra_grouping	__class__ j/home/ubuntu/.local/lib/python3.10/site-packages/deepspeed/model_implementations/transformers/ds_llama2.pyr      s   z!DeepSpeedLlama2Inference.__init__c                 O   s  |d }d }d}|  |  |jd dkrd | _| j}|j}| jjtjtjtj	fv rB|jtj
krB| jtj	kr:tjn| j}||}t 3 | ||d ||d d d | j| jd \}	}
}}}|
|f| _| |	||| jj}||}W d    |S 1 s|w   Y  |S )Nr   Tr   )allocate_workspacesizeshape
layer_pastdtyper   torchfloat16bfloat16int8floathalftono_grad	attentionnorm_wnorm_bmlpattn_ob)r   argskwargsinput
input_maskget_presentr   
input_typetarget_dtypeattention_outputkeyvaluecontext_outputtn_ctxinp_normoutputr   r   r   forward   s>   




z DeepSpeedLlama2Inference.forward)NNr   r   F)__name__
__module____qualname____doc__r   r1   __classcell__r   r   r   r   r   
   s    	r   )r   ;deepspeed.model_implementations.transformers.ds_transformerr   r   r   r   r   r   <module>   s   