o
    Tip                     @   s   d dl T d dlmZ d dlmZ ddlmZ ddlmZ ddlm	Z	 dd	lm
Z
 dd
lmZ dhaG dd deeeZG dd deZdS )   )*)MetaTensorContainer)HybridEngineContainer    )DeepSpeedBloomInference   )TransformerPolicy)transformer_param_names)
maybe_copymaybe_get_loraNc                       sH   e Zd Z fddZdddZdddZd	d
 Zdd Zdd Z  Z	S )DS_BloomContainerc                    s   ddl m} |d}|d}t|d }t|d }|dks'|dkr4|dkr4dd l}|d| d	 t jdi | d
| _d| _	d S )Nr   )versiontransformers.r      +   zTransformers version zr exceeds version 4.43.4! After transformers version 4.43.4, BLOOM inference with DeepSpeed is no longer supported.TF )
importlib.metadatar   splitintsysexitsuper__init__bigscience_bloomtriangular_masking)selfkwargsr   v_transformersversmajorminorr   	__class__r   \/home/ubuntu/.local/lib/python3.10/site-packages/deepspeed/module_inject/containers/bloom.pyr      s   


zDS_BloomContainer.__init__Nc                 C   s>   |d ur|n| j }t|| jd| _| j| jj_d| jj_| jS )N)mp_groupF)ds_model_configr   r&   modulescale_attentionconfiginvert_mask)r   r*   _configr   r   r%   create_module(   s
   
zDS_BloomContainer.create_moduleFc                 C   s8   | | jjj| j| jj_| | jjj| j| jj_d S N)copyr(   	attention	attn_qkvwqkvw	attn_qkvbqkvb)r   
mp_replacereversed_dimr   r   r%   attention_qkv_mp0   s   z"DS_BloomContainer.attention_qkv_mpc                 C   s8   |   \}}}}|| jf|| jf|| jf|| jfg}|S )D
        Necessary to implement for `HybridEngineContainer`
        )get_lora_params_h4h_w_4hh_wr2   dense_w)r   fc1_lorafc2_loraqkv_loraout_loraretr   r   r%   get_lora_matched_pair4   s   $z'DS_BloomContainer.get_lora_matched_pairc                 C   s<   dd | j jjj| j jjj| j jjj| j jjjfD | _dS )r8   c                 S   s   g | ]}t |qS r   r   ).0pr   r   r%   
<listcomp>@   s    z5DS_BloomContainer.set_lora_params.<locals>.<listcomp>N)	policyclient_modulemlpdense_h_to_4hdense_4h_to_hself_attentionquery_key_valuedenselora_paramsr   r   r   r%   set_lora_params<   s
   z!DS_BloomContainer.set_lora_paramsc                 C   s   d}t ddD ]}t|j|||t| |||  d| jj| jjd	 qt ddD ]}t|j|||t| |||   q't ddD ]}t|j|||t| |||   q?t ddD ]}t||||t| |||   qWd S )	N)z%self_attention.query_key_value.weightz#self_attention.query_key_value.biaszself_attention.dense.weightzself_attention.dense.biaszmlp.dense_h_to_4h.weightzmlp.dense_h_to_4h.biaszmlp.dense_4h_to_h.weightzmlp.dense_4h_to_h.biaszpost_attention_layernorm.weightzpost_attention_layernorm.biaszinput_layernorm.weightzinput_layernorm.biasr   r   T)qkvmegatron_v2	split_qkvr   
      )ranger
   r0   r	   rF   is_megatron_v2rS   rH   )r   r(   sdweight_quantizerr5   prefixparam_namesir   r   r%   load_paramsG   s.   
	

 zDS_BloomContainer.load_paramsr.   F)
__name__
__module____qualname__r   r-   r7   rB   rP   r]   __classcell__r   r   r#   r%   r      s    

r   c                       sF   e Zd ZdZd fdd	Zdd Zddd	Zdd
dZdd Z  Z	S )BLOOMLayerPolicyNTFc              
      s   t  j|d||d || _zdd l}|jjjjt_	t
|jjjjh W d S  tyB } ztd|  d t_	W Y d }~d S d }~ww )NT)linear_layeruse_load_prefixrS   r   zNWARNING! Setting BLOOMLayerPolicy._orig_layer_class to None due to Exception: )r   r   rG   r   modelsbloommodeling_bloom
BloomBlockrc   _orig_layer_classsupported_modelsupdate
BloomModel	Exceptionprint)r   rG   	inferencere   rS   r   er#   r   r%   r   m   s   zBLOOMLayerPolicy.__init__c                 C   s   | j jj| j jj| j jjtfS r.   )rG   rK   hidden_size	num_headsinput_layernormepsDEFAULT_INTERMEDIATE_SIZErO   r   r   r%   get_hidden_headsy   s
   z!BLOOMLayerPolicy.get_hidden_headsc                 C   ,   | j jjj| j jjj| j jjj| j jjjfS r.   )rG   rK   rL   weightbiasrM   r   enable_trainingr   r   r%   r0      
   



zBLOOMLayerPolicy.attentionc                 C   rx   r.   )rG   rH   rI   ry   rz   rJ   r{   r   r   r%   rH      r}   zBLOOMLayerPolicy.mlpc                 C   s$   | j jj| j jj| j jj| j jjfS r.   )rG   post_attention_layernormry   rz   rt   rO   r   r   r%   	layernorm   s
   zBLOOMLayerPolicy.layernorm)TTFr^   )
r_   r`   ra   rj   r   rw   r0   rH   r   rb   r   r   r#   r%   rc   j   s    

rc   )basefeatures.meta_tensorr   features.hybrid_enginer   5deepspeed.model_implementations.transformers.ds_bloomr   rF   r   r	   r
   r   rk   BaseTransformerContainerr   rc   r   r   r   r%   <module>   s   W