o
    i6                     @   s  d Z ddlZddlmZ ddlmZ ddlZddlmZ ddlm	Z	 ddl
mZ ddlmZmZ dd	lmZmZmZ dd
lmZ ddlmZ ddlmZmZmZ ddlmZ ddlmZ ddlm Z m!Z! ddl"m#Z# ddl$m%Z% ddl&m'Z'm(Z( ddl)m*Z*m+Z+m,Z,m-Z-m.Z. de/dej0fddZ1G dd dej2Z3G dd dej2Z4G dd dej2Z5eG dd  d ej2Z6G d!d" d"ej2e'e(Z7d#ee8e9ej0f  dee8e9ej0f  fd$d%Z:dS )&z?Inference-only BLOOM model compatible with HuggingFace weights.    N)Iterable)islice)nn)BloomConfig)support_torch_compile)CacheConfig
VllmConfig)get_pp_groupget_tensor_model_parallel_rank$get_tensor_model_parallel_world_size)
get_act_fn)	Attention)ColumnParallelLinearQKVParallelLinearRowParallelLinear)LogitsProcessor)QuantizationConfig)ParallelLMHeadVocabParallelEmbedding)default_weight_loader)IntermediateTensors   )
SupportsPPSupportsQuant)AutoWeightsLoaderis_pp_missing_parameter'make_empty_intermediate_tensors_factorymake_layersmaybe_prefixtotal_num_headsreturnc                 C   s   dt t |  }tjddt |d     tjd}tjdd| tjd}t||}|| kritjddt d| d     tjd}t	|| | }tjddd|  dtjd}tj
|t||gdd}|S )N      )dtyper   )startendstepr#   r   )dim)mathfloorlog2torchtensorfloat32arangeint32powmincat)r   closest_power_of_2basepowersslopes
extra_basenum_remaining_headsextra_powers r:   V/home/ubuntu/vllm_env/lib/python3.10/site-packages/vllm/model_executor/models/bloom.py_get_alibi_slopes?   s(   r<   c                	       Z   e Zd Z			ddededB dedB def fddZd	ej	d
ej	dej	fddZ
  ZS )BloomAttentionN configcache_configquant_configprefixc              	      s
  t    |j| _|j| _| j| j | _| j| j | jksJ t }| j| dks+J | j| | _t| j| j| jd|| dd| _	t
| j| jd|| dd| _t }|| j }|d | j }t| j}	|	||  }	| jd }
t| j| j|
|	||| dd	| _d S )
Nr   Tz.query_key_value)biasrB   rC   z.denser   g      z.attn)alibi_slopesrA   rB   rC   )super__init__hidden_sizen_headr   head_dimr   	num_headsr   query_key_valuer   denser
   r<   tolistr   attn)selfr@   rA   rB   rC   tp_world_sizetp_rank
head_starthead_endrE   scaling	__class__r:   r;   rG   X   sL   
	


zBloomAttention.__init__position_idshidden_statesr    c           
      C   sD   ~|  |\}}|jddd\}}}| |||}| |\}	}|	S )Nr"   )chunksr'   )rL   chunkrO   rM   )
rP   rX   rY   qkv_qkvattn_outputoutputr:   r:   r;   forward   s   zBloomAttention.forwardNNr?   __name__
__module____qualname__r   r   r   strrG   r+   Tensorrd   __classcell__r:   r:   rV   r;   r>   W   s(    3r>   c                       sJ   e Zd Z		ddededB def fddZdejd	ejfd
dZ	  Z
S )BloomMLPNr?   r@   rB   rC   c                    sV   t    |j}t|d| || dd| _td| _td| ||| dd| _d S )N   z.dense_h_to_4h)rB   rC   geluz.dense_4h_to_h)	rF   rG   rH   r   dense_h_to_4hr   	gelu_implr   dense_4h_to_h)rP   r@   rB   rC   rH   rV   r:   r;   rG      s   

zBloomMLP.__init__xr    c                 C   s*   |  |\}}| |}| |\}}|S N)rp   rq   rr   )rP   rs   r^   r:   r:   r;   rd      s   
zBloomMLP.forward)Nr?   )rg   rh   ri   r   r   rj   rG   r+   rk   rd   rl   r:   r:   rV   r;   rm      s    rm   c                	       r=   )
BloomBlockNr?   r@   rA   rB   rC   c                    sn   t    |j}tj||jd| _t|||| dd| _tj||jd| _	t
||| dd| _|j| _d S )Nepsz.self_attentionrC   z.mlp)rF   rG   rH   r   	LayerNormlayer_norm_epsiloninput_layernormr>   self_attentionpost_attention_layernormrm   mlp(apply_residual_connection_post_layernorm)rP   r@   rA   rB   rC   rH   rV   r:   r;   rG      s   
zBloomBlock.__init__rX   rY   r    c                 C   s\   |  |}| jr|}n|}| j||d}|| }| |}| jr#|}n|}| || }|S )N)rX   rY   )r{   r   r|   r}   r~   )rP   rX   rY   layernorm_outputresidualattention_outputrc   r:   r:   r;   rd      s   

zBloomBlock.forwardre   rf   r:   r:   rV   r;   ru      s(    ru   c                       s   e Zd Zdddedef fddZdejdejfd	d
Z	ddejdB dejde	dB dejdB deje	B f
ddZ
deeeejf  dee fddZ  ZS )
BloomModelr?   rx   vllm_configrC   c                   s   t    |jj|j |j| _j| _t	j
| j| _tj| jjd| _tj fdd| dd\| _| _| _tj| jjd| _tdgj| _d S )Nrv   c                    s   t  | dS )Nrx   )ru   rx   rA   r@   rB   r:   r;   <lambda>  s    z%BloomModel.__init__.<locals>.<lambda>z.hrx   rY   )rF   rG   model_config	hf_configrA   rB   r@   rH   	embed_dimr   
vocab_sizeword_embeddingsr   ry   rz   word_embeddings_layernormr   num_hidden_layersstart_layer	end_layerhln_fr   make_empty_intermediate_tensors)rP   r   rC   rV   r   r;   rG      s,   
	
zBloomModel.__init__	input_idsr    c                 C   s
   |  |S rt   )r   rP   r   r:   r:   r;   embed_input_ids  s   
zBloomModel.embed_input_idsNrX   intermediate_tensorsinputs_embedsc                 C   s   t  jr|d ur|}n| |}| |}n
|d usJ |d }t| j| j| jD ]}|||}q)t  js;t	d|iS | 
|}|S )NrY   )r	   is_first_rankr   r   r   r   r   r   is_last_rankr   r   )rP   r   rX   r   r   rY   layerr:   r:   r;   rd     s   

zBloomModel.forwardweightsc                 C   s   t | jdd}t }|D ]V\}}t|| rq|| }d|v rSt|dd }| jj}|d urS|j}	||	d | |ddf |	|d d   }|	||d }|
|	}t|dt}
|
|| || q|S )	NF)remove_duplicaterL   
output_dimr"   rZ   r   weight_loader)dictnamed_parameterssetr   getattrr@   num_attention_headsshapeview	transposereshaper   add)rP   r   params_dictloaded_paramsnameloaded_weightparamr   rK   loaded_weight_shaper   r:   r:   r;   load_weights,  s0   



zBloomModel.load_weightsrt   )rg   rh   ri   r   rj   rG   r+   rk   r   r   rd   r   tupler   r   rl   r:   r:   rV   r;   r      s     "
,r   c                       s   e Zd Zdddedef fddZdejdejfd	d
Z		ddejdB dejde	dB dejdB deje	B f
ddZ
dejdejdB fddZdeeeejf  dee fddZ  ZS )BloomForCausalLMr?   rx   r   rC   c                   s   t    |jj}|j}|| _|| _t|t|dd| _| jj	r&| jj
| _nt| jj| jjt|dd| _t|j| _| jj| _d S )Ntransformer)r   rC   lm_headrx   )rF   rG   r   r   rB   r@   r   r   r   tie_word_embeddingsr   r   r   r   rH   r   logits_processorr   )rP   r   rC   r@   rB   rV   r:   r;   rG   M  s$   

zBloomForCausalLM.__init__r   r    c                 C   s   | j |S rt   )r   r   r   r:   r:   r;   r   d  s   z BloomForCausalLM.embed_input_idsN	positionsr   r   c                 C   s   |  ||||}|S rt   )r   )rP   r   r   r   r   rY   r:   r:   r;   rd   g  s   zBloomForCausalLM.forwardrY   c                 C   s   |  | j|}|S rt   )r   r   )rP   rY   logitsr:   r:   r;   compute_logitss  s   zBloomForCausalLM.compute_logitsr   c                 C   s    t | dgd}t|}||S )Nzlm_head.weight)skip_prefixes)r   _add_transformer_prefixr   )rP   r   loaderr:   r:   r;   r   z  s   
zBloomForCausalLM.load_weights)NN)rg   rh   ri   r   rj   rG   r+   rk   r   r   rd   r   r   r   r   r   rl   r:   r:   rV   r;   r   L  s,    

,r   r   c                 c   s0    | D ]\}}| dsd| }||fV  qd S )Nztransformer.)
startswith)r   r   r,   r:   r:   r;   r     s   
r   );__doc__r(   collections.abcr   	itertoolsr   r+   r   transformersr   vllm.compilation.decoratorsr   vllm.configr   r   vllm.distributedr	   r
   r   %vllm.model_executor.layers.activationr   $vllm.model_executor.layers.attentionr   !vllm.model_executor.layers.linearr   r   r   +vllm.model_executor.layers.logits_processorr   'vllm.model_executor.layers.quantizationr   3vllm.model_executor.layers.vocab_parallel_embeddingr   r   -vllm.model_executor.model_loader.weight_utilsr   vllm.sequencer   
interfacesr   r   utilsr   r   r   r   r   intrk   r<   Moduler>   rm   ru   r   r   r   rj   r   r:   r:   r:   r;   <module>   s@   	A8]4