o
    
۾iD                     @   s  d Z ddlmZ ddlmZ ddlZddlmZ ddlmZm	Z	 ddl
mZ ddlmZmZ dd	lmZmZ dd
lmZ ddlmZ ddlmZmZmZ ddlmZ ddlmZ ddlmZ ddl m!Z! ddl"m#Z#m$Z$m%Z% ddl&m'Z' ddl(m)Z) ddl*m+Z+ ddl,m-Z-m.Z.m/Z/ ddl0m1Z1m2Z2m3Z3m4Z4m5Z5m6Z6 ej7e)j8ddd Z9G dd dej:Z;G dd dej:Z<G dd  d ej:Z=G d!d" d"ej:Z>eG d#d$ d$ej:Z?G d%d& d&ej:e-e.e/Z@dS )'zPyTorch Cohere model.    )Iterable)isliceN)nn)Cohere2ConfigCohereConfig)support_torch_compile)CacheConfig
VllmConfig)get_pp_group$get_tensor_model_parallel_world_size)
SiluAndMul)	Attention)MergedColumnParallelLinearQKVParallelLinearRowParallelLinear)LogitsProcessor)QuantizationConfig)get_rope)VocabParallelEmbedding)default_weight_loadermaybe_remap_kv_scale_namerow_parallel_weight_loader)set_weight_attrs)current_platform)IntermediateTensors   )SupportsLoRA
SupportsPPSupportsQuant)AutoWeightsLoaderextract_layer_indexis_pp_missing_parameter'make_empty_intermediate_tensors_factorymake_layersmaybe_prefix)backendc                 C   sh   | j }| tj} | jddd}| | djddd}| | t||  } |tj|  } | |S )NT)keepdim   )dtypetotorchfloat32meanpowrsqrt)hidden_statesweightvariance_epsiloninput_dtyper-   variance r5   W/home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/models/commandr.pylayer_norm_funcC   s   
r7   c                       s(   e Zd Zd fdd	ZdddZ  ZS )		LayerNormNh㈵>c                    s6   t    tt|| _|| _t| jdt	i d S )Nweight_loader)
super__init__r   	Parameterr+   onesr1   r2   r   r   )selfparam_shapeeps	__class__r5   r6   r<   O   s   
zLayerNorm.__init__c                 C   s   t || j| j}||fS N)r7   r1   r2   )r?   r0   	residualsr5   r5   r6   forwardU   s   
zLayerNorm.forward)Nr9   rD   )__name__
__module____qualname__r<   rF   __classcell__r5   r5   rB   r6   r8   N   s    r8   c                       s@   e Zd Z		d
deeB dedB def fddZdd	 Z  Z	S )	CohereMLPN configquant_configprefixc                    sn   t    || _|j| _|j| _t| j| jgd d|| dd| _t| j| jd|| dd| _t	 | _
d S )Nr(   Fz.gate_up_projbiasrN   rO   z
.down_proj)r;   r<   rM   hidden_sizeintermediate_sizer   gate_up_projr   	down_projr   act_fn)r?   rM   rN   rO   rB   r5   r6   r<   ^   s&   

zCohereMLP.__init__c                 C   s*   |  |\}}| |}| |\}}|S rD   )rT   rV   rU   )r?   xgate_up_r5   r5   r6   rF   x   s   
zCohereMLP.forward)NrL   )
rG   rH   rI   r   r   r   strr<   rF   rJ   r5   r5   rB   r6   rK   ]   s    rK   c                	       sf   e Zd Z			ddeeB dedB dedB def fddZd	d
 Z	de
jde
jde
jfddZ  ZS )CohereAttentionNrL   rM   cache_configrN   rO   c              
      s  t    t }|| _|j| _|j| _|j| _| j| | _| j| j | _	|j
| _| j|kr7| j| dks6J n	|| j dks@J td| j| | _| j| j	 | _| j| j	 | _| j	d | _t|dd pht|dd| _t|dd| _t| j| j	| j| jd|| d	d
| _t| j| j	 | jd|| dd
| _t| j	| j|jdd| _t|t| _d | _| jst|}|j | dkr|j| _t!| j| j	| j| j||| j| dd| _"| jrt#| j| j	f|j$d| _%t#| j| j	f|j$d| _&d S d S )Nr   r   g      model_max_lengthmax_position_embeddingsi    use_qk_normFz	.qkv_projrP   z.o_proj)max_positionrope_parametersis_neox_stylesliding_attentionz.attn)num_kv_headsr\   rN   per_layer_sliding_windowrO   r@   rA   )'r;   r<   r   rM   attention_dropoutrR   num_attention_headstotal_num_heads	num_headshead_dimnum_key_value_headstotal_num_kv_headsmaxrd   q_sizekv_sizescalinggetattrr^   r_   r   qkv_projr   o_projr   ra   
rotary_emb
isinstancer   v1sliding_windowr    layer_typesr   attnr8   layer_norm_epsq_normk_norm)r?   rM   r\   rN   rO   tp_size	layer_idxrB   r5   r6   r<      s   


	


zCohereAttention.__init__c                 C   s   |j g |jd d d| jR  }|j g |jd d d| jR  }| |\}}| |\}}|j g |jd d dR  }|j g |jd d dR  }||fS )Nr&   )viewshaperk   r|   r}   )r?   qkrY   r5   r5   r6   _apply_qk_norm   s   $$zCohereAttention._apply_qk_norm	positionsr0   returnc           
      C   s   |  |\}}|j| j| j| jgdd\}}}| jr"| ||\}}| js(| jr1| |||\}}| 	|||}| 
|\}	}|	S )Nr&   )dim)rs   splitro   rp   r_   r   rw   rx   ru   rz   rt   )
r?   r   r0   qkvrY   r   r   vattn_outputoutputr5   r5   r6   rF      s    zCohereAttention.forwardNNrL   )rG   rH   rI   r   r   r   r   rZ   r<   r   r+   TensorrF   rJ   r5   r5   rB   r6   r[      s*    S	r[   c                
       sr   e Zd Z			ddeeB dedB dedB def fddZd	e	j
d
e	j
de	j
dB dee	j
e	j
f fddZ  ZS )CohereDecoderLayerNrL   rM   r\   rN   rO   c                    sV   t    |j| _t|||| dd| _t||| dd| _t|j|jd| _	d S )Nz
.self_attn)rN   rO   z.mlprf   )
r;   r<   rR   r[   	self_attnrK   mlpr8   r{   input_layernorm)r?   rM   r\   rN   rO   rB   r5   r6   r<      s   
zCohereDecoderLayer.__init__r   r0   residualr   c                 C   s@   |}|  ||\}}| j||d}| |}|| | }||fS )N)r   r0   )r   r   r   )r?   r   r0   r   hidden_states_attentionhidden_states_mlpr5   r5   r6   rF     s   
zCohereDecoderLayer.forwardr   )rG   rH   rI   r   r   r   r   rZ   r<   r+   r   tuplerF   rJ   r5   r5   rB   r6   r      s,    r   c                       s   e Zd Zdddedef fddZdejdejfd	d
Z	ddejdB dejde	dB dejdB deje	B f
ddZ
deeeejf  dee fddZ  ZS )CohereModelrL   rO   vllm_configrO   c                   s   t    |jj|j |j| _| _j| _tjj	| _
tj fdd| dd\| _| _| _tj	jd| _tddgj	| _d S )Nc                    s   t  | dS )Nr   )r   r   r\   rM   rN   r5   r6   <lambda>*  s    z&CohereModel.__init__.<locals>.<lambda>z.layersr   rf   r0   r   )r;   r<   model_config	hf_configr\   rN   rM   
vocab_sizer   rR   embed_tokensr#   num_hidden_layersstart_layer	end_layerlayersr8   r{   normr"   make_empty_intermediate_tensors)r?   r   rO   rB   r   r6   r<     s*   


zCohereModel.__init__	input_idsr   c                 C   s
   |  |S rD   )r   r?   r   r5   r5   r6   embed_input_ids6  s   
zCohereModel.embed_input_idsNr   intermediate_tensorsinputs_embedsc           	      C   s   t  jr|d ur|}n| |}d }n|d usJ |d }|d }t| j| j| jD ]
}||||\}}q*t  js@t||dS | 	||\}}|S )Nr0   r   )r0   r   )
r
   is_first_rankr   r   r   r   r   is_last_rankr   r   )	r?   r   r   r   r   r0   r   layerrY   r5   r5   r6   rF   9  s(   

zCohereModel.forwardweightsc                 C   sF  g d}t |  }t }|D ]\}}| jd urA| j| }rA|| }t|dt}	| dkr2|n|d }|	|| || q|D ].\}
}}||vrMqC|	||
}|
dr]||vr]qCt|| rcqC|| }|j}	|	|||  n)|
dr|||vr|qt||}|d u rqt|| rq|| }t|dt}	|	|| || q|S )N))rs   q_projr   )rs   k_projr   )rs   v_projr   )rT   	gate_projr   )rT   up_projr   r:   r   z.bias)dictnamed_parameterssetrN   get_cache_scalerr   r   r   addreplaceendswithr!   r:   r   )r?   r   stacked_params_mappingparams_dictloaded_paramsnameloaded_weight
scale_nameparamr:   
param_name
shard_nameshard_idr5   r5   r6   load_weightsW  sL   






zCohereModel.load_weightsrD   )rG   rH   rI   r	   rZ   r<   r+   r   r   r   rF   r   r   r   r   rJ   r5   r5   rB   r6   r     s     
,r   c                       s   e Zd Zg dddgdZddiZddd	ed
ef fddZdej	dej	fddZ
e 		ddej	dB dej	dedB dej	dB dej	eB f
ddZdej	dej	dB fddZdeeeej	f  dee fddZ  ZS )CohereForCausalLM)r   r   r   r   r   )rs   rT   r   input_embeddingsrL   r   r   rO   c                   sb   t    |jj}|j}|| _|jsJ || _t|j|j	d| _
t|t|dd| _| jj| _d S )N)scalemodel)r   rO   )r;   r<   r   r   rN   rM   tie_word_embeddingsr   r   logit_scalelogits_processorr   r$   r   r   )r?   r   rO   rM   rN   rB   r5   r6   r<     s   


zCohereForCausalLM.__init__r   r   c                 C   s   | j |S rD   )r   r   r   r5   r5   r6   r     s   z!CohereForCausalLM.embed_input_idsNr   r   r   c                 C   s   |  ||||}|S rD   )r   )r?   r   r   r   r   r0   r5   r5   r6   rF     s   zCohereForCausalLM.forwardr0   c                 C   s<   t | jjd}|r| | jj|}|S | | jjj|}|S )Nr1   )hasattrr   r   r   
base_layer)r?   r0   is_not_loralogitsr5   r5   r6   compute_logits  s   
z CohereForCausalLM.compute_logitsr   c                 C   s   t | ddgd}||S )Nlm_headzrotary_emb.inv_freq)skip_prefixes)r   r   )r?   r   loaderr5   r5   r6   r     s   
zCohereForCausalLM.load_weights)NN)rG   rH   rI   packed_modules_mappingembedding_modulesr	   rZ   r<   r+   r   r   no_gradr   rF   r   r   r   r   r   rJ   r5   r5   rB   r6   r     s:    
,r   )A__doc__collections.abcr   	itertoolsr   r+   r   transformersr   r   vllm.compilation.decoratorsr   vllm.configr   r	   vllm.distributedr
   r   %vllm.model_executor.layers.activationr   $vllm.model_executor.layers.attentionr   !vllm.model_executor.layers.linearr   r   r   +vllm.model_executor.layers.logits_processorr   'vllm.model_executor.layers.quantizationr   +vllm.model_executor.layers.rotary_embeddingr   3vllm.model_executor.layers.vocab_parallel_embeddingr   -vllm.model_executor.model_loader.weight_utilsr   r   r   vllm.model_executor.utilsr   vllm.platformsr   vllm.sequencer   
interfacesr   r   r   utilsr   r    r!   r"   r#   r$   compilesimple_compile_backendr7   Moduler8   rK   r[   r   r   r   r5   r5   r5   r6   <module>   s>    


"m+w