o
    
۾i)                     @   s   d dl mZ d dlmZmZ d dlmZ eeZde	e
ef de	e
ef defddZd	e	de	fd
dZd	e	de	fddZd	e	de	fddZd	e	de	fddZd	e	de	fddZd	e	de	fddZd	e	de	fddZdS )    )Any)PretrainedConfigWhisperConfig)init_loggerconfig_dictdefaultsreturnc           
         s  t   t  t drt  t d}|o& d dp$ddk} ddkr4dg d< n@|rg|rgt  d	 d< d
g d< d v sMJ dddg}t fdd|D sfJ dd| n|rodg d< ndg d< t drt  t drddg}t fdd|D sJ dd|  dpi dp d}t dpi dpi d}|r|rJ d|rt	  |rt
  | D ]
\}} || qt }	td|	 |	S )Nquantizationmoenum_shared_expertsr   
model_typemambaMamba2ForCausalLMarchitecturesdeepseek_v3MistralLarge3ForCausalLMllama_4_scalingz+MistralLarge3 expect llama4 scaling config. original_max_position_embeddingsbetac                       g | ]}| d  v qS r    .0keyr   r   [/home/ubuntu/.local/lib/python3.10/site-packages/vllm/transformers_utils/configs/mistral.py
<listcomp>&       
z%adapt_config_dict.<locals>.<listcomp>z/llama_4_scaling config should define the keys: ,MixtralForCausalLMMistralForCausalLMyarnc                    r   r   r   r   r   r   r   r   9   r   
multimodalvision_encoder_argsvision_encoderwhisper_model_argsencoder_argsz'Vision and audio are mutually exclusivezInitialized config %s)_remap_general_mistral_args_remap_mistral_sliding_windowboolget _remap_mistral_quantization_args_remap_moe_argsalljoin_remap_mistral_yarn_args_remap_mistral_vision_args_remap_mistral_audio_argsitems
setdefaultr   	from_dictloggerdebug)
r   r   is_moeis_mistral_large_3llama_4_scaling_config_keys	is_visionis_audiokvconfigr   r   r   adapt_config_dict   s|   


	



r@   r?   c                 C   sV   |  dr| d}n| d}|  d}ddgt| t|d} |r)|| d< | S )Nr#   r%   quantization_configpixtralPixtralForConditionalGeneration)r   r   text_configvision_config)r+   popr   r5   )r?   rE   quant_configr   r   r   r1   \   s   


r1   c                 C   s   dddddd}|  dpi }ddd	| d
< | dd  }r$|| d
 d< | D ]\}}||v r9||| d
 |< q(t|dksGJ d| | S )Nfactorr   	beta_fast	beta_slowapply_yarn_scaling)rH   r   r   alphaapply_scaler"      )	rope_typemscale_all_dimrope_parameters
rope_thetar   zUnparsed yarn config: )r+   rF   r3   len)r?   yarn_config_mapyarn_configrR   old_namenew_namer   r   r   r0   n   s$   
r0   c                 C   s   ddddddd}dd	d
d|  ddfdd}| D ]\}}|| v r*| || |< q| D ]\}\}}| ||| |< q/| S )Nhidden_sizerms_norm_epsnum_key_value_headsnum_hidden_layersnum_attention_headsintermediate_size)dimnorm_eps
n_kv_headsn_layersn_heads
hidden_dim)r   transformer)
activationsilu)tied_embeddingsFmax_seq_lenmax_position_embeddings  )ri   rj   )r   
hidden_acttie_word_embeddingsrh   ri   r+   r3   rF   )r?   config_mappingtop_level_mapping_with_defaultr   new_keydefault_valuer   r   r   r(      s(   
r(   c                 C   s   |  d }rZt|tr<| d t| }|| }dd |D | d< tt|d h dks0J |ttd |d | d< | S t|trS|  dd u rSdg| d  | d< | S td| | S )	Nsliding_windowr[   c                 S   s   g | ]
}|d u r
dndqS )Nfull_attentionsliding_attentionr   )r   
layer_typer   r   r   r      s    z1_remap_mistral_sliding_window.<locals>.<listcomp>layer_typesrN   rt   z!Unsupported sliding_window type: )	r+   
isinstancelistrS   setnextfilterint
ValueError)r?   rr   pattern_repeatsrv   r   r   r   r)      s   

r)   c                 C   st   |  dr8| di }| ddkr0| d}|dv sJ d|dk}d|r(d	nd
d| d< | S td| d| S )Nr	   qformat_weightfp8_e4m3qscheme_act)	NO_SCALESTENSORNzAOnly NO_SCALES and TENSOR (default) are supported for qscheme_actr   fp8dynamicstatic)quant_methodactivation_schemerA   zFound unknown quantization='z' in config)r+   rF   r}   )r?   r	   r   
is_dynamicr   r   r   r,      s   




r,   c                 C   sj  | d  d}|d }|d }|d }|dr#|}||d  | d< nd	}|dr,d
nd}| d}d|gt| td(i d|d d d|d d d|d d d|d d d|d|d d|d d|d d|d d|d d|d d|d ddd |ddd!|d!d d"|d#|d#d$d%|d d%d&|| d&  d'} |r|| d< | S ))Nr#   r&   r'   downsample_argsdownsample_factorcausalr^   projection_sizerN   VoxtralRealtimeGenerationVoxtralForConditionalGenerationrA   voxtralnum_mel_binsaudio_encoding_argswindow_sizesampling_rate
hop_lengthd_modelencoder_layersra   encoder_ffn_dimrc   encoder_attention_headsrb   encoder_head_dimhead_dim
vocab_sizemax_source_positionsis_encoder_decoderF	is_causalrr   block_pool_size	pos_embed
sinusoidalglobal_log_mel_maxri   )r   r   rD   audio_configr   )rF   r+   r   r5   r   )r?   whisper_argsr'   r   r   r   architecturerG   r   r   r   r2      sz   

	
r2   c              
   C   sl   ddddddddd	d
	}|  di }| D ]\}}||v r'||}|| |< qd | d< d| d< d| d< | S )Nmoe_layer_freqfirst_k_dense_replacenum_experts_per_tokn_routed_expertsmoe_intermediate_sizerouted_scaling_factorn_shared_expertsn_group
topk_group)	route_every_nr   r   num_expertsexpert_hidden_dimrouted_scaler   num_expert_groupsnum_expert_groups_per_tokr
   topk_methodTnorm_topk_probsoftmaxscoring_funcrm   )r?   moe_config_map
moe_configrV   rW   valuer   r   r   r-     s(   
r-   N)typingr   transformersr   r   vllm.loggerr   __name__r6   dictstrr@   r1   r0   r(   r)   r,   r2   r-   r   r   r   r   <module>   s$   


P8