o
    -i}%                     @   s   d dl mZ d dlmZmZ d dlmZ eeZde	e
ef de	e
ef defddZd	e	de	fd
dZd	e	de	fddZd	e	de	fddZd	e	de	fddZd	e	de	fddZd	e	de	fddZdS )    )Any)PretrainedConfigWhisperConfig)init_loggerconfig_dictdefaultsreturnc           
         s  t   t drt  t d}|o" d dp ddk} ddkr0dg d< n@|rc|rct  d	 d< d
g d< d v sIJ dddg}t fdd|D sbJ dd| n|rkdg d< ndg d< t dr{t  t drddg}t fdd|D sJ dd|  dpi dp d}t dpi dpi d}|r|rJ d|rt  |rt	  |
 D ]
\}} || qt }	td|	 |	S )Nquantizationmoenum_shared_expertsr   
model_typemambaMamba2ForCausalLMarchitecturesdeepseek_v3MistralLarge3ForCausalLMllama_4_scalingz+MistralLarge3 expect llama4 scaling config. original_max_position_embeddingsbetac                       g | ]}| d  v qS r    .0keyr   r   d/home/ubuntu/veenaModal/venv/lib/python3.10/site-packages/vllm/transformers_utils/configs/mistral.py
<listcomp>%       
z%adapt_config_dict.<locals>.<listcomp>z/llama_4_scaling config should define the keys: ,MixtralForCausalLMMistralForCausalLMyarnc                    r   r   r   r   r   r   r   r   8   r   
multimodalvision_encoder_argsvision_encoderwhisper_model_argsencoder_argsz'Vision and audio are mutually exclusivezInitialized config %s)_remap_general_mistral_argsboolget _remap_mistral_quantization_args_remap_moe_argsalljoin_remap_mistral_yarn_args_remap_mistral_vision_args_remap_mistral_audio_argsitems
setdefaultr   	from_dictloggerdebug)
r   r   is_moeis_mistral_large_3llama_4_scaling_config_keys	is_visionis_audiokvconfigr   r   r   adapt_config_dict   sz   


	



r?   r>   c                 C   sV   |  dr| d}n| d}|  d}ddgt| t|d} |r)|| d< | S )Nr#   r%   quantization_configpixtralPixtralForConditionalGeneration)r   r   text_configvision_config)r*   popr   r4   )r>   rD   quant_configr   r   r   r0   [   s   


r0   c                 C   s   dddddd}|  dpi }ddd	| d
< | dd  }r$|| d
 d< | D ]\}}||v r9||| d
 |< q(t|dksGJ d| | S )Nfactorr   	beta_fast	beta_slowapply_yarn_scaling)rG   r   r   alphaapply_scaler"      )	rope_typemscale_all_dimrope_parameters
rope_thetar   zUnparsed yarn config: )r*   rE   r2   len)r>   yarn_config_mapyarn_configrQ   old_namenew_namer   r   r   r/   m   s$   
r/   c                 C   s   ddddddd}dd	d
d|  ddfdd}| D ]\}}|| v r*| || |< q| D ]\}\}}| ||| |< q/| S )Nhidden_sizerms_norm_epsnum_key_value_headsnum_hidden_layersnum_attention_headsintermediate_size)dimnorm_eps
n_kv_headsn_layersn_heads
hidden_dim)r   transformer)
activationsilu)tied_embeddingsFmax_seq_lenmax_position_embeddings  )rh   ri   )r   
hidden_acttie_word_embeddingsrg   rh   r*   r2   rE   )r>   config_mappingtop_level_mapping_with_defaultr   new_keydefault_valuer   r   r   r(      s(   
r(   c                 C   st   |  dr8| di }| ddkr0| d}|dv sJ d|dk}d|r(d	nd
d| d< | S td| d| S )Nr	   qformat_weightfp8_e4m3qscheme_act)	NO_SCALESTENSORNzAOnly NO_SCALES and TENSOR (default) are supported for qscheme_actrt   fp8dynamicstatic)quant_methodactivation_schemer@   zFound unknown quantization='z' in config)r*   rE   
ValueError)r>   r	   rs   
is_dynamicr   r   r   r+      s   




r+   c           
      C   s  | d  d}|d }|d }|d }|dr#|}||d  | d< nd	}|d
d }|d u r2d }n| r;t|}ntd||drIdnd}| d}	d|gt| td)i d|d d d|d d d|d d d|d d d|d|d d|d d|d d|d d|d d|d d|d d d!d"|dd!d#|d$|d%|d%d&d'|| d'  d(} |	r|	| d< | S )*Nr#   r&   r'   downsample_argsdownsample_factorcausalr]   projection_sizerM   ragged_attentionz#Unsupported: _maybe_sliding_window=VoxtralStreamingGenerationVoxtralForConditionalGenerationr@   voxtralnum_mel_binsaudio_encoding_argswindow_sizesampling_rate
hop_lengthd_modelencoder_layersr`   encoder_ffn_dimrb   encoder_attention_headsra   encoder_head_dimhead_dim
vocab_sizemax_source_positionsis_encoder_decoderF	is_causalsliding_windowblock_pool_size	pos_embed
sinusoidalrh   )r   r   rC   audio_configr   )rE   r*   isdigitintNotImplementedErrorr   r4   r   )
r>   whisper_argsr'   r}   r~   r   _maybe_sliding_windowr   architecturerF   r   r   r   r1      s~   


	
r1   c              
   C   sl   ddddddddd	d
	}|  di }| D ]\}}||v r'||}|| |< qd | d< d| d< d| d< | S )Nmoe_layer_freqfirst_k_dense_replacenum_experts_per_tokn_routed_expertsmoe_intermediate_sizerouted_scaling_factorn_shared_expertsn_group
topk_group)	route_every_nr   r   num_expertsexpert_hidden_dimrouted_scaler   num_expert_groupsnum_expert_groups_per_tokr
   topk_methodTnorm_topk_probsoftmaxscoring_funcrl   )r>   moe_config_map
moe_configrU   rV   valuer   r   r   r,      s(   
r,   N)typingr   transformersr   r   vllm.loggerr   __name__r5   dictstrr?   r0   r/   r(   r+   r1   r,   r   r   r   r   <module>   s"   


O=