o
    پi                     @   s   d Z ddlZddlmZmZmZmZmZ ddlZddl	m
Z
 ddlmZmZ ddlmZ ddlmZmZ ddlmZ ddlmZmZ dd	lmZmZmZ dd
lmZ ddlmZ ddl m!Z! ddl"m#Z# e$e%Z&G dd de
j'Z(e(Z)dS )zEInference-only Qwen2-Audio model compatible with HuggingFace weights.    N)AnyIterableListOptionalTuple)Qwen2AudioEncoderConfigQwen2Config)Qwen2AudioConfig)Qwen2AudioEncoderQwen2AudioMultiModalProjector)QuantizationConfig)/MultiModalityDataPaddingPatternMultimodalTokensgeneral_mm_embed_routine)ModalityMultimodalDataItemMultimodalInputs)ForwardBatch)default_weight_loader)Qwen2ForCausalLM)
add_prefixc                
       s   e Zd Zg dZddddddZ			dd
edee deddf fddZ	de
e defddZde
e dejfddZdejdejdededejf
ddZdeeeejf  fddZ  ZS ) "Qwen2AudioForConditionalGeneration)z.gate_proj.z.down_proj.z	.up_proj.z.q_proj.z.k_proj.z.v_proj.z.o_proj.)qkv_projr   )r      )r      )gate_up_projr   )r   r   )q_projk_projv_proj	gate_projup_projN configquant_configprefixreturnc                    s   t    || _t| jdd d u rt| jj| j_t| jdd d u r*t| jj| j_t	|j| _
t|| _t|j|td|d| _t | _d S )Naudio_configtext_configmodel)r#   )super__init__r!   getattrr   _name_or_pathr%   r   r&   r
   audio_towerr   multi_modal_projectorr   r   language_modelr   pattern)selfr!   r"   r#   	__class__ Q/home/ubuntu/.local/lib/python3.10/site-packages/sglang/srt/models/qwen2_audio.pyr)   L   s    

z+Qwen2AudioForConditionalGeneration.__init__	input_ids	mm_inputsc                 C   s   | j ||S )N)r/   pad_input_tokens)r0   r5   r6   r3   r3   r4   pad_input_idsg   s   z0Qwen2AudioForConditionalGeneration.pad_input_idsitemsc                 C   s   t jdd |D dd| jj}| |j}| |}t dd |D }g }t||D ]\}}||d |	   q-t j|ddS )Nc                 S      g | ]}|j qS r3   )feature.0itemr3   r3   r4   
<listcomp>l       zHQwen2AudioForConditionalGeneration.get_audio_feature.<locals>.<listcomp>r   )dimc                 S   r:   r3   )audio_feature_lensr<   r3   r3   r4   r?   s   r@   )
torchcattyper,   dtypelast_hidden_stater-   zipappendr>   )r0   r9   input_featuresaudio_embedsrB   
new_embedsidr3   r3   r4   get_audio_featurej   s   
z4Qwen2AudioForConditionalGeneration.get_audio_feature	positionsforward_batchkwargsc                 K   s    t ||| jtj| ji|d}|S )N)r5   rQ   r.   data_embedding_funcsrP   )r   r.   r   AUDIOrO   )r0   r5   rP   rQ   rR   hidden_statesr3   r3   r4   forwardz   s   
z*Qwen2AudioForConditionalGeneration.forwardweightsc              	   C   s  g d}t | jdd}|D ]v\}}d|v rqd|v sd|v r q| jjjr*d|v r*q|D ],\}}}||vs9d|v r:q,|||}	|	d	rJ|	|vrJq,||	 }
|
j}||
||  n+z|d	re||vreW q|| }
W n tyx   t	|
   w t|
d
t}||
| qd S )N))r   r   q)r   r   k)r   r   v)r   r   r   )r   r   r   F)remove_duplicatezrotary_emb.inv_freqzrotary_emb.cos_cachedzrotary_emb.sin_cachedzlm_head.weightr,   z.biasweight_loader)dictnamed_parametersr!   r&   tie_word_embeddingsreplaceendswithr\   KeyErrorprintkeysr*   r   )r0   rW   stacked_params_mappingparams_dictnameloaded_weight
param_nameweight_nameshard_idname_tmpparamr\   r3   r3   r4   load_weights   s>   
z/Qwen2AudioForConditionalGeneration.load_weights)Nr    )__name__
__module____qualname__#default_bitsandbytes_target_modules#bitsandbytes_stacked_params_mappingr	   r   r   strr)   r   intr   r8   r   rC   TensorrO   r   r   rV   r   r   rn   __classcell__r3   r3   r1   r4   r   8   sB    
$r   )*__doc__loggingtypingr   r   r   r   r   rC   torch.nnnntransformersr   r   9transformers.models.qwen2_audio.configuration_qwen2_audior	   4transformers.models.qwen2_audio.modeling_qwen2_audior
   r   *sglang.srt.layers.quantization.base_configr   sglang.srt.managers.mm_utilsr   r   "sglang.srt.managers.schedule_batchr   r   r   ,sglang.srt.model_executor.forward_batch_infor   $sglang.srt.model_loader.weight_utilsr   sglang.srt.models.qwen2r   sglang.srt.utilsr   	getLoggerro   loggerModuler   
EntryClassr3   r3   r3   r4   <module>   s&   
 