o
    .i
                     @   sv   d dl Z d dlmZ d dlmZ d dlmZ d dlmZ d dl	m
Z
 d dlmZ d dlmZ eeZG dd	 d	ZdS )
    N)
VllmConfig)set_forward_context)init_logger)	get_model)is_mixture_of_experts)SamplingMetadatac                   @   s   e Zd ZdZdedejfddZ	ddejde	d	e
eejf ee
eejf  B dB d
ejfddZdejd
dfddZe ded
dfddZdS )MedusaProposerz>
    Medusa proposer class for generating token sequences
    vllm_configdevicec                 C   s2   || _ || _|jj| _|jj | _|j	j
| _
d S N)r	   r
   scheduler_configmax_num_batched_tokensmax_num_tokensspeculative_configdraft_model_configget_hidden_sizehidden_sizemodel_configdtype)selfr	   r
    r   W/home/ubuntu/veenaModal/venv/lib/python3.10/site-packages/vllm/v1/spec_decode/medusa.py__init__   s   

zMedusaProposer.__init__Ntarget_hidden_statessampling_metadataslot_mappingsreturnc                 C   s2   |  |}| j |}tjdd |D dd}|S )Nc                 S   s   g | ]}|j d dqS )dim)argmax).0logitr   r   r   
<listcomp>3   s    z*MedusaProposer.propose.<locals>.<listcomp>   r   )modelcompute_logitstorchstack)r   r   r   r   blockslogitsdraft_tokensr   r   r   propose%   s   
	zMedusaProposer.proposetarget_modelc                 C   sn   ddl m} |d t| j| jjjd| _W d    n1 s w   Y  t| jr3| jjj	r5J dd S d S )Nr   )set_model_tagmedusa_head)r	   r   z EPLB for Medusa is not supported)
vllm.compilation.backendsr.   r   r	   r   r   r%   r   parallel_configenable_eplb)r   r-   r.   r   r   r   
load_model7   s   

zMedusaProposer.load_model
num_tokensc                 C   s\   t j| j| jf| j| jd}td | j|d | | W d    d S 1 s'w   Y  d S )N)r   r
   )r4   )	r'   zerosr   r   r   r
   r   r	   r%   )r   r4   hidden_statesr   r   r   	dummy_runD   s   
"zMedusaProposer.dummy_runr   )__name__
__module____qualname____doc__r   r'   r
   r   Tensorr   dictstrlistr,   nnModuler3   inference_modeintr7   r   r   r   r   r      s0    

r   )r'   torch.nnr@   vllm.configr   vllm.forward_contextr   vllm.loggerr    vllm.model_executor.model_loaderr   %vllm.model_executor.models.interfacesr   vllm.v1.sample.metadatar   r8   loggerr   r   r   r   r   <module>   s   