o
    
۾i
                     @   sv   d dl Z d dlmZ d dlmZ d dlmZ d dlmZ d dl	m
Z
 d dlmZ d dlmZ eeZG dd	 d	ZdS )
    N)
VllmConfig)set_forward_context)init_logger)	get_model)is_mixture_of_experts)SamplingMetadatac                   @   s   e Zd ZdZdedejfddZ	ddejde	d	e
eejf ee
eejf  B dB d
ejfddZdejd
dfddZe ded
dfddZdS )MedusaProposerz>
    Medusa proposer class for generating token sequences
    vllm_configdevicec                 C   sL   || _ |jd usJ d|j| _|| _|jj| _| jj | _	|j
j| _d S )NzSpeculative config must be set)r	   speculative_configspec_configr
   scheduler_configmax_num_batched_tokensmax_num_tokensdraft_model_configget_hidden_sizehidden_sizemodel_configdtype)selfr	   r
    r   N/home/ubuntu/.local/lib/python3.10/site-packages/vllm/v1/spec_decode/medusa.py__init__   s   
zMedusaProposer.__init__Ntarget_hidden_statessampling_metadataslot_mappingsreturnc                 C   s2   |  |}| j |}tjdd |D dd}|S )Nc                 S   s   g | ]}|j d dqS )dim)argmax).0logitr   r   r   
<listcomp>5   s    z*MedusaProposer.propose.<locals>.<listcomp>   r   )modelcompute_logitstorchstack)r   r   r   r   blockslogitsdraft_tokensr   r   r   propose'   s   
	zMedusaProposer.proposetarget_modelc                 C   sl   ddl m} |d t| j| jjd| _W d    n1 sw   Y  t| jr2| jjj	r4J dd S d S )Nr   )set_model_tagmedusa_head)r	   r   z EPLB for Medusa is not supported)
vllm.compilation.backendsr.   r   r	   r   r   r%   r   parallel_configenable_eplb)r   r-   r.   r   r   r   
load_model9   s   

zMedusaProposer.load_model
num_tokensc                 C   s\   t j| j| jf| j| jd}td | j|d | | W d    d S 1 s'w   Y  d S )N)r   r
   )r4   )	r'   zerosr   r   r   r
   r   r	   r%   )r   r4   hidden_statesr   r   r   	dummy_runF   s   
"zMedusaProposer.dummy_run)N)__name__
__module____qualname____doc__r   r'   r
   r   Tensorr   dictstrlistr,   nnModuler3   inference_modeintr7   r   r   r   r   r      s0    

r   )r'   torch.nnr@   vllm.configr   vllm.forward_contextr   vllm.loggerr    vllm.model_executor.model_loaderr   %vllm.model_executor.models.interfacesr   vllm.v1.sample.metadatar   r8   loggerr   r   r   r   r   <module>   s   