o
    
۾iS(                  
   @   s   d dl mZmZ d dlmZmZ d dlZd dlmZ d dl	m
Z
mZmZ er,d dlmZ edZG dd	 d	eZG d
d deZG dd deZdeeef de
dB deeee dB ee gedB f defddZdS )    )CallableSequence)TYPE_CHECKINGTypeVarN)SamplingParams)BatchUpdateLogitsProcessorMoveDirectionality)
VllmConfigTc                   @   sl   e Zd ZdddejdefddZdefdd	Zd
ede	fddZ
dedB fddZdejdejfddZdS )MinPLogitsProcessorvllm_configr
   deviceis_pin_memoryc                 C   s~   |j j}d| _tj|ftjd|d| _| j | _t	|j
dk| _| jr1tj|ftj|d| _n| j| _| jd d | _d S )Nr   cpu)dtyper   
pin_memoryr   r   )scheduler_configmax_num_seqsmin_p_counttorchzerosfloat32min_p_cpu_tensornumpy	min_p_cpur   typeuse_double_tensoremptymin_p_devicemin_p)selfr   r   r   max_num_reqs r$   [/home/ubuntu/.local/lib/python3.10/site-packages/vllm/v1/sample/logits_processor/builtin.py__init__   s   

zMinPLogitsProcessor.__init__returnc                 C      dS )z#Min-p never impacts greedy samplingTr$   r"   r$   r$   r%   is_argmax_invariant-   s   z'MinPLogitsProcessor.is_argmax_invariantindexc                 C   s   t | j| S N)floatr   )r"   r+   r$   r$   r%   get_min_p_by_index1   s   z&MinPLogitsProcessor.get_min_p_by_indexbatch_updateNc                 C   s  |sd S d}|j D ]0\}}}}|j}| j| }||kr9d}|| j|< |r.|s.|  jd7  _q	|s9|r9|  jd8  _q	| jr|jrYd}|jD ]}| j| rXd| j|< |  jd8  _qE|jD ]:\}}	}
| j| | j|	 }}||krd}|| j|	< |
tjkr|| j|< |
tjkr|rd| j|< |r|  jd8  _q\|j	}| jr|s| jj
d |kr| jd | | _| jr| jj| jd | dd | jd d S d S d S )NFT   r   )non_blocking)addedr!   r   r   removedmovedr	   SWAPUNIDIRECTIONAL
batch_sizeshaper    r   copy_r   
unsqueeze_)r"   r/   needs_updater+   params_r!   min_p_beforeadxbdxdirectmin_p_amin_p_bsizer$   r$   r%   update_state4   sT   









z MinPLogitsProcessor.update_statelogitsc                 C   sV   | j s|S tjjj|dd}tj|ddd}|| j}||k }||t	d  |S )N)dimT)rH   keepdiminf)
r   r   nn
functionalsoftmaxamaxmul_r!   masked_fill_r-   )r"   rF   probability_valuesmax_probabilitiesadjusted_min_pinvalid_token_maskr$   r$   r%   applyd   s   zMinPLogitsProcessor.apply)__name__
__module____qualname__r   r   boolr&   r*   intr-   r.   r   rE   TensorrU   r$   r$   r$   r%   r      s    
0r   c                   @   sp   e Zd ZdejdefddZdefddZded	B fd
dZ	de
dejdejfddZdejdejfddZd	S )LogitBiasLogitsProcessorr   r   c                 C   s@   || _ || _i | _td| _| g tj| g tjf| _d S )Nr$   )	r   r   biasesr   tensorbias_tensor_device_tensorint32logits_slice)r"   r=   r   r   r$   r$   r%   r&   v   s   
z!LogitBiasLogitsProcessor.__init__r'   c                 C   r(   )ziLogit bias can rebalance token probabilities and change the
        outcome of argmax in greedy sampling.Fr$   r)   r$   r$   r%   r*         z,LogitBiasLogitsProcessor.is_argmax_invariantr/   Nc                 C   s   t | j|dd }|rLg }g }g }| j D ]\}}||gt|  ||  ||  q| |tj	| _
| |tj| |tjf| _d S d S )Nc                 S   s
   | j pd S r,   )
logit_bias)r<   r=   __r$   r$   r%   <lambda>   s   
 z7LogitBiasLogitsProcessor.update_state.<locals>.<lambda>)process_dict_updatesr]   itemsextendlenkeysvaluesr`   r   r   r_   ra   rb   )r"   r/   r;   reqstok_idsr]   reqlbr$   r$   r%   rE      s    
z%LogitBiasLogitsProcessor.update_statedatar   c                 C       t j|d|| jdj| jddS Nr   )r   r   r   T)r   r1   r   r^   r   tor   r"   rq   r   r$   r$   r%   r`      
   
z'LogitBiasLogitsProcessor._device_tensorrF   c                 C   s   | j r|| j  | j7  < |S r,   )r]   rb   r_   r"   rF   r$   r$   r%   rU      s   zLogitBiasLogitsProcessor.apply)rV   rW   rX   r   r   rY   r&   r*   r   rE   listr   r[   r`   rU   r$   r$   r$   r%   r\   u   s    r\   c                   @   s   e Zd ZdddejdefddZdefdd	Zed
e	de
e dB de
e deeee ee f dB fddZdedB fddZde
dejdejfddZdejdejfddZdS )MinTokensLogitsProcessorr   r
   r   r   c                 C   sP   || _ || _i | _| g tj| g tjf| _tjtd tj	| j d| _
d S )NrJ   r   )r   r   min_toksr`   r   ra   rb   r^   r-   r   neg_inf_tensor)r"   r   r   r   r$   r$   r%   r&      s   z!MinTokensLogitsProcessor.__init__r'   c                 C   r(   )zoBy censoring stop tokens, min-tokens can change the outcome
        of the argmax operation in greedy sampling.Fr$   r)   r$   r$   r%   r*      rc   z,MinTokensLogitsProcessor.is_argmax_invariantr<   r=   Noutput_tok_idsc                 C   s&   | j }|rt||krd S ||| jfS r,   )
min_tokensrj   all_stop_token_ids)r<   r=   r}   r~   r$   r$   r%   add_request   s   z$MinTokensLogitsProcessor.add_requestr/   c           
      C   s   t | j|| j}| jr$tdd | j D }|r$d}|D ]}| j|= q|rWg }g }| j D ]\}\}}}	||gt|	  ||	 q/| |tj	| |tj	f| _
d S d S )Nc                 s   s*    | ]\}\}}}t ||kr|V  qd S r,   )rj   ).0r+   r{   out_tok_idsr=   r$   r$   r%   	<genexpr>   s    z8MinTokensLogitsProcessor.update_state.<locals>.<genexpr>T)rg   r{   r   tuplerh   ri   rj   r`   r   ra   rb   )
r"   r/   r;   	to_remover+   rm   rn   ro   r=   stop_tok_idsr$   r$   r%   rE      s*   


z%MinTokensLogitsProcessor.update_staterq   r   c                 C   rr   rs   rt   rv   r$   r$   r%   r`      rw   z'MinTokensLogitsProcessor._device_tensorrF   c                 C   s   | j r|| j| j |S r,   )r{   
index_put_rb   r|   rx   r$   r$   r%   rU      s   zMinTokensLogitsProcessor.apply)rV   rW   rX   r   r   rY   r&   r*   staticmethodr   ry   rZ   r   r   setr   r   rE   r   r[   r`   rU   r$   r$   r$   r%   rz      s,    

rz   req_entriesr/   	new_stater'   c                 C   s   |sdS d}|j D ]!\}}}}|||| }dur || |< d}q	| |ddur*d}q	| ri|jD ]
}| |dr:d}q0|jD ]*\}	}
}| |	d}| |
d}|durY|| |
< d}|durhd}|tjkrh|| |	< q>|S )zBUtility function to update dict state for sparse LogitsProcessors.FNT)r2   popr3   r4   r	   r5   )r   r/   r   updatedr+   r<   prompt_tok_idsr}   statea_indexb_indexrA   a_entryb_entryr$   r$   r%   rg      s6   

rg   )collections.abcr   r   typingr   r   r   vllmr   )vllm.v1.sample.logits_processor.interfacer   r   r	   vllm.configr
   r   r   r\   rz   dictrZ   ry   rY   rg   r$   r$   r$   r%   <module>   s(   `1J
 