o
    پiO                     @   s(   d dl Z d dlmZ G dd deZdS )    N)_BatchedPenalizerc                   @   sj   e Zd ZdZdefddZdd Zdejfdd	Z	d
ejfddZ
dejfddZdddZdddZdS )BatchedMinNewTokensPenalizerzV
    Min new tokens penalizer penalizes tokens based on the length of the output.
    returnc                 C   s   t dd | j D S )Nc                 s   s    | ]	}|j jd kV  qdS )r   Nsampling_paramsmin_new_tokens.0req r   a/home/ubuntu/.local/lib/python3.10/site-packages/sglang/srt/sampling/penaltylib/min_new_tokens.py	<genexpr>   s    
z<BatchedMinNewTokensPenalizer._is_required.<locals>.<genexpr>)anyorchestratorreqsselfr   r   r   _is_required   s   z)BatchedMinNewTokensPenalizer._is_requiredc              	      s   t jdd  j D t j jjdd _t jj	j
j fdd j D d jjd}t jt j  jjd ft j jjdjd|t j|t jtd	 jjd
dd d d  jjf  _t jt j dft j jjd _d S )Nc                 S   s   g | ]}|j jqS r   r   r   r   r   r   
<listcomp>   s    z9BatchedMinNewTokensPenalizer._prepare.<locals>.<listcomp>datadtypedevice   c                    sH   g | ] }t jt|jjpt |jjpt B |jjhB t j	 j
jd qS )r   )torchtensorlistr   stop_token_idsset	tokenizeradditional_stop_token_idseos_token_idint64r   r   r   r   r   r   r      s    T)	sequencesbatch_firstpadding_value)sizer   r   z-inf)inputr   
fill_valuer   )dimindexsrc)r   r   r   r   int32r   
unsqueeze_r   nnutilsrnnpad_sequence
vocab_sizezeroslenfloat32scatter_add_	full_likefloatstop_token_penaltieslen_output_tokens)r   padded_stop_token_idsr   r   r   _prepare   sL   

z%BatchedMinNewTokensPenalizer._prepare
output_idsc                 C   s   |  j d7  _ d S )Nr   )r:   )r   r=   r   r   r   _cumulate_output_tokensB   s   z4BatchedMinNewTokensPenalizer._cumulate_output_tokenslogitsc                 C   s,   | j | jk |}||  | j| 7  < d S N)r:   r   	expand_asr9   )r   r?   maskr   r   r   _applyE   s   z#BatchedMinNewTokensPenalizer._applykeep_indicesc                 C   s(   | j | | _ | j| | _| j| | _d S r@   r   r9   r:   )r   rD   r   r   r   _filterI   s   z$BatchedMinNewTokensPenalizer._filtertheirc                 C   sL   t j| j|jgdd| _t j| j|jgdd| _t j| j|jgdd| _d S )Nr   )r)   )r   catr   r9   r:   )r   rG   r   r   r   _mergeN   s   z#BatchedMinNewTokensPenalizer._mergeNc                 C   s"   dD ]}t | |rt| | qd S )NrE   )hasattrdelattr)r   namer   r   r   	_teardownZ   s
   

z&BatchedMinNewTokensPenalizer._teardown)rG   r   )r   N)__name__
__module____qualname____doc__boolr   r<   r   Tensorr>   rC   rF   rI   rM   r   r   r   r   r      s    2
r   )r   +sglang.srt.sampling.penaltylib.orchestratorr   r   r   r   r   r   <module>   s    