o
    
۾i9                     @   s   d dl Z d dlZd dlmZ d dlmZmZ d dlmZ d dl	m
Z
 d dlmZ d dlmZ d dlmZ d d	lmZ d d
lmZ d dlmZmZ d dlmZ erkd dlZd dlmZ d dlZd dlmZ d dl m!Z! nede" dZee#Z$G dd dZ%dS )    N)Iterable)FutureThreadPoolExecutor)TYPE_CHECKING)
VllmConfig)init_logger)ReasoningParserManager)cached_tokenizer_from_config)
LazyLoader)GuidanceBackend)StructuredOutputBackendStructuredOutputGrammar)XgrammarBackend)ReasoningParser)Requesttorchc                	   @   s   e Zd ZdZdefddZdd	d
ZdddefddZde	e
eeef  ddfddZdee
eeef  defddZdeedf dee deeee f ddfddZdddefddZdddefddZdddZdS ) StructuredOutputManagerz4Engine-level manager for structured output requests.vllm_configc                 C   s  d | _ d | _|| _|jjdk| _d | _tjdtj	d| _
| jjj}d| _| j|k r>d| _tdtt d d}t|d	| _| jjjstdt d d }t|d	| _t| jjd
| _| jjj}|rnt|dkrnt| | jjj}|rt |}|| jd| _| jjj!| _!d S )Nexternal_launcher)dtype               )max_workers)model_config   )	tokenizer)"backendreasonerr   parallel_configdistributed_executor_backend_use_async_grammar_compilation_grammar_bitmaskr   tensorint32
_full_maskscheduler_configmax_num_seqsfill_bitmask_parallel_threshold fill_bitmask_parallel_batch_sizemaxminmultiprocessing	cpu_countr   executor_for_fillmaskr   skip_tokenizer_initexecutorr	   r   structured_outputs_configreasoning_parser_pluginlenr   import_reasoning_parserreasoning_parserget_reasoning_parserenable_in_reasoning)selfr   max_batch_sizer   r5   r8   reasoner_cls r>   V/home/ubuntu/.local/lib/python3.10/site-packages/vllm/v1/structured_output/__init__.py__init__&   sD   



z StructuredOutputManager.__init__requestr   returnNc                 C   s&  |j d u rd S tr|jd ur|jjd usJ | jd u r||jd us"J |jjj}| jj }|dkr<t	| j| j
|d| _n@|dkrKt| j| j
|d| _n1|dkr`ddlm} || j| j
|d| _n|dkruddlm} || j| j
|d| _ntd	| | jr| j| j|}n| |}||j _d S )
Nxgrammar)r   
vocab_sizeguidanceoutlinesr   )OutlinesBackendzlm-format-enforcer)LMFormatEnforcerBackendz'Unsupported structured output backend: )structured_output_requestr   sampling_paramsstructured_outputsr    _backendr   r   get_vocab_sizer   r   r   *vllm.v1.structured_output.backend_outlinesrG   4vllm.v1.structured_output.backend_lm_format_enforcerrH   
ValueErrorr$   r3   submit_create_grammargrammar)r;   rA   r    rD   rG   rH   rS   r>   r>   r?   grammar_init`   sP   








z$StructuredOutputManager.grammar_initc                 C   s,   |j j}|\}}| jd usJ | j||S N)rI   structured_output_keyr    compile_grammar)r;   rA   keyrequest_typegrammar_specr>   r>   r?   rR      s   z'StructuredOutputManager._create_grammarbatchc                 C   sP   | j d usJ |D ]\}}}|r| s|| j | q	| j | | j q	d S rU   )r%   is_terminatedfill_bitmaskfill_r(   )r;   r[   rS   indexapply_bitmaskr>   r>   r?   _fill_bitmasks   s   z&StructuredOutputManager._fill_bitmasksc                 C   s   | j | j|S rU   )r1   rQ   ra   )r;   r[   r>   r>   r?   _async_submit_fill_bitmask   s   z2StructuredOutputManager._async_submit_fill_bitmaskrequestsstructured_output_request_idsscheduled_spec_decode_tokensznpt.NDArray[np.int32] | Nonec                 C   s"  |sd S d}| j jd ur| j jj}| jd u r-| jd usJ | j jj}| j|d|  | _d}t|| j	kr|dkrg }g }|D ]=}	||	 }
|
j
}trX|d usQJ |jd usXJ |j}| |
}||||f t|| jkry|| | g }|d7 }q@|r|| | |D ]}|  qnj|D ]g}	||	 }
|
j
}tr|d usJ |jd usJ |j}| |
}d}||	d}t|dD ]/}| |||ff |dkrd}|r| s||	|g}|sJ ||	|f|d7 }|d7 }q|dkr|| q| j}||jd k r|d | }| S )Nr   r   r>   )r   r   F)r   speculative_confignum_speculative_tokensr%   r    r)   r*   allocate_token_bitmaskr6   r+   rI   r   rS   should_fill_bitmaskappendr,   rb   resultget	itertoolschainra   r\   accept_tokensrollbackshapenumpy)r;   rc   rd   re   max_num_spec_tokensr<   cumulative_indexpromisesr[   req_idrA   rI   rS   r`   promisestate_advancements
req_tokenstokenacceptedbitmask_tensorr>   r>   r?   grammar_bitmask   sz   








z'StructuredOutputManager.grammar_bitmaskc                 C   sP   | j d ur&| jr
dS |jd usJ |jjd u r"| j |jpg |j_|jjS dS )NT)r!   r:   rI   reasoning_endedis_reasoning_endprompt_token_ids)r;   rA   r>   r>   r?   ri     s   
z+StructuredOutputManager.should_fill_bitmaskc                 C   s   |j sdS tr|jd usJ |jjd usJ | jd u rdS | jr"dS |j}|jr*dS |j|j }|j	}| j
|||d  rAd|_dS )NFT)use_structured_outputr   rI   rS   r!   r:   r~   num_computed_tokensnum_output_placeholdersall_token_idsis_reasoning_end_streaming)r;   rA   structured_req
delta_fromr   r>   r>   r?   should_advance.  s&   
z&StructuredOutputManager.should_advancec                 C   s   | j d ur| j   d S d S rU   )r    destroy)r;   r>   r>   r?   clear_backendP  s   
z%StructuredOutputManager.clear_backend)rA   r   rB   N)rB   N)__name__
__module____qualname____doc__r   r@   rT   r   rR   r   tupleintboolra   listr   rb   dictstrr}   ri   r   r   r>   r>   r>   r?   r   #   s6    
::



b"r   )&rm   r/   collections.abcr   concurrent.futuresr   r   typingr   vllm.configr   vllm.loggerr   vllm.reasoningr   vllm.tokenizersr	   vllm.utils.import_utilsr
   *vllm.v1.structured_output.backend_guidancer   'vllm.v1.structured_output.backend_typesr   r   *vllm.v1.structured_output.backend_xgrammarr   rr   npnumpy.typingnptr   r   vllm.v1.requestr   globalsr   loggerr   r>   r>   r>   r?   <module>   s,   