o
    پi&@                  
   @   s,  d dl Z d dlZd dlZd dlZd dlmZmZmZ d dlZ	d dl
mZ d dlmZmZ d dlmZmZ d dlmZ d dlmZ z
d dlZd dlZW n ey` Z z
e ZZW Y dZ[ndZ[ww eeZdd	 Zd
gZe jG dd dZ G dd deZ!	ddee"ee" f fddZ#	dddZ$dS )    N)ListOptionalUnion)BaseBackend)ChatTemplateget_chat_template_by_model_path)ChoicesDecisionChoicesSamplingMethod)StreamExecutor)SglSamplingParamsc                 C   s   g }| j }| D ]$\}}| |g}tdd |D s |dv r-|| t|dkr- nq	| j}dd |dd D }d	||d
 < |S )z#Get logit bias for integer numbers.c                 S   s   g | ]}|  qS  )isdigit.0cr   r   N/home/ubuntu/.local/lib/python3.10/site-packages/sglang/lang/backend/openai.py
<listcomp>        z)create_logit_bias_int.<locals>.<listcomp>) i,  c                 S   s   i | ]}|d qS )d   r   )r   tr   r   r   
<dictcomp>%       z)create_logit_bias_int.<locals>.<dictcomp>Ni+  r   z<|endoftext|>)_mergeable_ranksitemsdecodeallappendlen_special_tokens)	tokenizerint_token_idstokenstokentoken_idsspecial_tokensmaskr   r   r   create_logit_bias_int   s   
r(   zgpt-3.5-turbo-instructc                   @   s&   e Zd ZU eed< eed< dd ZdS )
TokenUsageprompt_tokenscompletion_tokensc                 C   s   d | _ | _d S )Nr   )r*   r+   selfr   r   r   reset4   s   zTokenUsage.resetN)__name__
__module____qualname__int__annotations__r.   r   r   r   r   r)   /   s   
 r)   c                
       s   e Zd Z			d"dedee dee def fddZd	d
 Zde	de
defddZ	d#dede	defddZdefddZdd ZdefddZdede	fddZdedee dededef
d d!Z  ZS )$OpenAINF
model_nameis_chat_modelchat_templateis_azurec                    s   t    tttrt|rtj|i || _n	tj|i || _|| _zt	
|| _W n ty;   t	d| _Y nw t| j| _|pGt|| _|d urQ|| _n|tv rYd| _nd| _| jjd d | _tdd| _i | _g | _d| _d S )Ncl100k_baseFT	assistantr      )super__init__
isinstanceopenai	ExceptionAzureOpenAIclientr4   r5   tiktokenencoding_for_modelr    KeyErrorget_encodingr(   logit_bias_intr   r7   r6   INSTRUCT_MODEL_NAMESrole_prefix_and_suffixchat_prefixr)   token_usagespec_kwargsspec_formatspec_max_num_tries)r-   r5   r6   r7   r8   argskwargs	__class__r   r   r=   9   s4   
	

zOpenAI.__init__c                 C   s   | j S N)r7   r,   r   r   r   get_chat_templatej   s   zOpenAI.get_chat_templatesampling_paramsnum_api_spec_tokensspec_var_namec                 C   s   d| j vr|| j d< n	| j d |ksJ | }| D ])\}}|dv r%q|dv r/td q|| j vr:|| j |< q|| j | ksEJ dq| jd|d |d di fS )	N
max_tokens)stop)rX   zLThe parameter max_tokens will be overwritten by speculated number of tokens.zNsampling parameters should be consistent if turn on api speculative execution. rY   textrY   name)rL   to_openai_kwargsr   warningswarnrM   r   )r-   rU   rV   rW   paramskeyvaluer   r   r   _prepare_spec_executionm   s*   

zOpenAI._prepare_spec_executionr%   c              
   C   s  |j d u r^| jr$|jd u r|j| jstd|j}n| ||j|S |j}|	 }| j
ds<| j
ds<d| j
v rC|dd  n|dd  td| j| j| j| j
|d|}|i fS |j tddfv r| jrmJ d	|	 }|d
 td| j| j| j| j
|jd dd|}t|trdd |D }|i fS d| d }|i fS |j tdfv r| jrJ d	|	 }|d
 td| j| j| j| j
|j| jdgd|}|i fS td|j  )NzThis use case is not supported if api speculative execution is off. For OpenAI chat models, sgl.gen must be right after sgl.assistant. Example of adding api speculative execution: @function(num_api_spec_tokens=128).o1o3rX   max_completion_tokensrB   rK   is_chatmodelpromptstrstringz,constrained type not supported on chat modelrY   ")rB   rK   ri   rj   rk   rY   c                 S   s   g | ]}d | d  qS )rn   r   r   xr   r   r   r          z#OpenAI.generate.<locals>.<listcomp>r2   r   )rB   rK   ri   rj   rk   
logit_biasrY   Unknown dtype: r   )dtyper6   rV   text_endswithrJ   RuntimeError	messages_rd   r^   r5   
startswithpopopenai_completionrB   rK   rl   r>   listr2   rG   
ValueError)r-   r%   rU   rW   rk   rP   compr   r   r   generate   s   




1



zOpenAI.generaterc   c                 C   s"   | j sJ | j|d d d d S )Nr[   )r6   rM   r   )r-   rc   r   r   r   	spec_fill   s   
zOpenAI.spec_fillc                 C   s   t | jD ]G\}}|d }|dkr"||r|t|d  }q dS ||d }|dkr<|d | |d< ||d  }q|t| jd krJ||d< q dS dS )Nr\   rZ   FrY      T)	enumeraterM   ry   r   find)r-   r~   itermr\   posr   r   r   spec_pattern_match   s   

zOpenAI.spec_pattern_matchc              	   C   s   |j d u s|j| jsd S d}tdd | jD sGt| jD ]'}td| j	| j
| j| j|jd| j}t|tr=|d n|}| |rF nq| jD ]&}| j|d 7  _|d }|d urp|d |j|< i |j|< |j|   qJi | _g | _d S )	NrZ   c                 s   s    | ]	}|d  du V  qdS )r]   Nr   ro   r   r   r   	<genexpr>   s    z+OpenAI.role_end_generate.<locals>.<genexpr>rh   r   r\   r]   r   )rV   ru   rv   rJ   r   rM   rangerN   r{   rB   rK   r6   r5   rx   rL   r>   r|   r   	variables	meta_infovariable_eventset)r-   r%   r~   r   comp_for_matchr   r]   r   r   r   role_end_generate   s8   	



zOpenAI.role_end_generatec                 C   sr   |j d u r1| jr|j| jstd|j}n|j}| }td| j	| j
| j| j|d|}|S td|j  )NzaThis use case is not supported. For OpenAI chat models, sgl.gen must be right after sgl.assistantrh   rs   r   )rt   r6   ru   rv   rJ   rw   rx   r^   openai_completion_streamrB   rK   r5   r}   )r-   r%   rU   rk   rP   	generatorr   r   r   generate_stream  s(   
zOpenAI.generate_streamchoicestemperaturechoices_methodreturnc                    s   j rtdt|} fdd|D }dg| }dd |D } j|j}	tdd |D }
t|
D ]}i }t|D ]}|| rJd||| | < q< jj	j
 j|	|d|d	}|jd j} j|d } j j|jj7  _|jj j_d
}t|D ]+}|| r|t|| d krd
||< ||| | kr||  d7  < d}q|d
||< q||sJ t|dkr n|	| q4t|t| d|idS )z9Note: `choices_method` is not used by the OpenAI backend.zrselect/choices is not supported for chat models. Please try to use a non-chat model such as gpt-3.5-turbo-instructc                    s   g | ]} j |qS r   )r    encodero   r,   r   r   r   G  rq   z!OpenAI.select.<locals>.<listcomp>r   c                 S   s   g | ]}t |d kqS )r   r   ro   r   r   r   r   I  rq   c                 S   s   g | ]}t |qS r   r   ro   r   r   r   r   L  r   r   r   )rj   rk   rr   rX   r   FTscores)decisionr   )r6   NotImplementedErrorr   r    r   ru   maxr   rB   completionscreater5   r   r\   rK   r*   usager+   npsumr   r   argmax)r-   r%   r   r   r   	n_choices	token_idsr   validr*   max_lensteprr   r   retret_str	ret_tokenhitr   r,   r   select8  sZ   
zOpenAI.select)NNFrS   )r/   r0   r1   rl   r   boolr   r=   rT   r   r2   rd   r
   r   r   r   r   r   r   floatr	   r   r   __classcell__r   r   rQ   r   r4   8   sd    1
#
T
#
r4   r;   r   c           
      K   s  d|v rt d |d= t|D ]}z{|rHd|v r$|d d u r$|d | jjjdd|i|}t|jdkr?|jd j	j
}n9dd |jD }n0| jjdd	|i|}t|ttfrcd
d |jD }n|jd j}t|jdkrxdd |jD }| j|jj7  _| j|jj7  _W  |S  tjtjtjfy }	 ztd|	 d td ||d kr|	W Y d }	~	qd }	~	w ty }	 ztd|	 d |	d }	~	ww |S )Nebnf?EBNF is not officially supported by OpenAI endpoints. Ignoring.rY   messagesr   r   c                 S   s   g | ]}|j jqS r   )messagecontentr   r   r   r   r     r   z%openai_completion.<locals>.<listcomp>rk   c                 S      g | ]}|j qS r   r\   r   r   r   r   r     r   c                 S   r   r   r   r   r   r   r   r     r   OpenAI Error: . Waiting 5 seconds...   RuntimeError .r   )r_   r`   r   rz   chatr   r   r   r   r   r   r>   r|   tupler\   r*   r   r+   r?   APIErrorAPIConnectionErrorRateLimitErrorloggererrortimesleepr@   )
rB   rK   ri   retriesrk   rP   attemptr   r~   er   r   r   r{     sF   



r{   c                 k   s   d|v rt d |d= t|D ]}z|r_d|v r%|d d u r%|d | jjjd|dddid|}|D ]&}t|jdkrAq7z	|jd j	j
}	W n tyU   d }	Y nw |	pYdi fV  q7n)| jjd|dddid	|}|D ]}t|jdkrzqp|jd j}	|	pdi fV  qp| j|jj7  _| j|jj7  _W  d S  tjtjtjfy }
 ztd
|
 d td ||d kr|
W Y d }
~
qd }
~
w ty }
 ztd|
 d |
d }
~
ww d S )Nr   r   rY   Tinclude_usage)r   streamstream_optionsr   rZ   )rk   r   r   r   r   r   r   r   r   r   )r_   r`   r   rz   r   r   r   r   r   deltar   
IndexErrorr\   r*   r   r+   r?   r   r   r   r   r   r   r   r@   )rB   rK   ri   r   rk   rP   r   r   r   r   r   r   r   r   r     sl   


	
r   )Nr;   N)%dataclassesloggingr   r_   typingr   r   r   numpyr    sglang.lang.backend.base_backendr   sglang.lang.chat_templater   r   sglang.lang.choicesr   r	   sglang.lang.interpreterr
   sglang.lang.irr   r?   rC   ImportErrorr   	getLoggerr/   r   r(   rH   	dataclassr)   r4   rl   r{   r   r   r   r   r   <module>   sB    
  J
+