o
    ï°“i  ã                   @   sž  d dl Z d dlmZmZmZmZmZmZ zd dlm	Z	 d dl
mZmZ d dlmZ W n ey5   edƒ‚w zd dlZW n eyG   edƒ‚w dd	lmZ dd
lmZmZ ddlmZ G dd„ deƒZG dd„ dƒZdededeeeeef  fdd„Zdedee defdd„Z		d'dededee defdd„ZG dd„ dƒZ deeef dede fd d!„Z!d"e	deeef ded#e"deee"f f
d$d%„Z#g d&¢Z$dS )(é    N)ÚAnyÚCallableÚListÚOptionalÚTupleÚUnion)ÚAutoModelForCausalLM)ÚLogitsProcessorÚ PrefixConstrainedLogitsProcessor)ÚPreTrainedTokenizerBasezWtransformers is not installed. Please install it with "pip install transformers[torch]"zfpytorch is not installed. See https://pytorch.org/get-started/locally/ for installation instructions."é   )ÚCharacterLevelParser)ÚTokenEnforcerÚTokenEnforcerTokenizerData)ÚFormatEnforcerAnalyzerc                   @   s:   e Zd Zdeddfdd„Zdejdejdejfdd	„ZdS )
ÚLogitsSaverWarperÚanalyzerÚreturnNc                 C   ó
   || _ d S ©N)r   )Úselfr   © r   ú^/home/ubuntu/.local/lib/python3.10/site-packages/lmformatenforcer/integrations/transformers.pyÚ__init__   ó   
zLogitsSaverWarper.__init__Ú	input_idsÚscoresc                 C   s6   |  ¡ }|  ¡ }t||ƒD ]\}}| j ||¡ q|S r   )ÚtolistÚzipr   Úreport_raw_logits)r   r   r   Ú
cpu_inputsÚ
cpu_scoresÚsingle_batch_inputsÚsingle_batch_scoresr   r   r   Ú__call__   s
   zLogitsSaverWarper.__call__)	Ú__name__Ú
__module__Ú__qualname__r   r   ÚtorchÚ
LongTensorÚFloatTensorr$   r   r   r   r   r      s     r   c                   @   s:   e Zd ZU eed< dedefdd„Zddd„Zd	d
„ Z	dS )ÚLogitsSaverManagerÚwarperÚmodelr   c                 C   s   || _ d | _d | _|| _d S r   )r-   r,   Ú
old_warperr   )r   r-   r   r   r   r   r   !   s   
zLogitsSaverManager.__init__Nc                    s$   ˆj jˆ_‡ ‡fdd„}|ˆj _d S )Nc                    sF   ˆ  | ¡}tˆjƒˆ_| dˆj¡ ˆ d ur!tˆ dƒ}| d|¡ |S )Nr   é   )r.   r   r   r,   Úinsertr
   )Úgeneration_configÚwarpersÚ	processor©Úfilter_funcr   r   r   Únew_logits_warper*   s   

zCLogitsSaverManager.replace_logits_warper.<locals>.new_logits_warper)r-   Ú_get_logits_warperr.   )r   r5   r6   r   r4   r   Úreplace_logits_warper'   s   
z(LogitsSaverManager.replace_logits_warperc                 C   s   | j | j_d S r   )r.   r-   r7   )r   r   r   r   Úunreplace_logits_warper4   s   z*LogitsSaverManager.unreplace_logits_warperr   )
r%   r&   r'   r   Ú__annotations__r   r   r   r8   r9   r   r   r   r   r+      s
   
 
r+   Ú	tokenizerÚ
vocab_sizer   c                 C   sr   |   d¡d }g }t|ƒD ])}|| jv rq|  ||g¡dd … }|  |g¡}t|ƒt|ƒk}| |||f¡ q|S )NÚ0éÿÿÿÿr/   )ÚencodeÚrangeÚall_special_idsÚdecodeÚlenÚappend)r;   r<   Útoken_0Úregular_tokensÚ	token_idxÚdecoded_after_0Údecoded_regularÚis_word_start_tokenr   r   r   Ú_build_regular_tokens_list8   s   
rK   Útokensc                 C   s   |   |¡}| d¡}|S )Nu   ï¿½)rB   Úrstrip)r;   rL   ÚdecodedÚcleanedr   r   r   Ú_decode_functionF   s   

rP   FÚuse_bitmaskc                 C   s4   |pt | ƒ}t| |ƒ}t t| ¡}t||| j||ƒS r   )rC   rK   Ú	functoolsÚpartialrP   r   Úeos_token_id)r;   rQ   r<   rF   Ú	decode_fnr   r   r   Ú#build_token_enforcer_tokenizer_dataL   s   
rV   c                   @   s6   e Zd Zdefdd„Zdedejdee fdd„Z	d	S )
Ú!TransformersPrefixAllowedTokensFnÚtoken_enforcerc                 C   r   r   )rX   )r   rX   r   r   r   r   W   r   z*TransformersPrefixAllowedTokensFn.__init__Úbatch_idÚsentr   c                 C   s   |  ¡ }| j |¡jS r   )r   rX   Úget_allowed_tokensÚallowed_tokens)r   rY   rZ   Útoken_sequencer   r   r   r$   Z   s   z*TransformersPrefixAllowedTokensFn.__call__N)
r%   r&   r'   r   r   Úintr(   ÚTensorr   r$   r   r   r   r   rW   V   s     rW   Útokenizer_dataÚcharacter_level_parserc                 C   s$   t | tƒr	t| ƒ} t| |ƒ}t|ƒS )a  Build the prefix allowed tokens function that transformers will use to filter the tokens generated by the model. The result
    can be passed to the prefix_allowed_tokens_fn parameter of the generate() method of transformers models or pipeline configurations.)Ú
isinstancer   rV   r   rW   )r`   ra   rX   r   r   r   Ú+build_transformers_prefix_allowed_tokens_fn_   s   

rc   r-   Úkwargsc                 K   sä   t ||ƒ}|d jd dk}| dd¡dk}|p| }| dd¡}| dd¡}	|o,|	o,|}
|
rdt|jƒ}t| |ƒ}| |¡ |}z| jdi |¤Ž}W | ¡  n| ¡  w | 	|d	 d  
¡ ¡}||_|S | jdi |¤d
|i¤Ž}|S )a›  Generate text from a model while enforcing a given format, generating enforcing diagnostic information. 
    This can be used instead of calling model.generate().
    If return_dict_in_generate and output_scores parameters are True, diagnostic information will be returned in the result.
    If you don't need this, consider using prefix_allowed_tokens_fn + build_transformers_prefix_allowed_tokens_fn() insteadr   r   r/   Ú	num_beamsÚreturn_dict_in_generateFÚoutput_scoresNÚ	sequencesÚprefix_allowed_tokens_fnr   )rc   ÚshapeÚgetr   rX   r+   r8   Úgenerater9   Úgenerate_report_dictr   Úenforced_scores)r-   r;   ra   rd   Ú"transformers_filter_allowed_tokensÚis_multi_inputsÚis_multi_beamsÚsupport_diagnosticsrf   rg   Úshould_run_in_advanced_moder   Úlogits_saverÚgenerate_kwargsÚoutputÚdf_dictr   r   r   Úgenerate_enforcedi   s(   
	



þrx   )rc   rx   rV   )FN)%rR   Útypingr   r   r   r   r   r   Útransformersr   Ú&transformers.generation.logits_processr	   r
   Útransformers.tokenization_utilsr   ÚImportErrorr(   Úcharacterlevelparserr   Útokenenforcerr   r   r   r   r   r+   r^   ÚstrÚboolrK   rP   rV   rW   rc   Údictrx   Ú__all__r   r   r   r   Ú<module>   s\     ÿÿ$þÿþ
ý
	ÿ
ÿ

ÿþý

ý'