o
    ۾i                     @   sB   d Z ddlmZmZ ddlZddlZddlZG dd dejZdS )z
This file helps integrate xgrammar in HF transformers package by extending
transformers.LogitsProcessor, which is to be fed to `model.generate()`.
    )ListUnionNc                   @   sJ   e Zd ZdZdeejeej f fddZde	j
de	jde	jfdd	Zd
S )LogitsProcessora  
    LogitsProcessor for processing logits in transformers' generate() method.

    Example usage
    -------------
        .. code:: python

            model_name = "Qwen/Qwen2.5-0.5B-Instruct"
            tokenizer = AutoTokenizer.from_pretrained(model_name)
            config = AutoConfig.from_pretrained(model_name)
            # This can be larger than tokenizer.vocab_size due to paddings
            full_vocab_size = config.vocab_size
            tokenizer_info = xgr.TokenizerInfo.from_huggingface(tokenizer, vocab_size=full_vocab_size)

            grammar_compiler = xgr.GrammarCompiler(tokenizer_info)
            compiled_grammar = grammar_compiler.compile_builtin_json_grammar()
            xgr_logits_processor = xgr.contrib.hf.LogitsProcessor(compiled_grammar)
            model.generate(prompt, logits_processor=[xgr_logits_processor])

        For an end-to-end example, see folder `examples/hf_transformers/`.

    Notes
    -----
        - Note that this LogitsProcessor can only be used once. For each `generate()` call,
            instantiate a new one.
        - Note that this implementation may contain extra overhead.
    compiled_grammarc                 C   sB   g | _ t|tr
|n|g| _| jd jj| _d| _d| _d| _	dS )a  Initialize the LogitsProcessor.

        Parameters
        ----------
        compiled_grammar : xgr.CompiledGrammar | List[xgr.CompiledGrammar]
            One or more grammars compiled according to the given grammar and the model's tokenizer_info.
        r   NF)
matchers
isinstancelistcompiled_grammarstokenizer_info
vocab_sizefull_vocab_sizetoken_bitmask	prefilled
batch_size)selfr    r   G/home/ubuntu/.local/lib/python3.10/site-packages/xgrammar/contrib/hf.py__init__+   s   
zLogitsProcessor.__init__	input_idsscoresreturnc                    sv  t  jdkr@|jd  _t  jdkr jn j j  _t  j jks*J d fddt jD  _t j j _	|jd  jkrYt
dd|jd  d j d	  js`d
 _nt jD ]} j|  s~|| d } j| |s~J qet jD ]} j|  s j|  j	| q|jj}|dkr|d}t| j	|j |dkr||}|S )z
        Accept token sampled in the last iteration, fill in bitmask, and apply bitmask to logits.

        Returns:
            scores: Logits modified with bitmask.
        r      z@The number of compiled grammars must be equal to the batch size.c                    s   g | ]
}t  j| qS r   )xgrGrammarMatcherr	   ).0ir   r   r   
<listcomp>N   s    z,LogitsProcessor.__call__.<locals>.<listcomp>z;Expect input_ids.shape[0] to be LogitsProcessor.batch_size.zGot z for the former, and z for the latter.Tcudacpu)lenr   shaper   r	   ranger   allocate_token_bitmaskr   r   RuntimeErrorr   is_terminatedaccept_tokenfill_next_token_bitmaskdevicetypetoapply_token_bitmask_inplace)r   r   r   r   sampled_tokendevice_typer   r   r   __call__<   sJ   



zLogitsProcessor.__call__N)__name__
__module____qualname____doc__r   r   CompiledGrammarr   r   torch
LongTensorFloatTensorr/   r   r   r   r   r      s     r   )	r3   typingr   r   r5   transformersxgrammarr   r   r   r   r   r   <module>   s    