o
    ﰓia                  	   @   s   d dl Z d dlmZmZmZmZ d dlZd dlmZ d dl	m
Z
mZ d dlmZmZ G dd dZdeeeeef  fd	d
ZdedefddZ	ddeeef de
dedefddZdS )    N)ListOptionalTupleUnion)PreTrainedTokenizerBase)CharacterLevelParserFormatEnforcerAnalyzer)TokenEnforcerTokenEnforcerTokenizerDatac                   @   sH   e Zd ZdefddZdd Zdedeee  dej	d	ej	fd
dZ
dS )TRTLLMLogitsProcessortoken_enforcerc                 C   s2   || _ |r	t|nd | _d | _tj | _|| _d S )N)r   r   analyzermaskmathinfmask_valeos_token_id)selfr   r   analyze r   X/home/ubuntu/.local/lib/python3.10/site-packages/lmformatenforcer/integrations/trtllm.py__init__
   s
   

zTRTLLMLogitsProcessor.__init__c                    s    fdd|  D S )Nc                    s.   g | ]}|t  jtr jn jgvr|qS r   )
isinstancer   list).0xr   r   r   
<listcomp>   s   
 z/TRTLLMLogitsProcessor._trim.<locals>.<listcomp>)tolist)r   inputr   r   r   _trim   s   zTRTLLMLogitsProcessor._trimstepbatch_input_idslogitsreturnc                 C   s   t t|D ]C}| jr| j|| ||   | j| || j}| j	d ur1| j	
| j n
t|| | j| _	d| j	|< || | j	 ||< q|S )Nr   )rangelenr   report_raw_logitsr   r   get_allowed_tokensr    allowed_tokensr   fill_r   torch	full_like)r   r!   r"   r#   idxr)   r   r   r   __call__   s   

zTRTLLMLogitsProcessor.__call__N)__name__
__module____qualname__r	   r   r    intr   r+   Tensorr.   r   r   r   r   r   	   s    *r   r$   c           	      C   s   t | dr	|  } t | dr| j} | dd g}g }| j}t|D ]1}|| jv r*q"tj||g tj	d}| 
|dd  }| 
|}t|t|k}||||f q"|S )Nget_tokenizer	tokenizer0dtype   )hasattrr4   r5   encode
vocab_sizer%   all_special_idsr+   tensorlongdecoder&   append)	r5   token_0regular_tokensr=   	token_idxtensor_after_0decoded_after_0decoded_regularis_word_start_tokenr   r   r   _build_regular_tokens_list'   s    



rJ   r5   c                    s@   t  }dtt dtf fdd}d}t|| j|t }|S )z\Build the TokenEnforcerTokenizerData from a tokenizer in order to cache it between instancestokensr$   c                    s   t j| t jd} |S )Nr8   )r+   r?   r@   rA   )rK   r?   r5   r   r   _decode@   s   
z,build_trtlmm_tokenizer_data.<locals>._decodeF)rJ   r   r2   strr
   r   r&   )r5   rD   rM   use_bitmasktokenizer_datar   rL   r   build_trtlmm_tokenizer_data<   s
   rQ   Fcharacter_level_parserr   c                 C   s0   t | tr| }nt| }t||}t|| j|S )zf
    Build logits processor for feeding it into generate function (use_py_session should be True)
    )r   r
   rQ   r	   r   r   )r5   rR   r   rP   r   r   r   r   build_trtllm_logits_processorI   s
   

rS   )F)r   typingr   r   r   r   r+   transformersr   lmformatenforcerr   r   lmformatenforcer.tokenenforcerr	   r
   r   r2   rN   boolrJ   rQ   rS   r   r   r   r   <module>   s"    