o
    V۷i{	                     @   s   d dl mZmZmZ d dlZd dlmZ ddl	m
Z
 ddlmZmZmZ d dlmZ ejdejdejd	ejfd
dZdejdeej d	ejfddZdS )    )TupleListcastN   )	LLMatcher)get_bitmask_shapeallocate_token_bitmaskfill_next_token_bitmask)NDArraydatamaskreturnc                 C   s~   d}t jjdg ddg|d}t jtd g| jd}|| ||gd| jfg| jd	 | jd
 d	fd| jg| jgd}|d
 }|S )Na  
        uint batch = thread_position_in_grid.y;  // Batch index
        uint elem = thread_position_in_grid.x;   // Element index within batch

        // Bounds check to prevent out-of-bounds access
        // assert(batch < inp_shape[0] && elem < inp_shape[1]);

        uint word_idx = elem / 32;  // Which u32 word
        uint bit_idx = elem % 32;   // Which bit in the word

        // Bounds check for mask access
        // assert(word_idx < mask_shape[1] && batch < mask_shape[0]);

        uint bit = word_idx < mask_shape[1] && (mask[batch * mask_shape[1] + word_idx] >> bit_idx) & 1;
        out[batch * inp_shape[1] + elem] = bit ? inp[batch * inp_shape[1] + elem] : neg_inf[0];
    bitmask_apply_batched)inpr   neg_infout)nameinput_namesoutput_namessourceinf)dtypeTr   r   )   r   r   )inputstemplategridthreadgroupoutput_shapesoutput_dtypes)mxfastmetal_kernelarrayfloatr   shape)r   r   r   kernelr   outputsa r)   D/home/ubuntu/vllm_env/lib/python3.10/site-packages/llguidance/mlx.pyapply_token_bitmask_kernel   s(   

r+   logitsmask_npc                 C   s   t |}t| jdkrt j| dd} t|jdkr!t j|dd}|jt jks+J dt| jdks6J d| j\}}|j\}}||ksHJ dtt jt| |}|S )Nr   r   )axiszMask must be int32   zLogits must be 2DzBatch size mismatch)	r    r#   lenr%   expand_dimsr   int32r   r+   )r,   r-   r   batchvocabm_batchm_vocabrr)   r)   r*   apply_token_bitmask6   s   


r8   )typingr   r   r   numpynpmlx.corecorer    _libr   r   r   r	   numpy.typingr
   custom_functionr#   r+   r2   r8   r)   r)   r)   r*   <module>   s   *