o
    ۷iY                     @   s8  d dl Z d dlZd dlmZ d dlmZ d dlmZ ddl	m
Z
 ddlmZ eeZdZG dd dZG d	d
 d
ZG dd deZG dd deZG dd deZG dd deZG dd deZG dd deZG dd deZG dd deZG dd deZG dd deZG dd  d eZG d!d" d"eZdS )#    N)sparse   )add_start_docstrings)
get_loggerad  
    Args:
        input_ids (`jnp.ndarray` of shape `(batch_size, sequence_length)`):
            Indices of input sequence tokens in the vocabulary.

            Indices can be obtained using [`PreTrainedTokenizer`]. See [`PreTrainedTokenizer.encode`] and
            [`PreTrainedTokenizer.__call__`] for details.

            [What are input IDs?](../glossary#input-ids)
        scores (`jnp.ndarray` of shape `(batch_size, config.vocab_size)`):
            Prediction scores of a language modeling head. These can be logits for each vocabulary when not using beam
            search or log softmax for each vocabulary token when using beam search
        kwargs (`dict[str, Any]`, *optional*):
            Additional logits processor specific kwargs.

    Return:
        `jnp.ndarray` of shape `(batch_size, config.vocab_size)`: The processed prediction scores.

c                   @   4   e Zd ZdZeedejdejdejfddZdS )FlaxLogitsProcessorzSAbstract base class for all logit processors that can be applied during generation.	input_idsscoresreturnc                 C      t | j d)z"Flax method for processing logits.H is an abstract class. Only classes inheriting this class can be called.NotImplementedError	__class__selfr   r	    r   a/home/ubuntu/vllm_env/lib/python3.10/site-packages/transformers/generation/flax_logits_process.py__call__6      
zFlaxLogitsProcessor.__call__N	__name__
__module____qualname____doc__r   !LOGITS_PROCESSOR_INPUTS_DOCSTRINGjnpndarrayr   r   r   r   r   r   3       "r   c                   @   r   )FlaxLogitsWarperzjAbstract base class for all logit warpers that can be applied during generation with multinomial sampling.r   r	   r
   c                 C   r   )zFlax method for warping logits.r   r   r   r   r   r   r   A   r   zFlaxLogitsWarper.__call__Nr   r   r   r   r   r   >   r   r   c                	   @   s8   e Zd ZdZeedejdejdedejfddZ	dS )	FlaxLogitsProcessorLista.  
    This class can be used to create a list of [`FlaxLogitsProcessor`] or [`FlaxLogitsWarper`] to subsequently process
    a `scores` input tensor. This class inherits from list and adds a specific *__call__* method to apply each
    [`FlaxLogitsProcessor`] or [`FlaxLogitsWarper`] to the inputs.
    r   r	   cur_lenr
   c                    s   | D ]C}t |jj}t|dkr?t fddt| dd  D s4tdt|  d|j	 d||||fi  }q||||}q|S )N   c                 3   s    | ]}| v V  qd S Nr   ).0argkwargsr   r   	<genexpr>U   s    z3FlaxLogitsProcessorList.__call__.<locals>.<genexpr>r   z,Make sure that all the required parameters: z for z$ are passed to the logits processor.)
inspect	signaturer   
parameterslenalllistkeys
ValueErrorr   )r   r   r	   r!   r'   	processorfunction_argsr   r&   r   r   P   s   &z FlaxLogitsProcessorList.__call__N)
r   r   r   r   r   r   r   r   intr   r   r   r   r   r    I   s    &r    c                   @   >   e Zd ZdZdefddZdejdejdedejfd	d
Z	dS )FlaxTemperatureLogitsWarperz
    [`FlaxLogitsWarper`] for temperature (exponential scaling output probability distribution).

    Args:
        temperature (`float`):
            The value used to module the logits distribution.
    temperaturec                 C   s*   t |tr	|dkstd| || _d S )Nr   z:`temperature` has to be a strictly positive float, but is )
isinstancefloatr0   r6   )r   r6   r   r   r   __init__i   s   
z$FlaxTemperatureLogitsWarper.__init__r   r	   r!   r
   c                 C   s   || j  }|S r#   )r6   r   r   r	   r!   r   r   r   r   o   s   
z$FlaxTemperatureLogitsWarper.__call__N)
r   r   r   r   r8   r9   r   r   r3   r   r   r   r   r   r5   `   s    $r5   c                   @   sR   e Zd ZdZed dfdededefddZd	ejd
ejdedejfddZ	dS )FlaxTopPLogitsWarpera=  
    [`FlaxLogitsWarper`] that performs top-p, i.e. restricting to top tokens summing to prob_cut_off <= prob_cut_off.

    Args:
        top_p (`float`):
            If set to < 1, only the smallest set of most probable tokens with probabilities that add up to `top_p` or
            higher are kept for generation.
        filter_value (`float`, *optional*, defaults to -inf):
            All filtered values will be set to this float value.
        min_tokens_to_keep (`int`, *optional*, defaults to 1):
            Minimum number of tokens that cannot be filtered.
    Inf   top_pfilter_valuemin_tokens_to_keepc                 C   s^   t |tr|dk s|dkrtd| t |tr|dk r$td| || _|| _|| _d S )Nr   g      ?z.`top_p` has to be a float > 0 and < 1, but is r=   z:`min_tokens_to_keep` has to be a positive integer, but is )r7   r8   r0   r3   r>   r?   r@   )r   r>   r?   r@   r   r   r   r9      s   
zFlaxTopPLogitsWarper.__init__r   r	   r!   r
   c                 C   s   t ||jd \}}t|| j}tjj|ddj	dd}|| j
k }t|d}||jd d df dO }|jd d d | jf d}t|||}	tj ||	d }
|
S )Naxisr=   r   T)laxtop_kshaper   	full_liker?   jaxnnsoftmaxcumsumr>   rollatsetr@   wheresort_key_val)r   r   r	   r!   topk_scorestopk_indicesmask_scorescumulative_probs
score_masktopk_next_scoresnext_scoresr   r   r   r      s   
zFlaxTopPLogitsWarper.__call__N
r   r   r   r   r8   r3   r9   r   r   r   r   r   r   r   r;   t   s    "$
r;   c                   @   sR   e Zd ZdZed dfdededefddZd	ejd
ejdedejfddZ	dS )FlaxTopKLogitsWarpera  
    [`FlaxLogitsWarper`] that performs top-k, i.e. restricting to the k highest probability elements.

    Args:
        top_k (`int`):
            The number of highest probability vocabulary tokens to keep for top-k-filtering.
        filter_value (`float`, *optional*, defaults to -inf):
            All filtered values will be set to this float value.
        min_tokens_to_keep (`int`, *optional*, defaults to 1):
            Minimum number of tokens that cannot be filtered.
    r<   r=   rE   r?   r@   c                 C   s6   t |tr	|dkrtd| t||| _|| _d S )Nr   z6`top_k` has to be a strictly positive integer, but is )r7   r3   r0   maxrE   r?   )r   rE   r?   r@   r   r   r   r9      s   
zFlaxTopKLogitsWarper.__init__r   r	   r!   r
   c                 C   s   |j \}}t|| | j}t| j|j d }t||\}}	tt|| d d d f ||f	 }
|	 }|		 |
 }|j
| |}|||}|S )NrA   )rF   r   fullr?   minrE   rD   broadcast_toarangeflattenrM   rN   reshape)r   r   r	   r!   
batch_size
vocab_sizenext_scores_flattopkrQ   rR   shifttopk_scores_flattopk_indices_flatrW   r   r   r   r      s   
*zFlaxTopKLogitsWarper.__call__NrX   r   r   r   r   rY      s    "$rY   c                   @   s>   e Zd ZdZdefddZdejdejdedejfd	d
ZdS )!FlaxForcedBOSTokenLogitsProcessorz
    [`FlaxLogitsProcessor`] that enforces the specified token as the first generated token.

    Args:
        bos_token_id (`int`):
            The id of the token to force as the first generated token.
    bos_token_idc                 C   s
   || _ d S r#   )ri   )r   ri   r   r   r   r9      s   
z*FlaxForcedBOSTokenLogitsProcessor.__init__r   r	   r!   r
   c                 C   sN   t |jtd }dt |d  }t ||jd d | jf d|}|S Ninfr=   r   )	r   r[   rF   r8   bool_rO   rM   ri   rN   r   r   r	   r!   
new_scoresapply_penaltyr   r   r   r      s   $z*FlaxForcedBOSTokenLogitsProcessor.__call__N	r   r   r   r   r3   r9   r   r   r   r   r   r   r   rh      s    $rh   c                   @   B   e Zd ZdZdedefddZdejdejded	ejfd
dZdS )!FlaxForcedEOSTokenLogitsProcessorae  
    [`FlaxLogitsProcessor`] that enforces the specified token as the last generated token when `max_length` is reached.

    Args:
        max_length (`int`):
            The maximum length of the sequence to be generated.
        eos_token_id (`int`):
            The id of the token to force as the last generated token when `max_length` is reached.
    
max_lengtheos_token_idc                 C   s   || _ || _d S r#   )rs   rt   )r   rs   rt   r   r   r   r9      s   
z*FlaxForcedEOSTokenLogitsProcessor.__init__r   r	   r!   r
   c                 C   sT   t |jtd }dt || j d  }t ||jd d | jf 	d|}|S rj   )
r   r[   rF   r8   rl   rs   rO   rM   rt   rN   rm   r   r   r   r      s   $z*FlaxForcedEOSTokenLogitsProcessor.__call__Nrp   r   r   r   r   rr      s    
$rr   c                   @   rq   )FlaxMinLengthLogitsProcessora3  
    [`FlaxLogitsProcessor`] enforcing a min-length by setting EOS probability to 0.

    Args:
        min_length (`int`):
            The minimum length below which the score of `eos_token_id` is set to `-float("Inf")`.
        eos_token_id (`int`):
            The id of the *end-of-sequence* token.
    
min_lengthrt   c                 C   sP   t |tr	|dk rtd| t |tr|dk r td| || _|| _d S )Nr   z2`min_length` has to be a positive integer, but is z4`eos_token_id` has to be a positive integer, but is )r7   r3   r0   rv   rt   )r   rv   rt   r   r   r   r9      s   
z%FlaxMinLengthLogitsProcessor.__init__r   r	   r!   r
   c                 C   sF   dt || j dd }t ||jd d | jf td |}|S )Nr=   r   rk   )r   cliprv   rO   rM   rt   rN   r8   r   r   r	   r!   ro   r   r   r   r     s   *z%FlaxMinLengthLogitsProcessor.__call__Nrp   r   r   r   r   ru      s    
$
ru   c                   @   s&   e Zd ZdZdd ZdefddZdS )(FlaxSuppressTokensAtBeginLogitsProcessora  
    [`FlaxLogitsProcessor`] suppressing a list of tokens as soon as the `generate` function starts generating using
    `begin_index` tokens. This should ensure that the tokens defined by `begin_suppress_tokens` are not sampled at the
    beginning of the generation.

    Args:
        begin_suppress_tokens (`list[int]`):
            Tokens to not sample.
        begin_index (`int`):
            Index where the tokens are suppressed.
    c                 C   s   t || _|| _d S r#   )r.   begin_suppress_tokensbegin_index)r   rz   r{   r   r   r   r9     s   

z1FlaxSuppressTokensAtBeginLogitsProcessor.__init__r!   c                 C   sB   dt || j  }t ||jd d | jf td |}|S )Nr=   rk   )r   rl   r{   rO   rM   rz   rN   r8   rx   r   r   r   r   !  s   *z1FlaxSuppressTokensAtBeginLogitsProcessor.__call__N)r   r   r   r   r9   r3   r   r   r   r   r   ry     s    ry   c                   @   r4   )!FlaxSuppressTokensLogitsProcessorz
    [`FlaxLogitsProcessor`] suppressing a list of tokens at each decoding step. The processor will set their log probs
    to be `-inf` so they are not sampled.

    Args:
        suppress_tokens (`list`):
            Tokens to not sample.
    suppress_tokensc                 C   s   t || _d S r#   )r.   r}   )r   r}   r   r   r   r9   3  s   z*FlaxSuppressTokensLogitsProcessor.__init__r   r	   r!   r
   c                 C   s    |j d| jf td }|S )N.rk   )rM   r}   rN   r8   r:   r   r   r   r   6  s   z*FlaxSuppressTokensLogitsProcessor.__call__N)
r   r   r   r   r.   r9   r   r   r3   r   r   r   r   r   r|   )  s    	$r|   c                   @   s8   e Zd ZdZdd Zdejdejdedejfdd	Zd
S )FlaxForceTokensLogitsProcessora  
    [`FlaxLogitsProcessor`] that takes a list of pairs of integers which indicates a mapping from generation indices to
    token indices that will be forced before sampling. The processor will set their log probs to 0 and all other tokens
    to `-inf` so that they are sampled at their corresponding index.

    Args:
        force_token_map (`list`):
            Map giving token ids and indices where they will be forced to be sampled.
    c                 C   sb   t |}tjt| d tjdd }| D ]\}}|d ur(|j| |}qt|| _	d S )Nr=   dtyperA   )
dictr   onesrZ   r/   int32itemsrM   rN   force_token_array)r   force_token_mapr   indextokenr   r   r   r9   G  s    z'FlaxForceTokensLogitsProcessor.__init__r   r	   r!   r
   c                    sB   fdd t jjd kfdd fddS )Nc                    sX    j d }j|  }tj  jdtd  }tj|df jd}t||d|f}|S )Nr   r   rk   r=   )	rF   r   r   	ones_liker   r8   zerosrD   dynamic_update_slice)generation_idxra   current_tokenrn   updates)r	   r   r   r   _force_tokenS  s   

z=FlaxForceTokensLogitsProcessor.__call__.<locals>._force_tokenr   c                          S r#   r   r   r	   r   r   <lambda>_      z9FlaxForceTokensLogitsProcessor.__call__.<locals>.<lambda>c                      s*   t j dk fddfddS )Nr   c                      s    S r#   r   r   )r   r!   r   r   r   d  s    zKFlaxForceTokensLogitsProcessor.__call__.<locals>.<lambda>.<locals>.<lambda>c                      r   r#   r   r   r   r   r   r   f  r   )rD   condr   r   r   r!   r	   r   r   r   r   a  s
    
)rD   r   r   rF   r:   r   r   r   r   R  s   	
z'FlaxForceTokensLogitsProcessor.__call__N)	r   r   r   r   r9   r   r   r3   r   r   r   r   r   r~   <  s    
$r~   c                   @   s    e Zd ZdZdd Zdd ZdS )#FlaxWhisperTimeStampLogitsProcessora{  
    Whisper specific Processor. This processor can be used to force a list of tokens. The processor will set their log
    probs to `inf` so that they are sampled at their corresponding index.

    Args:
        generate_config (`GenerateConfig`):
            The generate config used to generate the output. The following parameters are required:
                eos_token_id (`int`, *optional*, defaults to 50257):
                    The id of the *end-of-sequence* token.
                no_timestamps_token_id (`int`, *optional*, defaults to 50363):
                    The id of the `"<|notimestamps|>"` token.
                max_initial_timestamp_index (`int`, *optional*, defaults to 1):
                    Used to set the maximum value of the initial timestamp. This is used to prevent the model from
                    predicting timestamps that are too far in the future.
    c                 C   sp   |j | _ |j| _|jd | _|d | _|jr|  jd7  _t|dr'|j| _n|j| _| jd u r6|j| _d S d S )Nr=   r   max_initial_timestamp_index)rt   no_timestamps_token_idtimestamp_beginr{   is_multilingualhasattrr   rb   )r   generate_configmodel_configdecoder_input_lengthr   r   r   r9   }  s   



z,FlaxWhisperTimeStampLogitsProcessor.__init__c           	         s   |j d d jf td } fdd}t|||}t jkdd}tj	d u|d}j
j	 }t||j d d |d d f td |}tjj|dd}fd	d
}t|||}|S )Nrk   c                    s   t  j dkdd}t |  d  jk|d}t  j dk dd}t |  d  jkd|}t |t |dk|jjd  td |jd j td |S )Nr=   TFr   r   rk   )r   rO   r{   r   rM   rN   r8   rt   )input_ids_kscores_klast_was_timestamppenultimate_was_timestampr!   r   r   r   handle_pairs  s*   zBFlaxWhisperTimeStampLogitsProcessor.__call__.<locals>.handle_pairsTFr=   rA   rB   c                    sX   t jj|  jd  dd}t| d  j }t||k|jd  j t	d |S )NrA   rB   rk   )
rH   rI   	logsumexpr   r   rZ   rO   rM   rN   r8   )
logprobs_kr   timestamp_logprobmax_text_token_logprob)r   r   r   handle_cumulative_probs  s   zMFlaxWhisperTimeStampLogitsProcessor.__call__.<locals>.handle_cumulative_probs)rM   r   rN   r8   rH   vmapr   rO   r{   r   r   rI   log_softmax)	r   r   r	   r!   r   apply_max_initial_timestamplast_allowedlogprobsr   r   r   r   r     s&    $	z,FlaxWhisperTimeStampLogitsProcessor.__call__N)r   r   r   r   r9   r   r   r   r   r   r   l  s    r   c                   @   sl   e Zd ZdZdefddZdejdedefdd	Zd
ejdejfddZ	dejdejdedejfddZ
dS ) FlaxNoRepeatNGramLogitsProcessora9  
    [`FlaxLogitsProcessor`] that enforces no repetition of n-grams. See
    [Fairseq](https://github.com/pytorch/fairseq/blob/a07cb6f40480928c9e0548b737aadd36ee66ac76/fairseq/sequence_generator.py#L345).

    Args:
        ngram_size (`int`):
            All ngrams of size `ngram_size` can only occur once.
    
ngram_sizec                 C   s*   t |tr	|dkrtd| || _d S )Nr   z;`ngram_size` has to be a strictly positive integer, but is )r7   r3   r0   r   )r   r   r   r   r   r9     s   
z)FlaxNoRepeatNGramLogitsProcessor.__init__r   rb   r!   c              	      s   j \ }|jd  }|jd  } fdd} | jd f}tjd | |tj|jd}	t |  | k 	d}
t
j|
|	f f|fj  dS )a  
        get a matrix of size (batch_size,) + (vocab_size,)*n (for n-grams) that
        represent the n-grams that occurred previously.
        The BCOO representation allow to store only the few non-zero entries, instead of the full (huge) matrix
        r=   c                    sD   |   |  |j |  t g fddtjD  S )Nc                    s"   g | ]}t  | f qS r   )r   array)r$   j)br   posr   r   
<listcomp>  s   " zZFlaxNoRepeatNGramLogitsProcessor.get_previous_ngrams.<locals>.body_fun.<locals>.<listcomp>)rM   rN   r   r   ranger   )ivalra   r   r   )r   r   r   body_fun  s   
zFFlaxNoRepeatNGramLogitsProcessor.get_previous_ngrams.<locals>.body_funr   r   float32)rF   )rF   r   rH   rD   	fori_loopr   r   r   r^   astyper   BCOO)r   r   rb   r!   seq_len
seq_ngrams
cur_ngramsr   rF   all_update_indicesdatar   r   r   get_previous_ngrams  s   
 z4FlaxNoRepeatNGramLogitsProcessor.get_previous_ngramslatest_tokensr
   c                 C   s$   t jtjdd }t |||S )zt
        Determines which tokens must be banned given latest tokens and the previously seen
        ngrams.
        c                 S   s   |t |  S r#   )tuple)r   previous_ngramsr   r   r   inner_fn  s   zIFlaxNoRepeatNGramLogitsProcessor.get_banned_tokens_mask.<locals>.inner_fn)r   sparsifyrH   r   bcoo_todense)r   r   r   r   r   r   r   get_banned_tokens_mask  s   z7FlaxNoRepeatNGramLogitsProcessor.get_banned_tokens_maskr	   c                    s8    fdd}t j jd k|fdd}|S )Nc               
      s   j \} }| }tjj d jd fjd}tj|tj	d jd  fj d jd fd}
||d}t|td S )Nr   r=   r   )r   r   boolrk   )rF   r   r   r   r   r   rH   rD   r   dynamic_slicer   r   rO   r8   )_rb   r   r   banned_tokens_indices_maskr!   r   r	   r   r   r   true_fn  s   
 $	z:FlaxNoRepeatNGramLogitsProcessor.__call__.<locals>.true_fnr=   c                      r   r#   r   r   r   r   r   r     r   z;FlaxNoRepeatNGramLogitsProcessor.__call__.<locals>.<lambda>)rH   rD   r   r   )r   r   r	   r!   r   outputr   r   r   r     s   "z)FlaxNoRepeatNGramLogitsProcessor.__call__N)r   r   r   r   r3   r9   r   r   r   r   r   r   r   r   r   r     s    	"$r   )r)   rH   jax.laxrD   	jax.numpynumpyr   jax.experimentalr   utilsr   utils.loggingr   r   loggerr   r   r   r.   r    r5   r;   rY   rh   rr   ru   ry   r|   r~   r   r   r   r   r   r   <module>   s.   ,#0a