o
    ei}#                    @   s   d Z ddlmZ ddlZddlmZ ddlmZmZm	Z	 ddl
mZ G dd dejjZG d	d
 d
ejjZG dd deZG dd deZG dd deZG dd deZG dd deZG dd deZG dd deZG dd deZG dd deZdS )zDecoding methods for seq2seq autoregressive model.

Authors
 * Adel Moumen 2022, 2023, 2024
 * Ju-Chieh Chou 2020
 * Peter Plantinga 2020
 * Mirco Ravanelli 2020
 * Sung-Lin Yeh 2020
    )cached_propertyN)Categorical)_update_meminflate_tensormask_by_condition)undo_paddingc                       0   e Zd ZdZ fddZdd Zdd Z  ZS )AlivedHypothesesax  This class handle the data for the hypotheses during the decoding.

    Arguments
    ---------
    alived_seq : torch.Tensor
        The sequence of tokens for each hypothesis.
    alived_log_probs : torch.Tensor
        The log probabilities of each token for each hypothesis.
    sequence_scores : torch.Tensor
        The sum of log probabilities for each hypothesis.
    c                    s    t    || _|| _|| _d S N)super__init__
alived_seqalived_log_probssequence_scores)selfr   r   r   	__class__ Z/home/ubuntu/transcripts/venv/lib/python3.10/site-packages/speechbrain/decoders/seq2seq.pyr   %   s   

zAlivedHypotheses.__init__c                 C   s   | j | | j| | j| fS r
   r   r   r   )r   indexr   r   r   __getitem__+   s   zAlivedHypotheses.__getitem__c                 C   s   d| j  d| j d| j dS )NzAlivedHypotheses(alived_seq=z, alived_log_probs=z, sequence_scores=)r   r   r   r   r   __str__2   s   zAlivedHypotheses.__str__)__name__
__module____qualname____doc__r   r   r   __classcell__r   r   r   r   r	      s
    r	   c                       sP   e Zd ZdZ fddZdd Zdd Zdd	 Zd
d Zdd Z	dd Z
  ZS )S2SBaseSearchera  S2SBaseSearcher class to be inherited by other
    decoding approaches for seq2seq model.

    Arguments
    ---------
    bos_index : int
        The index of the beginning-of-sequence (bos) token.
    eos_index : int
        The index of end-of-sequence (eos) token.
    min_decode_ratio : float
        The ratio of minimum decoding steps to the length of encoder states.
    max_decode_ratio : float
        The ratio of maximum decoding steps to the length of encoder states.
    c                    s&   t    || _|| _|| _|| _d S r
   )r   r   	bos_index	eos_indexmin_decode_ratiomax_decode_ratio)r   r!   r"   r#   r$   r   r   r   r   F   s
   

zS2SBaseSearcher.__init__c                 C      t )a  This method should implement the forward algorithm of decoding method.

        Arguments
        ---------
        enc_states : torch.Tensor
            The precomputed encoder states to be used when decoding.
            (ex. the encoded speech representation to be attended).
        wav_len : torch.Tensor
            The speechbrain-style relative length.

        Returns
        -------
        hyps
            The predicted tokens, as a list of lists or, if return_topk is True,
            a Tensor of shape (batch, topk, max length of token_id sequences).
        top_lengths
            The length of each topk sequence in the batch.
        top_scores
            This final scores of topk hypotheses.
        top_log_probs
            The log probabilities of each hypotheses.
        NotImplementedError)r   
enc_stateswav_lenr   r   r   forwardO   s   zS2SBaseSearcher.forwardc                 C   r%   )aI  This method should implement one step of
        forwarding operation in the autoregressive model.

        Arguments
        ---------
        inp_tokens : torch.Tensor
            The input tensor of the current step.
        memory : No limit
            The memory variables input for this step.
            (ex. RNN hidden states).
        enc_states : torch.Tensor
            The encoder states to be attended.
        enc_lens : torch.Tensor
            The actual length of each enc_states sequence.

        Returns
        -------
        log_probs : torch.Tensor
            Log-probabilities of the current step output.
        memory : No limit
            The memory variables generated in this step.
            (ex. RNN hidden states).
        attn : torch.Tensor
            The attention weight for doing penalty.
        r&   )r   
inp_tokensmemoryr(   enc_lensr   r   r   forward_stepi   s   zS2SBaseSearcher.forward_stepc                 C   r%   )a  This method should implement the resetting of
        memory variables for the seq2seq model.
        E.g., initializing zero vector as initial hidden states.

        Arguments
        ---------
        batch_size : int
            The size of the batch.
        device : torch.device
            The device to put the initial variables.

        Return
        ------
        memory : No limit
            The initial memory variable.
        r&   r   
batch_sizedevicer   r   r   	reset_mem   s   zS2SBaseSearcher.reset_memc                 C   s   ||fS )z<set the minimum/maximum length of enc_states to be attended.r   )r   min_decode_stepsmax_decode_stepsr   r   r   change_max_decoding_length   s   z*S2SBaseSearcher.change_max_decoding_lengthc                 C   s
   | j jjS )zset the number of output tokens.
        Overrides this function if the fc layer is embedded
        in the model, e.g., Whisper.
        )fcwout_featuresr   r   r   r   	set_n_out   s   
zS2SBaseSearcher.set_n_outc                 C      dS zThis method is supposed to be overridden by the child class.
        For instance, if the decoder has a maximal number of tokens that it can
        attend to, this method should return True when the maximal number of tokens
        is reached.
        Fr   r   r,   r   r   r   _check_end_condition      z$S2SBaseSearcher._check_end_condition)r   r   r   r   r   r*   r.   r2   r5   r9   r=   r   r   r   r   r   r    6   s    	r    c                   @   s(   e Zd ZdZe dd Zdd ZdS )S2SGreedySearcherzpThis class implements the general forward-pass of
    greedy decoding approach. See also S2SBaseSearcher().
    c                 C   s  t |jd |  }|j}|jd }| j||d}||| j	 }g }t|jd | j
 }	t|jd | j }
| |	|
\}	}
|| }t|	|
D ]P}| ||||\}}}| jdkri|jdd}n
t|| j d }t jjj| dd}|| ||| jkB }t j ||< | j||< | s| |r nqPt j|dd}|jdd\}}|t j k}d||< | j||< | |||\}}}}t |dddf |}||||fS )a   This method performs a greedy search.

        Arguments
        ---------
        enc_states : torch.Tensor
            The precomputed encoder states to be used when decoding.
            (ex. the encoded speech representation to be attended).
        wav_len : torch.Tensor
            The speechbrain-style relative length.

        Returns
        -------
        hyps : List[List[int]]
            List containing the hypotheses.
        top_lengths : torch.Tensor (batch)
            This tensor contains the length of each hypothesis.
        top_scores : torch.Tensor (batch)
            The score of each hypotheses.
        top_log_probs : torch.Tensor (batch, max length of token_id sequences)
            The log probabilities of each hypotheses.
           r   r1   dim)logitsN)!torchroundshapeintr1   r2   	new_zerosfill_r!   longr#   r$   r5   boolranger.   temperatureargmaxr   samplenn
functionallog_softmaxfloatappendr"   infallr=   stackmax_get_top_predictionr   )r   r(   r)   r-   r1   r0   r,   r+   log_probs_lstr3   r4   	has_endedsteprE   _	log_probsscorespredictionsmasktop_hypstop_lengths
top_scorestop_log_probshypsr   r   r   r*      sZ   





zS2SGreedySearcher.forwardc                 C   s   | d}| d}|g| }t|D ]}|| }|| jkjdd}	t|	dkr0|	d  ||< qtj|tj|j	d}|}
|| }|
d|
d|
d|

dfS )a  This method sorts the scores and return corresponding hypothesis and log probs.

        Arguments
        ---------
        hyps : torch.Tensor (batch, max length of token_id sequences)
            This tensor stores the predicted hypothesis.
        scores : torch.Tensor (batch)
            The score of each hypotheses.
        log_probs : torch.Tensor (batch, max length of token_id sequences)
            The log probabilities of each hypotheses.

        Returns
        -------
        top_hyps : torch.Tensor (batch, max length of token_id sequences)
            This tensor stores the best predicted hypothesis.
        top_lengths : torch.Tensor (batch)
            This tensor contains the length of each hypothesis.
        top_scores : torch.Tensor (batch)
            The score of each hypotheses.
        top_log_probs : torch.Tensor (batch, max length of token_id sequences)
            The log probabilities of each hypotheses.
        r   r@   Fas_tupledtyper1   )sizerN   r"   nonzerolenitemrF   tensorrU   r1   	unsqueeze)r   rh   ra   r`   r0   
max_lengthre   
pred_indexpredpred_lengthrg   r   r   r   r[     s&   



z%S2SGreedySearcher._get_top_predictionN)r   r   r   r   rF   no_gradr*   r[   r   r   r   r   r?      s
    
Rr?   c                       2   e Zd ZdZd	 fdd	Zdd Zdd Z  ZS )
S2STransformerGreedySearchera  This class implements the greedy decoding
    for Transformer.

    Arguments
    ---------
    modules : list with the following one:
        model : torch.nn.Module
            A TransformerASR model.
        seq_lin : torch.nn.Module
            A linear output layer for the seq2seq model.
    temperature : float
        Temperature to use during decoding.
    **kwargs
        Arguments to pass to S2SGreedySearcher
            c                    @   t  jdi | |d | _|d | _tjjdd| _|| _d S Nr   r@   rB   rC   r   	r   r   modelr6   rF   rR   
LogSoftmaxsoftmaxrO   r   modulesrO   kwargsr   r   r   r   K  
   


z%S2STransformerGreedySearcher.__init__c                 C   r:   )z0Needed to reset the memory during greedy search.Nr   r/   r   r   r   r2   T     z&S2STransformerGreedySearcher.reset_memc                 C   sD   t ||}| j|||\}}| |}|dddddf ||fS )z3Performs a step in the implemented greedy searcher.NrB   )r   r~   decoder6   )r   r+   r,   r(   r-   ru   attnrE   r   r   r   r.   X  s   

z)S2STransformerGreedySearcher.forward_steprz   r   r   r   r   r   r2   r.   r   r   r   r   r   ry   :  s
    	ry   c                       st   e Zd ZdZ							d fdd	Zdd	 Zd
d Zdd Zedd Z	dd Z
dd Zdd Zdd Z  ZS )S2SWhisperGreedySearchera  
    This class implements the greedy decoding
    for Whisper neural nets made by OpenAI in
    https://cdn.openai.com/papers/whisper.pdf.

    Arguments
    ---------
    model: HuggingFaceWhisper
        The Whisper model.
    temperature: float
        The temperature to use during decoding.
    use_kv_cache: bool (default: True)
        Whether to use key-value cache.
    suppress_blank: bool (default: True)
        This will suppress blank outputs.
    suppress_tokens: str or list (default: "-1")
        list of tokens ids (or comma-separated token ids) to suppress
        "-1" will suppress a set of symbols as defined in `model.non_speech_tokens()`
    sample_len: int (default: None)
        Maximum number of tokens to sample.
    prefix: str or list (default: None)
        Prefix to add to the input tokens.
        See: https://github.com/openai/whisper/discussions/117#discussioncomment-3727051
    prompt: str or list (default: None)
        Prompt to add to the input tokens.
        See: https://github.com/openai/whisper/discussions/117#discussioncomment-3727051
    **kwargs
        see S2SBaseSearcher, arguments are directly passed.
    rz   T-1Nc	           
         s   t  jd|j|jd|	 || _|| _|| _d | _|| _|| _	|| _
|| _| jjjjj| _|p4| jd | _|  | _t| j| _| jj| _| jd | _d | _d | _d S )Nr!   r"      rB   r   r   r   boseosr~   rO   use_kv_cachekv_cachesuppress_blanksuppress_tokensprefixpromptdecoderconfigrs   max_attn_tokens
sample_len_get_initial_tokensinitial_tokensro   sample_beginr"   r!   no_speech_probslang_tokens)
r   r~   rO   r   r   r   r   r   r   r   r   r   r   r     s,   


z!S2SWhisperGreedySearcher.__init__c                 C   
   || _ dS z,Set the language to be used during decoding.Nr   r   r   r   r   r   set_lang_tokens     
z(S2SWhisperGreedySearcher.set_lang_tokensc                 C   2   | j | |  | _t| j| _| jd | _dS z(Set the task to be used during decoding.rB   Nr~   set_taskr   r   ro   r   r!   r   taskr   r   r   r        
z!S2SWhisperGreedySearcher.set_taskc                 C   ,   || _ |  | _t| j| _| jd | _dS z*Set the prompt to be used during decoding.rB   Nr   r   r   ro   r   r!   r   r   r   r   r   
set_prompt     
z#S2SWhisperGreedySearcher.set_promptc                 C      | j }t|trdd |dD }d|v r%dd |D }|| jj n|du s/t|dkr2g }n	t|ts;J d|| jj	| jj
| jj| jj| jjg ttt|S )	RGet the tokens to suppress during decoding if self.config.suppress_tokens is None.c                 S      g | ]}t |qS r   rI   .0tr   r   r   
<listcomp>      zCS2SWhisperGreedySearcher.get_tokens_to_suppress.<locals>.<listcomp>,rB   c                 S      g | ]}|d kr|qS r   r   r   r   r   r   r         Nr   suppress_tokens must be a listr   
isinstancestrsplitextendr~   non_speech_tokensro   list
transcribe	translater   bos_prevbos_lmtuplesortedsetr   r   r   r   r   get_tokens_to_suppress  ,   

z/S2SWhisperGreedySearcher.get_tokens_to_suppressc                 C      | j jj}| j}| j}|r9t|tr| j jjd|  ddn|}| j	dur5| j
d | j	 }|| d }|| }|rbt|trM| j jjd|  ddn|}| j jg|| j
d d  d  | }t|S z2Get the initial tokens to be used during decoding. Fadd_special_tokensNr   r@   r~   	tokenizerprefix_tokensr   r   r   r   encodestripr   r   r   r   r   tokensr   r   r   max_prefix_lenprompt_tokensr   r   r   r     8   



z,S2SWhisperGreedySearcher._get_initial_tokensc                 C   t   | j rd| _tjg| | _| jdd }t|g| |}| jdur8| j|dd| j	| j
jd f< d| _|S zJThis method set the first tokens to be decoder_input_tokens during search.NrB   r@   r   r   rF   nanr   r   rq   tor   r   r~   r   
lang_tokenr   r0   r1   memory_tokensmemr   r   r   r2     s   
z"S2SWhisperGreedySearcher.reset_memc                 C   s  t ||}| jj||| jd\}}}|jd | jkr;|dd| j| jjf 	 j
dd}	|	dd| jjf  | _|dddf }| jrI|| _| jri|jd | jkritj |dd| jjjddd| jg f< | jr| jjjdu rw| j}
n| jj}
tj |ddt|
f< |||fS 	0Performs a step in the implemented beamsearcher.)past_key_valuesr@   NrB   rC   r   Fr   )r   r~   forward_decoderr   rH   r   r   r   r   rU   r   	no_speechtolistr   r   r   rF   rW   r   r   r"   r   r   r   get_suppress_tokensr   )r   r+   r,   r(   r-   r   rE   r   kvprobs_at_bostokens_to_suppressr   r   r   r.   
  sD   

z%S2SWhisperGreedySearcher.forward_stepc                 C   s   |j d | j| j kS z0This method checks if the max length is reached.r@   )rH   r   r   r<   r   r   r   r=   2  s   z-S2SWhisperGreedySearcher._check_end_condition)rz   TTr   NNN)r   r   r   r   r   r   r   r   r   r   r   r2   r.   r=   r   r   r   r   r   r   `  s&    !'
!(r   c                       rx   )
S2SRNNGreedySearchera  
    This class implements the greedy decoding
    for AttentionalRNNDecoder (speechbrain/nnet/RNN.py).
    See also S2SBaseSearcher() and S2SGreedySearcher().

    Arguments
    ---------
    embedding : torch.nn.Module
        An embedding layer.
    decoder : torch.nn.Module
        Attentional RNN decoder.
    linear : torch.nn.Module
        A linear output layer.
    temperature : float
        The temperature to use during decoding.
    **kwargs
        see S2SBaseSearcher, arguments are directly passed.

    Example
    -------
    >>> import speechbrain as sb
    >>> from speechbrain.decoders import S2SRNNGreedySearcher
    >>> emb = torch.nn.Embedding(5, 3)
    >>> dec = sb.nnet.RNN.AttentionalRNNDecoder(
    ...     "gru", "content", 3, 3, 1, enc_dim=7, input_size=3
    ... )
    >>> lin = sb.nnet.linear.Linear(n_neurons=5, input_size=3)
    >>> searcher = S2SRNNGreedySearcher(
    ...     embedding=emb,
    ...     decoder=dec,
    ...     linear=lin,
    ...     bos_index=0,
    ...     eos_index=1,
    ...     min_decode_ratio=0,
    ...     max_decode_ratio=1,
    ... )
    >>> batch_size = 2
    >>> enc = torch.rand([batch_size, 6, 7])
    >>> wav_len = torch.ones([batch_size])
    >>> top_hyps, top_lengths, _, _ = searcher(enc, wav_len)
    rz   c                    s>   t  jdi | || _|| _|| _|| _tjjdd| _	d S NrB   rC   r   )
r   r   embdecr6   rO   rF   rR   r   r   r   	embeddingr   linearrO   r   r   r   r   r   b  s   zS2SRNNGreedySearcher.__init__c                 C   ,   d}| j j  tj|| j j|d}||fS )zcWhen doing greedy search, keep hidden state (hs) and context vector (c)
        as memory.
        NrA   r   r   resetrF   zerosattn_dimr   r0   r1   hscr   r   r   r2   j  s   zS2SRNNGreedySearcher.reset_memc                 C   sF   |\}}|  |}| j|||||\}}}}	| |}
|
||f|	fS )r   )r   r   r.   r6   )r   r+   r,   r(   r-   r  r  edec_outr7   rE   r   r   r   r.   s  s   


z!S2SRNNGreedySearcher.forward_stepr   r   r   r   r   r   r   7  s
    *	r   c                       s   e Zd ZdZ									d; fd	d
	Zdd Zdd Zdd Zdd Zdd Z	dd Z
dd Zdd Zdd Zdd Zdd  Zd!d" Zd#d$ Zd%d& Zd'd( Zd)d* Zd+d, Zd-d. Zd/d0 Zd1d2 Zd3d4 Zd5d6 Zd7d8 Zd9d: Z  ZS )<S2SBeamSearchera3  This class implements the beam-search algorithm for the seq2seq model.
    See also S2SBaseSearcher().

    Arguments
    ---------
    bos_index : int
        The index of beginning-of-sequence token.
    eos_index : int
        The index of end-of-sequence token.
    min_decode_ratio : float
        The ratio of minimum decoding steps to length of encoder states.
    max_decode_ratio : float
        The ratio of maximum decoding steps to length of encoder states.
    beam_size : int
        The width of beam.
    scorer: speechbrain.decoders.scorers.ScorerBuilder
        Scorer instance. Default: None.
    return_topk : bool
        Whether to return topk hypotheses. The topk hypotheses will be
        padded to the same length. Default: False.
    topk : int
        If return_topk is True, then return topk hypotheses. Default: 1.
    using_eos_threshold : bool
        Whether to use eos threshold. Default: True.
    eos_threshold : float
        The threshold coefficient for eos token. Default: 1.5.
        See 3.1.2 in reference: https://arxiv.org/abs/1904.02619
    length_normalization : bool
        Whether to divide the scores by the length. Default: True.
    using_max_attn_shift: bool
        Whether using the max_attn_shift constraint. Default: False.
    max_attn_shift: int
        Beam search will block the beams that attention shift more
        than max_attn_shift. Default: 60.
        Reference: https://arxiv.org/abs/1904.02619
    minus_inf : float
        The value of minus infinity to block some path
        of the search. Default: -1e20.
    NFr@   T      ?<   @xc                    s   t  |||| || _|| _|| _|| _|| _|	| _|
| _|| _	|| _
d| _d| _|| _| jd urs|r@| jjd dkr@td| jjd dkrui | jj| jj}|d j}t|||hdk rdtd| jjd | _d| j | _d S d S d S )N      ?rz   lengthz=Length normalization is not compatible with length rewarding.ctc   zMSet blank, eos and bos to different indexes for joint ATT/CTC or CTC decoding)r   r   	beam_sizescorerreturn_topktopklength_normalizationusing_eos_thresholdeos_thresholdusing_max_attn_shiftmax_attn_shiftattn_weight
ctc_weight	minus_infweights
ValueErrorfull_scorerspartial_scorersblank_indexro   )r   r!   r"   r#   r$   r  r  r  r  r  r  r  r  r  r  all_scorersr  r   r   r   r     sF   

zS2SBeamSearcher.__init__c                    s0   dd |D } fddt t|D }||kS )aV  This method checks whether hyps has been full.

        Arguments
        ---------
        hyps : List
            This list contains batch_size number.
            Each inside list contains a list stores all the hypothesis for this sentence.

        Returns
        -------
        bool
            Whether the hyps has been full.
        c                 S   r   r   ro   )r   lstr   r   r   r     r   z5S2SBeamSearcher._check_full_beams.<locals>.<listcomp>c                    s   g | ]} j qS r   )r  r   r_   r   r   r   r     s    )rN   ro   )r   rh   hyps_len
beams_sizer   r   r   _check_full_beams  s   z!S2SBeamSearcher._check_full_beamsc                 C   sD   t j|dd\}}||| j k}||| j k}|| d}||fS )a  This method checks whether attention shift is more than attn_shift.

        Arguments
        ---------
        attn : torch.Tensor
            The attention to be checked.
        prev_attn_peak : torch.Tensor
            The previous attention peak place.

        Returns
        -------
        cond : torch.BoolTensor
            Each element represents whether the beam is within the max_shift range.
        attn_peak : torch.Tensor
            The peak of the attn tensor.
        r@   rC   )rF   rZ   r  rr   )r   r   prev_attn_peakr_   	attn_peaklt_condmt_condcondr   r   r   _check_attn_shift  s
   z!S2SBeamSearcher._check_attn_shiftc                 C   s6   t j|dd\}}|dd| jf }|| j| k}|S )aI  This method checks whether eos log-probabilities exceed threshold.

        Arguments
        ---------
        log_probs : torch.Tensor
            The log-probabilities.

        Returns
        -------
        cond : torch.BoolTensor
            Each element represents whether the eos log-probabilities will be kept.
        rB   rC   N)rF   rZ   r"   r  )r   r`   	max_probsr_   	eos_probsr*  r   r   r   _check_eos_threshold
  s   z$S2SBeamSearcher._check_eos_thresholdc                 C   sV   t tj| jd| jd tj| jd| jdtj| j| jdtdd| j	ddS )zThis method initializes the AlivedHypotheses object.

        Returns
        -------
        AlivedHypotheses
            The alived hypotheses filled with the initial values.
        r   rA   -infrz   r   )
r	   rF   emptyn_bhr1   rL   rK   rU   index_fill_beam_offsetr   r   r   r   init_hypotheses  s   
zS2SBeamSearcher.init_hypothesesc                 C   s4   | j dkr| ||||\}}}| j | }|||fS )a  This method computes a forward_step if attn_weight is superior to 0.

        Arguments
        ---------
        inp_tokens : torch.Tensor
            The input tensor of the current step.
        memory : No limit
            The memory variables input for this step.
            (ex. RNN hidden states).
        enc_states : torch.Tensor
            The encoder states to be attended.
        enc_lens : torch.Tensor
            The actual length of each enc_states sequence.
        attn : torch.Tensor
            The attention weight.
        log_probs : torch.Tensor
            The log-probabilities of the current step output.

        Returns
        -------
        log_probs : torch.Tensor
            Log-probabilities of the current step output.
        memory : No limit
            The memory variables generated in this step.
            (ex. RNN hidden states).
        attn : torch.Tensor
            The attention weight.
        r   )r  r.   )r   r+   r,   r(   r-   r   r`   r   r   r   _attn_weight_step,  s   



z!S2SBeamSearcher._attn_weight_stepc                 C   s.   | j r| ||\}}t||| jd}||fS )aH  This method will block the beams that attention shift more
        than max_attn_shift.

        Arguments
        ---------
        attn : torch.Tensor
            The attention weight.
        prev_attn_peak : torch.Tensor
            The previous attention peak place.
        log_probs : torch.Tensor
            The log-probabilities of the current step output.

        Returns
        -------
        log_probs : torch.Tensor
            Log-probabilities of the current step output.
        prev_attn_peak : torch.Tensor
            The previous attention peak place.
        
fill_value)r  r+  r   r  )r   r   r&  r`   r*  r   r   r   _max_attn_shift_stepR  s   z$S2SBeamSearcher._max_attn_shift_stepc                 C   s,   | j dur| j ||||| j\}}||fS )a  This method call the scorers if scorer is not None.

        Arguments
        ---------
        inp_tokens : torch.Tensor
            The input tensor of the current step.
        scorer_memory : No limit
            The memory variables input for this step.
            (ex. RNN hidden states).
        attn : torch.Tensor
            The attention weight.
        log_probs : torch.Tensor
            The log-probabilities of the current step output.

        Returns
        -------
        log_probs : torch.Tensor
            Log-probabilities of the current step output.
        scorer_memory : No limit
            The memory variables generated in this step.
        N)r  scorer  )r   r+   scorer_memoryr   r`   r   r   r   _scorer_stepm  s
   
zS2SBeamSearcher._scorer_stepc                 C   s    ||k r| j |dd| jf< |S )a  This method set the log_probs of eos to minus infinity if the step is less than min_decode_steps.

        Arguments
        ---------
        log_probs : torch.Tensor
            The log-probabilities of the current step output.
        step : int
            The current decoding step.
        min_decode_steps : int
            The minimum decoding steps.

        Returns
        -------
        log_probs : torch.Tensor
            Log-probabilities of the current step output.
        N)r  r"   )r   r`   r^   r3   r   r   r   _set_eos_minus_inf_step  s   z'S2SBeamSearcher._set_eos_minus_inf_stepc                 C   s@   | j r| |}t|dd| jf || jd|dd| jf< |S )ax  This method set the log_probs of eos to minus infinity if the eos log-probabilities is less than eos_threshold.

        Arguments
        ---------
        log_probs : torch.Tensor
            The log-probabilities of the current step output.

        Returns
        -------
        log_probs : torch.Tensor
            Log-probabilities of the current step output.
        Nr6  )r  r.  r   r"   r  )r   r`   r*  r   r   r   _eos_threshold_step  s   
z#S2SBeamSearcher._eos_threshold_stepc                 C   s   | j dkr| j||d}|S )a  This method permute the memory if attn_weight is superior to 0.

        Arguments
        ---------
        memory : No limit
            The memory variables input for this step.
            (ex. RNN hidden states).
        predecessors : torch.Tensor
            The index of which beam the current top-K output came from in (t-1) steps.

        Returns
        -------
        memory : No limit
            The memory variables generated in this step.
            (ex. RNN hidden states).
        r   )r   )r  permute_mem)r   r,   predecessorsr   r   r    _attn_weight_permute_memory_step  s   
z0S2SBeamSearcher._attn_weight_permute_memory_stepc                 C   s    | j dur| j j|||d}|S )a1  This method permute the scorer_memory if scorer is not None.

        Arguments
        ---------
        scorer_memory : No limit
            The memory variables input for this step.
            (ex. RNN hidden states).
        predecessors : torch.Tensor
            The index of which beam the current top-K output came from in (t-1) steps.
        candidates : torch.Tensor
            The index of the current top-K output.

        Returns
        -------
        scorer_memory : No limit
            The memory variables generated in this step.
        N)r   
candidates)r  permute_scorer_mem)r   r:  r?  rA  r   r   r   _scorer_permute_memory_step  s
   
z+S2SBeamSearcher._scorer_permute_memory_stepc                 C   s   | j rtj|d|d}|S )a  This method permute the prev_attn_peak if using_max_attn_shift is True.

        Arguments
        ---------
        prev_attn_peak : torch.Tensor
            The previous attention peak place.
        predecessors : torch.Tensor
            The index of which beam the current top-K output came from in (t-1) steps.

        Returns
        -------
        prev_attn_peak : torch.Tensor
            The previous attention peak place.
        r   rD   r   )r  rF   index_select)r   r&  r?  r   r   r   #_max_attn_shift_permute_memory_step  s
   z3S2SBeamSearcher._max_attn_shift_permute_memory_stepc                 C   s6   | j | j| jd}d}| jdur| j||}||fS )a  Call reset memory for each module.

        Arguments
        ---------
        enc_states : torch.Tensor
            The encoder states to be attended.
        enc_lens : torch.Tensor
            The actual length of each enc_states sequence.

        Returns
        -------
        memory : No limit
            The memory variables generated in this step.
        scorer_memory : No limit
            The memory variables generated in this step.
        rA   N)r2   r1  r1   r  reset_scorer_mem)r   r(   r-   r,   r:  r   r   r   _update_reset_memory  s
   
z$S2SBeamSearcher._update_reset_memoryc                 C   s0   |  ||}| |||}| ||}|||fS )a  Call permute memory for each module. It allows us to synchronize the memory with the output.

        Arguments
        ---------
        memory : No limit
            The memory variables input for this step.
            (ex. RNN hidden states).
        scorer_memory : No limit
            The memory variables input for this step.
            (ex. RNN hidden states).
        predecessors : torch.Tensor
            The index of which beam the current top-K output came from in (t-1) steps.
        candidates : torch.Tensor
            The index of the current top-K output.
        prev_attn_peak : torch.Tensor
            The previous attention peak place.

        Returns
        -------
        memory : No limit
            The memory variables generated in this step.
        scorer_memory : No limit
            The memory variables generated in this step.
        prev_attn_peak : torch.Tensor
            The previous attention peak place.
        )r@  rC  rF  )r   r,   r:  r?  rA  r&  r   r   r   _update_permute_memory  s   
z&S2SBeamSearcher._update_permute_memoryc                 C   sv   t jt j|jd|d|dgdd|_|t | jd|f | j}t jt j|j	d|d|dgdd|_	|S )a  This method update sequences and log probabilities by adding the new inp_tokens.

        Arguments
        ---------
        log_probs : torch.Tensor
            The log-probabilities of the current step output.
        inp_tokens : torch.Tensor
            The input tensor of the current step.
        predecessors : torch.Tensor
            The index of which beam the current top-K output came from in (t-1) steps.
        candidates : torch.Tensor
            The index of the current top-K output.
        alived_hyps : AlivedHypotheses
            The alived hypotheses.

        Returns
        -------
        alived_hyps : AlivedHypotheses
            The alived hypotheses.
        r   rD  r@   rB   rC   )
rF   catrE  r   rr   aranger0   reshaper1  r   )r   r`   r+   r?  rA  alived_hypsbeam_log_probsr   r   r   _update_sequences_and_log_probs7  s,   
z/S2SBeamSearcher._update_sequences_and_log_probsc                 C   s   |j dd| j}|| }| jr||d  }|| jdj| jdd\}}|| j | j	}|| j	}||_ | jrD|j |d  |_ t
j|| jdd| jd| | j	}|||||fS )a  Compute scores and next input tokens.

        Arguments
        ---------
        alived_hyps : AlivedHypotheses
            The alived hypotheses.
        log_probs : torch.Tensor
            The log-probabilities of the current step output.
        step : int
            The current decoding step.

        Returns
        -------
        scores : torch.Tensor
            The scores of the current step output.
        candidates : torch.Tensor
            The index of the current top-K output.
        predecessors : torch.Tensor
            The index of which beam the current top-K output came from in (t-1) steps.
        inp_tokens : torch.Tensor
            The input tensor of the current step.
        alived_hyps : AlivedHypotheses
            The alived hypotheses.
        r@   rB   rC   floorrounding_mode)r   rr   expandn_outr  viewr0   r  r  r1  rF   divr3  	expand_as)r   rM  r`   r^   ra   rA  r+   r?  r   r   r   #_compute_scores_and_next_inp_tokensk  s2   
z3S2SBeamSearcher._compute_scores_and_next_inp_tokensc              
   C   s~  t |jd |  }|j| _|jd | _| j| j | _|  | _	| 
||\}}t|| jdd}t|| jdd}t j| j| jd| j }t j| j| jd| j | _t j| j| jd| j}|d| jd dd t| jD }t|jd | j | _t|jd | j | _| | j| j\| _| _t j| j| jd}	d}
t j| j| j	fd| jd}|  }|||||||
|	||f
S )	a  Initialize the beam search data.

        Arguments
        ---------
        enc_states : torch.Tensor
            The encoder states to be attended.
        wav_len : torch.Tensor
            The actual length of each enc_states sequence.

        Returns
        -------
        alived_hyps : AlivedHypotheses
            The alived hypotheses.
        inp_tokens : torch.Tensor
            The input tensor of the current step.
        log_probs : torch.Tensor
            The log-probabilities of the current step output.
        eos_hyps_and_log_probs_scores : list
            Generated hypotheses (the ones that have reached eos) and log probs scores.
        memory : No limit
            The memory variables generated in this step.
        scorer_memory : No limit
            The memory variables generated in this step.
        attn : torch.Tensor
            The attention weight.
        prev_attn_peak : torch.Tensor
            The previous attention peak place.
        enc_states : torch.Tensor
            The encoder states to be attended.
        enc_lens : torch.Tensor
            The actual length of each enc_states sequence.
        r@   r   )timesrD   rA   rz   c                 S   s   g | ]}g qS r   r   r"  r   r   r   r     s    z9S2SBeamSearcher.init_beam_search_data.<locals>.<listcomp>N)rF   rG   rH   rI   r1   r0   r  r1  r9   rT  rH  r   r   rK   r!   rL   rK  r3  r0  r  r2  rN   r#   r3   r$   r4   r5   fullr4  )r   r(   r)   r-   r,   r:  r+   r   eos_hyps_and_log_probs_scoresr&  r   r`   rM  r   r   r   init_beam_search_data  sT   !
z%S2SBeamSearcher.init_beam_search_datac                 C   s   | | j}tj|dd\}|jd dkrS|D ];}| }tj|| jdd}t|| | jkr0q|j	|ddf }	|j
|ddf }
||  }|| |	|
|f q|S )as  This method will update hyps and scores if inp_tokens are eos.

        Arguments
        ---------
        inp_tokens : torch.Tensor
            The current output.
        alived_hyps : AlivedHypotheses
            alived_seq : torch.Tensor
            alived_log_probs : torch.Tensor
        eos_hyps_and_log_probs_scores : list
            Generated hypotheses (the ones that have reached eos) and log probs scores.
        scores : torch.Tensor
            Scores at the current step.

        Returns
        -------
        is_eos : torch.BoolTensor
            Each element represents whether the token is eos.
        Tri   r   rP  rQ  N)eqr"   rF   rn   rH   rp   rV  r  ro   r   r   clonerV   )r   r+   rM  r[  ra   is_eoseos_indicesr   batch_idhypr`   final_scoresr   r   r   $_update_hyps_and_scores_if_eos_token  s&   
z4S2SBeamSearcher._update_hyps_and_scores_if_eos_tokenc                 C   sl  g g g g f\}}}}t |}tt |D ] }t||  \}}	}
||7 }||
7 }||	7 }|dd |D 7 }qtjjjj|ddd}tjjjj|ddd}tj|tj	|j
d}tj|dd|d}|d	 |d	 }|j| jdd\}}|| jd	 || j }tj|d|d
}||| jd}tj|d|d
}||| j}tj|d|d
}||| jd}||||fS )a"  This method sorts the scores and return corresponding hypothesis and log probs.

        Arguments
        ---------
        eos_hyps_and_log_probs_scores : list
            Generated hypotheses (the ones that have reached eos) and log probs scores.

        Returns
        -------
        topk_hyps : torch.Tensor (batch, topk, max length of token_id sequences)
            This tensor stores the topk predicted hypothesis.
        topk_lengths : torch.Tensor (batch, topk)
            This tensor contains the final scores of topk hypotheses.
        topk_scores : torch.Tensor (batch, topk)
            The length of each topk sequence in the batch.
        topk_log_probs : torch.Tensor (batch, topk, max length of token_id sequences)
            The log probabilities of each hypotheses.
        c                 S   r   r   r   )r   rb  r   r   r   r   ]  r   z8S2SBeamSearcher._get_topk_prediction.<locals>.<listcomp>Tr   )batch_firstpadding_valuerk   rC   rB   r@   rD  )ro   rN   ziprF   rR   utilsrnnpad_sequencerq   rU   r1   rY   rU  rm   r  r3  rr   rE  )r   r[  rd   rg   rf   re   r0   irh   r`   ra   topk_scoresindices	topk_hypstopk_lengthstopk_log_probsr   r   r   _get_topk_predictionA  s<   


z$S2SBeamSearcher._get_topk_predictionc              	   C   s   |  |||	|
||\}}}| | jd}| |||\}}| ||| j}| |}| ||||\}}| 	|||\}}}}}| 
|||||\}}}| |||||}| ||||}|j|td |||||||||f	S )a|  A search step for the next most likely tokens.

        Arguments
        ---------
        alived_hyps : AlivedHypotheses
            The alived hypotheses.
        inp_tokens : torch.Tensor
            The input tensor of the current step.
        log_probs : torch.Tensor
            The log-probabilities of the current step output.
        eos_hyps_and_log_probs_scores : list
            Generated hypotheses (the ones that have reached eos) and log probs scores.
        memory : No limit
            The memory variables input for this step.
            (ex. RNN hidden states).
        scorer_memory : No limit
            The memory variables input for this step.
            (ex. RNN hidden states).
        attn : torch.Tensor
            The attention weight.
        prev_attn_peak : torch.Tensor
            The previous attention peak place.
        enc_states : torch.Tensor
            The encoder states to be attended.
        enc_lens : torch.Tensor
            The actual length of each enc_states sequence.
        step : int
            The current decoding step.

        Returns
        -------
        alived_hyps : AlivedHypotheses
            The alived hypotheses.
        inp_tokens : torch.Tensor
            The input tensor of the current step.
        log_probs : torch.Tensor
            The log-probabilities of the current step output.
        eos_hyps_and_log_probs_scores : list
            Generated hypotheses (the ones that have reached eos) and log probs scores.
        memory : No limit
            The memory variables generated in this step.
        scorer_memory : No limit
            The memory variables generated in this step.
        attn : torch.Tensor
            The attention weight.
        prev_attn_peak : torch.Tensor
            The previous attention peak place.
        scores : torch.Tensor
            The scores of the current step output.
        rB   r/  )r5  r^  rL  r0   r8  r<  r3   r=  r;  rX  rI  rO  rd  r   masked_fill_rU   )r   rM  r+   r`   r[  r,   r:  r   r&  r(   r-   r^   log_probs_clonera   rA  r?  r_  r   r   r   search_step}  sV   @





zS2SBeamSearcher.search_stepc                 C   s<   |  |stj| j| jd| j }| |||| |S )a,  Fill the alived_hyps that have not reached eos with eos.

        Arguments
        ---------
        alived_hyps : AlivedHypotheses
            The alived hypotheses.
        eos_hyps_and_log_probs_scores : list
            Generated hypotheses (the ones that have reached eos) and log probs scores.
        scores : torch.Tensor
            The scores of the current step output.

        Returns
        -------
        eos_hyps_and_log_probs_scores : list
            Generated hypotheses (the ones that have reached eos) and log probs scores.
        rA   )	r%  rF   r   r1  r1   rK   r"   rL   rd  )r   rM  r[  ra   r+   r   r   r    _fill_alived_hyps_with_eos_token  s   
z0S2SBeamSearcher._fill_alived_hyps_with_eos_tokenc                 C   s  |  ||\
}}}}}}}	}
}}t| jD ](}| |r n | |||||||	|
|||\	}}}}}}}	}
}| |r= nq| |||}| |\}}}}| jrW||||fS |dddddf }|dddf }|dddf }|dddddf }t	||}||||fS )al  Applies beamsearch and returns the predicted tokens.

        Arguments
        ---------
        enc_states : torch.Tensor
            The encoder states to be attended.
        wav_len : torch.Tensor
            The actual length of each enc_states sequence.

        Returns
        -------
        hyps : list
            The predicted tokens.
        best_lens : torch.Tensor
            The length of each predicted tokens.
        best_scores : torch.Tensor
            The scores of each predicted tokens.
        best_log_probs : torch.Tensor
            The log probabilities of each predicted tokens.
        Nr   )
r\  rN   r4   r%  rt  r=   ru  rq  r  r   )r   r(   r)   rM  r+   r`   r[  r,   r:  r   r&  r-   r^   ra    finals_hyps_and_log_probs_scoresrn  ro  rl  rp  	best_hyps	best_lensbest_scoresbest_log_probsrh   r   r   r   r*     sv   
 


zS2SBeamSearcher.forwardc                 C   r:   r;   r   r   rM  r   r   r   r=   t  r>   z$S2SBeamSearcher._check_end_conditionc                 C   r%   )a  This method permutes the seq2seq model memory
        to synchronize the memory index with the current output.

        Arguments
        ---------
        memory : No limit
            The memory variable to be permuted.
        index : torch.Tensor
            The index of the previous path.

        Returns
        -------
        The variable of the memory being permuted.
        r&   r   r,   r   r   r   r   r>  |  s   zS2SBeamSearcher.permute_mem)	NFr@   Tr  TFr  r	  )r   r   r   r   r   r%  r+  r.  r4  r5  r8  r;  r<  r=  r@  rC  rF  rH  rI  rO  rX  r\  rd  rq  rt  ru  r*   r=   r>  r   r   r   r   r   r  ~  sH    /6&*4?h/<z ]r  c                       :   e Zd ZdZd fdd	Zdd Zdd Zd	d
 Z  ZS )S2SRNNBeamSearcheraf  
    This class implements the beam search decoding
    for AttentionalRNNDecoder (speechbrain/nnet/RNN.py).
    See also S2SBaseSearcher(), S2SBeamSearcher().

    Arguments
    ---------
    embedding : torch.nn.Module
        An embedding layer.
    decoder : torch.nn.Module
        Attentional RNN decoder.
    linear : torch.nn.Module
        A linear output layer.
    temperature : float
        Temperature factor applied to softmax. It changes the probability
        distribution, being softer when T>1 and sharper with T<1.
    **kwargs
        see S2SBeamSearcher, arguments are directly passed.

    Example
    -------
    >>> import speechbrain as sb
    >>> vocab_size = 5
    >>> emb = torch.nn.Embedding(vocab_size, 3)
    >>> dec = sb.nnet.RNN.AttentionalRNNDecoder(
    ...     "gru", "content", 3, 3, 1, enc_dim=7, input_size=3
    ... )
    >>> lin = sb.nnet.linear.Linear(n_neurons=vocab_size, input_size=3)
    >>> coverage_scorer = sb.decoders.scorer.CoverageScorer(vocab_size)
    >>> scorer = sb.decoders.scorer.ScorerBuilder(
    ...     full_scorers = [coverage_scorer],
    ...     partial_scorers = [],
    ...     weights= dict(coverage=1.5)
    ... )
    >>> searcher = S2SRNNBeamSearcher(
    ...     embedding=emb,
    ...     decoder=dec,
    ...     linear=lin,
    ...     bos_index=4,
    ...     eos_index=4,
    ...     min_decode_ratio=0,
    ...     max_decode_ratio=1,
    ...     beam_size=2,
    ...     scorer=scorer,
    ... )
    >>> batch_size = 2
    >>> enc = torch.rand([batch_size, 6, 7])
    >>> wav_len = torch.ones([batch_size])
    >>> hyps, _, _, _ = searcher(enc, wav_len)
    r
  c                    s>   t  jdi | || _|| _|| _tjjdd| _|| _	d S r   )
r   r   r   r   r6   rF   rR   r   r   rO   r   r   r   r   r     s   
zS2SRNNBeamSearcher.__init__c                 C   r   )-Needed to reset the memory during beamsearch.NrA   r   r  r   r   r   r2     s   zS2SRNNBeamSearcher.reset_memc                 C   s   t  7 |\}}| |}| j|||||\}}}}	| | || j }
| jjdkr4t j	|	dd}	W d   n1 s>w   Y  |
||f|	fS )r   multiheadlocationr@   rC   N)
rF   rw   r   r   r.   r   r6   rO   	attn_typemean)r   r+   r,   r(   r-   r  r  r  r  r7   r`   r   r   r   r.     s   



zS2SRNNBeamSearcher.forward_stepc                 C   s   |\}}t |tr"tj|d d|d}tj|d d|d}||f}ntj|d|d}tj|d|d}| jjdkrFtj| jjjd|d| jj_||fS )%Memory permutation during beamsearch.r   r@   rD  location)r   r   rF   rE  r   r  r   	prev_attn)r   r,   r   r  r  hs_0hs_1r   r   r   r>    s   

zS2SRNNBeamSearcher.permute_memr
  )	r   r   r   r   r   r2   r.   r>  r   r   r   r   r   r~    s    3r~  c                       r}  )S2STransformerBeamSearchera  This class implements the beam search decoding
    for Transformer.
    See also S2SBaseSearcher(), S2SBeamSearcher().

    Arguments
    ---------
    modules : list with the following one:
        model : torch.nn.Module
            A Transformer model.
        seq_lin : torch.nn.Module
            A linear output layer.
    temperature : float
        Temperature factor applied to softmax. It changes the probability
        distribution, being softer when T>1 and sharper with T<1.
    **kwargs
        Arguments to pass to S2SBeamSearcher

    Example
    -------
    >>> from speechbrain.nnet.linear import Linear
    >>> from speechbrain.lobes.models.transformer.TransformerASR import TransformerASR
    >>> from speechbrain.decoders import S2STransformerBeamSearcher
    >>> batch_size=8
    >>> n_channels=6
    >>> input_size=40
    >>> d_model=128
    >>> tgt_vocab=140
    >>> src = torch.rand([batch_size, n_channels, input_size])
    >>> tgt = torch.randint(0, tgt_vocab, [batch_size, n_channels])
    >>> net = TransformerASR(
    ...    tgt_vocab, input_size, d_model, 8, 1, 1, 1024, activation=torch.nn.GELU
    ... )
    >>> ctc_lin = Linear(input_shape=(1, 40, d_model), n_neurons=tgt_vocab)
    >>> lin = Linear(input_shape=(1, 40, d_model), n_neurons=tgt_vocab)
    >>> searcher = S2STransformerBeamSearcher(
    ...     modules=[net, lin],
    ...     bos_index=1,
    ...     eos_index=2,
    ...     min_decode_ratio=0.0,
    ...     max_decode_ratio=1.0,
    ...     using_eos_threshold=False,
    ...     beam_size=7,
    ...     temperature=1.15,
    ... )
    >>> enc, dec = net.forward(src, tgt)
    >>> hyps, _, _, _  = searcher(enc, torch.ones(batch_size))
    r
  c                    r{   r|   r}   r   r   r   r   r   %  r   z#S2STransformerBeamSearcher.__init__c                 C   r:   )r  Nr   r/   r   r   r   r2   .  r   z$S2STransformerBeamSearcher.reset_memc                 C   s   t j|d|d}|S )r  r   rD  )rF   rE  r|  r   r   r   r>  2  s   z&S2STransformerBeamSearcher.permute_memc                 C   sP   t ||}| j|||\}}| | || j }|dddddf ||fS r   NrB   )r   r~   r   r   r6   rO   r   r+   r,   r(   r-   ru   r   	prob_distr   r   r   r.   7  s   
z'S2STransformerBeamSearcher.forward_stepr  )	r   r   r   r   r   r2   r>  r.   r   r   r   r   r   r    s    0	r  c                       s   e Zd ZdZ							d fdd	Zdd	 Zd
d Zdd Zedd Z	dd Z
dd Zdd Zdd Zdd Zdd Zdd Z  ZS )S2SWhisperBeamSearchera  This class implements the beam search decoding
    for Whisper neural nets made by OpenAI in
    https://cdn.openai.com/papers/whisper.pdf.

    The beam search is stateful, meaning that some variables are stored
    in the searcher. If you want to reuse the searcher in different
    contexts, you should make sure that the variables are updated
    accordingly.

    Arguments
    ---------
    module : list with the following one:
        model : torch.nn.Module
            A whisper model. It should have a decode() method.
    temperature: float
        The temperature to use during decoding.
    use_kv_cache: bool (default: True)
        Whether to use key-value cache.
    suppress_blank: bool (default: True)
        This will suppress blank outputs.
    suppress_tokens: str or list (default: "-1")
        list of tokens ids (or comma-separated token ids) to suppress
        "-1" will suppress a set of symbols as defined in `model.non_speech_tokens()`
    sample_len: int (default: None)
        Maximum number of tokens to sample.
    prefix: str or list (default: None)
        Prefix to add to the input tokens.
        See: https://github.com/openai/whisper/discussions/117#discussioncomment-3727051
    prompt: str or list (default: None)
        Prompt to add to the input tokens.
        See: https://github.com/openai/whisper/discussions/117#discussioncomment-3727051
    **kwargs
        see S2SBeamSearcher, arguments are directly passed.
    r
  Tr   Nc	           
         s   t  jd|d j|d jd|	 |d | _|| _|| _d | _|| _|| _	|| _
|| _| jjjjj| _|p:| jd | _|  | _t| j| _| jj| _| jd | _d | _d | _d S )Nr   r   r   rB   r   r   )
r   modulerO   r   r   r   r   r   r   r   r   r   r   r   c  s,   



zS2SWhisperBeamSearcher.__init__c                 C   r   r   r   r   r   r   r   r     r   z&S2SWhisperBeamSearcher.set_lang_tokensc                 C   r   r   r   r   r   r   r   r     r   zS2SWhisperBeamSearcher.set_taskc                 C   r   r   r   r   r   r   r   r     r   z!S2SWhisperBeamSearcher.set_promptc                 C   r   )	r   c                 S   r   r   r   r   r   r   r   r     r   zAS2SWhisperBeamSearcher.get_tokens_to_suppress.<locals>.<listcomp>r   rB   c                 S   r   r   r   r   r   r   r   r     r   Nr   r   r   r   r   r   r   r     r   z-S2SWhisperBeamSearcher.get_tokens_to_suppressc                 C   r   r   r   r   r   r   r   r     r   z*S2SWhisperBeamSearcher._get_initial_tokensc                 C   r   r   r   r   r   r   r   r2     s   
z S2SWhisperBeamSearcher.reset_memc                 C   s*   t j|d|d}| jr| | j|| _|S )zPermutes the memory.r   rD  )rF   rE  r   _reorder_cacher   r|  r   r   r   r>    s   z"S2SWhisperBeamSearcher.permute_memc                    s.   d}|D ]}|t  fdd|D f7 }q|S )a  Reorder the key-value cache.

        Arguments
        ---------
        past_key_values : tuple
            The key-value cache.
        beam_idx : torch.Tensor
            The index of the previous path.

        Returns
        -------
        The reordered key-value cache.
        r   c                 3   s    | ]	}| d  V  qdS )r   N)rE  )r   
past_statebeam_idxr   r   	<genexpr>  s
    

z8S2SWhisperBeamSearcher._reorder_cache.<locals>.<genexpr>)r   )r   r   r  reordered_past
layer_pastr   r  r   r    s   z%S2SWhisperBeamSearcher._reorder_cachec                 C   s   | j j jjjjd S ) set the number of output tokens.r   )r~   r   embed_tokensweightrH   r   r   r   r   r9     s   z S2SWhisperBeamSearcher.set_n_outc                 C   s4  t ||}| jj||| jd\}}}|jd | jkr;|dd| j| jjf 	 j
dd}	|	dd| jjf  | _|dddf }| jrI|| _| jri|jd | jkritj |dd| jjjddd| jg f< | jr| jjjdu rw| j}
n| jj}
tj |ddt|
f< tjjj|	 dd| j }|||fS r   )r   r~   r   r   rH   r   r   r   r   rU   r   r   r   r   r   r   rF   rW   r   r   r"   r   r   r   r   r   rR   rS   rT   rO   )r   r+   r,   r(   r-   r   rE   r   r   r   r   r`   r   r   r   r.     sL   

z#S2SWhisperBeamSearcher.forward_stepc                 C   s   |j jd | j| j kS r   )r   rH   r   r   r{  r   r   r   r=   @  s   

z+S2SWhisperBeamSearcher._check_end_condition)r
  TTr   NNN)r   r   r   r   r   r   r   r   r   r   r   r2   r>  r  r9   r.   r=   r   r   r   r   r   r  ?  s,    &'
!-r  c                       r   )S2SHFTextBasedBeamSearchera  This class implements the beam search decoding
    for the text-based HF seq2seq models, such as mBART or NLLB.
    It is NOT significantly different from S2STransformerBeamSearcher.
    This is why it inherits S2STransformerBeamSearcher.
    The main difference might arise when one wishes to use directly
    the lm_head of the text-based HF model rather than making a new
    projection layer (self.fc = None).

    Arguments
    ---------
    modules : list with the following one:
        model : torch.nn.Module
            A Transformer model.
        seq_lin : torch.nn.Module
            A linear output layer.
            Normally set to None for this usecase.
    vocab_size : int
        The dimension of the lm_head.
    **kwargs
        Arguments to pass to S2SBeamSearcher
    c                    s   t  j|fi | || _d S r
   )r   r   
vocab_size)r   r   r  r   r   r   r   r   _  s   
z#S2SHFTextBasedBeamSearcher.__init__c                 C   s^   t ||}| j|||\}}| jdur| |}| || j }|dddddf ||fS r  )r   r~   r   r6   r   rO   r  r   r   r   r.   c  s   


z'S2SHFTextBasedBeamSearcher.forward_stepc                 C   s   | j S )r  )r  r   r   r   r   r9   l  s   z$S2SHFTextBasedBeamSearcher.set_n_out)r   r   r   r   r   r.   r9   r   r   r   r   r   r  H  s
    	r  )r   	functoolsr   rF   torch.distributionsr   speechbrain.decoders.utilsr   r   r   speechbrain.utils.data_utilsr   rR   Moduler	   r    r?   ry   r   r   r  r~  r  r  r  r   r   r   r   <module>   s:    
x & XG        eK  