o
    %ݫi                    @   s   d Z ddlZddlZddlZddlmZ G dd dZG dd deZ	G dd	 d	eZ
G d
d deZG dd deZG dd deZG dd deZG dd dZG dd deZG dd deZG dd deZG dd deZG dd dZdS )zg
Token scorer abstraction and specifications.

Authors:
 * Adel Moumen 2022, 2023
 * Sung-Lin Yeh 2021
    N)CTCPrefixScorec                   @   s(   e Zd ZdZdd Zdd Zdd ZdS )	BaseScorerInterfacea
  A scorer abstraction to be inherited by other
    scoring approaches for beam search.

    A scorer is a module that scores tokens in vocabulary
    based on the current timestep input and the previous
    scorer states. It can be used to score on full vocabulary
    set (i.e., full scorers) or a pruned set of tokens (i.e. partial scorers)
    to prevent computation overhead. In the latter case, the partial scorers
    will be called after the full scorers. It will only scores the
    top-k candidates (i.e., pruned set of tokens) extracted from the full scorers.
    The top-k candidates are extracted based on the beam size and the
    scorer_beam_scale such that the number of candidates is
    int(beam_size * scorer_beam_scale). It can be very useful
    when the full scorers are computationally expensive (e.g., KenLM scorer).

    Inherit this class to implement your own scorer compatible with
    speechbrain.decoders.seq2seq.S2SBeamSearcher().

    See:
        - speechbrain.decoders.scorer.CTCPrefixScorer
        - speechbrain.decoders.scorer.RNNLMScorer
        - speechbrain.decoders.scorer.TransformerLMScorer
        - speechbrain.decoders.scorer.KenLMScorer
        - speechbrain.decoders.scorer.CoverageScorer
        - speechbrain.decoders.scorer.LengthScorer
    c                 C      t )a  This method scores the new beams based on the
        information of the current timestep.

        A score is a tensor of shape (batch_size x beam_size, vocab_size).
        It is the log probability of the next token given the current
        timestep input and the previous scorer states.

        It can be used to score on pruned top-k candidates
        to prevent computation overhead, or on full vocabulary set
        when candidates is None.

        Arguments
        ---------
        inp_tokens : torch.Tensor
            The input tensor of the current timestep.
        memory : No limit
            The scorer states for this timestep.
        candidates : torch.Tensor
            (batch_size x beam_size, scorer_beam_size).
            The top-k candidates to be scored after the full scorers.
            If None, scorers will score on full vocabulary set.
        attn : torch.Tensor
            The attention weight to be used in CoverageScorer or CTCScorer.

        Returns
        -------
        torch.Tensor
            (batch_size x beam_size, vocab_size), Scores for the next tokens.
        memory : No limit
            The memory variables input for this timestep.
        NotImplementedErrorself
inp_tokensmemory
candidatesattn r   O/home/ubuntu/.local/lib/python3.10/site-packages/speechbrain/decoders/scorer.pyscore,   s    zBaseScorerInterface.scorec                 C      dS )as  This method permutes the scorer memory to synchronize
        the memory index with the current output and perform
        batched beam search.

        Arguments
        ---------
        memory : No limit
            The memory variables input for this timestep.
        index : torch.Tensor
            (batch_size, beam_size). The index of the previous path.
        Nr   r   r
   indexr   r   r   permute_memO      zBaseScorerInterface.permute_memc                 C   r   )ay  This method should implement the resetting of
        memory variables for the scorer.

        Arguments
        ---------
        x : torch.Tensor
            The precomputed encoder states to be used when decoding.
            (ex. the encoded speech representation to be attended).
        enc_lens : torch.Tensor
            The speechbrain-style relative length.
        Nr   r   xenc_lensr   r   r   	reset_mem]   r   zBaseScorerInterface.reset_memN)__name__
__module____qualname____doc__r   r   r   r   r   r   r   r      s
    #r   c                   @   2   e Zd ZdZdddZdd Zdd Zd	d
 ZdS )	CTCScorera  A wrapper of CTCPrefixScore based on the BaseScorerInterface.

    This Scorer is used to provides the CTC label-synchronous scores
    of the next input tokens. The implementation is based on
    https://www.merl.com/publications/docs/TR2017-190.pdf.

    See:
        - speechbrain.decoders.scorer.CTCPrefixScore

    Arguments
    ---------
    ctc_fc : torch.nn.Module
        A output linear layer for ctc.
    blank_index : int
        The index of the blank token.
    eos_index : int
        The index of the end-of-sequence (eos) token.
    ctc_window_size : int
        Compute the ctc scores over the time frames using windowing
        based on attention peaks. If 0, no windowing applied. (default: 0)

    Example
    -------
    >>> import torch
    >>> from speechbrain.nnet.linear import Linear
    >>> from speechbrain.lobes.models.transformer.TransformerASR import TransformerASR
    >>> from speechbrain.decoders import S2STransformerBeamSearcher, CTCScorer, ScorerBuilder
    >>> batch_size=8
    >>> n_channels=6
    >>> input_size=40
    >>> d_model=128
    >>> tgt_vocab=140
    >>> src = torch.rand([batch_size, n_channels, input_size])
    >>> tgt = torch.randint(0, tgt_vocab, [batch_size, n_channels])
    >>> net = TransformerASR(
    ...    tgt_vocab, input_size, d_model, 8, 1, 1, 1024, activation=torch.nn.GELU
    ... )
    >>> ctc_lin = Linear(input_shape=(1, 40, d_model), n_neurons=tgt_vocab)
    >>> lin = Linear(input_shape=(1, 40, d_model), n_neurons=tgt_vocab)
    >>> eos_index = 2
    >>> ctc_scorer = CTCScorer(
    ...    ctc_fc=ctc_lin,
    ...    blank_index=0,
    ...    eos_index=eos_index,
    ... )
    >>> scorer = ScorerBuilder(
    ...     full_scorers=[ctc_scorer],
    ...     weights={'ctc': 1.0}
    ... )
    >>> searcher = S2STransformerBeamSearcher(
    ...     modules=[net, lin],
    ...     bos_index=1,
    ...     eos_index=eos_index,
    ...     min_decode_ratio=0.0,
    ...     max_decode_ratio=1.0,
    ...     using_eos_threshold=False,
    ...     beam_size=7,
    ...     temperature=1.15,
    ...     scorer=scorer
    ... )
    >>> enc, dec = net.forward(src, tgt)
    >>> hyps, _, _, _ = searcher(enc, torch.ones(batch_size))
    r   c                 C   s.   || _ || _|| _|| _tjjjdd| _d S NT)	apply_log)	ctc_fcblank_index	eos_indexctc_window_sizesbnnetactivationsSoftmaxsoftmax)r   r!   r"   r#   r$   r   r   r   __init__   s
   zCTCScorer.__init__c                 C   s   | j ||||\}}||fS )a  This method scores the new beams based on the
        CTC scores computed over the time frames.

        See:
            - speechbrain.decoders.scorer.CTCPrefixScore

        Arguments
        ---------
        inp_tokens : torch.Tensor
            The input tensor of the current timestep.
        memory : No limit
            The scorer states for this timestep.
        candidates : torch.Tensor
            (batch_size x beam_size, scorer_beam_size).
            The top-k candidates to be scored after the full scorers.
            If None, scorers will score on full vocabulary set.
        attn : torch.Tensor
            The attention weight to be used in CoverageScorer or CTCScorer.

        Returns
        -------
        scores : torch.Tensor
        memory
        )	ctc_scoreforward_step)r   r	   r
   r   r   scoresr   r   r   r      s   zCTCScorer.scorec                 C   s   | j ||\}}||fS )a  This method permutes the scorer memory to synchronize
        the memory index with the current output and perform
        batched CTC beam search.

        Arguments
        ---------
        memory : No limit
            The memory variables input for this timestep.
        index : torch.Tensor
            (batch_size, beam_size). The index of the previous path.

        Returns
        -------
        r, psi : see ``ctc_score.permute_mem``
        )r+   r   )r   r
   r   rpsir   r   r   r      s   zCTCScorer.permute_memc                 C   s0   |  |}| |}t||| j| j| j| _dS )av  This method implement the resetting of
        memory variables for the CTC scorer.

        Arguments
        ---------
        x : torch.Tensor
            The precomputed encoder states to be used when decoding.
            (ex. the encoded speech representation to be attended).
        enc_lens : torch.Tensor
            The speechbrain-style relative length.
        N)r!   r)   r   r"   r#   r$   r+   )r   r   r   logitsr   r   r   r      s
   


zCTCScorer.reset_memNr   r   r   r   r   r*   r   r   r   r   r   r   r   r   l   s    
@r   c                   @   r   )RNNLMScoreraJ	  A wrapper of RNNLM based on BaseScorerInterface.

    The RNNLMScorer is used to provide the RNNLM scores of the next input tokens
    based on the current timestep input and the previous scorer states.

    Arguments
    ---------
    language_model : torch.nn.Module
        A RNN-based language model.
    temperature : float
        Temperature factor applied to softmax. It changes the probability
        distribution, being softer when T>1 and sharper with T<1. (default: 1.0)

    Example
    -------
    >>> from speechbrain.nnet.linear import Linear
    >>> from speechbrain.lobes.models.RNNLM import RNNLM
    >>> from speechbrain.nnet.RNN import AttentionalRNNDecoder
    >>> from speechbrain.decoders import S2SRNNBeamSearcher, RNNLMScorer, ScorerBuilder
    >>> input_size=17
    >>> vocab_size=11
    >>> emb = torch.nn.Embedding(
    ...     embedding_dim=input_size,
    ...     num_embeddings=vocab_size,
    ... )
    >>> d_model=7
    >>> dec = AttentionalRNNDecoder(
    ...     rnn_type="gru",
    ...     attn_type="content",
    ...     hidden_size=3,
    ...     attn_dim=3,
    ...     num_layers=1,
    ...     enc_dim=d_model,
    ...     input_size=input_size,
    ... )
    >>> n_channels=3
    >>> seq_lin = Linear(input_shape=[d_model, n_channels], n_neurons=vocab_size)
    >>> lm_weight = 0.4
    >>> lm_model = RNNLM(
    ...     embedding_dim=d_model,
    ...     output_neurons=vocab_size,
    ...     dropout=0.0,
    ...     rnn_neurons=128,
    ...     dnn_neurons=64,
    ...     return_hidden=True,
    ... )
    >>> rnnlm_scorer = RNNLMScorer(
    ...     language_model=lm_model,
    ...     temperature=1.25,
    ... )
    >>> scorer = ScorerBuilder(
    ...     full_scorers=[rnnlm_scorer],
    ...     weights={'rnnlm': lm_weight}
    ... )
    >>> beam_size=5
    >>> searcher = S2SRNNBeamSearcher(
    ...     embedding=emb,
    ...     decoder=dec,
    ...     linear=seq_lin,
    ...     bos_index=1,
    ...     eos_index=2,
    ...     min_decode_ratio=0.0,
    ...     max_decode_ratio=1.0,
    ...     topk=2,
    ...     using_eos_threshold=False,
    ...     beam_size=beam_size,
    ...     temperature=1.25,
    ...     scorer=scorer
    ... )
    >>> batch_size=2
    >>> enc = torch.rand([batch_size, n_channels, d_model])
    >>> wav_len = torch.ones([batch_size])
    >>> hyps, _, _, _ = searcher(enc, wav_len)
          ?c                 C   ,   || _ | j   || _tjjjdd| _d S r   lmevaltemperaturer%   r&   r'   r(   r)   r   language_modelr9   r   r   r   r*   D     
zRNNLMScorer.__init__c                 C   sX   t   | j||d\}}| || j }W d   ||fS 1 s#w   Y  ||fS )a  This method scores the new beams based on the
        RNNLM scores computed over the previous tokens.

        Arguments
        ---------
        inp_tokens : torch.Tensor
            The input tensor of the current timestep.
        memory : No limit
            The scorer states for this timestep.
        candidates : torch.Tensor
            (batch_size x beam_size, scorer_beam_size).
            The top-k candidates to be scored after the full scorers.
            If None, scorers will score on full vocabulary set.
        attn : torch.Tensor
            The attention weight to be used in CoverageScorer or CTCScorer.

        Returns
        -------
        log_probs : torch.Tensor
            Output probabilities.
        hs : torch.Tensor
            LM hidden states.
        )hxN)torchno_gradr7   r)   r9   )r   r	   r
   r   r   r0   hs	log_probsr   r   r   r   J  s   

zRNNLMScorer.scorec                 C   sR   t |trtj|d d|d}tj|d d|d}||f}|S tj|d|d}|S )  This method permutes the scorer memory to synchronize
        the memory index with the current output and perform
        batched beam search.

        Arguments
        ---------
        memory : No limit
            The memory variables input for this timestep.
        index : torch.Tensor
            (batch_size, beam_size). The index of the previous path.

        Returns
        -------
        memory
        r      dimr   )
isinstancetupler>   index_select)r   r
   r   memory_0memory_1r   r   r   r   g  s   
zRNNLMScorer.permute_memc                 C   r   x  This method implement the resetting of
        memory variables for the RNNLM scorer.

        Arguments
        ---------
        x : torch.Tensor
            The precomputed encoder states to be used when decoding.
            (ex. the encoded speech representation to be attended).
        enc_lens : torch.Tensor
            The speechbrain-style relative length.
        Nr   r   r   r   r   r     r   zRNNLMScorer.reset_memNr4   r2   r   r   r   r   r3      s    
Kr3   c                   @   r   )TransformerLMScorera
  A wrapper of TransformerLM based on BaseScorerInterface.

    The TransformerLMScorer is used to provide the TransformerLM scores
    of the next input tokens based on the current timestep input and the
    previous scorer states.

    Arguments
    ---------
    language_model : torch.nn.Module
        A Transformer-based language model.
    temperature : float
        Temperature factor applied to softmax. It changes the probability
        distribution, being softer when T>1 and sharper with T<1. (default: 1.0)

    Example
    -------
    >>> from speechbrain.nnet.linear import Linear
    >>> from speechbrain.lobes.models.transformer.TransformerASR import TransformerASR
    >>> from speechbrain.lobes.models.transformer.TransformerLM import TransformerLM
    >>> from speechbrain.decoders import S2STransformerBeamSearcher, TransformerLMScorer, CTCScorer, ScorerBuilder
    >>> input_size=17
    >>> vocab_size=11
    >>> d_model=128
    >>> net = TransformerASR(
    ...     tgt_vocab=vocab_size,
    ...     input_size=input_size,
    ...     d_model=d_model,
    ...     nhead=8,
    ...     num_encoder_layers=1,
    ...     num_decoder_layers=1,
    ...     d_ffn=256,
    ...     activation=torch.nn.GELU
    ... )
    >>> lm_model = TransformerLM(
    ...     vocab=vocab_size,
    ...     d_model=d_model,
    ...     nhead=8,
    ...     num_encoder_layers=1,
    ...     num_decoder_layers=0,
    ...     d_ffn=256,
    ...     activation=torch.nn.GELU,
    ... )
    >>> n_channels=6
    >>> ctc_lin = Linear(input_size=d_model, n_neurons=vocab_size)
    >>> seq_lin = Linear(input_size=d_model, n_neurons=vocab_size)
    >>> eos_index = 2
    >>> ctc_scorer = CTCScorer(
    ...     ctc_fc=ctc_lin,
    ...     blank_index=0,
    ...     eos_index=eos_index,
    ... )
    >>> transformerlm_scorer = TransformerLMScorer(
    ...     language_model=lm_model,
    ...     temperature=1.15,
    ... )
    >>> ctc_weight_decode=0.4
    >>> lm_weight=0.6
    >>> scorer = ScorerBuilder(
    ...     full_scorers=[transformerlm_scorer, ctc_scorer],
    ...     weights={'transformerlm': lm_weight, 'ctc': ctc_weight_decode}
    ... )
    >>> beam_size=5
    >>> searcher = S2STransformerBeamSearcher(
    ...     modules=[net, seq_lin],
    ...     bos_index=1,
    ...     eos_index=eos_index,
    ...     min_decode_ratio=0.0,
    ...     max_decode_ratio=1.0,
    ...     using_eos_threshold=False,
    ...     beam_size=beam_size,
    ...     temperature=1.15,
    ...     scorer=scorer
    ... )
    >>> batch_size=2
    >>> wav_len = torch.ones([batch_size])
    >>> src = torch.rand([batch_size, n_channels, input_size])
    >>> tgt = torch.randint(0, vocab_size, [batch_size, n_channels])
    >>> enc, dec = net.forward(src, tgt)
    >>> hyps, _, _, _ = searcher(enc, wav_len)
    r4   c                 C   r5   r   r6   r:   r   r   r   r*     r<   zTransformerLMScorer.__init__c                 C   s   t  @ |du rt j|dd|jd}t j||dgdd}t| j	 j
s0| j|j | |}| || j }W d   n1 sGw   Y  |dddddf |fS )a  This method scores the new beams based on the
        TransformerLM scores computed over the previous tokens.

        Arguments
        ---------
        inp_tokens : torch.Tensor
            The input tensor of the current timestep.
        memory : No limit
            The scorer states for this timestep.
        candidates : torch.Tensor
            (batch_size x beam_size, scorer_beam_size).
            The top-k candidates to be scored after the full scorers.
            If None, scorers will score on full vocabulary set.
        attn : torch.Tensor
            The attention weight to be used in CoverageScorer or CTCScorer.

        Returns
        -------
        log_probs : torch.Tensor
        memory
        Nr   devicerC   rE   )r>   r?   emptysizerP   cat	unsqueezenextr7   
parametersis_cudator)   r9   )r   r	   r
   r   r   r0   rA   r   r   r   r     s   

zTransformerLMScorer.scorec                 C      t j|d|d}|S )rB   r   rD   r>   rH   r   r   r   r   r   	  s   zTransformerLMScorer.permute_memc                 C   r   rK   r   r   r   r   r   r     r   zTransformerLMScorer.reset_memNrM   r2   r   r   r   r   rN     s    
Q#rN   c                   @   s0   e Zd ZdZdd Zdd Zdd Zdd	 Zd
S )KenLMScoreraI
  KenLM N-gram scorer.

    This scorer is based on KenLM, which is a fast and efficient
    N-gram language model toolkit. It is used to provide the n-gram scores
    of the next input tokens.

    This scorer is dependent on the KenLM package. It can be installed
    with the following command:
            > pip install https://github.com/kpu/kenlm/archive/master.zip

    Note: The KenLM scorer is computationally expensive. It is recommended
    to use it as a partial scorer to score on the top-k candidates instead
    of the full vocabulary set.

    Arguments
    ---------
    lm_path : str
        The path of ngram model.
    vocab_size: int
        The total number of tokens.
    token_list : list
        The tokens set.

    Example
    -------
    # >>> from speechbrain.nnet.linear import Linear
    # >>> from speechbrain.nnet.RNN import AttentionalRNNDecoder
    # >>> from speechbrain.decoders import S2SRNNBeamSearcher, KenLMScorer, ScorerBuilder
    # >>> input_size=17
    # >>> vocab_size=11
    # >>> lm_path='path/to/kenlm_model.arpa' # or .bin
    # >>> token_list=['<pad>', '<bos>', '<eos>', 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i']
    # >>> emb = torch.nn.Embedding(
    # ...     embedding_dim=input_size,
    # ...     num_embeddings=vocab_size,
    # ... )
    # >>> d_model=7
    # >>> dec = AttentionalRNNDecoder(
    # ...     rnn_type="gru",
    # ...     attn_type="content",
    # ...     hidden_size=3,
    # ...     attn_dim=3,
    # ...     num_layers=1,
    # ...     enc_dim=d_model,
    # ...     input_size=input_size,
    # ... )
    # >>> n_channels=3
    # >>> seq_lin = Linear(input_shape=[d_model, n_channels], n_neurons=vocab_size)
    # >>> kenlm_weight = 0.4
    # >>> kenlm_model = KenLMScorer(
    # ...     lm_path=lm_path,
    # ...     vocab_size=vocab_size,
    # ...     token_list=token_list,
    # ... )
    # >>> scorer = ScorerBuilder(
    # ...     full_scorers=[kenlm_model],
    # ...     weights={'kenlm': kenlm_weight}
    # ... )
    # >>> beam_size=5
    # >>> searcher = S2SRNNBeamSearcher(
    # ...     embedding=emb,
    # ...     decoder=dec,
    # ...     linear=seq_lin,
    # ...     bos_index=1,
    # ...     eos_index=2,
    # ...     min_decode_ratio=0.0,
    # ...     max_decode_ratio=1.0,
    # ...     topk=2,
    # ...     using_eos_threshold=False,
    # ...     beam_size=beam_size,
    # ...     temperature=1.25,
    # ...     scorer=scorer
    # ... )
    # >>> batch_size=2
    # >>> enc = torch.rand([batch_size, n_channels, d_model])
    # >>> wav_len = torch.ones([batch_size])
    # >>> hyps, _, _, _ = searcher(enc, wav_len)
    c                 C   sx   z	dd l }|| _ W n ty   d}t|w | j || _|| _t| j| _d| _t	||kr7d}t
||| _d S )Nr   zCouldn't import KenLM
            It is an optional dependency; it is not installed with SpeechBrain
            by default. Install it with:
            > pip install https://github.com/kpu/kenlm/archive/master.zip
            g@xz:The size of the token_list and vocab_size are not matched.)kenlmImportErrorModelr7   
vocab_sizenparangefull_candidates	minus_inflen
ValueErrorid2char)r   lm_pathra   
token_listr^   MSGr   r   r   r*   {  s   

zKenLMScorer.__init__c                 C   sF  | d}dttj }|du r$| j }t|g| }t|}n|\}}|du r2| jg| }t|| j	f| j
 }	tj|| j	ftd}
t|| j	fd }t|D ]<}|| dkr^qU|| }|| D ]*}| j|  }| j }|| j||| }||	||f< ||
||f< d|||f< qfqUt|	 |j}	|	|
|ffS )a  This method scores the new beams based on the
        n-gram scores.

        Arguments
        ---------
        inp_tokens : torch.Tensor
            The input tensor of the current timestep.
        memory : No limit
            The scorer states for this timestep.
        candidates : torch.Tensor
            (batch_size x beam_size, scorer_beam_size).
            The top-k candidates to be scored after the full scorers.
            If None, scorers will score on full vocabulary set.
        attn : torch.Tensor
            The attention weight to be used in CoverageScorer or CTCScorer.

        Returns
        -------
        scores : torch.Tensor
        (new_memory, new_scoring_table) : tuple
        r   r4   N)dtyperQ   rC   )rT   rb   log10er^   Statearrayonesrd   ra   re   zerosobjectrangerh   itemr7   	BaseScorer>   
from_numpyfloatrZ   rP   )r   r	   r
   r   r   n_bhscalestatescoring_tabler-   
new_memorynew_scoring_tableiparent_statetoken_idchar	out_stater   r   r   r   r     s4   


zKenLMScorer.scorec                 C   s~   |\}}|   }|jd }| j| }|tt|d|j| j  }|d}|d}|| }|d}|| }||fS )a  This method permutes the scorer memory to synchronize
        the memory index with the current output and perform
        batched beam search.

        Arguments
        ---------
        memory : No limit
            The memory variables input for this timestep.
        index : torch.Tensor
            (batch_size, beam_size). The index of the previous path.

        Returns
        -------
        state : torch.Tensor
        scoring_table : torch.Tensor
        rC   rQ   )	cpunumpyshapebatch_indexrb   broadcast_toexpand_dimsra   reshape)r   r
   r   r{   r|   	beam_sizebeam_offset	hyp_indexr   r   r   r     s    




zKenLMScorer.permute_memc                 C   s,   | j  }| j| t|d| _dS )ax  This method implement the resetting of
        memory variables for the KenLM scorer.

        Arguments
        ---------
        x : torch.Tensor
            The precomputed encoder states to be used when decoding.
            (ex. the encoded speech representation to be attended).
        enc_lens : torch.Tensor
            The speechbrain-style relative length.
        r   N)r^   ro   r7   NullContextWriterb   rc   rT   r   )r   r   r   r{   r   r   r   r     s   
zKenLMScorer.reset_memNr2   r   r   r   r   r]   +  s    O7$r]   c                   @   r   )CoverageScorera	  A coverage penalty scorer to prevent looping of hyps,
    where ```coverage``` is the cumulative attention probability vector.
    Reference: https://arxiv.org/pdf/1612.02695.pdf,
               https://arxiv.org/pdf/1808.10792.pdf

    Arguments
    ---------
    vocab_size: int
        The total number of tokens.
    threshold: float
        The penalty increases when the coverage of a frame is more
        than given threshold. (default: 0.5)

    Example
    -------
    >>> from speechbrain.nnet.linear import Linear
    >>> from speechbrain.lobes.models.RNNLM import RNNLM
    >>> from speechbrain.nnet.RNN import AttentionalRNNDecoder
    >>> from speechbrain.decoders import S2SRNNBeamSearcher, RNNLMScorer, CoverageScorer, ScorerBuilder
    >>> input_size=17
    >>> vocab_size=11
    >>> emb = torch.nn.Embedding(
    ...     num_embeddings=vocab_size,
    ...     embedding_dim=input_size
    ... )
    >>> d_model=7
    >>> dec = AttentionalRNNDecoder(
    ...     rnn_type="gru",
    ...     attn_type="content",
    ...     hidden_size=3,
    ...     attn_dim=3,
    ...     num_layers=1,
    ...     enc_dim=d_model,
    ...     input_size=input_size,
    ... )
    >>> n_channels=3
    >>> seq_lin = Linear(input_shape=[d_model, n_channels], n_neurons=vocab_size)
    >>> lm_weight = 0.4
    >>> coverage_penalty = 1.0
    >>> lm_model = RNNLM(
    ...     embedding_dim=d_model,
    ...     output_neurons=vocab_size,
    ...     dropout=0.0,
    ...     rnn_neurons=128,
    ...     dnn_neurons=64,
    ...     return_hidden=True,
    ... )
    >>> rnnlm_scorer = RNNLMScorer(
    ...     language_model=lm_model,
    ...     temperature=1.25,
    ... )
    >>> coverage_scorer = CoverageScorer(vocab_size=vocab_size)
    >>> scorer = ScorerBuilder(
    ...     full_scorers=[rnnlm_scorer, coverage_scorer],
    ...     weights={'rnnlm': lm_weight, 'coverage': coverage_penalty}
    ... )
    >>> beam_size=5
    >>> searcher = S2SRNNBeamSearcher(
    ...     embedding=emb,
    ...     decoder=dec,
    ...     linear=seq_lin,
    ...     bos_index=1,
    ...     eos_index=2,
    ...     min_decode_ratio=0.0,
    ...     max_decode_ratio=1.0,
    ...     topk=2,
    ...     using_eos_threshold=False,
    ...     beam_size=beam_size,
    ...     temperature=1.25,
    ...     scorer=scorer
    ... )
    >>> batch_size=2
    >>> enc = torch.rand([batch_size, n_channels, d_model])
    >>> wav_len = torch.ones([batch_size])
    >>> hyps, _, _, _ = searcher(enc, wav_len)
          ?c                 C   s   || _ || _d| _d S )Nr   )ra   	threshold	time_step)r   ra   r   r   r   r   r*   J  s   
zCoverageScorer.__init__c                 C   s   | d}|  jd7  _|du rtj||jd}t|  dkr(tj|dd}n|| }t|| 	| j
d}|| d| j
  }||dd| j}d| | j |fS )a  This method scores the new beams based on the
        Coverage scorer.

        Arguments
        ---------
        inp_tokens : torch.Tensor
            The input tensor of the current timestep.
        coverage : No limit
            The scorer states for this timestep.
        candidates : torch.Tensor
            (batch_size x beam_size, scorer_beam_size).
            The top-k candidates to be scored after the full scorers.
            If None, scorers will score on full vocabulary set.
        attn : torch.Tensor
            The attention weight to be used in CoverageScorer or CTCScorer.

        Returns
        -------
        score : torch.Tensor
        coverage
        r   rC   NrO      rR   rQ   )rT   r   r>   
zeros_likerP   rf   summaxclonefill_r   viewrV   expandra   )r   r	   coverager   r   ry   penaltyr   r   r   r   P  s   
zCoverageScorer.scorec                 C   r[   )a  This method permutes the scorer memory to synchronize
        the memory index with the current output and perform
        batched beam search.

        Arguments
        ---------
        coverage : No limit
            The memory variables input for this timestep.
        index : torch.Tensor
            (batch_size, beam_size). The index of the previous path.

        Returns
        -------
        coverage
        r   rD   r\   )r   r   r   r   r   r   r   {  s   zCoverageScorer.permute_memc                 C   s
   d| _ dS )rL   r   N)r   r   r   r   r   r     s   
zCoverageScorer.reset_memN)r   r2   r   r   r   r   r     s    
M+r   c                   @   s    e Zd ZdZdd Zdd ZdS )LengthScorera	  A length rewarding scorer.

    The LengthScorer is used to provide the length rewarding scores.
    It is used to prevent the beam search from favoring short hypotheses.

    Note: length_normalization is not compatible with this scorer. Make sure
    to set is to False when using LengthScorer.

    Arguments
    ---------
    vocab_size: int
        The total number of tokens.

    Example
    -------
    >>> from speechbrain.nnet.linear import Linear
    >>> from speechbrain.lobes.models.RNNLM import RNNLM
    >>> from speechbrain.nnet.RNN import AttentionalRNNDecoder
    >>> from speechbrain.decoders import S2SRNNBeamSearcher, RNNLMScorer, CoverageScorer, ScorerBuilder
    >>> input_size=17
    >>> vocab_size=11
    >>> emb = torch.nn.Embedding(
    ...     num_embeddings=vocab_size,
    ...     embedding_dim=input_size
    ... )
    >>> d_model=7
    >>> dec = AttentionalRNNDecoder(
    ...     rnn_type="gru",
    ...     attn_type="content",
    ...     hidden_size=3,
    ...     attn_dim=3,
    ...     num_layers=1,
    ...     enc_dim=d_model,
    ...     input_size=input_size,
    ... )
    >>> n_channels=3
    >>> seq_lin = Linear(input_shape=[d_model, n_channels], n_neurons=vocab_size)
    >>> lm_weight = 0.4
    >>> length_weight = 1.0
    >>> lm_model = RNNLM(
    ...     embedding_dim=d_model,
    ...     output_neurons=vocab_size,
    ...     dropout=0.0,
    ...     rnn_neurons=128,
    ...     dnn_neurons=64,
    ...     return_hidden=True,
    ... )
    >>> rnnlm_scorer = RNNLMScorer(
    ...     language_model=lm_model,
    ...     temperature=1.25,
    ... )
    >>> length_scorer = LengthScorer(vocab_size=vocab_size)
    >>> scorer = ScorerBuilder(
    ...     full_scorers=[rnnlm_scorer, length_scorer],
    ...     weights={'rnnlm': lm_weight, 'length': length_weight}
    ... )
    >>> beam_size=5
    >>> searcher = S2SRNNBeamSearcher(
    ...     embedding=emb,
    ...     decoder=dec,
    ...     linear=seq_lin,
    ...     bos_index=1,
    ...     eos_index=2,
    ...     min_decode_ratio=0.0,
    ...     max_decode_ratio=1.0,
    ...     topk=2,
    ...     using_eos_threshold=False,
    ...     beam_size=beam_size,
    ...     temperature=1.25,
    ...     length_normalization=False,
    ...     scorer=scorer
    ... )
    >>> batch_size=2
    >>> enc = torch.rand([batch_size, n_channels, d_model])
    >>> wav_len = torch.ones([batch_size])
    >>> hyps, _, _, _ = searcher(enc, wav_len)
    c                 C   s
   || _ d S N)ra   )r   ra   r   r   r   r*     s   
zLengthScorer.__init__c                 C   s*   t jdg|j|jd|d| jdfS )a  This method scores the new beams based on the
        Length scorer.

        Arguments
        ---------
        inp_tokens : torch.Tensor
            The input tensor of the current timestep.
        memory : No limit
            The scorer states for this timestep.
        candidates : torch.Tensor
            (batch_size x beam_size, scorer_beam_size).
            The top-k candidates to be scored after the full scorers.
            If None, scorers will score on full vocabulary set.
        attn : torch.Tensor
            The attention weight to be used in CoverageScorer or CTCScorer.

        Returns
        -------
        torch.Tensor
            Scores
        None
        r4   )rP   rl   r   N)r>   tensorrP   rl   r   rT   ra   r   r   r   r   r     s   zLengthScorer.scoreN)r   r   r   r   r*   r   r   r   r   r   r     s    Nr   c                   @   sH   e Zd ZdZe e e dfddZdd Zdd Zd	d
 Z	dd Z
dS )ScorerBuildera  Builds scorer instance for beamsearch.

    The ScorerBuilder class is responsible for building a scorer instance for
    beam search. It takes weights for full and partial scorers, as well as
    instances of full and partial scorer classes. It combines the scorers based
    on the weights specified and provides methods for scoring tokens, permuting
    scorer memory, and resetting scorer memory.

    This is the class to be used for building scorer instances for beam search.

    See speechbrain.decoders.seq2seq.S2SBeamSearcher()

    Arguments
    ---------
    weights : dict
        Weights of full/partial scorers specified.
    full_scorers : list
        Scorers that score on full vocabulary set.
    partial_scorers : list
        Scorers that score on pruned tokens to prevent computation overhead.
        Partial scoring is performed after full scorers.
    scorer_beam_scale : float
        The scale decides the number of pruned tokens for partial scorers:
        int(beam_size * scorer_beam_scale).

    Example
    -------
    >>> from speechbrain.nnet.linear import Linear
    >>> from speechbrain.lobes.models.transformer.TransformerASR import TransformerASR
    >>> from speechbrain.lobes.models.transformer.TransformerLM import TransformerLM
    >>> from speechbrain.decoders import S2STransformerBeamSearcher, TransformerLMScorer, CoverageScorer, CTCScorer, ScorerBuilder
    >>> input_size=17
    >>> vocab_size=11
    >>> d_model=128
    >>> net = TransformerASR(
    ...     tgt_vocab=vocab_size,
    ...     input_size=input_size,
    ...     d_model=d_model,
    ...     nhead=8,
    ...     num_encoder_layers=1,
    ...     num_decoder_layers=1,
    ...     d_ffn=256,
    ...     activation=torch.nn.GELU
    ... )
    >>> lm_model = TransformerLM(
    ...     vocab=vocab_size,
    ...     d_model=d_model,
    ...     nhead=8,
    ...     num_encoder_layers=1,
    ...     num_decoder_layers=0,
    ...     d_ffn=256,
    ...     activation=torch.nn.GELU,
    ... )
    >>> n_channels=6
    >>> ctc_lin = Linear(input_size=d_model, n_neurons=vocab_size)
    >>> seq_lin = Linear(input_size=d_model, n_neurons=vocab_size)
    >>> eos_index = 2
    >>> ctc_scorer = CTCScorer(
    ...     ctc_fc=ctc_lin,
    ...     blank_index=0,
    ...     eos_index=eos_index,
    ... )
    >>> transformerlm_scorer = TransformerLMScorer(
    ...     language_model=lm_model,
    ...     temperature=1.15,
    ... )
    >>> coverage_scorer = CoverageScorer(vocab_size=vocab_size)
    >>> ctc_weight_decode=0.4
    >>> lm_weight=0.6
    >>> coverage_penalty = 1.0
    >>> scorer = ScorerBuilder(
    ...     full_scorers=[transformerlm_scorer, coverage_scorer],
    ...     partial_scorers=[ctc_scorer],
    ...     weights={'transformerlm': lm_weight, 'ctc': ctc_weight_decode, 'coverage': coverage_penalty}
    ... )
    >>> beam_size=5
    >>> searcher = S2STransformerBeamSearcher(
    ...     modules=[net, seq_lin],
    ...     bos_index=1,
    ...     eos_index=eos_index,
    ...     min_decode_ratio=0.0,
    ...     max_decode_ratio=1.0,
    ...     using_eos_threshold=False,
    ...     beam_size=beam_size,
    ...     topk=3,
    ...     temperature=1.15,
    ...     scorer=scorer
    ... )
    >>> batch_size=2
    >>> wav_len = torch.ones([batch_size])
    >>> src = torch.rand([batch_size, n_channels, input_size])
    >>> tgt = torch.randint(0, vocab_size, [batch_size, n_channels])
    >>> enc, dec = net.forward(src, tgt)
    >>> hyps, _, _, _  = searcher(enc, wav_len)
    r   c           	      C   s   t |t |t | ksJ d|| _dd t  D }dd |D }dd |D }dd |D }i ||| _tt||| _tt||| _| 	| d S )Nz$Weights and scorers are not matched.c                 S   (   g | ]}| d r| dd qS )Scorerscorerr   endswithlowersplit.0kr   r   r   
<listcomp>|      z*ScorerBuilder.__init__.<locals>.<listcomp>c                 S   "   g | ]}|j j d d qS r   r   	__class__r   r   r   r   implr   r   r   r         c                 S   r   r   r   r   r   r   r   r     r   c                 S      i | ]}|d qS         r   r   r   r   r   
<dictcomp>      z*ScorerBuilder.__init__.<locals>.<dictcomp>)
rf   scorer_beam_scaleglobalskeysweightsdictzipfull_scorerspartial_scorers_validate_scorer)	r   r   r   r   r   all_scorer_namesfull_scorer_namespartial_scorer_namesinit_weightsr   r   r   r*   p  s(   
zScorerBuilder.__init__c                 C   s   t  }| j D ]*\}}|dkr|jj|dd|jf< |||| d|\}	||< ||	| j|  7 }q|jt	|| j
 dd\}
}| j D ]\}}|||| ||\}	||< ||	| j|  7 }qF||fS )aB  This method scores tokens in vocabulary based on defined full scorers
        and partial scorers. Scores will be added to the log probs for beamsearch.

        Arguments
        ---------
        inp_tokens : torch.Tensor
            See BaseScorerInterface().
        memory : dict[str, scorer memory]
            The states of scorers for this timestep.
        attn : torch.Tensor
            See BaseScorerInterface().
        log_probs : torch.Tensor
            (batch_size x beam_size, vocab_size). The log probs at this timestep.
        beam_size : int
            The beam size.

        Returns
        -------
        log_probs : torch.Tensor
            (batch_size x beam_size, vocab_size). Log probs updated by scorers.
        new_memory : dict[str, scorer memory]
            The updated states of scorers.
        ctcNrQ   rR   )r   r   itemsr+   re   r"   r   r   topkintr   r   )r   r	   r
   r   rA   r   r}   r   r   r   _r   r   r   r   r     s   
zScorerBuilder.scorec                 C   sz   | j  D ]!\}}|dks|dkr||| |||< q||| |||< q| j D ]\}}||| |||< q,|S )a  Update memory variables of scorers to synchronize
        the memory index with the current output and perform
        batched beam search.

        Arguments
        ---------
        memory : dict[str, scorer memory]
            The states of scorers for this timestep.
        index : torch.Tensor
            (batch_size x beam_size). The index of the previous path.
        candidates : torch.Tensor
            (batch_size, beam_size). The index of the topk candidates.

        Returns
        -------
        memory : dict
        r   r^   )r   r   r   r   )r   r
   r   r   r   r   r   r   r   permute_scorer_mem  s   z ScorerBuilder.permute_scorer_memc                 C   s8   t  }i | j| j D ]\}}|||||< q|S )a  Reset memory variables for scorers.

        Arguments
        ---------
        x : torch.Tensor
            See BaseScorerInterface().
        enc_lens : torch.Tensor
            See BaseScorerInterface().

        Returns
        -------
        memory : dict
        )r   r   r   r   r   )r   r   r   r
   r   r   r   r   r   reset_scorer_mem  s   zScorerBuilder.reset_scorer_memc                 C   s   t | jt |krtd|d| jd   kr dks%td td| jd dkrBd| j vr7td| jd dkrDtdd	S d	S )
zThese error messages indicate scorers are not properly set.

        Arguments
        ---------
        scorer_names : list
            Prefix of scorers defined in speechbrain.decoders.scorer.
        )The keys of weights should be named in {}r   r   r4   z%ctc_weight should not > 1.0 and < 0.0z:CTC scorer should be a full scorer when it's weight is 1.0r   zBPure CTC scorer doesn't have attention weights for coverage scorerN)rf   r   rg   formatr   r   )r   scorer_namesr   r   r   r     s&   zScorerBuilder._validate_scorerN)r   r   r   r   r   listr*   r   r   r   r   r   r   r   r   r     s    b
#0r   c                   @   s2   e Zd ZdZdd Zdd Zdd Zdd	d
ZdS )BaseRescorerInterfacea  A scorer abstraction intended for inheritance by other scoring approaches used in beam search.

    In this approach, a neural network is employed to assign scores to potential text transcripts.
    The beam search decoding process produces a collection of the top K hypotheses.
    These candidates are subsequently sent to a language model (LM) for ranking.
    The ranking is carried out by the LM, which assigns a score to each candidate.

    The score is computed as follows:

    score = beam_search_score + lm_weight * rescorer_score

    See:
        - speechbrain.decoders.scorer.RNNLMRescorer
        - speechbrain.decoders.scorer.TransformerLMRescorer
        - speechbrain.decoders.scorer.HuggingFaceLMRescorer
    c                 C      |S )zThis method should implement the normalization of the text before scoring.

        Arguments
        ---------
        text : list of str
            The text to be normalized.

        Returns
        -------
        Normalized text
        r   r   textr   r   r   normalize_text  r   z$BaseRescorerInterface.normalize_textc                 C   r   )zThis method should implement the preprocessing of the hypotheses before scoring.

        Arguments
        ---------
        hyps : list of str
            The hypotheses to be preprocessed.
        r   r   hypsr   r   r   preprocess_func-     z%BaseRescorerInterface.preprocess_funcc                 C   r   )zThis method should implement the rescoring of the hypotheses.

        Arguments
        ---------
        hyps : list of str
            The hypotheses to be rescored.
        r   r   r   r   r   rescore_hyps7  r   z"BaseRescorerInterface.rescore_hypsNc                 C   r   )a%  This method should implement the moving of the scorer to a device.

        If device is None, the scorer should be moved to the default device provided
        in the constructor.

        Arguments
        ---------
        device : str
            The device to move the scorer to.
        r   r   rP   r   r   r   	to_deviceA  s   zBaseRescorerInterface.to_devicer   )r   r   r   r   r   r   r   r   r   r   r   r   r     s    

r   c                   @   N   e Zd ZdZ					dddZdd Zdd
dZdd Ze	 dd Z
d	S )RNNLMRescorera  A wrapper of RNNLM based on the BaseRescorerInterface.

    Arguments
    ---------
    language_model : torch.nn.Module
        A RNN-based language model.
    tokenizer : SentencePieceProcessor
        A SentencePiece tokenizer.
    device : str
        The device to move the scorer to.
    temperature : float
        Temperature factor applied to softmax. It changes the probability
        distribution, being softer when T>1 and sharper with T<1. (default: 1.0)
    bos_index : int
        The index of the beginning-of-sequence (bos) token.
    eos_index : int
        The index of the end-of-sequence (eos) token.
    pad_index : int
        The index of the padding token.

    Note
    ----
    This class is intended to be used with a pretrained TransformerLM model.
    Please see: https://huggingface.co/speechbrain/asr-crdnn-rnnlm-librispeech

    By default, this model is using SentencePiece tokenizer.

    Example
    -------
    >>> import torch
    >>> from sentencepiece import SentencePieceProcessor
    >>> from speechbrain.lobes.models.RNNLM import RNNLM
    >>> from speechbrain.utils.parameter_transfer import Pretrainer
    >>> source = "speechbrain/asr-crdnn-rnnlm-librispeech"
    >>> lm_model_path = source + "/lm.ckpt"
    >>> tokenizer_path = source + "/tokenizer.ckpt"
    >>> # define your tokenizer and RNNLM from the HF hub
    >>> tokenizer = SentencePieceProcessor()
    >>> lm_model = RNNLM(
    ...    output_neurons = 1000,
    ...    embedding_dim = 128,
    ...    activation = torch.nn.LeakyReLU,
    ...    dropout = 0.0,
    ...    rnn_layers = 2,
    ...    rnn_neurons = 2048,
    ...    dnn_blocks = 1,
    ...    dnn_neurons = 512,
    ...    return_hidden = True,
    ... )
    >>> pretrainer = Pretrainer(
    ...     collect_in = getfixture("tmp_path"),
    ...    loadables = {
    ...     "lm" : lm_model,
    ...     "tokenizer" : tokenizer,
    ...     },
    ...    paths = {
    ...     "lm" : lm_model_path,
    ...     "tokenizer" : tokenizer_path,
    ... })
    >>> _ = pretrainer.collect_files()
    >>> pretrainer.load_collected()
    >>> from speechbrain.decoders.scorer import RNNLMRescorer, RescorerBuilder
    >>> rnnlm_rescorer = RNNLMRescorer(
    ...    language_model = lm_model,
    ...    tokenizer = tokenizer,
    ...    temperature = 1.0,
    ...    bos_index = 0,
    ...    eos_index = 0,
    ...    pad_index = 0,
    ... )
    >>> # Define a rescorer builder
    >>> rescorer = RescorerBuilder(
    ...    rescorers=[rnnlm_rescorer],
    ...    weights={"rnnlm":1.0}
    ... )
    >>> # topk hyps
    >>> topk_hyps = [["HELLO", "HE LLO", "H E L L O"]]
    >>> topk_scores = [[-2, -2, -2]]
    >>> rescored_hyps, rescored_scores = rescorer.rescore(topk_hyps, topk_scores)
    >>> # NOTE: the returned hypotheses are already sorted by score.
    >>> rescored_hyps # doctest: +SKIP
    [['HELLO', 'H E L L O', 'HE LLO']]
    >>> # NOTE: as we are returning log-probs, the more it is closer to 0, the better.
    >>> rescored_scores # doctest: +SKIP
    [[-17.863974571228027, -25.12890625, -26.075977325439453]]
    cudar4   r   c                 C   J   || _ | j   || _|| _tjjjdd| _|| _	|| _
|| _|| _d S r   r7   r8   	tokenizerr9   r%   r&   r'   r(   r)   rP   	bos_indexr#   	pad_indexr   r;   r   rP   r9   r   r#   r   r   r   r   r*     s   


zRNNLMRescorer.__init__c                 C      |  S )a  This method should implement the normalization of the text before scoring.

        Default to uppercasing the text because the (current) language models are trained on
        LibriSpeech which is all uppercase.

        Arguments
        ---------
        text : str
            The text to be normalized.

        Returns
        -------
        str
            The normalized text.
        upperr   r   r   r   r        zRNNLMRescorer.normalize_textNc                 C   *   |du r| j | j dS | j | dS )a  This method moves the scorer to a device.

        If device is None, the scorer is moved to the default device provided
        in the constructor.

        Arguments
        ---------
        device : str
            The device to move the scorer to.
        Nr7   rZ   rP   r   r   r   r   r     s   zRNNLMRescorer.to_devicec              	   C      g }|D ]}|D ]
}| | | qqg }|D ]}| t| jg| j| | jg  qdd |D }tjj	j
j|d| jd| j  j}||fS )t  This method preprocesses the hypotheses before scoring.

        Arguments
        ---------
        topk_hyps : list of list of str
            The hypotheses to be preprocessed.

        Returns
        -------
        padded_hyps : torch.Tensor
            The padded hypotheses.
        enc_hyps_length : list of int
            The length of each hypothesis.
        c                 S      g | ]}|j d  qS r1   r   r   enc_seqr   r   r   r         z1RNNLMRescorer.preprocess_func.<locals>.<listcomp>Tbatch_firstpadding_valueappendr   r>   r   r   r   encode_as_idsr#   nnutilsrnnpad_sequencer   rZ   r7   rX   __next__rP   r   	topk_hypsdecoded_seqbatchseqenc_hypsenc_hyps_lengthpadded_hypsr   r   r   r     .   

zRNNLMRescorer.preprocess_funcc           
         s   |  |\}  fdd D }tj|tj|jd}t| j js)| j	|j | |\}}| 
|| j }|ddddf d|ddddf dd}tj||ddddf  dd}	|	S )	8  This method implement the rescoring of the hypotheses.

        Arguments
        ---------
        topk_hyps : list of list of str
            The hypotheses to be rescored.

        Returns
        -------
        log_probs_scores : torch.Tensor[B * Topk, 1]
            The rescored hypotheses scores
        c                    &   g | ]  fd dt tD qS )c                       g | ]
}| k r
d ndqS rC   r   r   r   r   lengthr   r   r         z9RNNLMRescorer.rescore_hyps.<locals>.<listcomp>.<listcomp>rt   r   r   r  r  r   r         z.RNNLMRescorer.rescore_hyps.<locals>.<listcomp>rl   rP   NrQ   r   rC   rR   )r   r>   r   boolrP   rW   r7   rX   rY   rZ   r)   r9   gatherrV   squeezenansum)
r   r   r  	bool_maskbool_mask_tensorr0   r   rA   target_log_probslog_probs_scoresr   r  r   r     s&   

zRNNLMRescorer.rescore_hypsr   r4   r   r   r   r   r   r   r   r   r*   r   r   r   r>   r?   r   r   r   r   r   r   O  s    [

)r   c                   @   r   )TransformerLMRescorera  A wrapper of TransformerLM based on the BaseRescorerInterface.

    Arguments
    ---------
    language_model : torch.nn.Module
        A Transformer-based language model.
    tokenizer : SentencePieceProcessor
        A SentencePiece tokenizer.
    device : str
        The device to move the scorer to.
    temperature : float
        Temperature factor applied to softmax. It changes the probability
        distribution, being softer when T>1 and sharper with T<1. (default: 1.0)
    bos_index : int
        The index of the beginning-of-sequence (bos) token.
    eos_index : int
        The index of the end-of-sequence (eos) token.
    pad_index : int
        The index of the padding token.

    Note
    ----
    This class is intended to be used with a pretrained TransformerLM model.
    Please see: https://huggingface.co/speechbrain/asr-transformer-transformerlm-librispeech

    By default, this model is using SentencePiece tokenizer.

    Example
    -------
    >>> import torch
    >>> from sentencepiece import SentencePieceProcessor
    >>> from speechbrain.lobes.models.transformer.TransformerLM import TransformerLM
    >>> from speechbrain.utils.parameter_transfer import Pretrainer
    >>> source = "speechbrain/asr-transformer-transformerlm-librispeech"
    >>> lm_model_path = source + "/lm.ckpt"
    >>> tokenizer_path = source + "/tokenizer.ckpt"
    >>> tokenizer = SentencePieceProcessor()
    >>> lm_model = TransformerLM(
    ...     vocab=5000,
    ...     d_model=768,
    ...     nhead=12,
    ...     num_encoder_layers=12,
    ...     num_decoder_layers=0,
    ...     d_ffn=3072,
    ...     dropout=0.0,
    ...     activation=torch.nn.GELU,
    ...     normalize_before=False,
    ... )
    >>> pretrainer = Pretrainer(
    ...     collect_in = getfixture("tmp_path"),
    ...     loadables={
    ...         "lm": lm_model,
    ...         "tokenizer": tokenizer,
    ...     },
    ...     paths={
    ...         "lm": lm_model_path,
    ...         "tokenizer": tokenizer_path,
    ...     }
    ... )
    >>> _ = pretrainer.collect_files()
    >>> pretrainer.load_collected()
    >>> from speechbrain.decoders.scorer import TransformerLMRescorer, RescorerBuilder
    >>> transformerlm_rescorer = TransformerLMRescorer(
    ...     language_model=lm_model,
    ...     tokenizer=tokenizer,
    ...     temperature=1.0,
    ...     bos_index=1,
    ...     eos_index=2,
    ...     pad_index=0,
    ... )
    >>> rescorer = RescorerBuilder(
    ...     rescorers=[transformerlm_rescorer],
    ...     weights={"transformerlm": 1.0}
    ... )
    >>> topk_hyps = [["HELLO", "HE LLO", "H E L L O"]]
    >>> topk_scores = [[-2, -2, -2]]
    >>> rescored_hyps, rescored_scores = rescorer.rescore(topk_hyps, topk_scores)
    >>> # NOTE: the returned hypotheses are already sorted by score.
    >>> rescored_hyps # doctest: +SKIP
    [["HELLO", "HE L L O", "HE LLO"]]
    >>> # NOTE: as we are returning log-probs, the more it is closer to 0, the better.
    >>> rescored_scores  # doctest: +SKIP
    [[-17.863974571228027, -25.12890625, -26.075977325439453]]
    r   r4   r   c                 C   r   r   r   r   r   r   r   r*     s   


zTransformerLMRescorer.__init__c                 C   r   )ai  This method should implement the normalization of the text before scoring.

        Default to uppercasing the text because the language models are trained on
        LibriSpeech.

        Arguments
        ---------
        text : str
            The text to be normalized.

        Returns
        -------
        str
            The normalized text.
        r   r   r   r   r   r     r   z$TransformerLMRescorer.normalize_textNc                 C   r   ah  This method moves the scorer to a device.

        If device is None, the scorer is moved to the default device provided
        in the constructor.

        This method is dynamically called in the recipes when the stage is equal
        to TEST.

        Arguments
        ---------
        device : str
            The device to move the scorer to.
        Nr   r   r   r   r   r        zTransformerLMRescorer.to_devicec              	   C   r   )r   c                 S   r   r1   r   r   r   r   r   r     r   z9TransformerLMRescorer.preprocess_func.<locals>.<listcomp>Tr   r   r   r   r   r   r     r  z%TransformerLMRescorer.preprocess_funcc           	         s  |  |\}  fdd D }tj|tj|jd}t| j js)| j	|j | |}| 
|| j }td|dddd| jf< |ddddf d|ddddf dd}||ddddf jdd	 }tj||ddddf  dd	}|S )
r  c                    r  )c                    r	  r
  r   r  r  r   r   r     r  zATransformerLMRescorer.rescore_hyps.<locals>.<listcomp>.<listcomp>r  r  r  r  r   r      r  z6TransformerLMRescorer.rescore_hyps.<locals>.<listcomp>r  -infNrQ   r   rC   rR   )r   r>   r   r  rP   rW   r7   rX   rY   rZ   r)   r9   rx   r   r  rV   r  	logsumexpr  )	r   r   r  r  r  r0   rA   r  r  r   r  r   r     s.   


z"TransformerLMRescorer.rescore_hypsr  r   r  r   r   r   r   r  5  s    Y

)r  c                   @   sN   e Zd ZdZ	dddZdddZdd	 Zd
d Zdd Ze	
 dd ZdS )HuggingFaceLMRescorera  A wrapper of HuggingFace's TransformerLM based on the BaseRescorerInterface.

    Arguments
    ---------
    model_name : str
        The name of the model to be loaded.
    device : str
        The device to be used for scoring. (default: "cuda")

    Example
    -------
    >>> from speechbrain.decoders.scorer import HuggingFaceLMRescorer, RescorerBuilder
    >>> source = "gpt2-medium"
    >>> huggingfacelm_rescorer = HuggingFaceLMRescorer(
    ...     model_name=source,
    ... )
    >>> rescorer = RescorerBuilder(
    ...     rescorers=[huggingfacelm_rescorer],
    ...     weights={"huggingfacelm": 1.0}
    ... )
    >>> topk_hyps = [["Hello everyone.", "Hell o every one.", "Hello every one"]]
    >>> topk_scores = [[-2, -2, -2]]
    >>> rescored_hyps, rescored_scores = rescorer.rescore(topk_hyps, topk_scores)
    >>> # NOTE: the returned hypotheses are already sorted by score.
    >>> rescored_hyps # doctest: +SKIP
    [['Hello everyone.', 'Hello every one', 'Hell o every one.']]
    >>> # NOTE: as we are returning log-probs, the more it is closer to 0, the better.
    >>> rescored_scores # doctest: +SKIP
    [[-20.03631591796875, -27.615638732910156, -42.662353515625]]
    r   c                 C   s   || _ || _z
ddlm}m} W n ty   tdw |j| j dd | _|j| j dd| _	| j	j
d u rPd| j	_
| j	d| j	j
gi | jjt| j	d	d
 | j	j| _| j	j| _d S )Nr   )AutoModelForCausalLMAutoTokenizerz:Please install transformers with: pip install transformersT)
is_decoder)use_fastz<|pad|>additional_special_tokens    )pad_to_multiple_of)
model_namerP   transformersr$  r%  r_   from_pretrainedr8   r7   r   	pad_tokenadd_special_tokensresize_token_embeddingsrf   	bos_token	eos_token)r   r+  rP   r$  r%  r   r   r   r*   B  s6   

zHuggingFaceLMRescorer.__init__Nc                 C   r   r  r   r   r   r   r   r   e  r   zHuggingFaceLMRescorer.to_devicec                 C   r   )a  This method should implement the normalization of the text before scoring.

        Arguments
        ---------
        text : str
            The text to be normalized.

        Returns
        -------
        normalized_text : str
            The normalized text.
            In this case we do not apply any normalization. However, this method
            can be overridden to apply any normalization.
        r   r   r   r   r   r   x  s   z$HuggingFaceLMRescorer.normalize_textc                 C   s   | j | | j S )zThis method adds the special tokens to the text.

        Arguments
        ---------
        text : str
            The text to be augmented.

        Returns
        -------
        augmented_text : str
            The augmented text.
        )r1  r2  r   r   r   r   _add_special_tokens  s   z)HuggingFaceLMRescorer._add_special_tokensc                 C   sN   g }|D ]}|D ]
}| | | qqtt| j|}| jj|ddd}|S )a  This method preprocesses the hypotheses before scoring.

        Arguments
        ---------
        topk_hyps : list of str
            The hypotheses to be preprocessed.

        Returns
        -------
        encoding : tensor
            The encoding of the hypotheses.
        ptT)return_tensorspadding)r   r   r   mapr3  r   batch_encode_plus)r   r   normalized_hypsr  r  text_augmented_with_tokensencodingr   r   r   r     s   
z%HuggingFaceLMRescorer.preprocess_funcc                 C   s   |  |}|d | jj}|d | jj}| j||dd }td|dddd| jjdf< |ddddf d|ddd	df d	d}||ddddf j
dd
 }tj||ddd	df  dd
}|S )r  	input_idsattention_mask)r=  r   r!  NrQ   r   rC   rR   )r   rZ   r7   rP   rx   r   pad_token_idr  rV   r  r"  r>   r  )r   r   r;  idsr=  r0   r  r  r   r   r   r     s   
"6 z"HuggingFaceLMRescorer.rescore_hyps)r   r   )r   r   r   r   r*   r   r   r3  r   r>   r?   r   r   r   r   r   r#  "  s    "

#r#  c                   @   s<   e Zd ZdZe e fddZdd Zdd Zdd	d
Z	dS )RescorerBuildera  Builds rescorer instance for beamsearch.

    The RescorerBuilder class is responsible for building a scorer instance for
    beam search. It takes weights and rescorers classes. It combines the scorers based
    on the weights specified and provides methods for rescoring text.

    This is the class to be used for building rescorer instances for beam search.

    Arguments
    ---------
    weights : dict
        Weights of rescorers specified.
    rescorers : list
        Rescorers that re-ranks topk hypotheses.
    c                 C   sz   t |t |ksJ d|| _dd t  D }dd |D }dd |D }i ||| _tt||| _| | d S )Nz&Weights and rescorers are not matched.c                 S   r   )Rescorerrescorerr   r   r   r   r   r   r     r   z,RescorerBuilder.__init__.<locals>.<listcomp>c                 S   r   )rB  r   r   r   r   r   r   r     r   c                 S   r   r   r   r   r   r   r   r     r   z,RescorerBuilder.__init__.<locals>.<dictcomp>)rf   r   r   r   r   r   	rescorersr   )r   r   rC  all_rescorer_namesfull_rescorer_namesr   r   r   r   r*     s    zRescorerBuilder.__init__c              	   C   s   |  }| j D ]6\}}||}d}tt|D ]$}tt|| D ]}	|| |	  | j| ||   7  < |d7 }q$qq	tdd t	||D g}
g }g }|
D ]}|D ]}|\}}|
t| |
t| qWqS||fS )a  This method rescores the topk candidates.

        Arguments
        ---------
        topk_candidates : list of list of str
            The topk candidates to be rescored.
        topk_scores : list of list of float
            The scores of the topk candidates.

        Returns
        -------
        output_candidates : list of list of str
            The rescored candidates.
        output_scores : list of list of float
            The rescored scores.
        r   rC   c                 s   s0    | ]\}}t tt ||d d dd V  qdS )c                 S   s   | d S )NrC   r   )r   r   r   r   <lambda>$  s    z3RescorerBuilder.rescore.<locals>.<genexpr>.<lambda>T)keyreverseN)r   sorted)r   sublistr   r   r   r   	<genexpr>!  s    
z*RescorerBuilder.rescore.<locals>.<genexpr>)copyrC  r   r   rt   rf   r   ru   r   r   r   )r   topk_candidatestopk_scores
new_scoresr   r   r-   index_scoresr   jsorted_candidatesoutput_candidatesoutput_scoresrJ  ru   textsr   r   r   rescore  s2   

zRescorerBuilder.rescorec                 C   s$   t | jt |krtd|dS )zThese error messages indicate rescorers are not properly set.

        Arguments
        ---------
        rescorer_names : list
            Prefix of rescorers defined in speechbrain.decoders.scorer.
        r   N)rf   r   rg   r   )r   rescorer_namesr   r   r   r   5  s   z RescorerBuilder._validate_scorerNc                 C   s"   | j  D ]	\}}|| qdS )zMoves rescorers to device.

        Useful to avoid having on GPU rescorers while being
        on TRAIN and VALID Stages.

        Arguments
        ---------
        device : str
            The device to be used for scoring. (default: None)
        N)rC  r   r   )r   rP   r   r   r   r   r   move_rescorers_to_deviceD  s   z(RescorerBuilder.move_rescorers_to_devicer   )
r   r   r   r   r   r   r*   rV  r   rX  r   r   r   r   r@    s    
3r@  )r   r   rb   r>   speechbrainr%   speechbrain.decoders.ctcr   r   r   r3   rN   r]   r   r   r   r   r   r  r#  r@  r   r   r   r   <module>   s6    \    R #q B g n 4