o
    ei-                     @   s   d Z ddlZddlZddlmZmZmZ ddlZddlm	Z	 ddl
mZ ddl
mZ e	eZG dd	 d	ejZG d
d deZdS )aa  Graph compiler class to create, store, and use k2 decoding graphs in
speechbrain. Limits the output words to the ones in the lexicon.

This code is an extension, and therefore heavily inspired or taken from
icefall's (https://github.com/k2-fsa/icefall) graph compiler.

Authors:
  * Pierre Champion 2023
  * Zeyu Zhao 2023
  * Georgios Karakasidis 2023
    N)ListOptionalTuple)
get_logger   )k2)lexiconc                
   @   s   e Zd ZdZejdejfddZejde	j
fddZ	ejdd Zej		dd
ee dedeejejf fddZddee defddZ	ddee defddZdS )GraphCompilerzR
    This abstract class is used to compile graphs for training and decoding.
    returnc                 C      dS )z@
        Return the topology used to compile the graph.
        N selfr   r   g/home/ubuntu/transcripts/venv/lib/python3.10/site-packages/speechbrain/k2_integration/graph_compiler.pytopo       zGraphCompiler.topoc                 C   r   )z?
        Return the lexicon used to compile the graph.
        Nr   r   r   r   r   r   '   r   zGraphCompiler.lexiconc                 C   r   )z>
        Return the device used to compile the graph.
        Nr   r   r   r   r   device.   r   zGraphCompiler.deviceTtextsis_trainingc                 C   r   )a  
        Compile the graph for the given texts.

        Arguments
        ---------
        texts: List[str]
            A list of strings. Each string contains a sentence for an utterance.
            A sentence consists of spaces separated words. An example `texts`
            looks like:

                ['hello world', 'CTC training with k2']

        is_training: bool
            Indictating whether this is for training or not
            (OOV warning in training).
        Returns
        -------
        graph: GraphCompiler
            An FsaVec, the composition result of `self.ctc_topo` and the
            transcript FSA.
        target_lens: Torch.tensor
            It is an long tensor of shape (batch,). It contains lengths of
            each target sequence.
        Nr   )r   r   r   r   r   r   compile5   s   zGraphCompiler.compileNF	cache_dircachec                 C   s0  t d t| jjd}| jd}tt	|j
d tt	|j
d  }|rQ|durQ|d | d }tj|rQt d| d tjtj|dd	}|S t d
 tj||dd}t d t|}t d t|}t d|j
  |dur|d | d }t d|  t| | |S )a  
        Compile the decoding graph by composing H with L.
        This is for decoding without language model.

        Arguments
        ---------
        cache_dir: str
            The path to store the composition in a .pt format.
        cache: bool
            Whether or not to load the composition from the .pt format (in the
            cache_dir dir).

        Returns
        -------
        HL: k2.Fsa
            The HL composition
        Arc sorting Lcpur   Nz/.HL_.ptzLoading HL '\' from its cached .pt format. Set 'caching: False' in the yaml if this is not what you want.map_locationzComposing H and Ltokensinner_labelszConnecting HLzArc sorting HLz
HL.shape: zCaching HL to: )loggerinfor   arc_sortr   Ltor   strhashshapeospathexistswarningFsa	from_dicttorchloadcomposeconnectdebugsaveas_dict)r   r   r   r$   H	file_hashr*   HLr   r   r   
compile_HLS   s0   
$





zGraphCompiler.compile_HLc           
      C   s  t d t| jjd}t|d}| jd}tt	|j
d tt	|j
d  tt	|j
d  }|rb|durb|d | d }tj|rbt d| d tjtj|dd	}|S t d
 t||}	t d t|	}	t d t|	}	t d t|	}	| j|	}	t|	}	t|	}	|	jd|	_t d t|	}	t d tj||	dd}t d t|}t d t|}t d|j
  |dur|d | d }t d|  t| | |S )a   
        Compile the decoding graph by composing H with LG.
        This is for decoding with small language model.

        Arguments
        ---------
        G: k2.Fsa
            The language model FSA.
        cache_dir: str
            The path to store the composition in a .pt format.
        cache: bool
            Whether or not to load the composition from the .pt format (in the
            cache_dir dir).

        Returns
        -------
        HL: k2.Fsa
            The HLG composition
        r   r   r   Nz/.HLG_r   zLoading HLG 'r   r   zIntersecting L and GzConnecting LGzDeterminizing LGz"Connecting LG after k2.determinizezArc sorting LGzComposing H and LGr   r   zConnecting HLGzArc sorting HLGzHLG.shape: zCaching HLG to: )r!   r"   r   r#   r   
L_disambigr%   r   r&   r'   r(   r)   r*   r+   r,   r-   r.   r/   r0   r1   r2   determinizeremove_LG_disambig_symbolsremove_epsilon
aux_labelsremove_values_eqr3   r4   r5   )
r   Gr   r   r$   r6   r7   r*   HLGLGr   r   r   compile_HLG   sX   

















zGraphCompiler.compile_HLGT)NF)__name__
__module____qualname____doc__abcabstractpropertyr   r-   r   r   Lexiconr   abstractmethodr   r&   boolr   r/   Tensorr   r   r9   rC   r   r   r   r   r	      s0    
4r	   c                	   @   s|   e Zd ZdZ	ddejdejdefddZ	e
dd	 Ze
d
d Ze
dd Z	ddee dedeejejf fddZdS )CtcGraphCompilera  
    This class is used to compile decoding graphs for CTC training.

    Arguments
    ---------
    _lexicon: Lexicon
        It is built from `data/lang/lexicon.txt`.
    device: torch.device
        The device to use for operations compiling transcripts to FSAs.
    need_repeat_flag: bool
        If True, will add an attribute named `_is_repeat_token_` to ctc_topo
        indicating whether this token is a repeat token in ctc graph.
        This attribute is needed to implement delay-penalty for phone-based
        ctc loss. See https://github.com/k2-fsa/k2/pull/1086 for more
        details. Note: The above change MUST be included in k2 to enable this
        flag so make sure you have an up-to-date version.

    Example
    -------
    >>> import torch
    >>> from speechbrain.k2_integration.losses import ctc_k2
    >>> from speechbrain.k2_integration.graph_compiler import CtcGraphCompiler
    >>> from speechbrain.k2_integration.lexicon import Lexicon
    >>> from speechbrain.k2_integration.prepare_lang import prepare_lang

    >>> # Create a random batch of log-probs
    >>> batch_size = 4

    >>> log_probs = torch.randn(batch_size, 100, 30)
    >>> log_probs.requires_grad = True
    >>> # Assume all utterances have the same length so no padding was needed.
    >>> input_lens = torch.ones(batch_size)
    >>> # Create a small lexicon containing only two words and write it to a file.
    >>> lang_tmpdir = getfixture('tmpdir')
    >>> lexicon_sample = "hello h e l l o\nworld w o r l d\n<UNK> <unk>"
    >>> lexicon_file = lang_tmpdir.join("lexicon.txt")
    >>> lexicon_file.write(lexicon_sample)
    >>> # Create a lang directory with the lexicon and L.pt, L_inv.pt, L_disambig.pt
    >>> prepare_lang(lang_tmpdir)
    >>> # Create a lexicon object
    >>> lexicon = Lexicon(lang_tmpdir)
    >>> # Create a random decoding graph
    >>> graph = CtcGraphCompiler(
    ...     lexicon,
    ...     log_probs.device,
    ... )
    >>> isinstance(graph.topo, k2.Fsa)
    True

    F_lexiconr   need_repeat_flagc                 C   sz   || _ || _| j| | jjjdu sJ | j  t| jj}t	j
|dd}||| _
|r;| j
j| j
jk| j
_d S d S )NF)modified)_devicerP   r   r%   L_invrequires_gradr#   maxr   r   ctc_topolabelsr>   _is_repeat_token_)r   rP   r   rQ   max_token_idrW   r   r   r   __init__  s   

zCtcGraphCompiler.__init__c                 C      | j S )z&
        Return the ctc_topo.
        )rW   r   r   r   r   r   "     zCtcGraphCompiler.topoc                 C   r\   )z%
        Return the lexicon.
        )rP   r   r   r   r   r   )  r]   zCtcGraphCompiler.lexiconc                 C   r\   )z,Return the device used for compiling graphs.)rS   r   r   r   r   r   0  s   zCtcGraphCompiler.deviceTr   r   r
   c                 C   s   | j j||d}| j j||d}dd |D }tjdd |D tjd}tt|| j	}tj
| j j|dd}| }	t|	}
t|
}t|}tj| j|dd}|jdu s[J ||fS )a3  
        Build decoding graphs by composing ctc_topo with given transcripts.

        Arguments
        ---------
        texts: List[str]
            A list of strings. Each string contains a sentence for an utterance.
            A sentence consists of spaces separated words. An example `texts`
            looks like:

                ['hello world', 'CTC training with k2']

        is_training: bool
            Indictating whether this is for training or not
            (OOV warning in training).

        Returns
        -------
        graph: GraphCompiler
            An FsaVec, the composition result of `self.ctc_topo` and the
            transcript FSA.
        target_lens: Torch.tensor
            It is an long tensor of shape (batch,). It contains lengths of
            each target sequence.
        )log_unknown_warningc                 S   s   g | ]}t |g qS r   )sum).0innerr   r   r   
<listcomp>Z  s    z,CtcGraphCompiler.compile.<locals>.<listcomp>c                 S   s   g | ]}t |qS r   )len)r`   tr   r   r   rb   ]  s    )dtypeF)treat_epsilons_specially)r   texts_to_word_idstexts_to_token_idsr/   tensorlongr   add_epsilon_self_loops
linear_fsar   	intersectrT   invert_r#   !remove_epsilon_and_add_self_loopsr1   rW   rU   )r   r   r   word_idx	word2tidssentence_idstarget_lensword_fsa_with_self_loopsfsaans_fsatranscript_fsafsa_with_self_loopsgraphr   r   r   r   5  s:   

zCtcGraphCompiler.compileN)FrD   )rE   rF   rG   rH   r   rK   r/   r   rM   r[   propertyr   r   r&   r   r   r-   rN   r   r   r   r   r   rO      s0    7



rO   )rH   rI   r)   typingr   r   r   r/   speechbrain.utils.loggerr    r   r   rE   r!   ABCr	   rO   r   r   r   r   <module>   s     =