o
    wiL!                     @   s   d dl mZ d dlmZmZmZ d dlZd dlmZ dde	dee
 de
d	ed
df
ddZddee
 d	ed
dfddZddee
 d	ed
dfddZddee
 d	ed
dfddZdee
 d
dfddZG dd deZdS )    )	lru_cache)ListOptionalUnionN)k2Tnametokens	blank_numwith_self_loopsreturnk2.Fsac                 C   s   | dkr
t ||}n$| dkrt||}n| dkrt||}n| dkr't|}ntd|  |dkrN|j}|dk}||dk||k@   d8  < |||< ||_t|}|S )	a  Helper function to build a topology.
    It allows to build topologies with a non-zero blank ID.
    Args:
      name:
        The topology name. Choices: default, compact, shared_blank, minimal
      tokens:
        A list of tokens, e.g., phones, characters, etc.
      blank_num:
        Blank number. Must be in tokens
      with_self_loops:
        Whether to add token-to-epsilon self-loops to a topology
    Returns:
      Returns a topology FST.
    defaultcompactshared_blankminimalzUnknown topo name: r      )build_default_topobuild_compact_topobuild_shared_blank_topobuild_minimal_topo
ValueErrorlabelsr   arc_sort)r   r   r	   r
   ansr   
blank_mask r   e/home/ubuntu/sommelier/.venv/lib/python3.10/site-packages/nemo/collections/asr/parts/k2/topologies.py
build_topo   s"   

r   c                 C   s   d| vsJ dd| v sJ dt | }|}|rdnd}t|D ]=}t|D ],}||kr>|r=|| d| d| |  d7 }q&|| d| d| |  d| |  d	7 }q&|| d| d
7 }q || 7 }tjj|dd}t|}|S )zZBuild the default CTC topology.
    Zero is assumed to be the ID of the blank symbol.
    r   *We assume -1 is ID of the final transitionr   )We assume 0 is the ID of the blank symbol z0 0 0 0 0.0
  0 0.0
 0.0
 -1 -1 0.0
r   num_aux_labelslenranger   Fsafrom_strr   )r   r
   
num_statesfinal_statearcsijr   r   r   r   r   :   s"   *

r   c           	   
   C   s  d| vsJ dd| v sJ d| d d }t |}t| | }|}d}t||D ]}|d| d| ||   d| ||   d	7 }q)|d| d
7 }td|D ]}|| d| d7 }|rn|| d| d| ||   d7 }qO|| 7 }tjj|dd}t|}|S )zBuild the compact CTC topology.
    Zero is assumed to be the ID of the blank symbol.
    See https://arxiv.org/abs/2110.03098
    r   r   r   r    r   r!   0 r"   r$   r%   z 0 r#   r&   )intr)   r*   r   r+   r,   r   )	r   r
   eps_numselfloops_shiftr-   r.   r/   r0   r   r   r   r   r   R   s&   ."

r   c           	      C   s@  d| vsJ dd| v sJ d|   } | d t| }d}|d }g }|||dddg |||dddg ||g t| D ]2\}}|d7 }|||||dg |||||dg ||||ddg |ru||||ddg qCt|dd d}d	d
 |D }dd
 |D }d|}tjj	|dd}t
|}|S )zBuild the shared blank CTC topology.
    Zero is assumed to be the ID of the blank symbol.
    See https://github.com/k2-fsa/k2/issues/746#issuecomment-856421616
    r   r   r   r    r   c                 S   s   | d S )Nr   r   )arcr   r   r   <lambda>   s    z)build_shared_blank_topo.<locals>.<lambda>)keyc                 S   s   g | ]	}d d |D qS )c                 S   s   g | ]}t |qS r   )str.0r0   r   r   r   
<listcomp>   s    z6build_shared_blank_topo.<locals>.<listcomp>.<listcomp>r   r;   r6   r   r   r   r<      s    z+build_shared_blank_topo.<locals>.<listcomp>c                 S   s   g | ]}d  |qS )r"   )joinr=   r   r   r   r<          
r&   )copyremover)   append	enumeratesortedr>   r   r+   r,   r   )	r   r
   
num_tokensstartfinalr/   r0   pr   r   r   r   r   l   s4   


r   c                 C   s   d| vsJ dd| v sJ dt | }d}d}t|D ]}|d| |  d| |  d	7 }q|d
| d7 }|| 7 }tjj|dd}t|}|S )zBuild the minimal topology.
    Zero is assumed to be the ID of the blank symbol.
    See https://arxiv.org/abs/2110.03098
    r   r   r   r    r   r!   z0 0 r"   r$   r2   r%   r&   r(   )r   rF   r.   r/   r0   r   r   r   r   r      s    

r   c                   @   sl   e Zd ZdZddee dedee fddZdee	j
ee f d	d
fddZeddded	d
fddZdS )RnntEmissionAdapterBuildera  Builder class for RNNT Emission Adapters.

    An Emission Adapter is an FSA used to emulate desired temporal Emissions FSA properties of a trivial Emissions FSA.
    Temporal properties are emulated by <epsilon>-arcs with zero log-weight.
    These additional arcs do not contribute to the lattice scores and can be easily removed from the best path.

    k2 does not have Emissions FSAs. Instead, it has DenseFsaVec, which is not a real FSA.
    Thus, Emission Adapters should be composed with Supervision FSAs.
    IMPOTRANT: <epsilon>-outputs are expected to be present in the DenseFsaVec.

    These RNNT adapters do only the <blank> re-routing (emulate <blank> hopping over U dimension).
    Redundant non-<blank> are not removed by these adapters.

    At initialization, the builder expects a list of tokens, <blank> number and <epsilon> number.
    When called, the builder returns adapters according to the provided text lengths.
    Nr   r	   r4   c                 C   sj   d|vsJ d||v sJ d|d u s||vsJ d|| _ || _|d u r0| j d d | _d S || _d S )Nr   r   zThe blank ID must be in tokensz#The epsion ID must not be in tokensr   )r   r	   r4   )selfr   r	   r4   r   r   r   __init__   s   &z#RnntEmissionAdapterBuilder.__init__adapter_lengthsr   r   c                    s   t  fdd| D S )Nc                    s   g | ]}  |qS r   )_build_single_adapterr:   rK   r   r   r<      r?   z7RnntEmissionAdapterBuilder.__call__.<locals>.<listcomp>)r   create_fsa_vectolist)rK   rM   r   rO   r   __call__   s   z#RnntEmissionAdapterBuilder.__call__i   )maxsizeadapter_lengthc              	   C   s$  |dksJ d|d }|d d }d}t |D ]1}t t| jD ]}|| jkr:|| d|d  d| j|  d7 }q!|| d| d| j d7 }q|| d| d| j d7 }t ||D ]$}|| d||d k rm|d nd d| j d7 }|| d| d7 }q]|| 7 }ttjj|d	d
S )Nr   z(`adapter_length` cannot be less than one   r!   r"   r$   r   z -1 0.0
T)acceptor)	r*   r)   r   r	   r4   r   r   r+   r,   )rK   rT   first_eps_stater.   r/   r0   r1   r   r   r   rN      s    
$0
z0RnntEmissionAdapterBuilder._build_single_adapter)N)__name__
__module____qualname____doc__r   r3   r   rL   r   torchTensorrR   r   rN   r   r   r   r   rJ      s      	rJ   )T)	functoolsr   typingr   r   r   r\   nemo.core.utils.k2_guardr   r9   r3   boolr   r   r   r   r   objectrJ   r   r   r   r   <module>   s   $#!