o
    }oi94                     @   s   d dl mZ d dlmZmZmZmZ d dlZd dlm	Z	 d dl
mZ d dlmZ d dlmZ d dlmZmZmZmZ d d	lmZ d d
lmZ G dd deZG dd deeZdS )    )abstractmethod)AnyOptionalTupleUnionN)
DictConfig)GraphIntersectDenseConfig)
CtcK2Mixin)MLLoss)create_sparse_wrappedget_tot_objf_and_finite_maskinvert_permutation
load_graph)k2)loggingc                       s  e Zd ZdZeddddde dfdeded	ed
ee	 dede
deedef  de
dedef fddZed&ddZ	d'ddddddde
deejejed ed f f
ddZ	d'ddddddde
deejejed ed f f
dd Zd!dd"ed#ejdeejejf fd$d%Z  ZS )(MAPLossa  
    Maximum a Posteriori Probability criterion.
    It implements Lattice-Free Maximum Mutual Information (LF-MMI) and LF-boosted-MMI (LF-bMMI) losses.
    
    Based on https://github.com/k2-fsa/snowfall/blob/master/snowfall/objectives/mmi.py
    
    cfg takes precedence over all optional parameters
    We keep explicit parameter setting to be able to create an instance without the need of a config.
    NdefaultTF        num_classesblank	reductioncfg	topo_typetopo_with_self_loopstoken_lmk2.Fsaintersect_prunedintersect_confboost_coeffc                    s   t  j||||||d |d ur(|d|}|d|}|d|	}	|d|
}
|
| _|r0| jn| j| _|	| _d | _|d u rEt	
d d S t|trNt|n|| _| jd u rZtd| | j d S )N)r   r   r   r   r   r   r   r   r   r   ztoken_lm is empty. 
                            Trainable token_lm is not supported yet. 
                            Please call .update_graph(token_lm) before using.zlm_graph is empty.)super__init__getr   "_intersect_calc_scores_impl_pruned%_intersect_calc_scores_impl_exact_opt_intersect_calc_scores_implr   graph_compilerr   warning
isinstancestrr   lm_graph
ValueErrorupdate_graphselfr   r   r   r   r   r   r   r   r   r   	__class__ Z/home/ubuntu/.local/lib/python3.10/site-packages/nemo/collections/asr/parts/k2/map_loss.pyr    ;   s4   
zMAPLoss.__init__graphc                 C   s   t )N)NotImplementedError)r-   r2   r0   r0   r1   r+   i   s   zMAPLoss.update_graphdense_fsa_veczk2.DenseFsaVec
num_graphs	den_graphreturn_latsreturnc                 C   sn  |j }||j kr||j ksJ |jd }| |ksJ | }| }t||g}tj|tjd}tj	|g| tjd}	t
||	g d|}
t||
}tj|tjddd}|dd d|}tj||| jj||rydndd}|jd	d
d}|ddd }|ddd }|rtj|tjd|d }||t||t||d fS ||ddfS )zInner intersection method.
        Does joint (simultaneous) exact intersection of dense_fsa_vec against num_graphs and den_graph.
        
        Optiolally returns the numerator and the denominator lattices.
        r   dtype      seqframe_idxN)a_fsasb_fsasoutput_beam
a_to_b_mapseqframe_idx_nameTFlog_semiringuse_double_scores)deviceshapedim0cloner   cattorcharangeint32tensorstacktreshapeto	index_fsarepeatintersect_denser   rA   get_tot_scores)r-   r4   r5   r6   r7   rG   num_fsasnum_den_graphsnum_graphs_indexesden_graph_indexesnum_den_graphs_indexesnum_den_reordered_graphsrB   num_den_latsnum_den_tot_scoresnum_tot_scoresden_tot_scores	lat_slicer0   r0   r1   r#   n   s>   


z-MAPLoss._intersect_calc_scores_impl_exact_optc              	   C   s   |j }||j kr||j ksJ |jd }| |ksJ tj||| jj|r'dndd}tj||| jj| jj| jj	| jj
|r?dndd}|jddd}	|jddd}
|rY|	|
||fS |	|
ddfS )	zInner intersection method.
        Does exact intersection of dense_fsa_vec against num_graphs and pruned intersection against den_graph.
        
        Optiolally returns the numerator and the denominator lattices.
        r   r>   N)r?   r@   rA   rC   )r?   r@   search_beamrA   min_active_statesmax_active_statesrC   TFrD   )rG   rH   rI   r   rV   r   rA   intersect_dense_prunedrc   rd   re   rW   )r-   r4   r5   r6   r7   rG   rX   num_latsden_latsr`   ra   r0   r0   r1   r"      s0   



z*MAPLoss._intersect_calc_scores_impl_prunedemissions_graphssupervision_graphssupervisionsc                 C   s  | j dk}| ||d |d |\}}}}t|dddf jtjd}	d| _|| |	 }
t|
| j\}}|r|dur?|dusAJ |	 |j
jd |j
jd d f}|j d}tt||j|j|jg|dd |dd}~tt||j|j|jg|dd |dd}~tj||   d	 }~~t|| j\}}||@ }| jd
kr| j ||	 |  ||  n| j | | }||fS |}| jd
kr||  n| }||fS )ac  Intersects emissions_graphs with supervision_graphs and calculates lattice scores.
        This version implicitly assumes supervision_graphs to be a pair of the numerator and the denominator FSAs.

        It can also calculate accuracy between the numerator and the denominator lattices to use it as additional loss.

        Can be overridden.
        r   r   r<   Nr9   FT)indicesvaluessizemin_col_index)r<   r=   none)r   r$   r   rS   rL   long_MAPLoss__batch_orderr   r   rI   scoresrH   ri   row_idsr   r   index_selectr>   phonesget_arc_postexpsparsesumcoalesceabsto_dense)r-   ri   rj   rk   boostedr`   ra   rg   rh   inverted_batch_order
tot_scoresmmi_tot_scoresmmi_valid_maskrn   rt   
num_sparse
den_sparseacc_lossacc_tot_scoresacc_valid_mask
valid_mask
total_lossr0   r0   r1   _intersect_calc_scores   sR   



zMAPLoss._intersect_calc_scoresr2   r   )T)__name__
__module____qualname____doc__r   r   intr(   r   r   boolr   floatr    r+   r   rL   Tensorr#   r"   r   r   __classcell__r0   r0   r.   r1   r   0   s    
	
-
?
&r   c                       sv   e Zd ZdZddddde dfdeded	ed
ee dede	dee
def  de	dedef fddZdddZ  ZS )
CtcMmiLossa  MMI loss with custom CTC topologies.
    Available topologies:
        -   `default`, with or without self-loops
        -   `compact`, with or without self-loops
        -   `shared_blank`, with or without self-loops
        -   `minimal`, without self-loops

    cfg takes precedence over all optional parameters
    We keep explicit parameter setting to be able to create an instance without the need of a config.
    Nr   TFr   r   r   r   r   r   r   r   r   r   r   r   c                    s$   t  j|||||||||	|
d
 d S )N)
r   r   r   r   r   r   r   r   r   r   )r   r    r,   r.   r0   r1   r      s   
zCtcMmiLoss.__init__r2   c                 C   s   || _ | j  }t|drt|d |j}| | jd kr+td|  d| j ddl	m
} || j| j| j| j|d| _d S )N
aux_labelsr<   z1lm_graph is not compatible with the num_classes: z, r   )MmiGraphCompiler)	aux_graph)r)   rJ   hasattrdelattrlabelsmaxr   r*   unique-nemo.collections.asr.parts.k2.graph_compilersr   r   r   r   r%   )r-   r2   r)   r   compilerr0   r0   r1   r+   4  s   


zCtcMmiLoss.update_graphr   )r   r   r   r   r   r   r(   r   r   r   r   r   r    r+   r   r0   r0   r.   r1   r     s>    	
r   )abcr   typingr   r   r   r   rL   	omegaconfr   %nemo.collections.asr.parts.k2.classesr   )nemo.collections.asr.parts.k2.loss_mixinsr	   %nemo.collections.asr.parts.k2.ml_lossr
   #nemo.collections.asr.parts.k2.utilsr   r   r   r   nemo.core.utils.k2_guardr   
nemo.utilsr   r   r   r0   r0   r0   r1   <module>   s    _