o
    }oi-                     @   s   d dl mZ d dlmZmZmZ d dlZd dlmZ d dl	m
Z
mZmZmZmZmZ d dlmZ G dd deZG d	d
 d
eZdS )    )ABC)ListOptionalTupleN)GradExpNormalize)create_supervisionget_arc_weightsget_uniform_rnnt_prune_rangesmake_non_pad_maskmake_non_pad_mask_3dprep_padded_densefsavec)k2c                   @   s   e Zd ZdZ		ddejdejdeej deej deejejeej eej f f
dd	Zdejd
ejddfddZ	dejdejdejfddZ
	ddddededeeej eej f fddZdS )
CtcK2Mixina  k2 Mixin class that simplifies the construction of various k2-based CTC-like losses.
    
    It does the following:
        -   Prepares and adapts the input tensors (method _prepare_log_probs_and_targets).
        -   Creates Emissions graphs (method _prepare_emissions_graphs).
        -   Extracts the labels and probabilities of the best lattice path (method _extract_labels_and_probabilities).
    N	log_probsinput_lengthstargetstarget_lengthsreturnc                 C   sL   | d| jks
J t|}|||dur"t|| jk |d ||fS d|fS )zcCreates k2-style supervisions and shifts targets by one if the <blank> number is not zero.
        N   )sizenum_classesr   torchwhereblank)selfr   r   r   r   supervisions r   ]/home/ubuntu/.local/lib/python3.10/site-packages/nemo/collections/asr/parts/k2/loss_mixins.py_prepare_log_probs_and_targets)   s   	z)CtcK2Mixin._prepare_log_probs_and_targetsr   k2.DenseFsaVecc                 C   s   | j rt||S t||S )a"  Creates DenseFsaVec, padding it with <epsilon> frames if the topology is `compact`.
        In particular, every second frame of the DenseFsaVec is the <epsilon> frame.

        <epsilon> frame is a frame with <epsilon> log-probability zero and every other log-probability is -inf.
        )
pad_fsavecr   r   DenseFsaVec)r   r   r   r   r   r   _prepare_emissions_graphs<   s
   

z$CtcK2Mixin._prepare_emissions_graphsc                 C   s   t ||| jdkrdS dS )zPyTorch is doing the log-softmax normalization as part of the CTC computation.
        More: https://github.com/k2-fsa/k2/issues/575
        summeannone)r   apply	reductionr   r   r   r   r   r   _maybe_normalize_gradientsH   s   z%CtcK2Mixin._maybe_normalize_gradientsFTshortest_path_fsaszk2.Fsareturn_ilabelsoutput_alignedc           
      C   s   g }g }t |jd D ]`}|| }|jdk|j| jk @ }|r$|j| }	n |j| }	| jdkrD| j|	|	dk< |	|	dk|	| jk @   d8  < |	jtjd}	|sV|sV|	|	| jk }	|	|	 |	t
||  j|jd q||fS )zExtracts the labels and probabilities of the best lattice path,
        dropping <epsilon> arcs and restoring the targets shift, if needed.
        r   r   r   dtypedevice)rangeshapelabelsr   
aux_labelsr   tor   longappendr   expr1   )
r   r+   r,   r-   shortest_pathsprobsishortest_path_fsanon_eps_maskr4   r   r   r   !_extract_labels_and_probabilitiesN   s"   


"z,CtcK2Mixin._extract_labels_and_probabilities)NN)FT)__name__
__module____qualname____doc__r   Tensorr   r   r   r#   r*   boolr   r?   r   r   r   r   r       s6    
r   c                       s   e Zd ZdZdejdejdejdejdeejejejejf f
 fddZdejd	ejdd
fddZdejdejdejfddZ	  Z
S )RnntK2Mixina_  k2 Mixin class that simplifies the construction of various k2-based RNNT-like losses. Inherits CtcK2Mixin.
    
    It does the following:
        -   Prepares and adapts the input tensors.
        -   Creates Emissions graphs.
        -   Extracts the labels and probabilities of the best lattice path (method _extract_labels_and_probabilities).
    r   r   r   r   r   c                    s  t | dks
J | \}}}}|| }	| jdkr| j| k r| jd }
t|||
| j|d}|ddddf |ddddf  }t|dk}| jdkrjtjt	|j
|| ddj
}|d |d f}tj|d dd\}}|d dt |d|dk}|d t|dd|dd|
 |d |d d f t|d|  d f}| }t|f|}||  |8  < tt||}t|}|dt |d|dk}|||
 | t|d|  f}t|d |d ft|d |d ff| _|| | _nd| _d| _t||d ||d}|d}t||	 }| }tj|	| |jd	}| }|| ||< ||  || < |d|| |d|}t ||||S )
a  Before calling super()._prepare_log_probs_and_targets, this method reshapes the log_probs tensor 
        from (B, T, U+1, D) to (B, T', D) where T' = T*(U+1), shifts paddings along T and U towards the end of T', 
        and recomputes input_lengths.

        It also calculates indices on which <epsilon> steps should be applied to the log_probs tensor to emulate 
        <blank> arcs shift of the Emissions graph for the pruned RNNT variant.
           r   r   TNr   dim)return_countsr0   )lenr   predictor_window_sizemaxr	   predictor_step_sizer   r   repeat_interleavestackTunique	unsqueezerepeatarangecumsumfullcat_RnntK2Mixin__step_indices_RnntK2Mixin__supervisions_addr   flattenr$   r
   r1   clonereshapeviewsuperr   )r   r   r   r   r   BrQ   UDTUwindow_size_with_blankranges_begin
step_sizesraw_step_indicesrR   count
shift_maskstep_indices	max_countmax_count_vecpad_indices_row
pad_uniquepad_shift_maskpad_indicesnon_pad_mask_truenon_pad_mask_fakerearranged_indicesrearranged_indices_buffer	__class__r   r   r   u   sn   
(
$"	
 
z*RnntK2Mixin._prepare_log_probs_and_targetsr   r    c                 C   s  | j du s
| jdu r#tj|tj|d|ddf|jdfdd}ntj|d|dtt| j d |d  ftj	d}d|| j < tj|d|d|dd f|jd}tj
ttjjg|d dg |jd||< tj|tj|d|ddf|jdfddd	|d	d || < |ddd	f | j|dddf jtjd  }t|dd	 |dd  dkstj|dd
}||df |dddf< || |ddd	f< n||ddd	f< d| _ d| _t||S )a  Overrides super()._prepare_emissions_graphs.
        Creates DenseFsaVec, adding <epsilon> outputs to the end of the D dimension.

        If pruning is used, this method also pads the DenseFsaVec with <epsilon> frames 
        according to the <epsilon> steps, calculated before.

        <epsilon> frame is a frame with <epsilon> log-probability zero and every other log-probability is -inf.
        Nr   r   r0      rH   r.   Tr   )
descending)rY   rZ   r   rX   zerosr   r1   intrK   rE   tensorfinfofloat32minr^   r6   r7   allargsortr   r"   )r   r   r   log_probs_epsmaskr   orderr   r   r   r#      s6   	&.
,"
&0"z%RnntK2Mixin._prepare_emissions_graphsc                 C   s   |S )zNot required for RNNT.
        r   r)   r   r   r   r*      s   z&RnntK2Mixin._maybe_normalize_gradients)r@   rA   rB   rC   r   rD   r   r   r#   r*   __classcell__r   r   ru   r   rF   l   s    J$'rF   )abcr   typingr   r   r   r   (nemo.collections.asr.parts.k2.grad_utilsr   #nemo.collections.asr.parts.k2.utilsr   r   r	   r
   r   r   nemo.core.utils.k2_guardr   r   rF   r   r   r   r   <module>   s    L