o
    }oi8                     @   sJ   d dl mZmZmZmZ d dlZd dlmZ d dlm	Z	 G dd dZ
dS )    )DictListOptionalTupleN)AccessMixin)loggingc                   @   s   e Zd ZdZdee dee fddZdd Zdd	 Z	d
d Z
dd ZdefddZdefddZdedee defddZdeeejejf  fddZ			d(dejdejd ejd!ed"ed#ed$edeeej ef fd%d&Zd'S ))InterCTCMixina  Adds utilities for computing interCTC loss from https://arxiv.org/abs/2102.03216.

    To use, make sure encoder accesses ``interctc['capture_layers']``
    property in the AccessMixin and registers ``interctc/layer_output_X`` and
    ``interctc/layer_length_X`` for all layers that we want to get loss from.
    Additionally, specify the following config parameters to set up loss::

        interctc:
            # can use different values
            loss_weights: [0.3]
            apply_at_layers: [8]

    Then call

        * ``self.setup_interctc(ctc_decoder_name, ctc_loss_name, ctc_wer_name)``
          in the init method
        * ``self.add_interctc_losses`` after computing regular loss.
        * ``self.finalize_interctc_metrics(metrics, outputs, prefix="val_")``
          in the `multi_validation_epoch_end` method.
        * ``self.finalize_interctc_metrics(metrics, outputs, prefix="test_")``
          in the `multi_test_epoch_end` method.
    loss_weightsapply_at_layersc                 C   s   |  d| |  d| |  ddt|  | ddkr!td|  dt|dk t|t|kr7td	tjd
d|iit| dd d t| drQ| 	  d S t
d d S )Nintermediate_loss_weightsr
   main_loss_weightg      ?g        zMake sure that sum of intermediate loss weights is < 1.0. Note that we don't do any normalization and assign remaining weight to the regular model loss. E.g., if interctc.loss_weights = [0.1, 0.3], regular loss will have weight of 0.6enabledr   zELength of interctc.apply_at_layers has to match interctc.loss_weightsinterctccapture_layers
model_guid)guidpropagate_model_guidzsNot able to propagate model_guid to the submodules. Make sure to call self.propagate_model_guid() in ModelPT class.)set_interctc_paramsumget_interctc_param
ValueErrorlenr   update_access_cfggetattrhasattrr   r   warning)selfr	   r
    r   d/home/ubuntu/.local/lib/python3.10/site-packages/nemo/collections/asr/parts/mixins/interctc_mixin.py_process_config_values.   s$   
z$InterCTCMixin._process_config_valuesc                 C   s\   i | _ | jd}|dur&| jdi | || j d< || j d< || j d< dS | dd dS )	a  Sets up all interctc-specific parameters and checks config consistency.

        Caller has to specify names of attributes to perform CTC-specific WER,
        decoder and loss computation. They will be looked up in the class
        state with ``getattr``.

        The reason we get the names and look up object later is because those
        objects might change without re-calling the setup of this class. So
        we always want to look up the most up-to-date object instead of
        "caching" it here.
        r   Ndecoder_name	loss_namewer_namer   Fr   )_interctc_paramscfggetr   r   )r   r    r!   r"   interctc_configr   r   r   setup_interctcK   s   

zInterCTCMixin.setup_interctcc                 C   s&   |dv rt | | j|d  S | j| S )zzEither directly get parameter from ``self._interctc_params`` or
        call getattr with the corresponding name.
        decoderlosswer_name)r   r#   )r   
param_namer   r   r   r   f   s   
z InterCTCMixin.get_interctc_paramc                 C   s   |dv rt d|| j|< dS )zSetting the parameter to the ``self._interctc_params`` dictionary.

        Raises an error if trying to set decoder, loss or wer as those should
        always come from the main class.
        r(   zgCannot set "decoder", "loss" or "wer" as parameters. They are always looked up in the main class state.N)r   r#   )r   r-   param_valuer   r   r   r   n   s
   z InterCTCMixin.set_interctc_paramc                 C   s   t | ds	tddS )z3Can be used to verify if setup_interctc was called.r#   zuself.setup_interctc(ctc_decoder_name, ctc_loss_name, ctc_wer_name) has to be called before InterCTC loss can be used!N)r   RuntimeErrorr   r   r   r   _verify_setup_was_called{   s
   
z&InterCTCMixin._verify_setup_was_calledreturnc                 C   s   |    | dS )z)Returns whether interCTC loss is enabled.r   )r1   r   r0   r   r   r   is_interctc_enabled   s   
z!InterCTCMixin.is_interctc_enabledr   c                 C   sZ   |    |r%t| ddkrtdt| dt| dkr%td| d| dS )z0Can be used to enable/disable InterCTC manually.r   r   zWInterCTC cannot be enabled since interctc.loss_weights was not specified in the config.r
   z\InterCTC cannot be enabled, since length of "loss_weights" does not match "apply_at_layers".r   N)r1   r   r   r/   r   )r   r   r   r   r   set_interctc_enabled   s   z"InterCTCMixin.set_interctc_enabledmetricsoutputsprefixc                    s  |   r| dD ]]  d  |d v r0t fdd|D  }||d  d  <  d  |d v rft fdd|D  }t fd	d|D  }|| |d  d
  < q	 d|d v rtfdd|D  |d  d< dS dS dS )a)  Finalizes InterCTC WER and loss metrics for logging purposes.

        Should be called inside ``multi_validation_epoch_end`` (with ``prefix="val_"``) or
        ``multi_test_epoch_end`` (with ``prefix="test_"``).

        Note that ``metrics`` dictionary is going to be updated in-place.
        r
   inter_ctc_loss_lr   c                       g | ]}| d    qS )r8   r   .0x	layer_idxr7   r   r   
<listcomp>       z;InterCTCMixin.finalize_interctc_metrics.<locals>.<listcomp>loginter_wer_num_lc                    r9   )rB   r   r:   r=   r   r   r?      r@   c                    r9   )inter_wer_denom_lr   r:   r=   r   r   r?      r@   inter_wer_l
final_lossc                    s   g | ]	}|  d  qS )rE   r   r:   )r7   r   r   r?      s    N)r3   r   torchstackmeanr   )r   r5   r6   r7   r*   wer_num	wer_denomr   r=   r   finalize_interctc_metrics   s   .z'InterCTCMixin.finalize_interctc_metricsc              	   C   s   |   sg S i }t| j D ]}|D ]}|dr'||v r'td| dq|| qg }| dD ]C}z|d|  }|d|  }W n t	yV   td| dw t
|d	ksct
|d	krgtd
|| d|d d|d f q5|S )zReturns a list of captured tensors from encoder: tuples of (output, length).

        Will additionally apply ``ctc_decoder`` to the outputs.
        z	interctc/zlayer z  has been logged multiple times!r
   zinterctc/layer_output_zinterctc/layer_length_zIntermediate layer z was not captured! Check if length of model.encoder.captured_layer_outputs matches length of model.intermediate_loss_weights properties.   zVMake sure encoder.forward is called exactly one time before interCTC loss is computed.r)   r   )encoder_output)r3   r   get_module_registryencodervalues
startswithr/   updater   KeyErrorr   append)r   total_registrymodule_registrykeycaptured_tensorsr>   layer_outputslayer_lengthsr   r   r   get_captured_interctc_tensors   s6   
z+InterCTCMixin.get_captured_interctc_tensorsTF 
loss_value
transcripttranscript_lencompute_wercompute_losslog_wer_num_denom
log_prefixc                 C   sP  |   rtt| dds|i fS i }|r||| d< nd}|  }	|r,|| d9 }t| d|	| dD ]k\}
}}|r_| d|d |||d	 d
}| || d|
 < ||| 7 }|r| dj|d |||d	 d | d	 \}}}| d
  || d|
 |i |r|| d|
 || d|
 |i q8||fS )a  Adding interCTC losses if required.

        Will also register loss/wer metrics in the returned dictionary.

        Args:
            loss_value (torch.Tensor): regular loss tensor (will add interCTC loss to it).
            transcript (torch.Tensor): current utterance transcript.
            transcript_len (torch.Tensor): current utterance transcript length.
            compute_wer (bool): whether to compute WER for the current utterance.
                Should typically be True for validation/test and only True for
                training if current batch WER should be logged.
            compute_loss (bool): whether to compute loss for the current utterance.
                Should always be True in training and almost always True in
                validation, unless all other losses are disabled as well.
                Defaults to True.
            log_wer_num_denom (bool): if True, will additionally log WER num/denom
                in the returned metrics dictionary. Should always be True for
                validation/test to allow correct metrics aggregation. Should
                always be False for training. Defaults to False.
            log_prefix (str): prefix added to all log values. Should be ``""`` for
                training and ``"val_"`` for validation. Defaults to "".

        Returns:
            tuple[Optional[torch.Tensor], Dict]: tuple of new loss tensor and dictionary with logged metrics.
        r   NrE   r   r
   r   r*   r   rL   )	log_probstargetstarget_lengthsinput_lengthsr8   r+   )predictionsre   targets_lengthspredictions_lengthsrD   rB   rC   )r3   r   is_access_enabledr   r[   r   zipdetachrR   computereset)r   r]   r^   r_   r`   ra   rb   rc   r5   rX   r>   intermediate_resultloss_weightinter_loss_valuer+   rI   rJ   r   r   r   add_interctc_losses   sR   #
z!InterCTCMixin.add_interctc_lossesN)TFr\   )__name__
__module____qualname____doc__r   floatintr   r'   r   r   r1   boolr3   r4   r   strrK   r   rF   Tensorr[   r   rs   r   r   r   r   r      s>    -	r   )typingr   r   r   r   rF   nemo.core.classes.mixinsr   
nemo.utilsr   r   r   r   r   r   <module>   s
   