o
    }o™i“)  ã                   @   sd   d dl mZmZmZmZ d dlZd dlmZ d dlm	Z	 ddgZ
G dd„ deƒZG dd„ deƒZdS )	é    )ÚAnyÚDictÚOptionalÚTupleN)ÚMetric)Ú
METRIC_EPSÚClassificationReportÚMultiLabelClassificationReportc                       sŒ   e Zd ZdZdZ				ddedeeef ded	ed
e	e
 f
‡ fdd„Zdejdejddfdd„Zdeejejejejf fdd„Z‡  ZS )r   aã  
    This metric computes the number of True Positive, False Negative, and False Positive examples per class.
    When doing distributed training/evaluation the result of res=ClassificationReport(predictions, labels) calls
    will be all-reduced between all workers using SUM operations.

    If used with PytorchLightning LightningModule, include TPs, FNs, and FPs inside validation_step results.
    Then aggregate them at the end of validation epoch to correctly compute validation precision, recall, f1
    using get_precision_recall_f1().

    Example:
        def validation_step(self, batch, batch_idx):
            ...
            tp, fn, fp, _ = self.classification_report(preds, labels)

            return {'val_loss': val_loss, 'tp': tp, 'fn': fn, 'fp': fp}

        def on_validation_epoch_end(self):
            ...
            # calculate metrics and classification report
            precision, recall, f1, report = self.classification_report.compute()

            logging.info(report)

            self.log('val_loss', avg_loss, prog_bar=True)
            self.log('precision', precision)
            self.log('f1', f1)
            self.log('recall', recall)

    Args:
        num_classes: number of classes in the dataset
        label_ids (optional): label name to label id mapping
        mode: how to compute the average
        dist_sync_on_step: sync across ddp
        process_group: which processes to sync across
    Return:
        aggregated precision, recall, f1, report
    TNÚmacroFÚnum_classesÚ	label_idsÚmodeÚdist_sync_on_stepÚprocess_groupc                    s    t ƒ j||d || _|rdd„ | ¡ D ƒ| _nd | _|| _| jdt |¡ddd | jdt |¡ddd | jd	t |¡ddd | jd
t |¡ddd d S )N)r   r   c                 S   s   i | ]\}}||“qS © r   )Ú.0ÚkÚvr   r   úf/home/ubuntu/.local/lib/python3.10/site-packages/nemo/collections/nlp/metrics/classification_report.pyÚ
<dictcomp>L   s    z1ClassificationReport.__init__.<locals>.<dictcomp>ÚtpÚsumF)ÚdefaultÚdist_reduce_fxÚ
persistentÚfnÚfpÚnum_examples_per_class)	ÚsuperÚ__init__r   ÚitemsÚids_to_labelsr   Ú	add_stateÚtorchÚzeros)Úselfr   r   r   r   r   ©Ú	__class__r   r   r   A   s   
ÿzClassificationReport.__init__ÚpredictionsÚlabelsÚreturnc                 C   sè   g }g }g }t | jƒD ]+}||k}||k}| ||k|  ¡ ¡ | ||k|  ¡ ¡ | ||k|  ¡ ¡ qt |¡ |j¡}	t |¡ |j¡}
t |¡ |j¡}|	|
 }|  j|	7  _|  j	|
7  _	|  j
|7  _
|  j|7  _dS )á
  
        Updates attributes needed for new classification report (true positive, false negative, false postive, examples per class)

        Args:
            predictions: predicted labels 
            labels: actual labels

        Return:
            None
        N)Úranger   Úappendr   r#   ÚtensorÚtoÚdevicer   r   r   r   )r%   r(   r)   ÚTPÚFNÚFPÚlabel_idÚcurrent_labelÚlabel_predictedr   r   r   r   r   r   r   ÚupdateX   s"   zClassificationReport.updatec              
   C   sŽ  t  | j¡}t  | j¡ d¡}t  | jd | j| j t ¡}t  | jd | j| j	 t ¡}t  d| | || t ¡}d 
ddddd	¡}tt| jƒƒD ].}d
|› }| jre|| jv re| j| › d|› d}|d 
||| || || | j| ¡7 }qKt  t  | j¡d t  | j| j ¡t ¡}	t  t  | j¡d t  | j| j	 ¡t ¡}
t  d|	 |
 |	|
 t ¡}t  |¡| }t  |¡| }t  |¡| }t  || j ¡| }t  || j ¡| }t  || j ¡| }|d7 }|d 
d|	|
||¡7 }|d 
d||||¡7 }|d 
d||||¡d 7 }|| _| jdkr||||fS | jdkr'||||fS | jdkr3|	|
||fS | jdkr?||||fS t| j› dƒ‚)zì
        Aggregates and then calculates logs classification report similar to sklearn.metrics.classification_report.
        Typically used during epoch_end.

        Return:
            aggregated precision, recall, f1, report
        r   éd   é   z+
{:50s}   {:10s}   {:10s}   {:10s}   {:10s}ÚlabelÚ	precisionÚrecallÚf1Úsupportz
label_id: z (ú)z/
{:50s}   {:8.2f}   {:8.2f}   {:8.2f}   {:8.0f}z
-------------------z	micro avgz	macro avgzweighted avgÚ
r
   ÚweightedÚmicroÚallzs mode is not supported. Choose "macro" to get aggregated numbers             or "all" to get values for each class.)r#   r   r   ÚnonzeroÚsizeÚtrue_divider   r   r   r   Úformatr,   Úlenr!   Útotal_examplesr   Ú
ValueError)r%   rI   Únum_non_empty_classesr;   r<   r=   ÚreportÚir:   Úmicro_precisionÚmicro_recallÚmicro_f1Úmacro_precisionÚmacro_recallÚmacro_f1Úweighted_precisionÚweighted_recallÚweighted_f1r   r   r   Úcomputex   s`   
ÿ**
ÿ
ÿ
ÿýÿ
ÿzClassificationReport.compute)Nr
   FN)Ú__name__Ú
__module__Ú__qualname__Ú__doc__Úfull_state_updateÚintr   ÚstrÚboolr   r   r   r#   ÚTensorr7   r   rW   Ú__classcell__r   r   r&   r   r      s(    &úþ
ýüûú* c                   @   s*   e Zd ZdZdejdejddfdd„ZdS )r	   a  
    This metric computes the number of True Positive, False Negative, and False Positive examples per class for
    a multi-label dataset.
    
    When doing distributed training/evaluation the result of res=ClassificationReport(predictions, labels) calls
    will be all-reduced between all workers using SUM operations.

    If used with PytorchLightning LightningModule, include TPs, FNs, and FPs inside validation_step results.
    Then aggregate them at the end of validation epoch to correctly compute validation precision, recall, f1
    using get_precision_recall_f1().

    Example:
        def validation_step(self, batch, batch_idx):
            ...
            tp, fn, fp, _ = self.classification_report(preds, labels)

            return {'val_loss': val_loss, 'tp': tp, 'fn': fn, 'fp': fp}

        def on_validation_epoch_end(self):
            ...
            # calculate metrics and classification report
            precision, recall, f1, report = self.classification_report.compute()

            logging.info(report)

            self.log('val_loss', avg_loss, prog_bar=True)
            self.log('precision', precision)
            self.log('f1', f1)
            self.log('recall', recall)

    Args:
        num_classes: number of classes in the dataset
        label_ids (optional): label name to label id mapping
        mode: how to compute the average
        dist_sync_on_step: sync across ddp
        process_group: which processes to sync across
    Return:
        aggregated precision, recall, f1, report
    r(   r)   r*   Nc                 C   sü   |  ¡ }g }g }g }t| jƒD ]1}|| }|| }| ||k|dk  ¡ ¡ | ||k|dk  ¡ ¡ | ||k|dk  ¡ ¡ qt |¡ |j¡}	t |¡ |j¡}
t |¡ |j¡}|	|
 }|  j	|	7  _	|  j
|
7  _
|  j|7  _|  j|7  _dS )r+   é   N)Útr,   r   r-   r   r#   r.   r/   r0   r   r   r   r   )r%   r(   r)   r1   r2   r3   r4   r5   Úlabels_predictedr   r   r   r   r   r   r   r7   æ   s$   z%MultiLabelClassificationReport.update)rX   rY   rZ   r[   r#   r`   r7   r   r   r   r   r	   ½   s    ()Útypingr   r   r   r   r#   Útorchmetricsr   Útorchmetrics.utilities.datar   Ú__all__r   r	   r   r   r   r   Ú<module>   s    &