o
    }oi&                     @   s   d dl Z d dlZd dlZd dlmZ d dlmZmZ d dlZd dl	m
Z
 dgZG dd de
Zdd ZG d	d
 d
e
ZG dd de
ZG dd de
ZdS )    N)Counter)ListUnion)MetricTopKClassificationAccuracyc                       s   e Zd ZdZdZd fdd	Ze dejdejfd	d
Z	dejdejdejfddZ
dd Zedee fddZejdee fddZ  ZS )r   a{  
    This metric computes numerator and denominator for Overall Accuracy between logits and labels.
    When doing distributed training/evaluation the result of res=TopKClassificationAccuracy(logits, labels) calls
    will be all-reduced between all workers using SUM operations.
    Here contains two numbers res=[correctly_predicted, total_samples]. Accuracy=correctly_predicted/total_samples.

    If used with PytorchLightning LightningModule, include correct_count and total_count inside validation_step results.
    Then aggregate (sum) then at the end of validation epoch to correctly compute validation WER.

    Example:
        def validation_step(self, batch, batch_idx):
            ...
            correct_count, total_count = self._accuracy(logits, labels)
            self.val_outputs = {'val_loss': loss_value, 'val_correct_count': correct_count, 'val_total_count': total_count}
            return self.val_outputs

        def on_validation_epoch_end(self):
            ...
            val_loss_mean = torch.stack([x['val_loss'] for x in self.val_outputs]).mean()
            correct_counts = torch.stack([x['val_correct_counts'] for x in self.val_outputs])
            total_counts = torch.stack([x['val_total_counts'] for x in self.val_outputs])

            topk_scores = compute_topk_accuracy(correct_counts, total_counts)

            tensorboard_log = {'val_loss': val_loss_mean}
            for top_k, score in zip(self._accuracy.top_k, topk_scores):
                tensorboard_log['val_epoch_top@{}'.format(top_k)] = score
            
            self.val_outputs.clear()  # free memory
            return {'log': tensorboard_log}

    Args:
        top_k: Optional list of integers. Defaults to [1].

    Returns:
        res: a torch.Tensor object with two elements: [correct_count, total_count]. To correctly compute average
        accuracy, compute acc=correct_count/total_count
    TNFc                    sb   t  j|d |d u rdg}|| _| jdtt| jddd | jdtt| jddd d S )Ndist_sync_on_step   correct_counts_ksumF)defaultdist_reduce_fx
persistenttotal_counts_k)super__init__top_k	add_statetorchzeroslen)selfr   r   	__class__ k/home/ubuntu/.local/lib/python3.10/site-packages/nemo/collections/common/metrics/classification_accuracy.pyr   E   s   "z#TopKClassificationAccuracy.__init__logitsreturnc                 C   s$   t | j}|j|dddd\}}|S )Nr	   T)dimlargestsorted)maxr   topk)r   r   max_k_predictionsr   r   r   top_k_predicted_labelsQ   s   
z1TopKClassificationAccuracy.top_k_predicted_labelslabelsc           
      C   s   t  Z | |}| }||dd|}g }g }| jD ]}|d | d	 
 }|jd }	|| ||	 q!t j||j|jd| _t j||j|jd| _W d    d S 1 saw   Y  d S )Nr	   r   )dtypedevice)r   no_gradr&   teqview	expand_asr   reshapelongr   shapeappendtensorr)   r*   r
   r   )
r   r   r'   r%   correctr
   r   k	correct_ktotal_kr   r   r   updateW   s   




"z!TopKClassificationAccuracy.updatec                 C   sf   t | jt | j  krt | jkstd td| jdgkr*| j | j gS t| j| j}|S )z
        Computes the top-k accuracy.

        Returns:
            A list of length `K`, such that k-th index corresponds to top-k accuracy
            over all distributed processes.
        z*length of counts must match to topk lengthr	   )r   r
   r   r   
ValueErrorfloatcompute_topk_accuracy)r   top_k_scoresr   r   r   computej   s   $z"TopKClassificationAccuracy.computec                 C   s   | j S N)_top_kr   r   r   r   r   }   s   z TopKClassificationAccuracy.top_kvaluec                 C   s>   |d u rdg}t |tkr|g}t |tkrt|}|| _d S Nr	   )typeintlistr@   )r   rB   r   r   r   r      s   
)NF)__name__
__module____qualname____doc__full_state_updater   r   r+   Tensorr&   r9   r>   propertyr   rE   r   setter__classcell__r   r   r   r   r      s    'c                 C   sD   g }t t| D ]}| |  }||  }||t|  q|S )aK  
    Computes the top-k accuracy
    Args:
        correct_counts: Tensor of shape [K], K being the top-k parameter.
        total_counts: Tensor of shape [K], and K being the top-k parameter.
    Returns:
        A list of length `K`, such that k-th index corresponds to top-k accuracy
        over all distributed processes.
    )ranger   itemr3   r;   )r
   r   r=   kicorrect_counttotal_countr   r   r   r<      s   
r<   c                       sB   e Zd Zg df fdd	Zddededefdd	Zd
d Z  ZS )!ExactStringPerCategoryMatchMetricFc                    s   t  j|d t|| _| jdtddd | jdtddd |D ]}| j| dtddd | j| dtddd q$d S )	Nr   r5   r   r   r   r   total_total_correct)r   r   set
categoriesr   r   r4   )r   r[   r   argskwargscategoryr   r   r   r      s   
z*ExactStringPerCategoryMatchMetric.__init__Npredtargetr^   c                 C   s   ||kr|  j d7  _ |  jd7  _|d u rd S || jv rKt| | d}t| | d|d  ||krIt| | d}t| | d|d  d S d S t| d d S )Nr	   rX   rY   z is not in the pre-defined list)r5   rW   r[   getattrsetattrloggingwarn)r   r_   r`   r^   valr   r   r   r9      s   
z(ExactStringPerCategoryMatchMetric.updatec                 C   sr   i }| j  | j |d< | jD ]}t| | dt| | d ||< q| jD ]}t| | d|| d< q'|S )NaccrY   rX   )r5   r;   rW   r[   ra   )r   resultsr^   r   r   r   r>      s   
&
z)ExactStringPerCategoryMatchMetric.computer?   rG   rH   rI   r   strr9   r>   rO   r   r   r   r   rU      s    
rU   c                       s8   e Zd Zd
 fdd	ZdedefddZdd	 Z  ZS )ExactStringMatchMetricFc                    s>   t  j|d | jdtddd | jdtddd d S )Nr   r5   r   r   rV   rW   r   r   r   r   r4   r   r   r\   r]   r   r   r   r         zExactStringMatchMetric.__init__r_   r`   c                 C   s(   ||kr|  j d7  _ |  jd7  _d S rC   )r5   rW   r   r_   r`   r   r   r   r9      s   zExactStringMatchMetric.updatec                 C      | j  | j S r?   r5   r;   rW   rA   r   r   r   r>         zExactStringMatchMetric.computeFrh   r   r   r   r   rj      s    rj   c                       sX   e Zd ZdZd fdd	Zdedeeee f fddZd	d
 Z	dd Z
dd Z  ZS )TokenF1ScorezGTaken from the official evaluation script for v1.1 of the SQuAD datasetFc                    s>   t  j|d | jdtddd | jdtddd d S )Nr   r5           r   rV   rW   r   rk   rl   r   r   r   r      rm   zTokenF1Score.__init__r_   r`   c                    s`   t |tr j |7  _nt |tr' jt fdd|D 7  _ jd7  _d S )Nc                    s   g | ]}  |qS r   )f1_score).0tgtr_   r   r   r   
<listcomp>   s    z'TokenF1Score.update.<locals>.<listcomp>r	   )
isinstanceri   r5   ru   rF   r!   rW   rn   r   rx   r   r9      s
   

"zTokenF1Score.updatec                 C   ro   r?   rp   rA   r   r   r   r>      rq   zTokenF1Score.computec           
      C   s|   |  | }|  | }t|t|@ }t| }|dkr"dS d| t| }d| t| }d| | ||  }	|	S )Nr   rt   g      ?   )	normalizesplitr   r   valuesr   )
r   
predictionground_truthprediction_tokensground_truth_tokenscommonnum_same	precisionrecallf1r   r   r   ru      s   zTokenF1Score.f1_scorec                 C   s4   dd }dd }dd }dd }|||||S )	zALower text and remove punctuation, articles and extra whitespace.c                 S   s   t dd| S )Nz\b(a|an|the)\b )resubtextr   r   r   remove_articles      z/TokenF1Score.normalize.<locals>.remove_articlesc                 S   s   d |  S )Nr   )joinr}   r   r   r   r   white_space_fix   r   z/TokenF1Score.normalize.<locals>.white_space_fixc                    s"   t tj d fdd| D S )N c                 3   s    | ]	}| vr|V  qd S r?   r   )rv   chexcluder   r   	<genexpr>  s    z>TokenF1Score.normalize.<locals>.remove_punc.<locals>.<genexpr>)rZ   stringpunctuationr   r   r   r   r   remove_punc   s   
z+TokenF1Score.normalize.<locals>.remove_puncc                 S   s   |   S r?   )lowerr   r   r   r   r     s   z%TokenF1Score.normalize.<locals>.lowerr   )r   sr   r   r   r   r   r   r   r|      s
   zTokenF1Score.normalizerr   )rG   rH   rI   rJ   r   ri   r   r   r9   r>   ru   r|   rO   r   r   r   r   rs      s    rs   )rc   r   r   collectionsr   typingr   r   r   torchmetricsr   __all__r   r<   rU   rj   rs   r   r   r   r   <module>   s   t$