o
    }oi,4                     @   s  d Z ddlZddlZddlZddlZddlZddlmZm	Z	m
Z
mZmZmZmZmZmZmZmZmZmZmZmZ ddlmZ dgZdZdZdZd	Zd
edefddZ d
ededefddZ!de"fddZ#de"de"de"dede$de$fddZ%dedededede$de$de"fddZ&dS )z
Evaluate predictions JSON file, w.r.t. ground truth file.
This file contains code artifacts adapted from the original implementation:
https://github.com/google-research/google-research/blob/master/schema_guided_dst/evaluate.py
    N)ACTIVE_INTENT_ACCURACYJOINT_CAT_ACCURACYJOINT_GOAL_ACCURACYJOINT_NONCAT_ACCURACYNAN_VALREQUESTED_SLOTS_F1REQUESTED_SLOTS_PRECISIONREQUESTED_SLOTS_RECALLSLOT_TAGGING_F1SLOT_TAGGING_PRECISIONSLOT_TAGGING_RECALLget_active_intent_accuracy#get_average_and_joint_goal_accuracyget_requested_slots_f1get_slot_tagging_f1)loggingget_in_domain_servicesz#ALL_SERVICESz#SEEN_SERVICESz#UNSEEN_SERVICESzdialogues_and_metrics.jsonschema_pathreturnc                 C   sb   t  }t| dd}t|}|D ]	}||d  q|  W d   |S 1 s*w   Y  |S )z
    Get the set of all services present in a schema.
    Args:
        schema_path: schema file path
    Returns:
        service_set: set of services in file
    UTF-8encodingservice_nameN)setopenjsonloadaddclose)r   service_setfschemaservice r#   c/home/ubuntu/.local/lib/python3.10/site-packages/nemo/collections/nlp/data/dialogue/sgd/evaluate.pyget_service_set:   s   


r%   r   c                 C   s   t | |@ }|S )zGet the set of common services between a schema and set of services.
    Args:
        schema_path: path to schema file
        service_set: set of services
    Returns: 
        joint_services: joint services between schema path file and service set
    )r%   )r   r   joint_servicesr#   r#   r$   r   K   s   c              	   C   s   i }t | tr
| }ntt| }|D ]G}t|v rqtd| t|dd,}t	|}t |tr=|D ]}|||d < q3n
t |t
rG|| |  W d   n1 sUw   Y  q|S )zRead the DSTC8/SGD json dialogue data as dictionary with dialog ID as keys.
    Args:
        file_path_patterns: list or directory of files 
    Returns:
        dataset_dict: dataset dictionary with dialog ID as keys
    zLoading file: %sr   r   dialogue_idN)
isinstancelistsortedglobPER_FRAME_OUTPUT_FILENAMEr   debugr   r   r   dictupdater   )file_path_patternsdataset_dictlist_fpfpr    datadialr#   r#   r$   get_dataset_as_dictW   s(   





r6   dataset_refdataset_hypservice_schemasin_domain_servicesjoint_acc_across_turnuse_fuzzy_matchc           $      C   sr  t dd }t| t|  sJ tdt|t|  i }| D ]Y\}}	| | }
t|
d t|	d krCt	d
|tttg}tt|
d |	d D ],\}\}}t dd }|d |d krpt	d	
||d d
krwqS|d |d krtd|d  td|d  t	d
|dd |d D }|d D ]}|d }||vrt	d
|||| }|| }t||}t|||d |}t||}t||||}t|t|jt|jt|ji}|dur|j|t< |j|t< |j|t< || d
|||d }|||< ||d< |d  dd }t!|d |g}|d |v r*|"t# n|"t$ |D ]/}| D ]'\}}|t%kr]|rT||v rT|| |  |9  < q7|| | "| q7q1q|r|D ]}||  D ]\}}|| | "| qoqgqSq'i } | D ],\}}!i }"|! D ]\}}#|#rt&t't()|#d d|"|< qt%|"|< q|"| |< q| |fS )a!  Calculate the DSTC8/SGD metrics.
    Args:
        dataset_ref: The ground truth dataset represented as a dict mapping dialogue id to the corresponding dialogue.
        dataset_hyp: The predictions in the same format as `dataset_ref`.
        service_schemas: A dict mapping service name to the schema for the service.
        in_domain_services: The set of services which are present in the training set.
        joint_acc_across_turn: Whether to compute joint accuracy across turn instead of across service. Should be set to True when conducting multiwoz style evaluation.
        use_fuzzy_match: Whether to use fuzzy string matching when comparing non-categorical slot values. Should be set to False when conducting multiwoz style evaluation.

    Returns:
        all_metric_aggregate: A dict mapping a metric collection name to a dict containing the values
            for various metrics. Each metric collection aggregates the metrics across a specific set of frames in the dialogues.
        per_frame_metric: metrics aggregated for each frame
    c                   S   s
   t tS )N)collectionsdefaultdictr)   r#   r#   r#   r$   <lambda>   s   
 zget_metrics.<locals>.<lambda>z(len(dataset_hyp)=%d, len(dataset_ref)=%dservicesz[Set of services present in ground truth and predictions don't match for dialogue with id {}turnsc                   S   s   t dd S )Nc                   S   s   dS )Ng      ?r#   r#   r#   r#   r$   r?      s    z/get_metrics.<locals>.<lambda>.<locals>.<lambda>)r=   r>   r#   r#   r#   r$   r?      s    speakerz+Speakers don't match in dialogue with id {}USER	utterancezRef utt: %szHyp utt: %sz.Utterances don't match for dialogue with id {}c                 S   s   i | ]}|d  |qS )r"   r#   ).0framer#   r#   r$   
<dictcomp>   s    zget_metrics.<locals>.<dictcomp>framesr"   z5Frame for service {} not found in dialogue with id {}Nz{:s}-{:03d}-{:s}metrics_r   g      Y@   )*r=   r>   r   keysissubsetr   r-   lenitems
ValueErrorformatr   r   r   	enumerateziperrorr   r   r   r   r   r   f1r   	precisionr	   recallr
   r   r   r/   splitALL_SERVICESappendSEEN_SERVICESUNSEEN_SERVICESr   roundfloatnpmean)$r7   r8   r9   r:   r;   r<   metric_collectionsper_frame_metricdial_iddial_hypdial_refjoint_metricsturn_idturn_refturn_hypmetric_collections_per_turnhyp_frames_by_service	frame_refr   r"   	frame_hypactive_intent_accslot_tagging_f1_scoresrequested_slots_f1_scoresgoal_accuracy_dictframe_metricframe_iddomain_namedomain_keys
domain_key
metric_keymetric_valueall_metric_aggregatedomain_metric_valsdomain_metric_aggregate
value_listr#   r#   r$   get_metricsr   s   
$








H r}   prediction_dirdata_direval_datasetc                 C   s~  t tj||ddd}i }t|}|D ]}	|	||	d < q|  W d   n1 s-w   Y  ttj||d}
ttj| d}t|
|||||\}}t	|v ret
dt	 d	t|t	    t|v rzt
dt d
t|t    t|v rt
dt dt|t    t tj| tddd}tj||ddd |  W d   |t S 1 sw   Y  |t S )a  Calculate the DSTC8/SGD metrics for given data.

    Args:
        prediction_dir: prediction location
        data_dir: ground truth data location.
        eval_dataset: evaluation data split
        in_domain_services: The set of services which are present in the training set.
        joint_acc_across_turn: Whether to compute joint goal accuracy across turn instead of across service. Should be set to True when conducting multiwoz style evaluation.
        use_fuzzy_match: Whether to use fuzzy string matching when comparing non-categorical slot values. Should be set to False when conducting multiwoz style evaluation.

    Returns:
        A dict mapping a metric collection name to a dict containing the values
        for various metrics for all dialogues and all services
    zschema.jsonr   r   r   Nzdialogues_*.jsonz*.jsonzDialog metrics for z  : : z   : wrK   ),r   )indent
separators)r   ospathjoinr   r   r   r6   r}   r[   r   infor*   rO   r\   rY   r,   dump)r~   r   r   r:   r;   r<   r    eval_serviceslist_servicesr"   r7   r8   ry   rJ   r#   r#   r$   evaluate   s2   

"""

r   )'__doc__r=   r+   r   r   numpyr_   (nemo.collections.nlp.metrics.sgd_metricsr   r   r   r   r   r   r   r	   r
   r   r   r   r   r   r   
nemo.utilsr   __all__rY   r[   r\   r,   strr   r%   r   r.   r6   boolr}   r   r#   r#   r#   r$   <module>   sZ   D
 