o
    QiB                     @   s   d dl mZmZmZ d dlZd dlZejjdkrdZndZdd Z	dd	 Z
d
d Zdd Zedkrfd dlZdZejedZejdedd ejdedd e Ze	ejZe	ejZe
eeZee dS dS )    )divisionprint_functionabsolute_importN   TFc                 C   s   g }g }t j| ddd<}|D ]0}| }|dkr!|| g }q|d\}}|}tdu r8|d}|d}|||g qW d    |S 1 sKw   Y  |S )Nr	utf_8_sig)encodingEOS	TzUTF-8)codecsopenrstripappendsplitPY_3encode)filenamesentdataflinesurfacecsv_form r   L/home/ubuntu/.local/lib/python3.10/site-packages/nagisa/mecab_system_eval.pyreadFile   s&   



r   c                 C   s  t | t |kstdt | }d}d}ddg}t|D ]}d}d}d}	d}
| | }|| }|t |k |t |k @ r|	|
kr|| d || d krR|d  d7  < || d || d krf|d  d7  < |	t || d 7 }	|
t || d 7 }
|d7 }|d7 }|d7 }|d7 }n)|	|
k r|	t || d 7 }	|d7 }|d7 }n|
t || d 7 }
|d7 }|d7 }|t |k |t |k @ s:|t |k r|d7 }|d7 }|t |k s|t |k r|d7 }|d7 }|t |k sq|d }|d }||||gS )z
    This script is written by referring to the following code.
    https://github.com/taku910/mecab/blob/master/mecab/src/eval.cpp
    zlen(sys_data) != len(ans_data)r      )lenAssertionErrorrange)sys_dataans_data	num_sentsprecrecallnum_correctii_sysi_ansl_sysl_anssys_sentans_sentws_cpt_cr   r   r   
mecab_eval!   sZ   

r/   c                 C   s   | \}}}}t d| | d}t d| | d}|| dkr$t dd}nt d| | ||  d}t d| | d}t d| | d}	||	 dkrOt dd}
nt d| |	 ||	  d}
|||||	|
gS )Nd      r   g           )round)r   r-   r.   r#   r$   ws_pws_rws_fpt_ppt_rpt_fr   r   r   calculate_fvaluesY   s   r:   c                 C   s  | \}}}}t | \}}}}}	}
t|d t| d t| d }t|d t| d t| d }t|d t| d t| d }t|	d t| d t| d }d||t|g}d||t|
g}g d}td| td| td| d S )N(/)zLEVEL 0:z
LEVEL ALL:)z        	precisionr$   Fr
   )r:   strprintjoin)r   r-   r.   r#   r$   r4   r5   r6   r7   r8   r9   ws_p_outws_r_outpt_p_outpt_r_outws_outpt_outheaderr   r   r   
print_evalm   s   $$$$rJ   __main__a  Reimplementation of mecab-system-eval            (https://github.com/taku910/mecab/blob/master/mecab/src/eval.cpp).            However, this script only evaluates on the word segmentation level            and POS-tagging level. Input file format: Word	POS-tag)descriptionz--systemzSystem output's file)typehelpz--answerzAnswer file)
__future__r   r   r   sysr   version_infomajorr   r   r/   r:   rJ   __name__argparsedscArgumentParserparseradd_argumentr@   
parse_argsargssystemr    answerr!   r   r   r   r   r   <module>   s,   8


