o
    Qi                     @   s   d dl Z d dlmZ e je jeZed Zed Z	e
eddZdd eD ZW d   n1 s4w   Y  e
e	ddZd	d eD ZW d   n1 sQw   Y  d
d Zdd ZdddZdddZdddZdddZdS )    N)
check_dirsz/frequent_enrolled_josa.txtz/frequent_noun_suffix.txtutf-8encodingc                 C      h | ]}|r|  qS  strip.0wordr   r   T/home/ubuntu/.local/lib/python3.10/site-packages/soynlp/noun/_noun_postprocessing.py	<setcomp>	       r   c                 C   r   r   r   r
   r   r   r   r      r   c                 C   sj   t |  t| ddd }|d| t|D ]
}|d| qW d    d S 1 s.w   Y  d S )Nar   r   z
{}
z{}
)r   openwriteformatsorted)pathheaderwordsfr   r   r   r   	write_log   s   "r   c                    s    fdd|   D S )Nc                    s"   i | ]\}}| v d kr||qS )Fr   )r   r   scoreremovalsr   r   
<dictcomp>   s    z&_select_true_nouns.<locals>.<dictcomp>)items)nounsr   r   r   r   _select_true_nouns   s   r    c           
      C   s   |sd}t  }| D ]6}t|dkrq	tdt|D ]%}|d | ||d  }}t|dkr/q|| v r>||v r>||  nqq	|rHt||| t| |}	|	|fS )Nz2## Ignored noun candidates from detaching features      )setlenrangeaddr   r    )
r   featureslogpath	logheaderr   r   elrnouns_r   r   r   detaching_features   s$   

r.   c                 C   sL   |sd}t  }| D ]}||v r|| q	|rt||| t| |}||fS )Nz7## Ignored noun candidates these are same with features)r#   r&   r   r    )r   r'   r(   r)   r   r   r-   r   r   r   ignore_features7   s   

r/      c                 C   s   |sd}t  }|  D ]T\}}t|}|dkrqtd|D ]A}	|d |	 ||	d  }
}|tvsB|tv sB|
| vsB|d | |
 d krCq|j|
i }dd |D }t|}||kr^|| qq|rht	||| t
| |}||fS )Nz## Ignored true N+Jr!   r   c                 S   s   g | ]}|t v r|qS r   )josaset)r   r,   r   r   r   
<listcomp>^   r   z!check_N_is_NJ.<locals>.<listcomp>)r#   r   r$   r%   r1   	suffixset
_lr_origingetr&   r   r    )r   lrgraphmin_num_of_josar(   r)   r   r   r   nir+   r,   r'   n_josar-   r   r   r   check_N_is_NJH   s2   

r;   
   ffffff?r!   c                 C   s   i }|S )Nr   )r   sents	min_count	min_scoremax_nngram_nounsr   r   r   rB   i   s   rB   )NN)r0   NN)r<   r=   r!   )ossoynlp.utilsr   r   dirnamerealpath__file__filepathjosapath
suffixpathr   r   r1   r3   r   r    r.   r/   r;   rB   r   r   r   r   <module>   s"    


!