o
    +wi                     @   sX  d dl mZ d dl mZ d dlZd dlZd dlZd dlZd dlZd dl	Z	d dl
mZ ejejeZg dZe	jd  dk rHG dd dejZd(d
dZe	jd  dk rcd dlmZ d)ddZdd Zdd Ze	jd  dk rsdd ZnejZdd Zdd Ze	jd  dkreZdd Zd)ddZ d)ddZ!d)d d!Z"d"d# Z#d$d% Z$G d&d' d'eZ%dS )*    )absolute_import)unicode_literalsN)Thread))   ·/)u   ․r   )u   ㆍr   )u   ･r   )u   ～~)u   ❑-)u   ‘')u   ’r	   )u   “")u   ”r
   )u   「<)u   」>   c                   @   s   e Zd Zdd ZdS )UnicodePrinterc                 C   s:   t |trtjjp
d}||ddfS tj| ||||S )z0Overrided method to enable Unicode pretty print.utf-8TF)	
isinstanceunicodesysstdoutencodingencodeppPrettyPrinterformat)selfobjectcontext	maxlevelslevelr    r   I/home/ubuntu/sommelier/.venv/lib/python3.10/site-packages/konlpy/utils.pyr       s   
zUnicodePrinter.formatN)__name__
__module____qualname__r   r   r   r   r   r      s    r   Fc                    s\   |  } fddt|D }|r,|D ]}td|d|td|d |d  f  q|S )u  Find concordances of a phrase in a text.

    The farmost left numbers are indices, that indicate the location
    of the phrase in the text (by means of tokens).
    The following string, is part of the text surrounding the phrase
    for the given index.

    :param phrase: Phrase to search in the document.
    :param text: Target document.
    :param show: If ``True``, shows locations of the phrase on the console.

    .. code-block:: python

        >>> from konlpy.corpus import kolaw
        >>> from konlpy.tag import Mecab
        >>> from konlpy import utils
        >>> constitution = kolaw.open('constitution.txt').read()
        >>> idx = utils.concordance(u'대한민국', constitution, show=True)
        0       대한민국헌법 유구한 역사와
        9       대한국민은 3·1운동으로 건립된 대한민국임시정부의 법통과 불의에
        98      총강 제1조 ① 대한민국은 민주공화국이다. ②대한민국의
        100     ① 대한민국은 민주공화국이다. ②대한민국의 주권은 국민에게
        110     나온다. 제2조 ① 대한민국의 국민이 되는
        126     의무를 진다. 제3조 대한민국의 영토는 한반도와
        133     부속도서로 한다. 제4조 대한민국은 통일을 지향하며,
        147     추진한다. 제5조 ① 대한민국은 국제평화의 유지에
        787     군무원이 아닌 국민은 대한민국의 영역안에서는 중대한
        1836    파견 또는 외국군대의 대한민국 영역안에서의 주류에
        3620    경제 제119조 ① 대한민국의 경제질서는 개인과
        >>> idx
        [0, 9, 98, 100, 110, 126, 133, 147, 787, 1836, 3620]
    c                    s   g | ]
\}} |v r|qS r   r   ).0itermphraser   r   
<listcomp>K       zconcordance.<locals>.<listcomp>z%d	%s r   r   )split	enumerateprintjoinmax)r'   textshowtermsindexesr$   r   r&   r   concordance(   s   ".r4   )csvutilsr   c                 C   s   t | }dd |D S )u,  Reads a csv file.

        :param f: File object.

        .. code-block:: python

            >>> from konlpy.utils import csvread
            >>> with open('some.csv', 'r') as f:
                    print csvread(f)
            [[u'이 / NR', u'차 / NNB'], [u'나가 / VV', u'네 / EFN']]
        c                 S   s   g | ]}|qS r   r   )r#   rowr   r   r   r(   b   s    zcsvread.<locals>.<listcomp>)r5   UnicodeReader)fr   readerr   r   r   csvreadU   s   
r:   c                 C   s   t || S )u:  Writes a csv file.

        :param data: A list of list.

        .. code-block:: python

            >>> from konlpy.utils import csvwrite
            >>> d = [[u'이 / NR', u'차 / NNB'], [u'나가 / VV', u'네 / EFN']]
            >>> with open('some.csv', 'w') as f:
                    csvwrite(d, f)
        )r5   UnicodeWriter	writerows)datar8   r   r   r   csvwrited   s   r>   c                    s$    fddt dg| |dg D S )zPartitions a list to several parts using indices.

    :param list_: The target list.
    :param indices: Indices to partition the target list.
    c                    s   g | ]
\}} || qS r   r   )r#   r$   jlist_r   r   r(   y   r)   zpartition.<locals>.<listcomp>r   N)zip)rA   indicesr   r@   r   	partitions   s   $rD   c                 K   s,   d|  v rt|d d| S t | S )u  Unicode pretty printer.

        .. code-block:: python

            >>> import pprint, konlpy
            >>> pprint.pprint([u"Print", u"유니코드", u"easily"])
            [u'Print', u'유니코드', u'easily']
            >>> konlpy.utils.pprint([u"Print", u"유니코드", u"easily"])
            ['Print', '유니코드', 'easily']

        :param stream: Option to stream to a particular destination. Can be either sys.stdout (default) or sys.stderr. See #179 for details.
        stream)rE   )keysr   pprint)objkwargsr   r   r   rG   |   s   rG   c                 C   s   t D ]
\}}| ||} q| S )z:Replaces some ambiguous punctuation marks to simpler ones.)replace_setreplace)r'   abr   r   r   select   s   rN   c                 C   s   t t| S )ut   Converts a unicode character to hex.

    .. code-block:: python

        >>> char2hex(u'음')
        '0xc74c'
    )hexord)cr   r   r   char2hex   s   rR   c                 C   s   t t| dS )u   Converts a hex character to unicode.

    .. code-block:: python

        >>> print hex2char('c74c')
        음
        >>> print hex2char('0xc74c')
        음
       )unichrint)hr   r   r   hex2char      
rW   c                 C   s   t j| d|dS )zEText file loader.
    To read a file, use ``read_txt()``instead.
    rr   )ioopen)filenamer   r   r   r   load_txt   s   r^   c                 C   s<   t j| d|d}| W  d   S 1 sw   Y  dS )zText file reader.rY   rZ   N)r[   r\   readr]   r   r8   r   r   r   read_txt   s   $ra   c                 C   s>   t j| d|d}t|W  d   S 1 sw   Y  dS )zJSON file reader.rY   rZ   N)r[   r\   jsonloadr`   r   r   r   	read_json   s   $rd   c                 C      t dd| S )zDelete links from input string

    Args:
        string (str): string to delete links

    Returns:
        str: string without links
    zhttp\S+ resubstringr   r   r   delete_links   rX   rl   c                 C   re   )zDelete at marks from input string

    Args:
        string (str): string to delete at marks

    Returns:
        str: string without at marks.
    z@\S+rf   rg   rj   r   r   r   delete_mentions   rX   rm   c                       s(   e Zd ZdZdd Z fddZ  ZS )PropagatingThreadzPropagatingThread is just a fancy wrapper for Thread to manage exceptions.

    Raises:
        self.exception: Exception defined in higher-level.

    Returns:
        self.ret: Thread target object.
    c              
   C   sv   d | _ z!t| dr| j| ji | j| _W d S | j| ji | j| _W d S  t	y: } z
|| _ W Y d }~d S d }~ww )N_Thread__target)
	exceptionhasattrro   _Thread__args_Thread__kwargsret_target_args_kwargsBaseException)r   er   r   r   run   s   
zPropagatingThread.runc                    s    t t|   | jr| j| jS )N)superrn   r.   rp   rt   )r   	__class__r   r   r.      s   zPropagatingThread.join)r    r!   r"   __doc__rz   r.   __classcell__r   r   r|   r   rn      s    	rn   )F)r   )&
__future__r   r   r[   rb   osrh   rG   r   r   	threadingr   pathdirnamerealpath__file__installpathrJ   version_infor   r   r4   konlpyr5   r:   r>   rD   rN   rR   chrrT   rW   r^   ra   rd   rl   rm   rn   r   r   r   r   <module>   sB   
	*





